]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-aufs4.patch
- 4.9.309
[packages/kernel.git] / kernel-aufs4.patch
CommitLineData
e8791d4f
AM
1diff -urNp -x '*.orig' linux-4.9/Documentation/ABI/testing/debugfs-aufs linux-4.9/Documentation/ABI/testing/debugfs-aufs
2--- linux-4.9/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
3+++ linux-4.9/Documentation/ABI/testing/debugfs-aufs 2021-02-24 16:15:09.518240088 +0100
86dc4139 4@@ -0,0 +1,50 @@
7f207e10
AM
5+What: /debug/aufs/si_<id>/
6+Date: March 2009
f6b6e03d 7+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
8+Description:
9+ Under /debug/aufs, a directory named si_<id> is created
10+ per aufs mount, where <id> is a unique id generated
11+ internally.
1facf9fc 12+
86dc4139
AM
13+What: /debug/aufs/si_<id>/plink
14+Date: Apr 2013
f6b6e03d 15+Contact: J. R. Okajima <hooanon05g@gmail.com>
86dc4139
AM
16+Description:
17+ It has three lines and shows the information about the
18+ pseudo-link. The first line is a single number
19+ representing a number of buckets. The second line is a
20+ number of pseudo-links per buckets (separated by a
21+ blank). The last line is a single number representing a
22+ total number of psedo-links.
23+ When the aufs mount option 'noplink' is specified, it
24+ will show "1\n0\n0\n".
25+
7f207e10
AM
26+What: /debug/aufs/si_<id>/xib
27+Date: March 2009
f6b6e03d 28+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
29+Description:
30+ It shows the consumed blocks by xib (External Inode Number
31+ Bitmap), its block size and file size.
32+ When the aufs mount option 'noxino' is specified, it
33+ will be empty. About XINO files, see the aufs manual.
34+
35+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
36+Date: March 2009
f6b6e03d 37+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
38+Description:
39+ It shows the consumed blocks by xino (External Inode Number
40+ Translation Table), its link count, block size and file
41+ size.
42+ When the aufs mount option 'noxino' is specified, it
43+ will be empty. About XINO files, see the aufs manual.
44+
45+What: /debug/aufs/si_<id>/xigen
46+Date: March 2009
f6b6e03d 47+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
48+Description:
49+ It shows the consumed blocks by xigen (External Inode
50+ Generation Table), its block size and file size.
51+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
52+ be created.
53+ When the aufs mount option 'noxino' is specified, it
54+ will be empty. About XINO files, see the aufs manual.
e8791d4f
AM
55diff -urNp -x '*.orig' linux-4.9/Documentation/ABI/testing/sysfs-aufs linux-4.9/Documentation/ABI/testing/sysfs-aufs
56--- linux-4.9/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
57+++ linux-4.9/Documentation/ABI/testing/sysfs-aufs 2021-02-24 16:15:09.518240088 +0100
392086de 58@@ -0,0 +1,31 @@
7f207e10
AM
59+What: /sys/fs/aufs/si_<id>/
60+Date: March 2009
f6b6e03d 61+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
62+Description:
63+ Under /sys/fs/aufs, a directory named si_<id> is created
64+ per aufs mount, where <id> is a unique id generated
65+ internally.
66+
67+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
68+Date: March 2009
f6b6e03d 69+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
70+Description:
71+ It shows the abolute path of a member directory (which
72+ is called branch) in aufs, and its permission.
73+
392086de
AM
74+What: /sys/fs/aufs/si_<id>/brid0, brid1 ... bridN
75+Date: July 2013
f6b6e03d 76+Contact: J. R. Okajima <hooanon05g@gmail.com>
392086de
AM
77+Description:
78+ It shows the id of a member directory (which is called
79+ branch) in aufs.
80+
7f207e10
AM
81+What: /sys/fs/aufs/si_<id>/xi_path
82+Date: March 2009
f6b6e03d 83+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
84+Description:
85+ It shows the abolute path of XINO (External Inode Number
86+ Bitmap, Translation Table and Generation Table) file
87+ even if it is the default path.
88+ When the aufs mount option 'noxino' is specified, it
89+ will be empty. About XINO files, see the aufs manual.
e8791d4f
AM
90diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/README linux-4.9/Documentation/filesystems/aufs/README
91--- linux-4.9/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
92+++ linux-4.9/Documentation/filesystems/aufs/README 2021-02-24 16:15:09.521573529 +0100
93@@ -0,0 +1,393 @@
94+
95+Aufs4 -- advanced multi layered unification filesystem version 4.x
96+http://aufs.sf.net
97+Junjiro R. Okajima
98+
99+
100+0. Introduction
101+----------------------------------------
102+In the early days, aufs was entirely re-designed and re-implemented
103+Unionfs Version 1.x series. Adding many original ideas, approaches,
104+improvements and implementations, it becomes totally different from
105+Unionfs while keeping the basic features.
106+Recently, Unionfs Version 2.x series begin taking some of the same
107+approaches to aufs1's.
108+Unionfs is being developed by Professor Erez Zadok at Stony Brook
109+University and his team.
110+
111+Aufs4 supports linux-4.0 and later, and for linux-3.x series try aufs3.
112+If you want older kernel version support, try aufs2-2.6.git or
113+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
114+
115+Note: it becomes clear that "Aufs was rejected. Let's give it up."
116+ According to Christoph Hellwig, linux rejects all union-type
117+ filesystems but UnionMount.
118+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
119+
120+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
121+ UnionMount, and he pointed out an issue around a directory mutex
122+ lock and aufs addressed it. But it is still unsure whether aufs will
123+ be merged (or any other union solution).
124+<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
125+
126+
127+1. Features
128+----------------------------------------
129+- unite several directories into a single virtual filesystem. The member
130+ directory is called as a branch.
131+- you can specify the permission flags to the branch, which are 'readonly',
132+ 'readwrite' and 'whiteout-able.'
133+- by upper writable branch, internal copyup and whiteout, files/dirs on
134+ readonly branch are modifiable logically.
135+- dynamic branch manipulation, add, del.
136+- etc...
137+
138+Also there are many enhancements in aufs, such as:
139+- test only the highest one for the directory permission (dirperm1)
140+- copyup on open (coo=)
141+- 'move' policy for copy-up between two writable branches, after
142+ checking free space.
143+- xattr, acl
144+- readdir(3) in userspace.
145+- keep inode number by external inode number table
146+- keep the timestamps of file/dir in internal copyup operation
147+- seekable directory, supporting NFS readdir.
148+- whiteout is hardlinked in order to reduce the consumption of inodes
149+ on branch
150+- do not copyup, nor create a whiteout when it is unnecessary
151+- revert a single systemcall when an error occurs in aufs
152+- remount interface instead of ioctl
153+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
154+- loopback mounted filesystem as a branch
155+- kernel thread for removing the dir who has a plenty of whiteouts
156+- support copyup sparse file (a file which has a 'hole' in it)
157+- default permission flags for branches
158+- selectable permission flags for ro branch, whether whiteout can
159+ exist or not
160+- export via NFS.
161+- support <sysfs>/fs/aufs and <debugfs>/aufs.
162+- support multiple writable branches, some policies to select one
163+ among multiple writable branches.
164+- a new semantics for link(2) and rename(2) to support multiple
165+ writable branches.
166+- no glibc changes are required.
167+- pseudo hardlink (hardlink over branches)
168+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
169+ including NFS or remote filesystem branch.
170+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
171+- and more...
172+
173+Currently these features are dropped temporary from aufs4.
174+See design/08plan.txt in detail.
175+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
176+ (robr)
177+- statistics of aufs thread (/sys/fs/aufs/stat)
178+
179+Features or just an idea in the future (see also design/*.txt),
180+- reorder the branch index without del/re-add.
181+- permanent xino files for NFSD
182+- an option for refreshing the opened files after add/del branches
183+- light version, without branch manipulation. (unnecessary?)
184+- copyup in userspace
185+- inotify in userspace
186+- readv/writev
187+
188+
189+2. Download
190+----------------------------------------
191+There are three GIT trees for aufs4, aufs4-linux.git,
192+aufs4-standalone.git, and aufs-util.git. Note that there is no "4" in
193+"aufs-util.git."
194+While the aufs-util is always necessary, you need either of aufs4-linux
195+or aufs4-standalone.
196+
197+The aufs4-linux tree includes the whole linux mainline GIT tree,
198+git://git.kernel.org/.../torvalds/linux.git.
199+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
200+build aufs4 as an external kernel module.
201+Several extra patches are not included in this tree. Only
202+aufs4-standalone tree contains them. They are described in the later
203+section "Configuration and Compilation."
204+
205+On the other hand, the aufs4-standalone tree has only aufs source files
206+and necessary patches, and you can select CONFIG_AUFS_FS=m.
207+But you need to apply all aufs patches manually.
208+
209+You will find GIT branches whose name is in form of "aufs4.x" where "x"
210+represents the linux kernel version, "linux-4.x". For instance,
211+"aufs4.0" is for linux-4.0. For latest "linux-4.x-rcN", use
212+"aufs4.x-rcN" branch.
213+
214+o aufs4-linux tree
215+$ git clone --reference /your/linux/git/tree \
216+ git://github.com/sfjro/aufs4-linux.git aufs4-linux.git
217+- if you don't have linux GIT tree, then remove "--reference ..."
218+$ cd aufs4-linux.git
219+$ git checkout origin/aufs4.0
220+
221+Or You may want to directly git-pull aufs into your linux GIT tree, and
222+leave the patch-work to GIT.
223+$ cd /your/linux/git/tree
224+$ git remote add aufs4 git://github.com/sfjro/aufs4-linux.git
225+$ git fetch aufs4
226+$ git checkout -b my4.0 v4.0
227+$ (add your local change...)
228+$ git pull aufs4 aufs4.0
229+- now you have v4.0 + your_changes + aufs4.0 in you my4.0 branch.
230+- you may need to solve some conflicts between your_changes and
231+ aufs4.0. in this case, git-rerere is recommended so that you can
232+ solve the similar conflicts automatically when you upgrade to 4.1 or
233+ later in the future.
234+
235+o aufs4-standalone tree
236+$ git clone git://github.com/sfjro/aufs4-standalone.git aufs4-standalone.git
237+$ cd aufs4-standalone.git
238+$ git checkout origin/aufs4.0
239+
240+o aufs-util tree
241+$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git
242+- note that the public aufs-util.git is on SourceForge instead of
243+ GitHUB.
244+$ cd aufs-util.git
245+$ git checkout origin/aufs4.0
246+
247+Note: The 4.x-rcN branch is to be used with `rc' kernel versions ONLY.
248+The minor version number, 'x' in '4.x', of aufs may not always
249+follow the minor version number of the kernel.
250+Because changes in the kernel that cause the use of a new
251+minor version number do not always require changes to aufs-util.
252+
253+Since aufs-util has its own minor version number, you may not be
254+able to find a GIT branch in aufs-util for your kernel's
255+exact minor version number.
256+In this case, you should git-checkout the branch for the
257+nearest lower number.
258+
259+For (an unreleased) example:
260+If you are using "linux-4.10" and the "aufs4.10" branch
261+does not exist in aufs-util repository, then "aufs4.9", "aufs4.8"
262+or something numerically smaller is the branch for your kernel.
263+
264+Also you can view all branches by
265+ $ git branch -a
266+
267+
268+3. Configuration and Compilation
269+----------------------------------------
270+Make sure you have git-checkout'ed the correct branch.
271+
272+For aufs4-linux tree,
273+- enable CONFIG_AUFS_FS.
274+- set other aufs configurations if necessary.
275+
276+For aufs4-standalone tree,
277+There are several ways to build.
278+
279+1.
280+- apply ./aufs4-kbuild.patch to your kernel source files.
281+- apply ./aufs4-base.patch too.
282+- apply ./aufs4-mmap.patch too.
283+- apply ./aufs4-standalone.patch too, if you have a plan to set
284+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs4-standalone.patch.
285+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
286+ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
287+- enable CONFIG_AUFS_FS, you can select either
288+ =m or =y.
289+- and build your kernel as usual.
290+- install the built kernel.
291+ Note: Since linux-3.9, every filesystem module requires an alias
292+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
293+ modules.aliases file if you set CONFIG_AUFS_FS=m.
294+- install the header files too by "make headers_install" to the
295+ directory where you specify. By default, it is $PWD/usr.
296+ "make help" shows a brief note for headers_install.
297+- and reboot your system.
298+
299+2.
300+- module only (CONFIG_AUFS_FS=m).
301+- apply ./aufs4-base.patch to your kernel source files.
302+- apply ./aufs4-mmap.patch too.
303+- apply ./aufs4-standalone.patch too.
304+- build your kernel, don't forget "make headers_install", and reboot.
305+- edit ./config.mk and set other aufs configurations if necessary.
306+ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
307+ every aufs configurations.
308+- build the module by simple "make".
309+ Note: Since linux-3.9, every filesystem module requires an alias
310+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
311+ modules.aliases file.
312+- you can specify ${KDIR} make variable which points to your kernel
313+ source tree.
314+- install the files
315+ + run "make install" to install the aufs module, or copy the built
316+ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
317+ + run "make install_headers" (instead of headers_install) to install
318+ the modified aufs header file (you can specify DESTDIR which is
319+ available in aufs standalone version's Makefile only), or copy
320+ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
321+ you like manually. By default, the target directory is $PWD/usr.
322+- no need to apply aufs4-kbuild.patch, nor copying source files to your
323+ kernel source tree.
324+
325+Note: The header file aufs_type.h is necessary to build aufs-util
326+ as well as "make headers_install" in the kernel source tree.
327+ headers_install is subject to be forgotten, but it is essentially
328+ necessary, not only for building aufs-util.
329+ You may not meet problems without headers_install in some older
330+ version though.
331+
332+And then,
333+- read README in aufs-util, build and install it
334+- note that your distribution may contain an obsoleted version of
335+ aufs_type.h in /usr/include/linux or something. When you build aufs
336+ utilities, make sure that your compiler refers the correct aufs header
337+ file which is built by "make headers_install."
338+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
339+ then run "make install_ulib" too. And refer to the aufs manual in
340+ detail.
341+
342+There several other patches in aufs4-standalone.git. They are all
343+optional. When you meet some problems, they will help you.
344+- aufs4-loopback.patch
345+ Supports a nested loopback mount in a branch-fs. This patch is
346+ unnecessary until aufs produces a message like "you may want to try
347+ another patch for loopback file".
348+- vfs-ino.patch
349+ Modifies a system global kernel internal function get_next_ino() in
350+ order to stop assigning 0 for an inode-number. Not directly related to
351+ aufs, but recommended generally.
352+- tmpfs-idr.patch
353+ Keeps the tmpfs inode number as the lowest value. Effective to reduce
354+ the size of aufs XINO files for tmpfs branch. Also it prevents the
355+ duplication of inode number, which is important for backup tools and
356+ other utilities. When you find aufs XINO files for tmpfs branch
357+ growing too much, try this patch.
358+- lockdep-debug.patch
359+ Because aufs is not only an ordinary filesystem (callee of VFS), but
360+ also a caller of VFS functions for branch filesystems, subclassing of
361+ the internal locks for LOCKDEP is necessary. LOCKDEP is a debugging
362+ feature of linux kernel. If you enable CONFIG_LOCKDEP, then you will
363+ need to apply this debug patch to expand several constant values.
364+ If don't know what LOCKDEP, then you don't have apply this patch.
365+
366+
367+4. Usage
368+----------------------------------------
369+At first, make sure aufs-util are installed, and please read the aufs
370+manual, aufs.5 in aufs-util.git tree.
371+$ man -l aufs.5
372+
373+And then,
374+$ mkdir /tmp/rw /tmp/aufs
375+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
376+
377+Here is another example. The result is equivalent.
378+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
379+ Or
380+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
381+# mount -o remount,append:${HOME} /tmp/aufs
382+
383+Then, you can see whole tree of your home dir through /tmp/aufs. If
384+you modify a file under /tmp/aufs, the one on your home directory is
385+not affected, instead the same named file will be newly created under
386+/tmp/rw. And all of your modification to a file will be applied to
387+the one under /tmp/rw. This is called the file based Copy on Write
388+(COW) method.
389+Aufs mount options are described in aufs.5.
390+If you run chroot or something and make your aufs as a root directory,
391+then you need to customize the shutdown script. See the aufs manual in
392+detail.
393+
394+Additionally, there are some sample usages of aufs which are a
395+diskless system with network booting, and LiveCD over NFS.
396+See sample dir in CVS tree on SourceForge.
397+
398+
399+5. Contact
400+----------------------------------------
401+When you have any problems or strange behaviour in aufs, please let me
402+know with:
403+- /proc/mounts (instead of the output of mount(8))
404+- /sys/module/aufs/*
405+- /sys/fs/aufs/* (if you have them)
406+- /debug/aufs/* (if you have them)
407+- linux kernel version
408+ if your kernel is not plain, for example modified by distributor,
409+ the url where i can download its source is necessary too.
410+- aufs version which was printed at loading the module or booting the
411+ system, instead of the date you downloaded.
412+- configuration (define/undefine CONFIG_AUFS_xxx)
413+- kernel configuration or /proc/config.gz (if you have it)
414+- behaviour which you think to be incorrect
415+- actual operation, reproducible one is better
416+- mailto: aufs-users at lists.sourceforge.net
417+
418+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
419+and Feature Requests) on SourceForge. Please join and write to
420+aufs-users ML.
421+
422+
423+6. Acknowledgements
424+----------------------------------------
425+Thanks to everyone who have tried and are using aufs, whoever
426+have reported a bug or any feedback.
427+
428+Especially donators:
429+Tomas Matejicek(slax.org) made a donation (much more than once).
430+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
431+ scripts) is making "doubling" donations.
432+ Unfortunately I cannot list all of the donators, but I really
433+ appreciate.
434+ It ends Aug 2010, but the ordinary donation URL is still available.
435+ <http://sourceforge.net/donate/index.php?group_id=167503>
436+Dai Itasaka made a donation (2007/8).
437+Chuck Smith made a donation (2008/4, 10 and 12).
438+Henk Schoneveld made a donation (2008/9).
439+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
440+Francois Dupoux made a donation (2008/11).
441+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
442+ aufs2 GIT tree (2009/2).
443+William Grant made a donation (2009/3).
444+Patrick Lane made a donation (2009/4).
445+The Mail Archive (mail-archive.com) made donations (2009/5).
446+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
447+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
448+Pavel Pronskiy made a donation (2011/2).
449+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
450+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
451+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
452+11).
453+Sam Liddicott made a donation (2011/9).
454+Era Scarecrow made a donation (2013/4).
455+Bor Ratajc made a donation (2013/4).
456+Alessandro Gorreta made a donation (2013/4).
457+POIRETTE Marc made a donation (2013/4).
458+Alessandro Gorreta made a donation (2013/4).
459+lauri kasvandik made a donation (2013/5).
460+"pemasu from Finland" made a donation (2013/7).
461+The Parted Magic Project made a donation (2013/9 and 11).
462+Pavel Barta made a donation (2013/10).
463+Nikolay Pertsev made a donation (2014/5).
464+James B made a donation (2014/7 and 2015/7).
465+Stefano Di Biase made a donation (2014/8).
466+Daniel Epellei made a donation (2015/1).
467+OmegaPhil made a donation (2016/1).
468+Tomasz Szewczyk made a donation (2016/4).
469+James Burry made a donation (2016/12).
470+
471+Thank you very much.
472+Donations are always, including future donations, very important and
473+helpful for me to keep on developing aufs.
474+
475+
476+7.
477+----------------------------------------
478+If you are an experienced user, no explanation is needed. Aufs is
479+just a linux filesystem.
480+
481+
482+Enjoy!
483+
484+# Local variables: ;
485+# mode: text;
486+# End: ;
487diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/01intro.txt linux-4.9/Documentation/filesystems/aufs/design/01intro.txt
488--- linux-4.9/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
489+++ linux-4.9/Documentation/filesystems/aufs/design/01intro.txt 2021-02-24 16:15:09.518240088 +0100
ae9dfd79 490@@ -0,0 +1,171 @@
53392da6 491+
ae9dfd79 492+# Copyright (C) 2005-2018 Junjiro R. Okajima
53392da6
AM
493+#
494+# This program is free software; you can redistribute it and/or modify
495+# it under the terms of the GNU General Public License as published by
496+# the Free Software Foundation; either version 2 of the License, or
497+# (at your option) any later version.
498+#
499+# This program is distributed in the hope that it will be useful,
500+# but WITHOUT ANY WARRANTY; without even the implied warranty of
501+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
502+# GNU General Public License for more details.
503+#
504+# You should have received a copy of the GNU General Public License
523b37e3 505+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
506+
507+Introduction
508+----------------------------------------
509+
ae9dfd79 510+aufs [ei ju: ef es] | /ey-yoo-ef-es/ | [a u f s]
53392da6
AM
511+1. abbrev. for "advanced multi-layered unification filesystem".
512+2. abbrev. for "another unionfs".
513+3. abbrev. for "auf das" in German which means "on the" in English.
514+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
515+ But "Filesystem aufs Filesystem" is hard to understand.
ae9dfd79 516+4. abbrev. for "African Urban Fashion Show".
53392da6
AM
517+
518+AUFS is a filesystem with features:
519+- multi layered stackable unification filesystem, the member directory
520+ is called as a branch.
521+- branch permission and attribute, 'readonly', 'real-readonly',
7e9cd9fe 522+ 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
53392da6
AM
523+ combination.
524+- internal "file copy-on-write".
525+- logical deletion, whiteout.
526+- dynamic branch manipulation, adding, deleting and changing permission.
527+- allow bypassing aufs, user's direct branch access.
528+- external inode number translation table and bitmap which maintains the
529+ persistent aufs inode number.
530+- seekable directory, including NFS readdir.
531+- file mapping, mmap and sharing pages.
532+- pseudo-link, hardlink over branches.
533+- loopback mounted filesystem as a branch.
534+- several policies to select one among multiple writable branches.
535+- revert a single systemcall when an error occurs in aufs.
536+- and more...
537+
538+
539+Multi Layered Stackable Unification Filesystem
540+----------------------------------------------------------------------
541+Most people already knows what it is.
542+It is a filesystem which unifies several directories and provides a
543+merged single directory. When users access a file, the access will be
544+passed/re-directed/converted (sorry, I am not sure which English word is
545+correct) to the real file on the member filesystem. The member
546+filesystem is called 'lower filesystem' or 'branch' and has a mode
547+'readonly' and 'readwrite.' And the deletion for a file on the lower
548+readonly branch is handled by creating 'whiteout' on the upper writable
549+branch.
550+
551+On LKML, there have been discussions about UnionMount (Jan Blunck,
552+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
553+different approaches to implement the merged-view.
554+The former tries putting it into VFS, and the latter implements as a
555+separate filesystem.
556+(If I misunderstand about these implementations, please let me know and
557+I shall correct it. Because it is a long time ago when I read their
558+source files last time).
559+
560+UnionMount's approach will be able to small, but may be hard to share
561+branches between several UnionMount since the whiteout in it is
562+implemented in the inode on branch filesystem and always
563+shared. According to Bharata's post, readdir does not seems to be
564+finished yet.
565+There are several missing features known in this implementations such as
566+- for users, the inode number may change silently. eg. copy-up.
567+- link(2) may break by copy-up.
568+- read(2) may get an obsoleted filedata (fstat(2) too).
569+- fcntl(F_SETLK) may be broken by copy-up.
570+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
571+ open(O_RDWR).
572+
7e9cd9fe
AM
573+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
574+merged into mainline. This is another implementation of UnionMount as a
575+separated filesystem. All the limitations and known problems which
576+UnionMount are equally inherited to "overlay" filesystem.
577+
578+Unionfs has a longer history. When I started implementing a stackable
579+filesystem (Aug 2005), it already existed. It has virtual super_block,
580+inode, dentry and file objects and they have an array pointing lower
581+same kind objects. After contributing many patches for Unionfs, I
582+re-started my project AUFS (Jun 2006).
53392da6
AM
583+
584+In AUFS, the structure of filesystem resembles to Unionfs, but I
585+implemented my own ideas, approaches and enhancements and it became
586+totally different one.
587+
588+Comparing DM snapshot and fs based implementation
589+- the number of bytes to be copied between devices is much smaller.
590+- the type of filesystem must be one and only.
591+- the fs must be writable, no readonly fs, even for the lower original
592+ device. so the compression fs will not be usable. but if we use
593+ loopback mount, we may address this issue.
594+ for instance,
595+ mount /cdrom/squashfs.img /sq
596+ losetup /sq/ext2.img
597+ losetup /somewhere/cow
598+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
599+- it will be difficult (or needs more operations) to extract the
600+ difference between the original device and COW.
601+- DM snapshot-merge may help a lot when users try merging. in the
602+ fs-layer union, users will use rsync(1).
603+
7e9cd9fe
AM
604+You may want to read my old paper "Filesystems in LiveCD"
605+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
53392da6 606+
7e9cd9fe
AM
607+
608+Several characters/aspects/persona of aufs
53392da6
AM
609+----------------------------------------------------------------------
610+
7e9cd9fe 611+Aufs has several characters, aspects or persona.
53392da6
AM
612+1. a filesystem, callee of VFS helper
613+2. sub-VFS, caller of VFS helper for branches
614+3. a virtual filesystem which maintains persistent inode number
615+4. reader/writer of files on branches such like an application
616+
617+1. Callee of VFS Helper
618+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
619+unlink(2) from an application reaches sys_unlink() kernel function and
620+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
621+calls filesystem specific unlink operation. Actually aufs implements the
622+unlink operation but it behaves like a redirector.
623+
624+2. Caller of VFS Helper for Branches
625+aufs_unlink() passes the unlink request to the branch filesystem as if
626+it were called from VFS. So the called unlink operation of the branch
627+filesystem acts as usual. As a caller of VFS helper, aufs should handle
628+every necessary pre/post operation for the branch filesystem.
629+- acquire the lock for the parent dir on a branch
630+- lookup in a branch
631+- revalidate dentry on a branch
632+- mnt_want_write() for a branch
633+- vfs_unlink() for a branch
634+- mnt_drop_write() for a branch
635+- release the lock on a branch
636+
637+3. Persistent Inode Number
638+One of the most important issue for a filesystem is to maintain inode
639+numbers. This is particularly important to support exporting a
640+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
641+backend block device for its own. But some storage is necessary to
7e9cd9fe
AM
642+keep and maintain the inode numbers. It may be a large space and may not
643+suit to keep in memory. Aufs rents some space from its first writable
644+branch filesystem (by default) and creates file(s) on it. These files
645+are created by aufs internally and removed soon (currently) keeping
646+opened.
53392da6
AM
647+Note: Because these files are removed, they are totally gone after
648+ unmounting aufs. It means the inode numbers are not persistent
649+ across unmount or reboot. I have a plan to make them really
650+ persistent which will be important for aufs on NFS server.
651+
652+4. Read/Write Files Internally (copy-on-write)
653+Because a branch can be readonly, when you write a file on it, aufs will
654+"copy-up" it to the upper writable branch internally. And then write the
655+originally requested thing to the file. Generally kernel doesn't
656+open/read/write file actively. In aufs, even a single write may cause a
657+internal "file copy". This behaviour is very similar to cp(1) command.
658+
659+Some people may think it is better to pass such work to user space
660+helper, instead of doing in kernel space. Actually I am still thinking
661+about it. But currently I have implemented it in kernel space.
e8791d4f
AM
662diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/02struct.txt linux-4.9/Documentation/filesystems/aufs/design/02struct.txt
663--- linux-4.9/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
664+++ linux-4.9/Documentation/filesystems/aufs/design/02struct.txt 2021-02-24 16:15:09.518240088 +0100
7e9cd9fe 665@@ -0,0 +1,258 @@
53392da6 666+
ae9dfd79 667+# Copyright (C) 2005-2018 Junjiro R. Okajima
53392da6
AM
668+#
669+# This program is free software; you can redistribute it and/or modify
670+# it under the terms of the GNU General Public License as published by
671+# the Free Software Foundation; either version 2 of the License, or
672+# (at your option) any later version.
673+#
674+# This program is distributed in the hope that it will be useful,
675+# but WITHOUT ANY WARRANTY; without even the implied warranty of
676+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
677+# GNU General Public License for more details.
678+#
679+# You should have received a copy of the GNU General Public License
523b37e3 680+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
681+
682+Basic Aufs Internal Structure
683+
684+Superblock/Inode/Dentry/File Objects
685+----------------------------------------------------------------------
686+As like an ordinary filesystem, aufs has its own
687+superblock/inode/dentry/file objects. All these objects have a
688+dynamically allocated array and store the same kind of pointers to the
689+lower filesystem, branch.
690+For example, when you build a union with one readwrite branch and one
691+readonly, mounted /au, /rw and /ro respectively.
692+- /au = /rw + /ro
693+- /ro/fileA exists but /rw/fileA
694+
695+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
696+pointers are stored in a aufs dentry. The array in aufs dentry will be,
7e9cd9fe 697+- [0] = NULL (because /rw/fileA doesn't exist)
53392da6
AM
698+- [1] = /ro/fileA
699+
700+This style of an array is essentially same to the aufs
701+superblock/inode/dentry/file objects.
702+
703+Because aufs supports manipulating branches, ie. add/delete/change
7e9cd9fe
AM
704+branches dynamically, these objects has its own generation. When
705+branches are changed, the generation in aufs superblock is
706+incremented. And a generation in other object are compared when it is
707+accessed. When a generation in other objects are obsoleted, aufs
708+refreshes the internal array.
53392da6
AM
709+
710+
711+Superblock
712+----------------------------------------------------------------------
713+Additionally aufs superblock has some data for policies to select one
714+among multiple writable branches, XIB files, pseudo-links and kobject.
715+See below in detail.
7e9cd9fe
AM
716+About the policies which supports copy-down a directory, see
717+wbr_policy.txt too.
53392da6
AM
718+
719+
720+Branch and XINO(External Inode Number Translation Table)
721+----------------------------------------------------------------------
722+Every branch has its own xino (external inode number translation table)
723+file. The xino file is created and unlinked by aufs internally. When two
724+members of a union exist on the same filesystem, they share the single
725+xino file.
726+The struct of a xino file is simple, just a sequence of aufs inode
727+numbers which is indexed by the lower inode number.
728+In the above sample, assume the inode number of /ro/fileA is i111 and
729+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
730+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
731+
732+When the inode numbers are not contiguous, the xino file will be sparse
733+which has a hole in it and doesn't consume as much disk space as it
734+might appear. If your branch filesystem consumes disk space for such
735+holes, then you should specify 'xino=' option at mounting aufs.
736+
7e9cd9fe
AM
737+Aufs has a mount option to free the disk blocks for such holes in XINO
738+files on tmpfs or ramdisk. But it is not so effective actually. If you
739+meet a problem of disk shortage due to XINO files, then you should try
740+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
741+The patch localizes the assignment inumbers per tmpfs-mount and avoid
742+the holes in XINO files.
743+
53392da6 744+Also a writable branch has three kinds of "whiteout bases". All these
7e9cd9fe 745+are existed when the branch is joined to aufs, and their names are
53392da6
AM
746+whiteout-ed doubly, so that users will never see their names in aufs
747+hierarchy.
7e9cd9fe 748+1. a regular file which will be hardlinked to all whiteouts.
53392da6 749+2. a directory to store a pseudo-link.
7e9cd9fe 750+3. a directory to store an "orphan"-ed file temporary.
53392da6
AM
751+
752+1. Whiteout Base
753+ When you remove a file on a readonly branch, aufs handles it as a
754+ logical deletion and creates a whiteout on the upper writable branch
755+ as a hardlink of this file in order not to consume inode on the
756+ writable branch.
757+2. Pseudo-link Dir
758+ See below, Pseudo-link.
759+3. Step-Parent Dir
760+ When "fileC" exists on the lower readonly branch only and it is
761+ opened and removed with its parent dir, and then user writes
762+ something into it, then aufs copies-up fileC to this
763+ directory. Because there is no other dir to store fileC. After
764+ creating a file under this dir, the file is unlinked.
765+
766+Because aufs supports manipulating branches, ie. add/delete/change
7e9cd9fe
AM
767+dynamically, a branch has its own id. When the branch order changes,
768+aufs finds the new index by searching the branch id.
53392da6
AM
769+
770+
771+Pseudo-link
772+----------------------------------------------------------------------
773+Assume "fileA" exists on the lower readonly branch only and it is
774+hardlinked to "fileB" on the branch. When you write something to fileA,
775+aufs copies-up it to the upper writable branch. Additionally aufs
776+creates a hardlink under the Pseudo-link Directory of the writable
777+branch. The inode of a pseudo-link is kept in aufs super_block as a
778+simple list. If fileB is read after unlinking fileA, aufs returns
779+filedata from the pseudo-link instead of the lower readonly
780+branch. Because the pseudo-link is based upon the inode, to keep the
7e9cd9fe 781+inode number by xino (see above) is essentially necessary.
53392da6
AM
782+
783+All the hardlinks under the Pseudo-link Directory of the writable branch
784+should be restored in a proper location later. Aufs provides a utility
785+to do this. The userspace helpers executed at remounting and unmounting
786+aufs by default.
787+During this utility is running, it puts aufs into the pseudo-link
788+maintenance mode. In this mode, only the process which began the
789+maintenance mode (and its child processes) is allowed to operate in
790+aufs. Some other processes which are not related to the pseudo-link will
791+be allowed to run too, but the rest have to return an error or wait
792+until the maintenance mode ends. If a process already acquires an inode
793+mutex (in VFS), it has to return an error.
794+
795+
796+XIB(external inode number bitmap)
797+----------------------------------------------------------------------
798+Addition to the xino file per a branch, aufs has an external inode number
7e9cd9fe
AM
799+bitmap in a superblock object. It is also an internal file such like a
800+xino file.
53392da6
AM
801+It is a simple bitmap to mark whether the aufs inode number is in-use or
802+not.
803+To reduce the file I/O, aufs prepares a single memory page to cache xib.
804+
7e9cd9fe 805+As well as XINO files, aufs has a feature to truncate/refresh XIB to
53392da6
AM
806+reduce the number of consumed disk blocks for these files.
807+
808+
809+Virtual or Vertical Dir, and Readdir in Userspace
810+----------------------------------------------------------------------
811+In order to support multiple layers (branches), aufs readdir operation
812+constructs a virtual dir block on memory. For readdir, aufs calls
813+vfs_readdir() internally for each dir on branches, merges their entries
814+with eliminating the whiteout-ed ones, and sets it to file (dir)
815+object. So the file object has its entry list until it is closed. The
816+entry list will be updated when the file position is zero and becomes
7e9cd9fe 817+obsoleted. This decision is made in aufs automatically.
53392da6
AM
818+
819+The dynamically allocated memory block for the name of entries has a
820+unit of 512 bytes (by default) and stores the names contiguously (no
821+padding). Another block for each entry is handled by kmem_cache too.
822+During building dir blocks, aufs creates hash list and judging whether
823+the entry is whiteouted by its upper branch or already listed.
824+The merged result is cached in the corresponding inode object and
825+maintained by a customizable life-time option.
826+
827+Some people may call it can be a security hole or invite DoS attack
828+since the opened and once readdir-ed dir (file object) holds its entry
829+list and becomes a pressure for system memory. But I'd say it is similar
830+to files under /proc or /sys. The virtual files in them also holds a
831+memory page (generally) while they are opened. When an idea to reduce
832+memory for them is introduced, it will be applied to aufs too.
833+For those who really hate this situation, I've developed readdir(3)
834+library which operates this merging in userspace. You just need to set
835+LD_PRELOAD environment variable, and aufs will not consume no memory in
836+kernel space for readdir(3).
837+
838+
839+Workqueue
840+----------------------------------------------------------------------
841+Aufs sometimes requires privilege access to a branch. For instance,
842+in copy-up/down operation. When a user process is going to make changes
843+to a file which exists in the lower readonly branch only, and the mode
844+of one of ancestor directories may not be writable by a user
845+process. Here aufs copy-up the file with its ancestors and they may
846+require privilege to set its owner/group/mode/etc.
847+This is a typical case of a application character of aufs (see
848+Introduction).
849+
850+Aufs uses workqueue synchronously for this case. It creates its own
851+workqueue. The workqueue is a kernel thread and has privilege. Aufs
852+passes the request to call mkdir or write (for example), and wait for
853+its completion. This approach solves a problem of a signal handler
854+simply.
855+If aufs didn't adopt the workqueue and changed the privilege of the
7e9cd9fe
AM
856+process, then the process may receive the unexpected SIGXFSZ or other
857+signals.
53392da6
AM
858+
859+Also aufs uses the system global workqueue ("events" kernel thread) too
860+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
861+whiteout base and etc. This is unrelated to a privilege.
862+Most of aufs operation tries acquiring a rw_semaphore for aufs
863+superblock at the beginning, at the same time waits for the completion
864+of all queued asynchronous tasks.
865+
866+
867+Whiteout
868+----------------------------------------------------------------------
869+The whiteout in aufs is very similar to Unionfs's. That is represented
870+by its filename. UnionMount takes an approach of a file mode, but I am
871+afraid several utilities (find(1) or something) will have to support it.
872+
873+Basically the whiteout represents "logical deletion" which stops aufs to
874+lookup further, but also it represents "dir is opaque" which also stop
7e9cd9fe 875+further lookup.
53392da6
AM
876+
877+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
878+In order to make several functions in a single systemcall to be
879+revertible, aufs adopts an approach to rename a directory to a temporary
880+unique whiteouted name.
881+For example, in rename(2) dir where the target dir already existed, aufs
882+renames the target dir to a temporary unique whiteouted name before the
7e9cd9fe 883+actual rename on a branch, and then handles other actions (make it opaque,
53392da6
AM
884+update the attributes, etc). If an error happens in these actions, aufs
885+simply renames the whiteouted name back and returns an error. If all are
886+succeeded, aufs registers a function to remove the whiteouted unique
887+temporary name completely and asynchronously to the system global
888+workqueue.
889+
890+
891+Copy-up
892+----------------------------------------------------------------------
893+It is a well-known feature or concept.
894+When user modifies a file on a readonly branch, aufs operate "copy-up"
895+internally and makes change to the new file on the upper writable branch.
896+When the trigger systemcall does not update the timestamps of the parent
897+dir, aufs reverts it after copy-up.
c2b27bf2
AM
898+
899+
900+Move-down (aufs3.9 and later)
901+----------------------------------------------------------------------
902+"Copy-up" is one of the essential feature in aufs. It copies a file from
903+the lower readonly branch to the upper writable branch when a user
904+changes something about the file.
905+"Move-down" is an opposite action of copy-up. Basically this action is
906+ran manually instead of automatically and internally.
076b876e
AM
907+For desgin and implementation, aufs has to consider these issues.
908+- whiteout for the file may exist on the lower branch.
909+- ancestor directories may not exist on the lower branch.
910+- diropq for the ancestor directories may exist on the upper branch.
911+- free space on the lower branch will reduce.
912+- another access to the file may happen during moving-down, including
7e9cd9fe 913+ UDBA (see "Revalidate Dentry and UDBA").
076b876e
AM
914+- the file should not be hard-linked nor pseudo-linked. they should be
915+ handled by auplink utility later.
c2b27bf2
AM
916+
917+Sometimes users want to move-down a file from the upper writable branch
918+to the lower readonly or writable branch. For instance,
919+- the free space of the upper writable branch is going to run out.
920+- create a new intermediate branch between the upper and lower branch.
921+- etc.
922+
923+For this purpose, use "aumvdown" command in aufs-util.git.
e8791d4f
AM
924diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/03atomic_open.txt linux-4.9/Documentation/filesystems/aufs/design/03atomic_open.txt
925--- linux-4.9/Documentation/filesystems/aufs/design/03atomic_open.txt 1970-01-01 01:00:00.000000000 +0100
926+++ linux-4.9/Documentation/filesystems/aufs/design/03atomic_open.txt 2021-02-24 16:15:09.518240088 +0100
b912730e
AM
927@@ -0,0 +1,85 @@
928+
ae9dfd79 929+# Copyright (C) 2015-2018 Junjiro R. Okajima
b912730e
AM
930+#
931+# This program is free software; you can redistribute it and/or modify
932+# it under the terms of the GNU General Public License as published by
933+# the Free Software Foundation; either version 2 of the License, or
934+# (at your option) any later version.
935+#
936+# This program is distributed in the hope that it will be useful,
937+# but WITHOUT ANY WARRANTY; without even the implied warranty of
938+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
939+# GNU General Public License for more details.
940+#
941+# You should have received a copy of the GNU General Public License
942+# along with this program. If not, see <http://www.gnu.org/licenses/>.
943+
944+Support for a branch who has its ->atomic_open()
945+----------------------------------------------------------------------
946+The filesystems who implement its ->atomic_open() are not majority. For
947+example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
948+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
949+->atomic_open(), NFSv4 returns an error for this open(2). While I am not
950+sure whether all filesystems who have ->atomic_open() behave like this,
951+but NFSv4 surely returns the error.
952+
953+In order to support ->atomic_open() for aufs, there are a few
954+approaches.
955+
956+A. Introduce aufs_atomic_open()
957+ - calls one of VFS:do_last(), lookup_open() or atomic_open() for
958+ branch fs.
959+B. Introduce aufs_atomic_open() calling create, open and chmod. this is
960+ an aufs user Pip Cet's approach
961+ - calls aufs_create(), VFS finish_open() and notify_change().
962+ - pass fake-mode to finish_open(), and then correct the mode by
963+ notify_change().
964+C. Extend aufs_open() to call branch fs's ->atomic_open()
965+ - no aufs_atomic_open().
966+ - aufs_lookup() registers the TID to an aufs internal object.
967+ - aufs_create() does nothing when the matching TID is registered, but
968+ registers the mode.
969+ - aufs_open() calls branch fs's ->atomic_open() when the matching
970+ TID is registered.
971+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
972+ credential
973+ - no aufs_atomic_open().
974+ - aufs_create() registers the TID to an internal object. this info
975+ represents "this process created this file just now."
976+ - when aufs gets EACCES from branch fs's ->open(), then confirm the
977+ registered TID and re-try open() with superuser's credential.
978+
979+Pros and cons for each approach.
980+
981+A.
982+ - straightforward but highly depends upon VFS internal.
983+ - the atomic behavaiour is kept.
984+ - some of parameters such as nameidata are hard to reproduce for
985+ branch fs.
986+ - large overhead.
987+B.
988+ - easy to implement.
989+ - the atomic behavaiour is lost.
990+C.
991+ - the atomic behavaiour is kept.
992+ - dirty and tricky.
993+ - VFS checks whether the file is created correctly after calling
994+ ->create(), which means this approach doesn't work.
995+D.
996+ - easy to implement.
997+ - the atomic behavaiour is lost.
998+ - to open a file with superuser's credential and give it to a user
999+ process is a bad idea, since the file object keeps the credential
1000+ in it. It may affect LSM or something. This approach doesn't work
1001+ either.
1002+
1003+The approach A is ideal, but it hard to implement. So here is a
1004+variation of A, which is to be implemented.
1005+
1006+A-1. Introduce aufs_atomic_open()
1007+ - calls branch fs ->atomic_open() if exists. otherwise calls
1008+ vfs_create() and finish_open().
1009+ - the demerit is that the several checks after branch fs
1010+ ->atomic_open() are lost. in the ordinary case, the checks are
1011+ done by VFS:do_last(), lookup_open() and atomic_open(). some can
1012+ be implemented in aufs, but not all I am afraid.
e8791d4f
AM
1013diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/03lookup.txt linux-4.9/Documentation/filesystems/aufs/design/03lookup.txt
1014--- linux-4.9/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
1015+++ linux-4.9/Documentation/filesystems/aufs/design/03lookup.txt 2021-02-24 16:15:09.518240088 +0100
7e9cd9fe 1016@@ -0,0 +1,113 @@
53392da6 1017+
ae9dfd79 1018+# Copyright (C) 2005-2018 Junjiro R. Okajima
53392da6
AM
1019+#
1020+# This program is free software; you can redistribute it and/or modify
1021+# it under the terms of the GNU General Public License as published by
1022+# the Free Software Foundation; either version 2 of the License, or
1023+# (at your option) any later version.
1024+#
1025+# This program is distributed in the hope that it will be useful,
1026+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1027+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1028+# GNU General Public License for more details.
1029+#
1030+# You should have received a copy of the GNU General Public License
523b37e3 1031+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1032+
1033+Lookup in a Branch
1034+----------------------------------------------------------------------
1035+Since aufs has a character of sub-VFS (see Introduction), it operates
7e9cd9fe
AM
1036+lookup for branches as VFS does. It may be a heavy work. But almost all
1037+lookup operation in aufs is the simplest case, ie. lookup only an entry
1038+directly connected to its parent. Digging down the directory hierarchy
1039+is unnecessary. VFS has a function lookup_one_len() for that use, and
1040+aufs calls it.
1041+
1042+When a branch is a remote filesystem, aufs basically relies upon its
53392da6
AM
1043+->d_revalidate(), also aufs forces the hardest revalidate tests for
1044+them.
1045+For d_revalidate, aufs implements three levels of revalidate tests. See
1046+"Revalidate Dentry and UDBA" in detail.
1047+
1048+
076b876e
AM
1049+Test Only the Highest One for the Directory Permission (dirperm1 option)
1050+----------------------------------------------------------------------
1051+Let's try case study.
1052+- aufs has two branches, upper readwrite and lower readonly.
1053+ /au = /rw + /ro
1054+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1055+- user invoked "chmod a+rx /au/dirA"
1056+- the internal copy-up is activated and "/rw/dirA" is created and its
7e9cd9fe 1057+ permission bits are set to world readable.
076b876e
AM
1058+- then "/au/dirA" becomes world readable?
1059+
1060+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1061+or it may be a natively readonly filesystem. If aufs respects the lower
1062+branch, it should not respond readdir request from other users. But user
1063+allowed it by chmod. Should really aufs rejects showing the entries
1064+under /ro/dirA?
1065+
7e9cd9fe
AM
1066+To be honest, I don't have a good solution for this case. So aufs
1067+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
1068+users.
076b876e
AM
1069+When dirperm1 is specified, aufs checks only the highest one for the
1070+directory permission, and shows the entries. Otherwise, as usual, checks
1071+every dir existing on all branches and rejects the request.
1072+
1073+As a side effect, dirperm1 option improves the performance of aufs
1074+because the number of permission check is reduced when the number of
1075+branch is many.
1076+
1077+
53392da6
AM
1078+Revalidate Dentry and UDBA (User's Direct Branch Access)
1079+----------------------------------------------------------------------
1080+Generally VFS helpers re-validate a dentry as a part of lookup.
1081+0. digging down the directory hierarchy.
1082+1. lock the parent dir by its i_mutex.
1083+2. lookup the final (child) entry.
1084+3. revalidate it.
1085+4. call the actual operation (create, unlink, etc.)
1086+5. unlock the parent dir
1087+
1088+If the filesystem implements its ->d_revalidate() (step 3), then it is
1089+called. Actually aufs implements it and checks the dentry on a branch is
1090+still valid.
1091+But it is not enough. Because aufs has to release the lock for the
1092+parent dir on a branch at the end of ->lookup() (step 2) and
1093+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
1094+held by VFS.
1095+If the file on a branch is changed directly, eg. bypassing aufs, after
1096+aufs released the lock, then the subsequent operation may cause
1097+something unpleasant result.
1098+
1099+This situation is a result of VFS architecture, ->lookup() and
1100+->d_revalidate() is separated. But I never say it is wrong. It is a good
1101+design from VFS's point of view. It is just not suitable for sub-VFS
1102+character in aufs.
1103+
1104+Aufs supports such case by three level of revalidation which is
1105+selectable by user.
1106+1. Simple Revalidate
1107+ Addition to the native flow in VFS's, confirm the child-parent
1108+ relationship on the branch just after locking the parent dir on the
1109+ branch in the "actual operation" (step 4). When this validation
1110+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
1111+ checks the validation of the dentry on branches.
1112+2. Monitor Changes Internally by Inotify/Fsnotify
1113+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
1114+ the dentry on the branch, and returns EBUSY if it finds different
1115+ dentry.
1116+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
1117+ during it is in cache. When the event is notified, aufs registers a
1118+ function to kernel 'events' thread by schedule_work(). And the
1119+ function sets some special status to the cached aufs dentry and inode
1120+ private data. If they are not cached, then aufs has nothing to
1121+ do. When the same file is accessed through aufs (step 0-3) later,
1122+ aufs will detect the status and refresh all necessary data.
1123+ In this mode, aufs has to ignore the event which is fired by aufs
1124+ itself.
1125+3. No Extra Validation
1126+ This is the simplest test and doesn't add any additional revalidation
7e9cd9fe 1127+ test, and skip the revalidation in step 4. It is useful and improves
53392da6
AM
1128+ aufs performance when system surely hide the aufs branches from user,
1129+ by over-mounting something (or another method).
e8791d4f
AM
1130diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/04branch.txt linux-4.9/Documentation/filesystems/aufs/design/04branch.txt
1131--- linux-4.9/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
1132+++ linux-4.9/Documentation/filesystems/aufs/design/04branch.txt 2021-02-24 16:15:09.518240088 +0100
7e9cd9fe 1133@@ -0,0 +1,74 @@
53392da6 1134+
ae9dfd79 1135+# Copyright (C) 2005-2018 Junjiro R. Okajima
53392da6
AM
1136+#
1137+# This program is free software; you can redistribute it and/or modify
1138+# it under the terms of the GNU General Public License as published by
1139+# the Free Software Foundation; either version 2 of the License, or
1140+# (at your option) any later version.
1141+#
1142+# This program is distributed in the hope that it will be useful,
1143+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1144+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1145+# GNU General Public License for more details.
1146+#
1147+# You should have received a copy of the GNU General Public License
523b37e3 1148+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1149+
1150+Branch Manipulation
1151+
1152+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
1153+and changing its permission/attribute, there are a lot of works to do.
1154+
1155+
1156+Add a Branch
1157+----------------------------------------------------------------------
1158+o Confirm the adding dir exists outside of aufs, including loopback
7e9cd9fe 1159+ mount, and its various attributes.
53392da6
AM
1160+o Initialize the xino file and whiteout bases if necessary.
1161+ See struct.txt.
1162+
1163+o Check the owner/group/mode of the directory
1164+ When the owner/group/mode of the adding directory differs from the
1165+ existing branch, aufs issues a warning because it may impose a
1166+ security risk.
1167+ For example, when a upper writable branch has a world writable empty
1168+ top directory, a malicious user can create any files on the writable
1169+ branch directly, like copy-up and modify manually. If something like
1170+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
1171+ writable branch, and the writable branch is world-writable, then a
1172+ malicious guy may create /etc/passwd on the writable branch directly
1173+ and the infected file will be valid in aufs.
7e9cd9fe 1174+ I am afraid it can be a security issue, but aufs can do nothing except
53392da6
AM
1175+ producing a warning.
1176+
1177+
1178+Delete a Branch
1179+----------------------------------------------------------------------
1180+o Confirm the deleting branch is not busy
1181+ To be general, there is one merit to adopt "remount" interface to
1182+ manipulate branches. It is to discard caches. At deleting a branch,
1183+ aufs checks the still cached (and connected) dentries and inodes. If
1184+ there are any, then they are all in-use. An inode without its
1185+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
1186+
1187+ For the cached one, aufs checks whether the same named entry exists on
1188+ other branches.
1189+ If the cached one is a directory, because aufs provides a merged view
1190+ to users, as long as one dir is left on any branch aufs can show the
1191+ dir to users. In this case, the branch can be removed from aufs.
1192+ Otherwise aufs rejects deleting the branch.
1193+
1194+ If any file on the deleting branch is opened by aufs, then aufs
1195+ rejects deleting.
1196+
1197+
1198+Modify the Permission of a Branch
1199+----------------------------------------------------------------------
1200+o Re-initialize or remove the xino file and whiteout bases if necessary.
1201+ See struct.txt.
1202+
1203+o rw --> ro: Confirm the modifying branch is not busy
1204+ Aufs rejects the request if any of these conditions are true.
1205+ - a file on the branch is mmap-ed.
1206+ - a regular file on the branch is opened for write and there is no
1207+ same named entry on the upper branch.
e8791d4f
AM
1208diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/05wbr_policy.txt linux-4.9/Documentation/filesystems/aufs/design/05wbr_policy.txt
1209--- linux-4.9/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
1210+++ linux-4.9/Documentation/filesystems/aufs/design/05wbr_policy.txt 2021-02-24 16:15:09.521573529 +0100
523b37e3 1211@@ -0,0 +1,64 @@
53392da6 1212+
ae9dfd79 1213+# Copyright (C) 2005-2018 Junjiro R. Okajima
53392da6
AM
1214+#
1215+# This program is free software; you can redistribute it and/or modify
1216+# it under the terms of the GNU General Public License as published by
1217+# the Free Software Foundation; either version 2 of the License, or
1218+# (at your option) any later version.
1219+#
1220+# This program is distributed in the hope that it will be useful,
1221+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1222+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1223+# GNU General Public License for more details.
1224+#
1225+# You should have received a copy of the GNU General Public License
523b37e3 1226+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1227+
1228+Policies to Select One among Multiple Writable Branches
1229+----------------------------------------------------------------------
1230+When the number of writable branch is more than one, aufs has to decide
1231+the target branch for file creation or copy-up. By default, the highest
1232+writable branch which has the parent (or ancestor) dir of the target
1233+file is chosen (top-down-parent policy).
1234+By user's request, aufs implements some other policies to select the
7e9cd9fe
AM
1235+writable branch, for file creation several policies, round-robin,
1236+most-free-space, and other policies. For copy-up, top-down-parent,
1237+bottom-up-parent, bottom-up and others.
53392da6
AM
1238+
1239+As expected, the round-robin policy selects the branch in circular. When
1240+you have two writable branches and creates 10 new files, 5 files will be
1241+created for each branch. mkdir(2) systemcall is an exception. When you
1242+create 10 new directories, all will be created on the same branch.
1243+And the most-free-space policy selects the one which has most free
1244+space among the writable branches. The amount of free space will be
1245+checked by aufs internally, and users can specify its time interval.
1246+
1247+The policies for copy-up is more simple,
1248+top-down-parent is equivalent to the same named on in create policy,
1249+bottom-up-parent selects the writable branch where the parent dir
1250+exists and the nearest upper one from the copyup-source,
1251+bottom-up selects the nearest upper writable branch from the
1252+copyup-source, regardless the existence of the parent dir.
1253+
1254+There are some rules or exceptions to apply these policies.
1255+- If there is a readonly branch above the policy-selected branch and
1256+ the parent dir is marked as opaque (a variation of whiteout), or the
1257+ target (creating) file is whiteout-ed on the upper readonly branch,
1258+ then the result of the policy is ignored and the target file will be
1259+ created on the nearest upper writable branch than the readonly branch.
1260+- If there is a writable branch above the policy-selected branch and
1261+ the parent dir is marked as opaque or the target file is whiteouted
1262+ on the branch, then the result of the policy is ignored and the target
1263+ file will be created on the highest one among the upper writable
1264+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1265+ it as usual.
1266+- link(2) and rename(2) systemcalls are exceptions in every policy.
1267+ They try selecting the branch where the source exists as possible
1268+ since copyup a large file will take long time. If it can't be,
1269+ ie. the branch where the source exists is readonly, then they will
1270+ follow the copyup policy.
1271+- There is an exception for rename(2) when the target exists.
1272+ If the rename target exists, aufs compares the index of the branches
1273+ where the source and the target exists and selects the higher
1274+ one. If the selected branch is readonly, then aufs follows the
1275+ copyup policy.
e8791d4f
AM
1276diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/06dirren.dot linux-4.9/Documentation/filesystems/aufs/design/06dirren.dot
1277--- linux-4.9/Documentation/filesystems/aufs/design/06dirren.dot 1970-01-01 01:00:00.000000000 +0100
1278+++ linux-4.9/Documentation/filesystems/aufs/design/06dirren.dot 2021-02-24 16:15:09.521573529 +0100
ae9dfd79
AM
1279@@ -0,0 +1,31 @@
1280+
1281+// to view this graph, run dot(1) command in GRAPHVIZ.
1282+
1283+digraph G {
1284+node [shape=box];
1285+whinfo [label="detailed info file\n(lower_brid_root-hinum, h_inum, namelen, old name)"];
1286+
1287+node [shape=oval];
1288+
1289+aufs_rename -> whinfo [label="store/remove"];
1290+
1291+node [shape=oval];
1292+inode_list [label="h_inum list in branch\ncache"];
1293+
1294+node [shape=box];
1295+whinode [label="h_inum list file"];
1296+
1297+node [shape=oval];
1298+brmgmt [label="br_add/del/mod/umount"];
1299+
1300+brmgmt -> inode_list [label="create/remove"];
1301+brmgmt -> whinode [label="load/store"];
1302+
1303+inode_list -> whinode [style=dashed,dir=both];
1304+
1305+aufs_rename -> inode_list [label="add/del"];
1306+
1307+aufs_lookup -> inode_list [label="search"];
1308+
1309+aufs_lookup -> whinfo [label="load/remove"];
1310+}
e8791d4f
AM
1311diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/06dirren.txt linux-4.9/Documentation/filesystems/aufs/design/06dirren.txt
1312--- linux-4.9/Documentation/filesystems/aufs/design/06dirren.txt 1970-01-01 01:00:00.000000000 +0100
1313+++ linux-4.9/Documentation/filesystems/aufs/design/06dirren.txt 2021-02-24 16:15:09.521573529 +0100
ae9dfd79
AM
1314@@ -0,0 +1,102 @@
1315+
1316+# Copyright (C) 2017-2018 Junjiro R. Okajima
1317+#
1318+# This program is free software; you can redistribute it and/or modify
1319+# it under the terms of the GNU General Public License as published by
1320+# the Free Software Foundation; either version 2 of the License, or
1321+# (at your option) any later version.
1322+#
1323+# This program is distributed in the hope that it will be useful,
1324+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1325+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1326+# GNU General Public License for more details.
1327+#
1328+# You should have received a copy of the GNU General Public License
1329+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1330+
1331+Special handling for renaming a directory (DIRREN)
1332+----------------------------------------------------------------------
1333+First, let's assume we have a simple usecase.
1334+
1335+- /u = /rw + /ro
1336+- /rw/dirA exists
1337+- /ro/dirA and /ro/dirA/file exist too
1338+- there is no dirB on both branches
1339+- a user issues rename("dirA", "dirB")
1340+
1341+Now, what should aufs behave against this rename(2)?
1342+There are a few possible cases.
1343+
1344+A. returns EROFS.
1345+ since dirA exists on a readonly branch which cannot be renamed.
1346+B. returns EXDEV.
1347+ it is possible to copy-up dirA (only the dir itself), but the child
1348+ entries ("file" in this case) should not be. it must be a bad
1349+ approach to copy-up recursively.
1350+C. returns a success.
1351+ even the branch /ro is readonly, aufs tries renaming it. Obviously it
1352+ is a violation of aufs' policy.
1353+D. construct an extra information which indicates that /ro/dirA should
1354+ be handled as the name of dirB.
1355+ overlayfs has a similar feature called REDIRECT.
1356+
1357+Until now, aufs implements the case B only which returns EXDEV, and
1358+expects the userspace application behaves like mv(1) which tries
1359+issueing rename(2) recursively.
1360+
1361+A new aufs feature called DIRREN is introduced which implements the case
1362+D. There are several "extra information" added.
1363+
1364+1. detailed info per renamed directory
1365+ path: /rw/dirB/$AUFS_WH_DR_INFO_PFX.<lower branch-id>
1366+2. the inode-number list of directories on a branch
1367+ path: /rw/dirB/$AUFS_WH_DR_BRHINO
1368+
1369+The filename of "detailed info per directory" represents the lower
1370+branch, and its format is
1371+- a type of the branch id
1372+ one of these.
1373+ + uuid (not implemented yet)
1374+ + fsid
1375+ + dev
1376+- the inode-number of the branch root dir
1377+
1378+And it contains these info in a single regular file.
1379+- magic number
1380+- branch's inode-number of the logically renamed dir
1381+- the name of the before-renamed dir
1382+
1383+The "detailed info per directory" file is created in aufs rename(2), and
1384+loaded in any lookup.
1385+The info is considered in lookup for the matching case only. Here
1386+"matching" means that the root of branch (in the info filename) is same
1387+to the current looking-up branch. After looking-up the before-renamed
1388+name, the inode-number is compared. And the matched dentry is used.
1389+
1390+The "inode-number list of directories" is a regular file which contains
1391+simply the inode-numbers on the branch. The file is created or updated
1392+in removing the branch, and loaded in adding the branch. Its lifetime is
1393+equal to the branch.
1394+The list is refered in lookup, and when the current target inode is
1395+found in the list, the aufs tries loading the "detailed info per
1396+directory" and get the changed and valid name of the dir.
1397+
1398+Theoretically these "extra informaiton" may be able to be put into XATTR
1399+in the dir inode. But aufs doesn't choose this way because
1400+1. XATTR may not be supported by the branch (or its configuration)
1401+2. XATTR may have its size limit.
1402+3. XATTR may be less easy to convert than a regular file, when the
1403+ format of the info is changed in the future.
1404+At the same time, I agree that the regular file approach is much slower
1405+than XATTR approach. So, in the future, aufs may take the XATTR or other
1406+better approach.
1407+
1408+This DIRREN feature is enabled by aufs configuration, and is activated
1409+by a new mount option.
1410+
1411+For the more complicated case, there is a work with UDBA option, which
1412+is to dected the direct access to the branches (by-passing aufs) and to
1413+maintain the cashes in aufs. Since a single cached aufs dentry may
1414+contains two names, before- and after-rename, the name comparision in
1415+UDBA handler may not work correctly. In this case, the behaviour will be
1416+equivalen to udba=reval case.
e8791d4f
AM
1417diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/06fhsm.txt linux-4.9/Documentation/filesystems/aufs/design/06fhsm.txt
1418--- linux-4.9/Documentation/filesystems/aufs/design/06fhsm.txt 1970-01-01 01:00:00.000000000 +0100
1419+++ linux-4.9/Documentation/filesystems/aufs/design/06fhsm.txt 2021-02-24 16:15:09.521573529 +0100
076b876e
AM
1420@@ -0,0 +1,120 @@
1421+
ae9dfd79 1422+# Copyright (C) 2011-2018 Junjiro R. Okajima
076b876e
AM
1423+#
1424+# This program is free software; you can redistribute it and/or modify
1425+# it under the terms of the GNU General Public License as published by
1426+# the Free Software Foundation; either version 2 of the License, or
1427+# (at your option) any later version.
1428+#
1429+# This program is distributed in the hope that it will be useful,
1430+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1431+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1432+# GNU General Public License for more details.
1433+#
1434+# You should have received a copy of the GNU General Public License
1435+# along with this program; if not, write to the Free Software
1436+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1437+
1438+
1439+File-based Hierarchical Storage Management (FHSM)
1440+----------------------------------------------------------------------
1441+Hierarchical Storage Management (or HSM) is a well-known feature in the
1442+storage world. Aufs provides this feature as file-based with multiple
7e9cd9fe 1443+writable branches, based upon the principle of "Colder, the Lower".
076b876e 1444+Here the word "colder" means that the less used files, and "lower" means
7e9cd9fe 1445+that the position in the order of the stacked branches vertically.
076b876e
AM
1446+These multiple writable branches are prioritized, ie. the topmost one
1447+should be the fastest drive and be used heavily.
1448+
1449+o Characters in aufs FHSM story
1450+- aufs itself and a new branch attribute.
1451+- a new ioctl interface to move-down and to establish a connection with
1452+ the daemon ("move-down" is a converse of "copy-up").
1453+- userspace tool and daemon.
1454+
1455+The userspace daemon establishes a connection with aufs and waits for
1456+the notification. The notified information is very similar to struct
1457+statfs containing the number of consumed blocks and inodes.
1458+When the consumed blocks/inodes of a branch exceeds the user-specified
1459+upper watermark, the daemon activates its move-down process until the
1460+consumed blocks/inodes reaches the user-specified lower watermark.
1461+
1462+The actual move-down is done by aufs based upon the request from
1463+user-space since we need to maintain the inode number and the internal
1464+pointer arrays in aufs.
1465+
1466+Currently aufs FHSM handles the regular files only. Additionally they
1467+must not be hard-linked nor pseudo-linked.
1468+
1469+
1470+o Cowork of aufs and the user-space daemon
1471+ During the userspace daemon established the connection, aufs sends a
1472+ small notification to it whenever aufs writes something into the
1473+ writable branch. But it may cost high since aufs issues statfs(2)
1474+ internally. So user can specify a new option to cache the
1475+ info. Actually the notification is controlled by these factors.
1476+ + the specified cache time.
1477+ + classified as "force" by aufs internally.
1478+ Until the specified time expires, aufs doesn't send the info
1479+ except the forced cases. When aufs decide forcing, the info is always
1480+ notified to userspace.
1481+ For example, the number of free inodes is generally large enough and
1482+ the shortage of it happens rarely. So aufs doesn't force the
1483+ notification when creating a new file, directory and others. This is
1484+ the typical case which aufs doesn't force.
1485+ When aufs writes the actual filedata and the files consumes any of new
1486+ blocks, the aufs forces notifying.
1487+
1488+
1489+o Interfaces in aufs
1490+- New branch attribute.
1491+ + fhsm
1492+ Specifies that the branch is managed by FHSM feature. In other word,
1493+ participant in the FHSM.
1494+ When nofhsm is set to the branch, it will not be the source/target
1495+ branch of the move-down operation. This attribute is set
1496+ independently from coo and moo attributes, and if you want full
1497+ FHSM, you should specify them as well.
1498+- New mount option.
1499+ + fhsm_sec
1500+ Specifies a second to suppress many less important info to be
1501+ notified.
1502+- New ioctl.
1503+ + AUFS_CTL_FHSM_FD
1504+ create a new file descriptor which userspace can read the notification
1505+ (a subset of struct statfs) from aufs.
1506+- Module parameter 'brs'
1507+ It has to be set to 1. Otherwise the new mount option 'fhsm' will not
1508+ be set.
1509+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
1510+ When there are two or more branches with fhsm attributes,
1511+ /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
1512+ terminates it. As a result of remounting and branch-manipulation, the
1513+ number of branches with fhsm attribute can be one. In this case,
1514+ /sbin/mount.aufs will terminate the user-space daemon.
1515+
1516+
1517+Finally the operation is done as these steps in kernel-space.
1518+- make sure that,
1519+ + no one else is using the file.
1520+ + the file is not hard-linked.
1521+ + the file is not pseudo-linked.
1522+ + the file is a regular file.
1523+ + the parent dir is not opaqued.
1524+- find the target writable branch.
1525+- make sure the file is not whiteout-ed by the upper (than the target)
1526+ branch.
1527+- make the parent dir on the target branch.
1528+- mutex lock the inode on the branch.
1529+- unlink the whiteout on the target branch (if exists).
1530+- lookup and create the whiteout-ed temporary name on the target branch.
1531+- copy the file as the whiteout-ed temporary name on the target branch.
1532+- rename the whiteout-ed temporary name to the original name.
1533+- unlink the file on the source branch.
1534+- maintain the internal pointer array and the external inode number
1535+ table (XINO).
1536+- maintain the timestamps and other attributes of the parent dir and the
1537+ file.
1538+
1539+And of course, in every step, an error may happen. So the operation
1540+should restore the original file state after an error happens.
e8791d4f
AM
1541diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/06mmap.txt linux-4.9/Documentation/filesystems/aufs/design/06mmap.txt
1542--- linux-4.9/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
1543+++ linux-4.9/Documentation/filesystems/aufs/design/06mmap.txt 2021-02-24 16:15:09.521573529 +0100
b912730e 1544@@ -0,0 +1,72 @@
53392da6 1545+
ae9dfd79 1546+# Copyright (C) 2005-2018 Junjiro R. Okajima
53392da6
AM
1547+#
1548+# This program is free software; you can redistribute it and/or modify
1549+# it under the terms of the GNU General Public License as published by
1550+# the Free Software Foundation; either version 2 of the License, or
1551+# (at your option) any later version.
1552+#
1553+# This program is distributed in the hope that it will be useful,
1554+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1555+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1556+# GNU General Public License for more details.
1557+#
1558+# You should have received a copy of the GNU General Public License
523b37e3 1559+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1560+
1561+mmap(2) -- File Memory Mapping
1562+----------------------------------------------------------------------
1563+In aufs, the file-mapped pages are handled by a branch fs directly, no
1564+interaction with aufs. It means aufs_mmap() calls the branch fs's
1565+->mmap().
1566+This approach is simple and good, but there is one problem.
7e9cd9fe 1567+Under /proc, several entries show the mmapped files by its path (with
53392da6
AM
1568+device and inode number), and the printed path will be the path on the
1569+branch fs's instead of virtual aufs's.
1570+This is not a problem in most cases, but some utilities lsof(1) (and its
1571+user) may expect the path on aufs.
1572+
1573+To address this issue, aufs adds a new member called vm_prfile in struct
1574+vm_area_struct (and struct vm_region). The original vm_file points to
1575+the file on the branch fs in order to handle everything correctly as
1576+usual. The new vm_prfile points to a virtual file in aufs, and the
1577+show-functions in procfs refers to vm_prfile if it is set.
1578+Also we need to maintain several other places where touching vm_file
1579+such like
1580+- fork()/clone() copies vma and the reference count of vm_file is
1581+ incremented.
1582+- merging vma maintains the ref count too.
1583+
7e9cd9fe 1584+This is not a good approach. It just fakes the printed path. But it
53392da6
AM
1585+leaves all behaviour around f_mapping unchanged. This is surely an
1586+advantage.
1587+Actually aufs had adopted another complicated approach which calls
1588+generic_file_mmap() and handles struct vm_operations_struct. In this
1589+approach, aufs met a hard problem and I could not solve it without
1590+switching the approach.
b912730e
AM
1591+
1592+There may be one more another approach which is
1593+- bind-mount the branch-root onto the aufs-root internally
1594+- grab the new vfsmount (ie. struct mount)
1595+- lazy-umount the branch-root internally
1596+- in open(2) the aufs-file, open the branch-file with the hidden
1597+ vfsmount (instead of the original branch's vfsmount)
1598+- ideally this "bind-mount and lazy-umount" should be done atomically,
1599+ but it may be possible from userspace by the mount helper.
1600+
1601+Adding the internal hidden vfsmount and using it in opening a file, the
1602+file path under /proc will be printed correctly. This approach looks
1603+smarter, but is not possible I am afraid.
1604+- aufs-root may be bind-mount later. when it happens, another hidden
1605+ vfsmount will be required.
1606+- it is hard to get the chance to bind-mount and lazy-umount
1607+ + in kernel-space, FS can have vfsmount in open(2) via
1608+ file->f_path, and aufs can know its vfsmount. But several locks are
1609+ already acquired, and if aufs tries to bind-mount and lazy-umount
1610+ here, then it may cause a deadlock.
1611+ + in user-space, bind-mount doesn't invoke the mount helper.
1612+- since /proc shows dev and ino, aufs has to give vma these info. it
1613+ means a new member vm_prinode will be necessary. this is essentially
1614+ equivalent to vm_prfile described above.
1615+
1616+I have to give up this "looks-smater" approach.
e8791d4f
AM
1617diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/06xattr.txt linux-4.9/Documentation/filesystems/aufs/design/06xattr.txt
1618--- linux-4.9/Documentation/filesystems/aufs/design/06xattr.txt 1970-01-01 01:00:00.000000000 +0100
1619+++ linux-4.9/Documentation/filesystems/aufs/design/06xattr.txt 2021-02-24 16:15:09.521573529 +0100
c1595e42
JR
1620@@ -0,0 +1,96 @@
1621+
ae9dfd79 1622+# Copyright (C) 2014-2018 Junjiro R. Okajima
c1595e42
JR
1623+#
1624+# This program is free software; you can redistribute it and/or modify
1625+# it under the terms of the GNU General Public License as published by
1626+# the Free Software Foundation; either version 2 of the License, or
1627+# (at your option) any later version.
1628+#
1629+# This program is distributed in the hope that it will be useful,
1630+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1631+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1632+# GNU General Public License for more details.
1633+#
1634+# You should have received a copy of the GNU General Public License
1635+# along with this program; if not, write to the Free Software
1636+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1637+
1638+
1639+Listing XATTR/EA and getting the value
1640+----------------------------------------------------------------------
1641+For the inode standard attributes (owner, group, timestamps, etc.), aufs
1642+shows the values from the topmost existing file. This behaviour is good
7e9cd9fe 1643+for the non-dir entries since the bahaviour exactly matches the shown
c1595e42
JR
1644+information. But for the directories, aufs considers all the same named
1645+entries on the lower branches. Which means, if one of the lower entry
1646+rejects readdir call, then aufs returns an error even if the topmost
1647+entry allows it. This behaviour is necessary to respect the branch fs's
1648+security, but can make users confused since the user-visible standard
1649+attributes don't match the behaviour.
1650+To address this issue, aufs has a mount option called dirperm1 which
1651+checks the permission for the topmost entry only, and ignores the lower
1652+entry's permission.
1653+
1654+A similar issue can happen around XATTR.
1655+getxattr(2) and listxattr(2) families behave as if dirperm1 option is
7e9cd9fe
AM
1656+always set. Otherwise these very unpleasant situation would happen.
1657+- listxattr(2) may return the duplicated entries.
c1595e42
JR
1658+- users may not be able to remove or reset the XATTR forever,
1659+
1660+
1661+XATTR/EA support in the internal (copy,move)-(up,down)
1662+----------------------------------------------------------------------
7e9cd9fe 1663+Generally the extended attributes of inode are categorized as these.
c1595e42
JR
1664+- "security" for LSM and capability.
1665+- "system" for posix ACL, 'acl' mount option is required for the branch
1666+ fs generally.
1667+- "trusted" for userspace, CAP_SYS_ADMIN is required.
1668+- "user" for userspace, 'user_xattr' mount option is required for the
1669+ branch fs generally.
1670+
1671+Moreover there are some other categories. Aufs handles these rather
1672+unpopular categories as the ordinary ones, ie. there is no special
1673+condition nor exception.
1674+
1675+In copy-up, the support for XATTR on the dst branch may differ from the
1676+src branch. In this case, the copy-up operation will get an error and
7e9cd9fe
AM
1677+the original user operation which triggered the copy-up will fail. It
1678+can happen that even all copy-up will fail.
c1595e42
JR
1679+When both of src and dst branches support XATTR and if an error occurs
1680+during copying XATTR, then the copy-up should fail obviously. That is a
1681+good reason and aufs should return an error to userspace. But when only
7e9cd9fe 1682+the src branch support that XATTR, aufs should not return an error.
c1595e42
JR
1683+For example, the src branch supports ACL but the dst branch doesn't
1684+because the dst branch may natively un-support it or temporary
1685+un-support it due to "noacl" mount option. Of course, the dst branch fs
1686+may NOT return an error even if the XATTR is not supported. It is
1687+totally up to the branch fs.
1688+
1689+Anyway when the aufs internal copy-up gets an error from the dst branch
1690+fs, then aufs tries removing the just copied entry and returns the error
1691+to the userspace. The worst case of this situation will be all copy-up
1692+will fail.
1693+
1694+For the copy-up operation, there two basic approaches.
1695+- copy the specified XATTR only (by category above), and return the
7e9cd9fe 1696+ error unconditionally if it happens.
c1595e42
JR
1697+- copy all XATTR, and ignore the error on the specified category only.
1698+
1699+In order to support XATTR and to implement the correct behaviour, aufs
7e9cd9fe
AM
1700+chooses the latter approach and introduces some new branch attributes,
1701+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
c1595e42 1702+They correspond to the XATTR namespaces (see above). Additionally, to be
7e9cd9fe
AM
1703+convenient, "icex" is also provided which means all "icex*" attributes
1704+are set (here the word "icex" stands for "ignore copy-error on XATTR").
c1595e42
JR
1705+
1706+The meaning of these attributes is to ignore the error from setting
1707+XATTR on that branch.
1708+Note that aufs tries copying all XATTR unconditionally, and ignores the
1709+error from the dst branch according to the specified attributes.
1710+
1711+Some XATTR may have its default value. The default value may come from
1712+the parent dir or the environment. If the default value is set at the
1713+file creating-time, it will be overwritten by copy-up.
1714+Some contradiction may happen I am afraid.
1715+Do we need another attribute to stop copying XATTR? I am unsure. For
1716+now, aufs implements the branch attributes to ignore the error.
e8791d4f
AM
1717diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/07export.txt linux-4.9/Documentation/filesystems/aufs/design/07export.txt
1718--- linux-4.9/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
1719+++ linux-4.9/Documentation/filesystems/aufs/design/07export.txt 2021-02-24 16:15:09.521573529 +0100
523b37e3 1720@@ -0,0 +1,58 @@
53392da6 1721+
ae9dfd79 1722+# Copyright (C) 2005-2018 Junjiro R. Okajima
53392da6
AM
1723+#
1724+# This program is free software; you can redistribute it and/or modify
1725+# it under the terms of the GNU General Public License as published by
1726+# the Free Software Foundation; either version 2 of the License, or
1727+# (at your option) any later version.
1728+#
1729+# This program is distributed in the hope that it will be useful,
1730+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1731+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1732+# GNU General Public License for more details.
1733+#
1734+# You should have received a copy of the GNU General Public License
523b37e3 1735+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1736+
1737+Export Aufs via NFS
1738+----------------------------------------------------------------------
1739+Here is an approach.
1740+- like xino/xib, add a new file 'xigen' which stores aufs inode
1741+ generation.
1742+- iget_locked(): initialize aufs inode generation for a new inode, and
1743+ store it in xigen file.
1744+- destroy_inode(): increment aufs inode generation and store it in xigen
1745+ file. it is necessary even if it is not unlinked, because any data of
1746+ inode may be changed by UDBA.
1747+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
1748+ build file handle by
1749+ + branch id (4 bytes)
1750+ + superblock generation (4 bytes)
1751+ + inode number (4 or 8 bytes)
1752+ + parent dir inode number (4 or 8 bytes)
1753+ + inode generation (4 bytes))
1754+ + return value of exportfs_encode_fh() for the parent on a branch (4
1755+ bytes)
1756+ + file handle for a branch (by exportfs_encode_fh())
1757+- fh_to_dentry():
1758+ + find the index of a branch from its id in handle, and check it is
1759+ still exist in aufs.
1760+ + 1st level: get the inode number from handle and search it in cache.
7e9cd9fe
AM
1761+ + 2nd level: if not found in cache, get the parent inode number from
1762+ the handle and search it in cache. and then open the found parent
1763+ dir, find the matching inode number by vfs_readdir() and get its
1764+ name, and call lookup_one_len() for the target dentry.
53392da6
AM
1765+ + 3rd level: if the parent dir is not cached, call
1766+ exportfs_decode_fh() for a branch and get the parent on a branch,
1767+ build a pathname of it, convert it a pathname in aufs, call
1768+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
1769+ the 2nd level.
1770+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
1771+ for every branch, but not itself. to get this, (currently) aufs
1772+ searches in current->nsproxy->mnt_ns list. it may not be a good
1773+ idea, but I didn't get other approach.
1774+ + test the generation of the gotten inode.
1775+- every inode operation: they may get EBUSY due to UDBA. in this case,
1776+ convert it into ESTALE for NFSD.
1777+- readdir(): call lockdep_on/off() because filldir in NFSD calls
1778+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
e8791d4f
AM
1779diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/08shwh.txt linux-4.9/Documentation/filesystems/aufs/design/08shwh.txt
1780--- linux-4.9/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
1781+++ linux-4.9/Documentation/filesystems/aufs/design/08shwh.txt 2021-02-24 16:15:09.521573529 +0100
523b37e3 1782@@ -0,0 +1,52 @@
53392da6 1783+
ae9dfd79 1784+# Copyright (C) 2005-2018 Junjiro R. Okajima
53392da6
AM
1785+#
1786+# This program is free software; you can redistribute it and/or modify
1787+# it under the terms of the GNU General Public License as published by
1788+# the Free Software Foundation; either version 2 of the License, or
1789+# (at your option) any later version.
1790+#
1791+# This program is distributed in the hope that it will be useful,
1792+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1793+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1794+# GNU General Public License for more details.
1795+#
1796+# You should have received a copy of the GNU General Public License
523b37e3 1797+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1798+
1799+Show Whiteout Mode (shwh)
1800+----------------------------------------------------------------------
1801+Generally aufs hides the name of whiteouts. But in some cases, to show
1802+them is very useful for users. For instance, creating a new middle layer
1803+(branch) by merging existing layers.
1804+
1805+(borrowing aufs1 HOW-TO from a user, Michael Towers)
1806+When you have three branches,
1807+- Bottom: 'system', squashfs (underlying base system), read-only
1808+- Middle: 'mods', squashfs, read-only
1809+- Top: 'overlay', ram (tmpfs), read-write
1810+
1811+The top layer is loaded at boot time and saved at shutdown, to preserve
1812+the changes made to the system during the session.
1813+When larger changes have been made, or smaller changes have accumulated,
1814+the size of the saved top layer data grows. At this point, it would be
1815+nice to be able to merge the two overlay branches ('mods' and 'overlay')
1816+and rewrite the 'mods' squashfs, clearing the top layer and thus
1817+restoring save and load speed.
1818+
1819+This merging is simplified by the use of another aufs mount, of just the
1820+two overlay branches using the 'shwh' option.
1821+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
1822+ aufs /livesys/merge_union
1823+
1824+A merged view of these two branches is then available at
1825+/livesys/merge_union, and the new feature is that the whiteouts are
1826+visible!
1827+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
1828+writing to all branches. Also the default mode for all branches is 'ro'.
1829+It is now possible to save the combined contents of the two overlay
1830+branches to a new squashfs, e.g.:
1831+# mksquashfs /livesys/merge_union /path/to/newmods.squash
1832+
1833+This new squashfs archive can be stored on the boot device and the
1834+initramfs will use it to replace the old one at the next boot.
e8791d4f
AM
1835diff -urNp -x '*.orig' linux-4.9/Documentation/filesystems/aufs/design/10dynop.txt linux-4.9/Documentation/filesystems/aufs/design/10dynop.txt
1836--- linux-4.9/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
1837+++ linux-4.9/Documentation/filesystems/aufs/design/10dynop.txt 2021-02-24 16:15:09.521573529 +0100
7e9cd9fe 1838@@ -0,0 +1,47 @@
53392da6 1839+
ae9dfd79 1840+# Copyright (C) 2010-2018 Junjiro R. Okajima
53392da6
AM
1841+#
1842+# This program is free software; you can redistribute it and/or modify
1843+# it under the terms of the GNU General Public License as published by
1844+# the Free Software Foundation; either version 2 of the License, or
1845+# (at your option) any later version.
1846+#
1847+# This program is distributed in the hope that it will be useful,
1848+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1849+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1850+# GNU General Public License for more details.
1851+#
1852+# You should have received a copy of the GNU General Public License
523b37e3 1853+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1854+
1855+Dynamically customizable FS operations
1856+----------------------------------------------------------------------
1857+Generally FS operations (struct inode_operations, struct
1858+address_space_operations, struct file_operations, etc.) are defined as
1859+"static const", but it never means that FS have only one set of
1860+operation. Some FS have multiple sets of them. For instance, ext2 has
1861+three sets, one for XIP, for NOBH, and for normal.
1862+Since aufs overrides and redirects these operations, sometimes aufs has
7e9cd9fe 1863+to change its behaviour according to the branch FS type. More importantly
53392da6
AM
1864+VFS acts differently if a function (member in the struct) is set or
1865+not. It means aufs should have several sets of operations and select one
1866+among them according to the branch FS definition.
1867+
7e9cd9fe 1868+In order to solve this problem and not to affect the behaviour of VFS,
53392da6 1869+aufs defines these operations dynamically. For instance, aufs defines
7e9cd9fe
AM
1870+dummy direct_IO function for struct address_space_operations, but it may
1871+not be set to the address_space_operations actually. When the branch FS
1872+doesn't have it, aufs doesn't set it to its address_space_operations
1873+while the function definition itself is still alive. So the behaviour
1874+itself will not change, and it will return an error when direct_IO is
1875+not set.
53392da6
AM
1876+
1877+The lifetime of these dynamically generated operation object is
1878+maintained by aufs branch object. When the branch is removed from aufs,
1879+the reference counter of the object is decremented. When it reaches
1880+zero, the dynamically generated operation object will be freed.
1881+
7e9cd9fe
AM
1882+This approach is designed to support AIO (io_submit), Direct I/O and
1883+XIP (DAX) mainly.
1884+Currently this approach is applied to address_space_operations for
1885+regular files only.
e8791d4f
AM
1886diff -urNp -x '*.orig' linux-4.9/MAINTAINERS linux-4.9/MAINTAINERS
1887--- linux-4.9/MAINTAINERS 2021-02-24 16:14:54.924432098 +0100
1888+++ linux-4.9/MAINTAINERS 2021-02-24 16:15:09.504906321 +0100
1889@@ -2293,6 +2293,19 @@ F: include/linux/audit.h
1890 F: include/uapi/linux/audit.h
1891 F: kernel/audit*
1892
1893+AUFS (advanced multi layered unification filesystem) FILESYSTEM
1894+M: "J. R. Okajima" <hooanon05g@gmail.com>
1895+L: linux-unionfs@vger.kernel.org
1896+L: aufs-users@lists.sourceforge.net (members only)
1897+W: http://aufs.sourceforge.net
1898+T: git://github.com/sfjro/aufs4-linux.git
1899+S: Supported
1900+F: Documentation/filesystems/aufs/
1901+F: Documentation/ABI/testing/debugfs-aufs
1902+F: Documentation/ABI/testing/sysfs-aufs
1903+F: fs/aufs/
1904+F: include/uapi/linux/aufs_type.h
53392da6 1905+
e8791d4f
AM
1906 AUXILIARY DISPLAY DRIVERS
1907 M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
1908 W: http://miguelojeda.es/auxdisplay.htm
1909diff -urNp -x '*.orig' linux-4.9/drivers/block/loop.c linux-4.9/drivers/block/loop.c
1910--- linux-4.9/drivers/block/loop.c 2021-02-24 16:14:57.284508877 +0100
1911+++ linux-4.9/drivers/block/loop.c 2021-02-24 16:15:09.538240738 +0100
1912@@ -552,7 +552,7 @@ static int do_req_filebacked(struct loop
1913 }
1914
1915 struct switch_request {
1916- struct file *file;
1917+ struct file *file, *virt_file;
1918 struct completion wait;
1919 };
1920
1921@@ -578,6 +578,7 @@ static void do_loop_switch(struct loop_d
1922 mapping = file->f_mapping;
1923 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
1924 lo->lo_backing_file = file;
1925+ lo->lo_backing_virt_file = p->virt_file;
1926 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
1927 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
1928 lo->old_gfp_mask = mapping_gfp_mask(mapping);
1929@@ -590,11 +591,13 @@ static void do_loop_switch(struct loop_d
1930 * First it needs to flush existing IO, it does this by sending a magic
1931 * BIO down the pipe. The completion of this BIO does the actual switch.
1932 */
1933-static int loop_switch(struct loop_device *lo, struct file *file)
1934+static int loop_switch(struct loop_device *lo, struct file *file,
1935+ struct file *virt_file)
1936 {
1937 struct switch_request w;
1938
1939 w.file = file;
1940+ w.virt_file = virt_file;
1941
1942 /* freeze queue and wait for completion of scheduled requests */
1943 blk_mq_freeze_queue(lo->lo_queue);
1944@@ -616,7 +619,16 @@ static int loop_flush(struct loop_device
1945 /* loop not yet configured, no running thread, nothing to flush */
1946 if (lo->lo_state != Lo_bound)
1947 return 0;
1948- return loop_switch(lo, NULL);
1949+ return loop_switch(lo, NULL, NULL);
1950+}
53392da6 1951+
e8791d4f
AM
1952+static struct file *loop_real_file(struct file *file)
1953+{
1954+ struct file *f = NULL;
53392da6 1955+
e8791d4f
AM
1956+ if (file->f_path.dentry->d_sb->s_op->real_loop)
1957+ f = file->f_path.dentry->d_sb->s_op->real_loop(file);
1958+ return f;
1959 }
1960
1961 static void loop_reread_partitions(struct loop_device *lo,
1962@@ -683,6 +695,7 @@ static int loop_change_fd(struct loop_de
1963 unsigned int arg)
1964 {
1965 struct file *file, *old_file;
1966+ struct file *f, *virt_file = NULL, *old_virt_file;
1967 struct inode *inode;
1968 int error;
1969
1970@@ -699,6 +712,12 @@ static int loop_change_fd(struct loop_de
1971 file = fget(arg);
1972 if (!file)
1973 goto out;
1974+ f = loop_real_file(file);
1975+ if (f) {
1976+ virt_file = file;
1977+ file = f;
1978+ get_file(file);
1979+ }
1980
1981 error = loop_validate_file(file, bdev);
1982 if (error)
1983@@ -706,6 +725,7 @@ static int loop_change_fd(struct loop_de
1984
1985 inode = file->f_mapping->host;
1986 old_file = lo->lo_backing_file;
1987+ old_virt_file = lo->lo_backing_virt_file;
1988
1989 error = -EINVAL;
1990
1991@@ -714,21 +734,43 @@ static int loop_change_fd(struct loop_de
1992 goto out_putf;
1993
1994 /* and ... switch */
1995- error = loop_switch(lo, file);
1996+ error = loop_switch(lo, file, virt_file);
1997 if (error)
1998 goto out_putf;
1999
2000 fput(old_file);
2001+ if (old_virt_file)
2002+ fput(old_virt_file);
2003 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
2004 loop_reread_partitions(lo, bdev);
2005 return 0;
2006
2007 out_putf:
2008 fput(file);
2009+ if (virt_file)
2010+ fput(virt_file);
2011 out:
2012 return error;
2013 }
2014
2015+/*
2016+ * for AUFS
2017+ * no get/put for file.
2018+ */
2019+struct file *loop_backing_file(struct super_block *sb)
2020+{
2021+ struct file *ret;
2022+ struct loop_device *l;
1e00d052 2023+
e8791d4f
AM
2024+ ret = NULL;
2025+ if (MAJOR(sb->s_dev) == LOOP_MAJOR) {
2026+ l = sb->s_bdev->bd_disk->private_data;
2027+ ret = l->lo_backing_file;
2028+ }
2029+ return ret;
2030+}
2031+EXPORT_SYMBOL_GPL(loop_backing_file);
53392da6 2032+
e8791d4f
AM
2033 /* loop sysfs attributes */
2034
2035 static ssize_t loop_attr_show(struct device *dev, char *page,
2036@@ -887,7 +929,7 @@ static int loop_prepare_queue(struct loo
2037 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
2038 struct block_device *bdev, unsigned int arg)
2039 {
2040- struct file *file;
2041+ struct file *file, *f, *virt_file = NULL;
2042 struct inode *inode;
2043 struct address_space *mapping;
2044 unsigned lo_blocksize;
2045@@ -902,6 +944,12 @@ static int loop_set_fd(struct loop_devic
2046 file = fget(arg);
2047 if (!file)
2048 goto out;
2049+ f = loop_real_file(file);
2050+ if (f) {
2051+ virt_file = file;
2052+ file = f;
2053+ get_file(file);
2054+ }
2055
2056 error = -EBUSY;
2057 if (lo->lo_state != Lo_unbound)
2058@@ -939,6 +987,7 @@ static int loop_set_fd(struct loop_devic
2059 lo->lo_flags = lo_flags;
2060 lo->lo_xid = vx_current_xid();
2061 lo->lo_backing_file = file;
2062+ lo->lo_backing_virt_file = virt_file;
2063 lo->transfer = NULL;
2064 lo->ioctl = NULL;
2065 lo->lo_sizelimit = 0;
2066@@ -971,6 +1020,8 @@ static int loop_set_fd(struct loop_devic
2067
2068 out_putf:
2069 fput(file);
2070+ if (virt_file)
2071+ fput(virt_file);
2072 out:
2073 /* This is safe: open() is still holding a reference. */
2074 module_put(THIS_MODULE);
2075@@ -1017,6 +1068,7 @@ loop_init_xfer(struct loop_device *lo, s
2076 static int loop_clr_fd(struct loop_device *lo)
2077 {
2078 struct file *filp = lo->lo_backing_file;
2079+ struct file *virt_filp = lo->lo_backing_virt_file;
2080 gfp_t gfp = lo->old_gfp_mask;
2081 struct block_device *bdev = lo->lo_device;
2082
2083@@ -1048,6 +1100,7 @@ static int loop_clr_fd(struct loop_devic
2084 spin_lock_irq(&lo->lo_lock);
2085 lo->lo_state = Lo_rundown;
2086 lo->lo_backing_file = NULL;
2087+ lo->lo_backing_virt_file = NULL;
2088 spin_unlock_irq(&lo->lo_lock);
2089
2090 loop_release_xfer(lo);
2091@@ -1093,6 +1146,8 @@ static int loop_clr_fd(struct loop_devic
2092 * bd_mutex which is usually taken before lo_ctl_mutex.
2093 */
2094 fput(filp);
2095+ if (virt_filp)
2096+ fput(virt_filp);
2097 return 0;
2098 }
2099
2100diff -urNp -x '*.orig' linux-4.9/drivers/block/loop.h linux-4.9/drivers/block/loop.h
2101--- linux-4.9/drivers/block/loop.h 2021-02-24 16:14:57.284508877 +0100
2102+++ linux-4.9/drivers/block/loop.h 2021-02-24 16:15:09.538240738 +0100
2103@@ -47,7 +47,7 @@ struct loop_device {
2104 int (*ioctl)(struct loop_device *, int cmd,
2105 unsigned long arg);
2106
2107- struct file * lo_backing_file;
2108+ struct file * lo_backing_file, *lo_backing_virt_file;
2109 struct block_device *lo_device;
2110 unsigned lo_blocksize;
2111 void *key_data;
2112diff -urNp -x '*.orig' linux-4.9/fs/Kconfig linux-4.9/fs/Kconfig
2113--- linux-4.9/fs/Kconfig 2016-12-11 20:17:54.000000000 +0100
2114+++ linux-4.9/fs/Kconfig 2021-02-24 16:15:09.501572879 +0100
2115@@ -249,6 +249,7 @@ source "fs/pstore/Kconfig"
2116 source "fs/sysv/Kconfig"
2117 source "fs/ufs/Kconfig"
2118 source "fs/exofs/Kconfig"
2119+source "fs/aufs/Kconfig"
2120
2121 endif # MISC_FILESYSTEMS
2122
2123diff -urNp -x '*.orig' linux-4.9/fs/Makefile linux-4.9/fs/Makefile
2124--- linux-4.9/fs/Makefile 2016-12-11 20:17:54.000000000 +0100
2125+++ linux-4.9/fs/Makefile 2021-02-24 16:15:09.501572879 +0100
2126@@ -129,3 +129,4 @@ obj-y += exofs/ # Multiple modules
2127 obj-$(CONFIG_CEPH_FS) += ceph/
2128 obj-$(CONFIG_PSTORE) += pstore/
2129 obj-$(CONFIG_EFIVAR_FS) += efivarfs/
2130+obj-$(CONFIG_AUFS_FS) += aufs/
2131diff -urNp -x '*.orig' linux-4.9/fs/aufs/Kconfig linux-4.9/fs/aufs/Kconfig
2132--- linux-4.9/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
2133+++ linux-4.9/fs/aufs/Kconfig 2021-02-24 16:15:09.531573855 +0100
2134@@ -0,0 +1,198 @@
2135+config AUFS_FS
2136+ tristate "Aufs (Advanced multi layered unification filesystem) support"
2137+ help
2138+ Aufs is a stackable unification filesystem such as Unionfs,
2139+ which unifies several directories and provides a merged single
2140+ directory.
2141+ In the early days, aufs was entirely re-designed and
2142+ re-implemented Unionfs Version 1.x series. Introducing many
2143+ original ideas, approaches and improvements, it becomes totally
2144+ different from Unionfs while keeping the basic features.
53392da6 2145+
e8791d4f
AM
2146+if AUFS_FS
2147+choice
2148+ prompt "Maximum number of branches"
2149+ default AUFS_BRANCH_MAX_127
2150+ help
2151+ Specifies the maximum number of branches (or member directories)
2152+ in a single aufs. The larger value consumes more system
2153+ resources and has a minor impact to performance.
2154+config AUFS_BRANCH_MAX_127
2155+ bool "127"
2156+ help
2157+ Specifies the maximum number of branches (or member directories)
2158+ in a single aufs. The larger value consumes more system
2159+ resources and has a minor impact to performance.
2160+config AUFS_BRANCH_MAX_511
2161+ bool "511"
2162+ help
2163+ Specifies the maximum number of branches (or member directories)
2164+ in a single aufs. The larger value consumes more system
2165+ resources and has a minor impact to performance.
2166+config AUFS_BRANCH_MAX_1023
2167+ bool "1023"
2168+ help
2169+ Specifies the maximum number of branches (or member directories)
2170+ in a single aufs. The larger value consumes more system
2171+ resources and has a minor impact to performance.
2172+config AUFS_BRANCH_MAX_32767
2173+ bool "32767"
2174+ help
2175+ Specifies the maximum number of branches (or member directories)
2176+ in a single aufs. The larger value consumes more system
2177+ resources and has a minor impact to performance.
2178+endchoice
53392da6 2179+
e8791d4f
AM
2180+config AUFS_SBILIST
2181+ bool
2182+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
2183+ default y
2184+ help
2185+ Automatic configuration for internal use.
2186+ When aufs supports Magic SysRq or /proc, enabled automatically.
53392da6 2187+
e8791d4f
AM
2188+config AUFS_HNOTIFY
2189+ bool "Detect direct branch access (bypassing aufs)"
2190+ help
2191+ If you want to modify files on branches directly, eg. bypassing aufs,
2192+ and want aufs to detect the changes of them fully, then enable this
2193+ option and use 'udba=notify' mount option.
2194+ Currently there is only one available configuration, "fsnotify".
2195+ It will have a negative impact to the performance.
2196+ See detail in aufs.5.
53392da6 2197+
e8791d4f
AM
2198+choice
2199+ prompt "method" if AUFS_HNOTIFY
2200+ default AUFS_HFSNOTIFY
2201+config AUFS_HFSNOTIFY
2202+ bool "fsnotify"
2203+ select FSNOTIFY
2204+endchoice
38d290e6 2205+
e8791d4f
AM
2206+config AUFS_EXPORT
2207+ bool "NFS-exportable aufs"
2208+ depends on EXPORTFS
2209+ help
2210+ If you want to export your mounted aufs via NFS, then enable this
2211+ option. There are several requirements for this configuration.
2212+ See detail in aufs.5.
53392da6 2213+
e8791d4f
AM
2214+config AUFS_INO_T_64
2215+ bool
2216+ depends on AUFS_EXPORT
2217+ depends on 64BIT && !(ALPHA || S390)
2218+ default y
2219+ help
2220+ Automatic configuration for internal use.
2221+ /* typedef unsigned long/int __kernel_ino_t */
2222+ /* alpha and s390x are int */
53392da6 2223+
e8791d4f
AM
2224+config AUFS_XATTR
2225+ bool "support for XATTR/EA (including Security Labels)"
2226+ help
2227+ If your branch fs supports XATTR/EA and you want to make them
2228+ available in aufs too, then enable this opsion and specify the
2229+ branch attributes for EA.
2230+ See detail in aufs.5.
53392da6 2231+
e8791d4f
AM
2232+config AUFS_FHSM
2233+ bool "File-based Hierarchical Storage Management"
2234+ help
2235+ Hierarchical Storage Management (or HSM) is a well-known feature
2236+ in the storage world. Aufs provides this feature as file-based.
2237+ with multiple branches.
2238+ These multiple branches are prioritized, ie. the topmost one
2239+ should be the fastest drive and be used heavily.
53392da6 2240+
e8791d4f
AM
2241+config AUFS_RDU
2242+ bool "Readdir in userspace"
2243+ help
2244+ Aufs has two methods to provide a merged view for a directory,
2245+ by a user-space library and by kernel-space natively. The latter
2246+ is always enabled but sometimes large and slow.
2247+ If you enable this option, install the library in aufs2-util
2248+ package, and set some environment variables for your readdir(3),
2249+ then the work will be handled in user-space which generally
2250+ shows better performance in most cases.
2251+ See detail in aufs.5.
53392da6 2252+
e8791d4f
AM
2253+config AUFS_DIRREN
2254+ bool "Workaround for rename(2)-ing a directory"
2255+ help
2256+ By default, aufs returns EXDEV error in renameing a dir who has
2257+ his child on the lower branch, since it is a bad idea to issue
2258+ rename(2) internally for every lower branch. But user may not
2259+ accept this behaviour. So here is a workaround to allow such
2260+ rename(2) and store some extra infromation on the writable
2261+ branch. Obviously this costs high (and I don't like it).
2262+ To use this feature, you need to enable this configuration AND
2263+ to specify the mount option `dirren.'
2264+ See details in aufs.5 and the design documents.
53392da6 2265+
e8791d4f
AM
2266+config AUFS_SHWH
2267+ bool "Show whiteouts"
2268+ help
2269+ If you want to make the whiteouts in aufs visible, then enable
2270+ this option and specify 'shwh' mount option. Although it may
2271+ sounds like philosophy or something, but in technically it
2272+ simply shows the name of whiteout with keeping its behaviour.
53392da6 2273+
e8791d4f
AM
2274+config AUFS_BR_RAMFS
2275+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
2276+ help
2277+ If you want to use ramfs as an aufs branch fs, then enable this
2278+ option. Generally tmpfs is recommended.
2279+ Aufs prohibited them to be a branch fs by default, because
2280+ initramfs becomes unusable after switch_root or something
2281+ generally. If you sets initramfs as an aufs branch and boot your
2282+ system by switch_root, you will meet a problem easily since the
2283+ files in initramfs may be inaccessible.
2284+ Unless you are going to use ramfs as an aufs branch fs without
2285+ switch_root or something, leave it N.
53392da6 2286+
e8791d4f
AM
2287+config AUFS_BR_FUSE
2288+ bool "Fuse fs as an aufs branch"
2289+ depends on FUSE_FS
2290+ select AUFS_POLL
2291+ help
2292+ If you want to use fuse-based userspace filesystem as an aufs
2293+ branch fs, then enable this option.
2294+ It implements the internal poll(2) operation which is
2295+ implemented by fuse only (curretnly).
53392da6 2296+
e8791d4f
AM
2297+config AUFS_POLL
2298+ bool
2299+ help
2300+ Automatic configuration for internal use.
53392da6 2301+
e8791d4f
AM
2302+config AUFS_BR_HFSPLUS
2303+ bool "Hfsplus as an aufs branch"
2304+ depends on HFSPLUS_FS
2305+ default y
2306+ help
2307+ If you want to use hfsplus fs as an aufs branch fs, then enable
2308+ this option. This option introduces a small overhead at
2309+ copying-up a file on hfsplus.
53392da6 2310+
e8791d4f
AM
2311+config AUFS_BDEV_LOOP
2312+ bool
2313+ depends on BLK_DEV_LOOP
2314+ default y
2315+ help
2316+ Automatic configuration for internal use.
2317+ Convert =[ym] into =y.
53392da6 2318+
e8791d4f
AM
2319+config AUFS_DEBUG
2320+ bool "Debug aufs"
2321+ help
2322+ Enable this to compile aufs internal debug code.
2323+ It will have a negative impact to the performance.
53392da6 2324+
e8791d4f
AM
2325+config AUFS_MAGIC_SYSRQ
2326+ bool
2327+ depends on AUFS_DEBUG && MAGIC_SYSRQ
2328+ default y
2329+ help
2330+ Automatic configuration for internal use.
2331+ When aufs supports Magic SysRq, enabled automatically.
2332+endif
2333diff -urNp -x '*.orig' linux-4.9/fs/aufs/Makefile linux-4.9/fs/aufs/Makefile
2334--- linux-4.9/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
2335+++ linux-4.9/fs/aufs/Makefile 2021-02-24 16:15:09.531573855 +0100
2336@@ -0,0 +1,45 @@
53392da6 2337+
e8791d4f
AM
2338+include ${src}/magic.mk
2339+ifeq (${CONFIG_AUFS_FS},m)
2340+include ${src}/conf.mk
2341+endif
2342+-include ${src}/priv_def.mk
53392da6 2343+
e8791d4f
AM
2344+# cf. include/linux/kernel.h
2345+# enable pr_debug
2346+ccflags-y += -DDEBUG
2347+# sparse requires the full pathname
2348+ifdef M
2349+ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h
2350+else
2351+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
2352+endif
53392da6 2353+
e8791d4f
AM
2354+obj-$(CONFIG_AUFS_FS) += aufs.o
2355+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
2356+ wkq.o vfsub.o dcsub.o \
2357+ cpup.o whout.o wbr_policy.o \
2358+ dinfo.o dentry.o \
2359+ dynop.o \
2360+ finfo.o file.o f_op.o \
2361+ dir.o vdir.o \
2362+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
2363+ mvdown.o ioctl.o
53392da6 2364+
e8791d4f
AM
2365+# all are boolean
2366+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
2367+aufs-$(CONFIG_SYSFS) += sysfs.o
2368+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
2369+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
2370+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
2371+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
2372+aufs-$(CONFIG_AUFS_EXPORT) += export.o
2373+aufs-$(CONFIG_AUFS_XATTR) += xattr.o
2374+aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
2375+aufs-$(CONFIG_AUFS_DIRREN) += dirren.o
2376+aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
2377+aufs-$(CONFIG_AUFS_POLL) += poll.o
2378+aufs-$(CONFIG_AUFS_RDU) += rdu.o
2379+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
2380+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
2381+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
2382diff -urNp -x '*.orig' linux-4.9/fs/aufs/aufs.h linux-4.9/fs/aufs/aufs.h
2383--- linux-4.9/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
2384+++ linux-4.9/fs/aufs/aufs.h 2021-02-24 16:15:09.521573529 +0100
ae9dfd79 2385@@ -0,0 +1,60 @@
7f207e10 2386+/*
ae9dfd79 2387+ * Copyright (C) 2005-2018 Junjiro R. Okajima
7f207e10
AM
2388+ *
2389+ * This program, aufs is free software; you can redistribute it and/or modify
2390+ * it under the terms of the GNU General Public License as published by
2391+ * the Free Software Foundation; either version 2 of the License, or
2392+ * (at your option) any later version.
2393+ *
2394+ * This program is distributed in the hope that it will be useful,
2395+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2396+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2397+ * GNU General Public License for more details.
2398+ *
2399+ * You should have received a copy of the GNU General Public License
523b37e3 2400+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2401+ */
2402+
2403+/*
2404+ * all header files
2405+ */
2406+
2407+#ifndef __AUFS_H__
2408+#define __AUFS_H__
2409+
2410+#ifdef __KERNEL__
2411+
2412+#define AuStub(type, name, body, ...) \
2413+ static inline type name(__VA_ARGS__) { body; }
2414+
2415+#define AuStubVoid(name, ...) \
2416+ AuStub(void, name, , __VA_ARGS__)
2417+#define AuStubInt0(name, ...) \
2418+ AuStub(int, name, return 0, __VA_ARGS__)
2419+
2420+#include "debug.h"
2421+
2422+#include "branch.h"
2423+#include "cpup.h"
2424+#include "dcsub.h"
2425+#include "dbgaufs.h"
2426+#include "dentry.h"
2427+#include "dir.h"
ae9dfd79 2428+#include "dirren.h"
7f207e10
AM
2429+#include "dynop.h"
2430+#include "file.h"
2431+#include "fstype.h"
ae9dfd79 2432+#include "hbl.h"
7f207e10
AM
2433+#include "inode.h"
2434+#include "loop.h"
2435+#include "module.h"
7f207e10
AM
2436+#include "opts.h"
2437+#include "rwsem.h"
7f207e10
AM
2438+#include "super.h"
2439+#include "sysaufs.h"
2440+#include "vfsub.h"
2441+#include "whout.h"
2442+#include "wkq.h"
2443+
2444+#endif /* __KERNEL__ */
2445+#endif /* __AUFS_H__ */
e8791d4f
AM
2446diff -urNp -x '*.orig' linux-4.9/fs/aufs/branch.c linux-4.9/fs/aufs/branch.c
2447--- linux-4.9/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
2448+++ linux-4.9/fs/aufs/branch.c 2021-02-24 16:15:09.521573529 +0100
ae9dfd79 2449@@ -0,0 +1,1432 @@
7f207e10 2450+/*
ae9dfd79 2451+ * Copyright (C) 2005-2018 Junjiro R. Okajima
7f207e10
AM
2452+ *
2453+ * This program, aufs is free software; you can redistribute it and/or modify
2454+ * it under the terms of the GNU General Public License as published by
2455+ * the Free Software Foundation; either version 2 of the License, or
2456+ * (at your option) any later version.
2457+ *
2458+ * This program is distributed in the hope that it will be useful,
2459+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2460+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2461+ * GNU General Public License for more details.
2462+ *
2463+ * You should have received a copy of the GNU General Public License
523b37e3 2464+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2465+ */
2466+
2467+/*
2468+ * branch management
2469+ */
2470+
027c5e7a 2471+#include <linux/compat.h>
7f207e10
AM
2472+#include <linux/statfs.h>
2473+#include "aufs.h"
2474+
2475+/*
2476+ * free a single branch
1facf9fc 2477+ */
2478+static void au_br_do_free(struct au_branch *br)
2479+{
2480+ int i;
2481+ struct au_wbr *wbr;
4a4d8108 2482+ struct au_dykey **key;
1facf9fc 2483+
027c5e7a 2484+ au_hnotify_fin_br(br);
ae9dfd79
AM
2485+ /* always, regardless the mount option */
2486+ au_dr_hino_free(&br->br_dirren);
027c5e7a 2487+
1facf9fc 2488+ if (br->br_xino.xi_file)
2489+ fput(br->br_xino.xi_file);
ae9dfd79
AM
2490+ for (i = br->br_xino.xi_nondir.total - 1; i >= 0; i--)
2491+ AuDebugOn(br->br_xino.xi_nondir.array[i]);
2492+ kfree(br->br_xino.xi_nondir.array);
1facf9fc 2493+
5afbbe0d
AM
2494+ AuDebugOn(au_br_count(br));
2495+ au_br_count_fin(br);
1facf9fc 2496+
2497+ wbr = br->br_wbr;
2498+ if (wbr) {
2499+ for (i = 0; i < AuBrWh_Last; i++)
2500+ dput(wbr->wbr_wh[i]);
2501+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
dece6358 2502+ AuRwDestroy(&wbr->wbr_wh_rwsem);
1facf9fc 2503+ }
2504+
076b876e
AM
2505+ if (br->br_fhsm) {
2506+ au_br_fhsm_fin(br->br_fhsm);
ae9dfd79 2507+ kfree(br->br_fhsm);
076b876e
AM
2508+ }
2509+
4a4d8108
AM
2510+ key = br->br_dykey;
2511+ for (i = 0; i < AuBrDynOp; i++, key++)
2512+ if (*key)
2513+ au_dy_put(*key);
2514+ else
2515+ break;
2516+
537831f9
AM
2517+ /* recursive lock, s_umount of branch's */
2518+ lockdep_off();
86dc4139 2519+ path_put(&br->br_path);
537831f9 2520+ lockdep_on();
ae9dfd79
AM
2521+ kfree(wbr);
2522+ kfree(br);
1facf9fc 2523+}
2524+
2525+/*
2526+ * frees all branches
2527+ */
2528+void au_br_free(struct au_sbinfo *sbinfo)
2529+{
2530+ aufs_bindex_t bmax;
2531+ struct au_branch **br;
2532+
dece6358
AM
2533+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2534+
5afbbe0d 2535+ bmax = sbinfo->si_bbot + 1;
1facf9fc 2536+ br = sbinfo->si_branch;
2537+ while (bmax--)
2538+ au_br_do_free(*br++);
2539+}
2540+
2541+/*
2542+ * find the index of a branch which is specified by @br_id.
2543+ */
2544+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
2545+{
5afbbe0d 2546+ aufs_bindex_t bindex, bbot;
1facf9fc 2547+
5afbbe0d
AM
2548+ bbot = au_sbbot(sb);
2549+ for (bindex = 0; bindex <= bbot; bindex++)
1facf9fc 2550+ if (au_sbr_id(sb, bindex) == br_id)
2551+ return bindex;
2552+ return -1;
2553+}
2554+
2555+/* ---------------------------------------------------------------------- */
2556+
2557+/*
2558+ * add a branch
2559+ */
2560+
b752ccd1
AM
2561+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
2562+ struct dentry *h_root)
1facf9fc 2563+{
b752ccd1
AM
2564+ if (unlikely(h_adding == h_root
2565+ || au_test_loopback_overlap(sb, h_adding)))
1facf9fc 2566+ return 1;
b752ccd1
AM
2567+ if (h_adding->d_sb != h_root->d_sb)
2568+ return 0;
2569+ return au_test_subdir(h_adding, h_root)
2570+ || au_test_subdir(h_root, h_adding);
1facf9fc 2571+}
2572+
2573+/*
2574+ * returns a newly allocated branch. @new_nbranch is a number of branches
2575+ * after adding a branch.
2576+ */
2577+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
2578+ int perm)
2579+{
2580+ struct au_branch *add_branch;
2581+ struct dentry *root;
5527c038 2582+ struct inode *inode;
4a4d8108 2583+ int err;
1facf9fc 2584+
4a4d8108 2585+ err = -ENOMEM;
be52b249 2586+ add_branch = kzalloc(sizeof(*add_branch), GFP_NOFS);
1facf9fc 2587+ if (unlikely(!add_branch))
2588+ goto out;
ae9dfd79
AM
2589+ add_branch->br_xino.xi_nondir.total = 8; /* initial size */
2590+ add_branch->br_xino.xi_nondir.array
2591+ = kcalloc(add_branch->br_xino.xi_nondir.total, sizeof(ino_t),
2592+ GFP_NOFS);
2593+ if (unlikely(!add_branch->br_xino.xi_nondir.array))
2594+ goto out_br;
1facf9fc 2595+
027c5e7a
AM
2596+ err = au_hnotify_init_br(add_branch, perm);
2597+ if (unlikely(err))
ae9dfd79 2598+ goto out_xinondir;
027c5e7a 2599+
1facf9fc 2600+ if (au_br_writable(perm)) {
2601+ /* may be freed separately at changing the branch permission */
be52b249 2602+ add_branch->br_wbr = kzalloc(sizeof(*add_branch->br_wbr),
1facf9fc 2603+ GFP_NOFS);
2604+ if (unlikely(!add_branch->br_wbr))
027c5e7a 2605+ goto out_hnotify;
1facf9fc 2606+ }
2607+
076b876e
AM
2608+ if (au_br_fhsm(perm)) {
2609+ err = au_fhsm_br_alloc(add_branch);
2610+ if (unlikely(err))
2611+ goto out_wbr;
2612+ }
2613+
ae9dfd79 2614+ root = sb->s_root;
e2f27e51 2615+ err = au_sbr_realloc(au_sbi(sb), new_nbranch, /*may_shrink*/0);
4a4d8108 2616+ if (!err)
e2f27e51 2617+ err = au_di_realloc(au_di(root), new_nbranch, /*may_shrink*/0);
5527c038
JR
2618+ if (!err) {
2619+ inode = d_inode(root);
ae9dfd79
AM
2620+ err = au_hinode_realloc(au_ii(inode), new_nbranch,
2621+ /*may_shrink*/0);
5527c038 2622+ }
4a4d8108
AM
2623+ if (!err)
2624+ return add_branch; /* success */
1facf9fc 2625+
076b876e 2626+out_wbr:
ae9dfd79 2627+ kfree(add_branch->br_wbr);
027c5e7a
AM
2628+out_hnotify:
2629+ au_hnotify_fin_br(add_branch);
ae9dfd79
AM
2630+out_xinondir:
2631+ kfree(add_branch->br_xino.xi_nondir.array);
4f0767ce 2632+out_br:
ae9dfd79 2633+ kfree(add_branch);
4f0767ce 2634+out:
4a4d8108 2635+ return ERR_PTR(err);
1facf9fc 2636+}
2637+
2638+/*
2639+ * test if the branch permission is legal or not.
2640+ */
2641+static int test_br(struct inode *inode, int brperm, char *path)
2642+{
2643+ int err;
2644+
4a4d8108
AM
2645+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
2646+ if (!err)
2647+ goto out;
1facf9fc 2648+
4a4d8108
AM
2649+ err = -EINVAL;
2650+ pr_err("write permission for readonly mount or inode, %s\n", path);
2651+
4f0767ce 2652+out:
1facf9fc 2653+ return err;
2654+}
2655+
2656+/*
2657+ * returns:
2658+ * 0: success, the caller will add it
2659+ * plus: success, it is already unified, the caller should ignore it
2660+ * minus: error
2661+ */
2662+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
2663+{
2664+ int err;
5afbbe0d 2665+ aufs_bindex_t bbot, bindex;
5527c038 2666+ struct dentry *root, *h_dentry;
1facf9fc 2667+ struct inode *inode, *h_inode;
2668+
2669+ root = sb->s_root;
5afbbe0d
AM
2670+ bbot = au_sbbot(sb);
2671+ if (unlikely(bbot >= 0
1facf9fc 2672+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
2673+ err = 1;
2674+ if (!remount) {
2675+ err = -EINVAL;
4a4d8108 2676+ pr_err("%s duplicated\n", add->pathname);
1facf9fc 2677+ }
2678+ goto out;
2679+ }
2680+
2681+ err = -ENOSPC; /* -E2BIG; */
2682+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
5afbbe0d 2683+ || AUFS_BRANCH_MAX - 1 <= bbot)) {
4a4d8108 2684+ pr_err("number of branches exceeded %s\n", add->pathname);
1facf9fc 2685+ goto out;
2686+ }
2687+
2688+ err = -EDOM;
5afbbe0d 2689+ if (unlikely(add->bindex < 0 || bbot + 1 < add->bindex)) {
4a4d8108 2690+ pr_err("bad index %d\n", add->bindex);
1facf9fc 2691+ goto out;
2692+ }
2693+
5527c038 2694+ inode = d_inode(add->path.dentry);
1facf9fc 2695+ err = -ENOENT;
2696+ if (unlikely(!inode->i_nlink)) {
4a4d8108 2697+ pr_err("no existence %s\n", add->pathname);
1facf9fc 2698+ goto out;
2699+ }
2700+
2701+ err = -EINVAL;
2702+ if (unlikely(inode->i_sb == sb)) {
4a4d8108 2703+ pr_err("%s must be outside\n", add->pathname);
1facf9fc 2704+ goto out;
2705+ }
2706+
2707+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
4a4d8108
AM
2708+ pr_err("unsupported filesystem, %s (%s)\n",
2709+ add->pathname, au_sbtype(inode->i_sb));
1facf9fc 2710+ goto out;
2711+ }
2712+
c1595e42
JR
2713+ if (unlikely(inode->i_sb->s_stack_depth)) {
2714+ pr_err("already stacked, %s (%s)\n",
2715+ add->pathname, au_sbtype(inode->i_sb));
2716+ goto out;
2717+ }
2718+
5527c038 2719+ err = test_br(d_inode(add->path.dentry), add->perm, add->pathname);
1facf9fc 2720+ if (unlikely(err))
2721+ goto out;
2722+
5afbbe0d 2723+ if (bbot < 0)
1facf9fc 2724+ return 0; /* success */
2725+
2726+ err = -EINVAL;
5afbbe0d 2727+ for (bindex = 0; bindex <= bbot; bindex++)
1facf9fc 2728+ if (unlikely(test_overlap(sb, add->path.dentry,
2729+ au_h_dptr(root, bindex)))) {
4a4d8108 2730+ pr_err("%s is overlapped\n", add->pathname);
1facf9fc 2731+ goto out;
2732+ }
2733+
2734+ err = 0;
2735+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
5527c038
JR
2736+ h_dentry = au_h_dptr(root, 0);
2737+ h_inode = d_inode(h_dentry);
1facf9fc 2738+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
0c3ec466
AM
2739+ || !uid_eq(h_inode->i_uid, inode->i_uid)
2740+ || !gid_eq(h_inode->i_gid, inode->i_gid))
2741+ pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
2742+ add->pathname,
2743+ i_uid_read(inode), i_gid_read(inode),
2744+ (inode->i_mode & S_IALLUGO),
2745+ i_uid_read(h_inode), i_gid_read(h_inode),
2746+ (h_inode->i_mode & S_IALLUGO));
1facf9fc 2747+ }
2748+
4f0767ce 2749+out:
1facf9fc 2750+ return err;
2751+}
2752+
2753+/*
2754+ * initialize or clean the whiteouts for an adding branch
2755+ */
2756+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
86dc4139 2757+ int new_perm)
1facf9fc 2758+{
2759+ int err, old_perm;
2760+ aufs_bindex_t bindex;
febd17d6 2761+ struct inode *h_inode;
1facf9fc 2762+ struct au_wbr *wbr;
2763+ struct au_hinode *hdir;
5527c038 2764+ struct dentry *h_dentry;
1facf9fc 2765+
86dc4139
AM
2766+ err = vfsub_mnt_want_write(au_br_mnt(br));
2767+ if (unlikely(err))
2768+ goto out;
2769+
1facf9fc 2770+ wbr = br->br_wbr;
2771+ old_perm = br->br_perm;
2772+ br->br_perm = new_perm;
2773+ hdir = NULL;
febd17d6 2774+ h_inode = NULL;
1facf9fc 2775+ bindex = au_br_index(sb, br->br_id);
2776+ if (0 <= bindex) {
5527c038 2777+ hdir = au_hi(d_inode(sb->s_root), bindex);
5afbbe0d 2778+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 2779+ } else {
5527c038 2780+ h_dentry = au_br_dentry(br);
febd17d6
JR
2781+ h_inode = d_inode(h_dentry);
2782+ inode_lock_nested(h_inode, AuLsc_I_PARENT);
1facf9fc 2783+ }
2784+ if (!wbr)
86dc4139 2785+ err = au_wh_init(br, sb);
1facf9fc 2786+ else {
2787+ wbr_wh_write_lock(wbr);
86dc4139 2788+ err = au_wh_init(br, sb);
1facf9fc 2789+ wbr_wh_write_unlock(wbr);
2790+ }
2791+ if (hdir)
5afbbe0d 2792+ au_hn_inode_unlock(hdir);
1facf9fc 2793+ else
febd17d6 2794+ inode_unlock(h_inode);
86dc4139 2795+ vfsub_mnt_drop_write(au_br_mnt(br));
1facf9fc 2796+ br->br_perm = old_perm;
2797+
2798+ if (!err && wbr && !au_br_writable(new_perm)) {
ae9dfd79 2799+ kfree(wbr);
1facf9fc 2800+ br->br_wbr = NULL;
2801+ }
2802+
86dc4139 2803+out:
1facf9fc 2804+ return err;
2805+}
2806+
2807+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
86dc4139 2808+ int perm)
1facf9fc 2809+{
2810+ int err;
4a4d8108 2811+ struct kstatfs kst;
1facf9fc 2812+ struct au_wbr *wbr;
2813+
2814+ wbr = br->br_wbr;
dece6358 2815+ au_rw_init(&wbr->wbr_wh_rwsem);
1facf9fc 2816+ atomic_set(&wbr->wbr_wh_running, 0);
1facf9fc 2817+
4a4d8108
AM
2818+ /*
2819+ * a limit for rmdir/rename a dir
523b37e3 2820+ * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h
4a4d8108 2821+ */
86dc4139 2822+ err = vfs_statfs(&br->br_path, &kst);
4a4d8108
AM
2823+ if (unlikely(err))
2824+ goto out;
2825+ err = -EINVAL;
2826+ if (kst.f_namelen >= NAME_MAX)
86dc4139 2827+ err = au_br_init_wh(sb, br, perm);
4a4d8108 2828+ else
523b37e3
AM
2829+ pr_err("%pd(%s), unsupported namelen %ld\n",
2830+ au_br_dentry(br),
86dc4139 2831+ au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen);
1facf9fc 2832+
4f0767ce 2833+out:
1facf9fc 2834+ return err;
2835+}
2836+
c1595e42 2837+/* initialize a new branch */
1facf9fc 2838+static int au_br_init(struct au_branch *br, struct super_block *sb,
2839+ struct au_opt_add *add)
2840+{
2841+ int err;
5527c038 2842+ struct inode *h_inode;
1facf9fc 2843+
2844+ err = 0;
ae9dfd79
AM
2845+ spin_lock_init(&br->br_xino.xi_nondir.spin);
2846+ init_waitqueue_head(&br->br_xino.xi_nondir.wqh);
1facf9fc 2847+ br->br_perm = add->perm;
86dc4139 2848+ br->br_path = add->path; /* set first, path_get() later */
4a4d8108 2849+ spin_lock_init(&br->br_dykey_lock);
5afbbe0d 2850+ au_br_count_init(br);
1facf9fc 2851+ atomic_set(&br->br_xino_running, 0);
2852+ br->br_id = au_new_br_id(sb);
7f207e10 2853+ AuDebugOn(br->br_id < 0);
1facf9fc 2854+
ae9dfd79
AM
2855+ /* always, regardless the given option */
2856+ err = au_dr_br_init(sb, br, &add->path);
2857+ if (unlikely(err))
2858+ goto out_err;
2859+
1facf9fc 2860+ if (au_br_writable(add->perm)) {
86dc4139 2861+ err = au_wbr_init(br, sb, add->perm);
1facf9fc 2862+ if (unlikely(err))
b752ccd1 2863+ goto out_err;
1facf9fc 2864+ }
2865+
2866+ if (au_opt_test(au_mntflags(sb), XINO)) {
5527c038
JR
2867+ h_inode = d_inode(add->path.dentry);
2868+ err = au_xino_br(sb, br, h_inode->i_ino,
1facf9fc 2869+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
2870+ if (unlikely(err)) {
2871+ AuDebugOn(br->br_xino.xi_file);
b752ccd1 2872+ goto out_err;
1facf9fc 2873+ }
2874+ }
2875+
2876+ sysaufs_br_init(br);
86dc4139 2877+ path_get(&br->br_path);
b752ccd1 2878+ goto out; /* success */
1facf9fc 2879+
4f0767ce 2880+out_err:
86dc4139 2881+ memset(&br->br_path, 0, sizeof(br->br_path));
4f0767ce 2882+out:
1facf9fc 2883+ return err;
2884+}
2885+
2886+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
5afbbe0d 2887+ struct au_branch *br, aufs_bindex_t bbot,
1facf9fc 2888+ aufs_bindex_t amount)
2889+{
2890+ struct au_branch **brp;
2891+
dece6358
AM
2892+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2893+
1facf9fc 2894+ brp = sbinfo->si_branch + bindex;
2895+ memmove(brp + 1, brp, sizeof(*brp) * amount);
2896+ *brp = br;
5afbbe0d
AM
2897+ sbinfo->si_bbot++;
2898+ if (unlikely(bbot < 0))
2899+ sbinfo->si_bbot = 0;
1facf9fc 2900+}
2901+
2902+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
5afbbe0d 2903+ aufs_bindex_t bbot, aufs_bindex_t amount)
1facf9fc 2904+{
2905+ struct au_hdentry *hdp;
2906+
1308ab2a 2907+ AuRwMustWriteLock(&dinfo->di_rwsem);
2908+
5afbbe0d 2909+ hdp = au_hdentry(dinfo, bindex);
1facf9fc 2910+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
2911+ au_h_dentry_init(hdp);
5afbbe0d
AM
2912+ dinfo->di_bbot++;
2913+ if (unlikely(bbot < 0))
2914+ dinfo->di_btop = 0;
1facf9fc 2915+}
2916+
2917+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
5afbbe0d 2918+ aufs_bindex_t bbot, aufs_bindex_t amount)
1facf9fc 2919+{
2920+ struct au_hinode *hip;
2921+
1308ab2a 2922+ AuRwMustWriteLock(&iinfo->ii_rwsem);
2923+
5afbbe0d 2924+ hip = au_hinode(iinfo, bindex);
1facf9fc 2925+ memmove(hip + 1, hip, sizeof(*hip) * amount);
5afbbe0d
AM
2926+ au_hinode_init(hip);
2927+ iinfo->ii_bbot++;
2928+ if (unlikely(bbot < 0))
2929+ iinfo->ii_btop = 0;
1facf9fc 2930+}
2931+
86dc4139
AM
2932+static void au_br_do_add(struct super_block *sb, struct au_branch *br,
2933+ aufs_bindex_t bindex)
1facf9fc 2934+{
86dc4139 2935+ struct dentry *root, *h_dentry;
5527c038 2936+ struct inode *root_inode, *h_inode;
5afbbe0d 2937+ aufs_bindex_t bbot, amount;
1facf9fc 2938+
2939+ root = sb->s_root;
5527c038 2940+ root_inode = d_inode(root);
5afbbe0d
AM
2941+ bbot = au_sbbot(sb);
2942+ amount = bbot + 1 - bindex;
86dc4139 2943+ h_dentry = au_br_dentry(br);
53392da6 2944+ au_sbilist_lock();
5afbbe0d
AM
2945+ au_br_do_add_brp(au_sbi(sb), bindex, br, bbot, amount);
2946+ au_br_do_add_hdp(au_di(root), bindex, bbot, amount);
2947+ au_br_do_add_hip(au_ii(root_inode), bindex, bbot, amount);
1facf9fc 2948+ au_set_h_dptr(root, bindex, dget(h_dentry));
5527c038
JR
2949+ h_inode = d_inode(h_dentry);
2950+ au_set_h_iptr(root_inode, bindex, au_igrab(h_inode), /*flags*/0);
53392da6 2951+ au_sbilist_unlock();
1facf9fc 2952+}
2953+
2954+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
2955+{
2956+ int err;
5afbbe0d 2957+ aufs_bindex_t bbot, add_bindex;
1facf9fc 2958+ struct dentry *root, *h_dentry;
2959+ struct inode *root_inode;
2960+ struct au_branch *add_branch;
2961+
2962+ root = sb->s_root;
5527c038 2963+ root_inode = d_inode(root);
1facf9fc 2964+ IMustLock(root_inode);
5afbbe0d 2965+ IiMustWriteLock(root_inode);
1facf9fc 2966+ err = test_add(sb, add, remount);
2967+ if (unlikely(err < 0))
2968+ goto out;
2969+ if (err) {
2970+ err = 0;
2971+ goto out; /* success */
2972+ }
2973+
5afbbe0d
AM
2974+ bbot = au_sbbot(sb);
2975+ add_branch = au_br_alloc(sb, bbot + 2, add->perm);
1facf9fc 2976+ err = PTR_ERR(add_branch);
2977+ if (IS_ERR(add_branch))
2978+ goto out;
2979+
2980+ err = au_br_init(add_branch, sb, add);
2981+ if (unlikely(err)) {
2982+ au_br_do_free(add_branch);
2983+ goto out;
2984+ }
2985+
2986+ add_bindex = add->bindex;
1facf9fc 2987+ if (!remount)
86dc4139 2988+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 2989+ else {
2990+ sysaufs_brs_del(sb, add_bindex);
86dc4139 2991+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 2992+ sysaufs_brs_add(sb, add_bindex);
2993+ }
2994+
86dc4139 2995+ h_dentry = add->path.dentry;
1308ab2a 2996+ if (!add_bindex) {
1facf9fc 2997+ au_cpup_attr_all(root_inode, /*force*/1);
1308ab2a 2998+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
2999+ } else
5527c038 3000+ au_add_nlink(root_inode, d_inode(h_dentry));
1facf9fc 3001+
3002+ /*
4a4d8108 3003+ * this test/set prevents aufs from handling unnecesary notify events
027c5e7a 3004+ * of xino files, in case of re-adding a writable branch which was
1facf9fc 3005+ * once detached from aufs.
3006+ */
3007+ if (au_xino_brid(sb) < 0
3008+ && au_br_writable(add_branch->br_perm)
3009+ && !au_test_fs_bad_xino(h_dentry->d_sb)
3010+ && add_branch->br_xino.xi_file
2000de60 3011+ && add_branch->br_xino.xi_file->f_path.dentry->d_parent == h_dentry)
1facf9fc 3012+ au_xino_brid_set(sb, add_branch->br_id);
3013+
4f0767ce 3014+out:
1facf9fc 3015+ return err;
3016+}
3017+
3018+/* ---------------------------------------------------------------------- */
3019+
79b8bda9 3020+static unsigned long long au_farray_cb(struct super_block *sb, void *a,
076b876e
AM
3021+ unsigned long long max __maybe_unused,
3022+ void *arg)
3023+{
3024+ unsigned long long n;
3025+ struct file **p, *f;
ae9dfd79
AM
3026+ struct hlist_bl_head *files;
3027+ struct hlist_bl_node *pos;
076b876e 3028+ struct au_finfo *finfo;
076b876e
AM
3029+
3030+ n = 0;
3031+ p = a;
3032+ files = &au_sbi(sb)->si_files;
ae9dfd79
AM
3033+ hlist_bl_lock(files);
3034+ hlist_bl_for_each_entry(finfo, pos, files, fi_hlist) {
076b876e
AM
3035+ f = finfo->fi_file;
3036+ if (file_count(f)
3037+ && !special_file(file_inode(f)->i_mode)) {
3038+ get_file(f);
3039+ *p++ = f;
3040+ n++;
3041+ AuDebugOn(n > max);
3042+ }
3043+ }
ae9dfd79 3044+ hlist_bl_unlock(files);
076b876e
AM
3045+
3046+ return n;
3047+}
3048+
3049+static struct file **au_farray_alloc(struct super_block *sb,
3050+ unsigned long long *max)
3051+{
5afbbe0d 3052+ *max = au_nfiles(sb);
79b8bda9 3053+ return au_array_alloc(max, au_farray_cb, sb, /*arg*/NULL);
076b876e
AM
3054+}
3055+
3056+static void au_farray_free(struct file **a, unsigned long long max)
3057+{
3058+ unsigned long long ull;
3059+
3060+ for (ull = 0; ull < max; ull++)
3061+ if (a[ull])
3062+ fput(a[ull]);
be52b249 3063+ kvfree(a);
076b876e
AM
3064+}
3065+
3066+/* ---------------------------------------------------------------------- */
3067+
1facf9fc 3068+/*
3069+ * delete a branch
3070+ */
3071+
3072+/* to show the line number, do not make it inlined function */
4a4d8108 3073+#define AuVerbose(do_info, fmt, ...) do { \
1facf9fc 3074+ if (do_info) \
4a4d8108 3075+ pr_info(fmt, ##__VA_ARGS__); \
1facf9fc 3076+} while (0)
3077+
5afbbe0d
AM
3078+static int au_test_ibusy(struct inode *inode, aufs_bindex_t btop,
3079+ aufs_bindex_t bbot)
027c5e7a 3080+{
5afbbe0d 3081+ return (inode && !S_ISDIR(inode->i_mode)) || btop == bbot;
027c5e7a
AM
3082+}
3083+
5afbbe0d
AM
3084+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t btop,
3085+ aufs_bindex_t bbot)
027c5e7a 3086+{
5afbbe0d 3087+ return au_test_ibusy(d_inode(dentry), btop, bbot);
027c5e7a
AM
3088+}
3089+
1facf9fc 3090+/*
3091+ * test if the branch is deletable or not.
3092+ */
3093+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
b752ccd1 3094+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3095+{
3096+ int err, i, j, ndentry;
5afbbe0d 3097+ aufs_bindex_t btop, bbot;
1facf9fc 3098+ struct au_dcsub_pages dpages;
3099+ struct au_dpage *dpage;
3100+ struct dentry *d;
1facf9fc 3101+
3102+ err = au_dpages_init(&dpages, GFP_NOFS);
3103+ if (unlikely(err))
3104+ goto out;
3105+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
3106+ if (unlikely(err))
3107+ goto out_dpages;
3108+
1facf9fc 3109+ for (i = 0; !err && i < dpages.ndpage; i++) {
3110+ dpage = dpages.dpages + i;
3111+ ndentry = dpage->ndentry;
3112+ for (j = 0; !err && j < ndentry; j++) {
3113+ d = dpage->dentries[j];
c1595e42 3114+ AuDebugOn(au_dcount(d) <= 0);
027c5e7a 3115+ if (!au_digen_test(d, sigen)) {
1facf9fc 3116+ di_read_lock_child(d, AuLock_IR);
027c5e7a
AM
3117+ if (unlikely(au_dbrange_test(d))) {
3118+ di_read_unlock(d, AuLock_IR);
3119+ continue;
3120+ }
3121+ } else {
1facf9fc 3122+ di_write_lock_child(d);
027c5e7a
AM
3123+ if (unlikely(au_dbrange_test(d))) {
3124+ di_write_unlock(d);
3125+ continue;
3126+ }
1facf9fc 3127+ err = au_reval_dpath(d, sigen);
3128+ if (!err)
3129+ di_downgrade_lock(d, AuLock_IR);
3130+ else {
3131+ di_write_unlock(d);
3132+ break;
3133+ }
3134+ }
3135+
027c5e7a 3136+ /* AuDbgDentry(d); */
5afbbe0d
AM
3137+ btop = au_dbtop(d);
3138+ bbot = au_dbbot(d);
3139+ if (btop <= bindex
3140+ && bindex <= bbot
1facf9fc 3141+ && au_h_dptr(d, bindex)
5afbbe0d 3142+ && au_test_dbusy(d, btop, bbot)) {
1facf9fc 3143+ err = -EBUSY;
523b37e3 3144+ AuVerbose(verbose, "busy %pd\n", d);
027c5e7a 3145+ AuDbgDentry(d);
1facf9fc 3146+ }
3147+ di_read_unlock(d, AuLock_IR);
3148+ }
3149+ }
3150+
4f0767ce 3151+out_dpages:
1facf9fc 3152+ au_dpages_free(&dpages);
4f0767ce 3153+out:
1facf9fc 3154+ return err;
3155+}
3156+
3157+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
b752ccd1 3158+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3159+{
3160+ int err;
7f207e10
AM
3161+ unsigned long long max, ull;
3162+ struct inode *i, **array;
5afbbe0d 3163+ aufs_bindex_t btop, bbot;
1facf9fc 3164+
7f207e10
AM
3165+ array = au_iarray_alloc(sb, &max);
3166+ err = PTR_ERR(array);
3167+ if (IS_ERR(array))
3168+ goto out;
3169+
1facf9fc 3170+ err = 0;
7f207e10
AM
3171+ AuDbg("b%d\n", bindex);
3172+ for (ull = 0; !err && ull < max; ull++) {
3173+ i = array[ull];
076b876e
AM
3174+ if (unlikely(!i))
3175+ break;
7f207e10 3176+ if (i->i_ino == AUFS_ROOT_INO)
1facf9fc 3177+ continue;
3178+
7f207e10 3179+ /* AuDbgInode(i); */
537831f9 3180+ if (au_iigen(i, NULL) == sigen)
1facf9fc 3181+ ii_read_lock_child(i);
3182+ else {
3183+ ii_write_lock_child(i);
027c5e7a
AM
3184+ err = au_refresh_hinode_self(i);
3185+ au_iigen_dec(i);
1facf9fc 3186+ if (!err)
3187+ ii_downgrade_lock(i);
3188+ else {
3189+ ii_write_unlock(i);
3190+ break;
3191+ }
3192+ }
3193+
5afbbe0d
AM
3194+ btop = au_ibtop(i);
3195+ bbot = au_ibbot(i);
3196+ if (btop <= bindex
3197+ && bindex <= bbot
1facf9fc 3198+ && au_h_iptr(i, bindex)
5afbbe0d 3199+ && au_test_ibusy(i, btop, bbot)) {
1facf9fc 3200+ err = -EBUSY;
3201+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
7f207e10 3202+ AuDbgInode(i);
1facf9fc 3203+ }
3204+ ii_read_unlock(i);
3205+ }
7f207e10 3206+ au_iarray_free(array, max);
1facf9fc 3207+
7f207e10 3208+out:
1facf9fc 3209+ return err;
3210+}
3211+
b752ccd1
AM
3212+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
3213+ const unsigned int verbose)
1facf9fc 3214+{
3215+ int err;
3216+ unsigned int sigen;
3217+
3218+ sigen = au_sigen(root->d_sb);
3219+ DiMustNoWaiters(root);
5527c038 3220+ IiMustNoWaiters(d_inode(root));
1facf9fc 3221+ di_write_unlock(root);
b752ccd1 3222+ err = test_dentry_busy(root, bindex, sigen, verbose);
1facf9fc 3223+ if (!err)
b752ccd1 3224+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
1facf9fc 3225+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
3226+
3227+ return err;
3228+}
3229+
076b876e
AM
3230+static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
3231+ struct file **to_free, int *idx)
3232+{
3233+ int err;
c1595e42 3234+ unsigned char matched, root;
5afbbe0d 3235+ aufs_bindex_t bindex, bbot;
076b876e
AM
3236+ struct au_fidir *fidir;
3237+ struct au_hfile *hfile;
3238+
3239+ err = 0;
2000de60 3240+ root = IS_ROOT(file->f_path.dentry);
c1595e42
JR
3241+ if (root) {
3242+ get_file(file);
3243+ to_free[*idx] = file;
3244+ (*idx)++;
3245+ goto out;
3246+ }
3247+
076b876e 3248+ matched = 0;
076b876e
AM
3249+ fidir = au_fi(file)->fi_hdir;
3250+ AuDebugOn(!fidir);
5afbbe0d
AM
3251+ bbot = au_fbbot_dir(file);
3252+ for (bindex = au_fbtop(file); bindex <= bbot; bindex++) {
076b876e
AM
3253+ hfile = fidir->fd_hfile + bindex;
3254+ if (!hfile->hf_file)
3255+ continue;
3256+
c1595e42 3257+ if (hfile->hf_br->br_id == br_id) {
076b876e 3258+ matched = 1;
076b876e 3259+ break;
c1595e42 3260+ }
076b876e 3261+ }
c1595e42 3262+ if (matched)
076b876e
AM
3263+ err = -EBUSY;
3264+
3265+out:
3266+ return err;
3267+}
3268+
3269+static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
3270+ struct file **to_free, int opened)
3271+{
3272+ int err, idx;
3273+ unsigned long long ull, max;
5afbbe0d 3274+ aufs_bindex_t btop;
076b876e 3275+ struct file *file, **array;
076b876e
AM
3276+ struct dentry *root;
3277+ struct au_hfile *hfile;
3278+
3279+ array = au_farray_alloc(sb, &max);
3280+ err = PTR_ERR(array);
3281+ if (IS_ERR(array))
3282+ goto out;
3283+
3284+ err = 0;
3285+ idx = 0;
3286+ root = sb->s_root;
3287+ di_write_unlock(root);
3288+ for (ull = 0; ull < max; ull++) {
3289+ file = array[ull];
3290+ if (unlikely(!file))
3291+ break;
3292+
3293+ /* AuDbg("%pD\n", file); */
3294+ fi_read_lock(file);
5afbbe0d 3295+ btop = au_fbtop(file);
2000de60 3296+ if (!d_is_dir(file->f_path.dentry)) {
076b876e
AM
3297+ hfile = &au_fi(file)->fi_htop;
3298+ if (hfile->hf_br->br_id == br_id)
3299+ err = -EBUSY;
3300+ } else
3301+ err = test_dir_busy(file, br_id, to_free, &idx);
3302+ fi_read_unlock(file);
3303+ if (unlikely(err))
3304+ break;
3305+ }
3306+ di_write_lock_child(root);
3307+ au_farray_free(array, max);
3308+ AuDebugOn(idx > opened);
3309+
3310+out:
3311+ return err;
3312+}
3313+
3314+static void br_del_file(struct file **to_free, unsigned long long opened,
3315+ aufs_bindex_t br_id)
3316+{
3317+ unsigned long long ull;
5afbbe0d 3318+ aufs_bindex_t bindex, btop, bbot, bfound;
076b876e
AM
3319+ struct file *file;
3320+ struct au_fidir *fidir;
3321+ struct au_hfile *hfile;
3322+
3323+ for (ull = 0; ull < opened; ull++) {
3324+ file = to_free[ull];
3325+ if (unlikely(!file))
3326+ break;
3327+
3328+ /* AuDbg("%pD\n", file); */
2000de60 3329+ AuDebugOn(!d_is_dir(file->f_path.dentry));
076b876e
AM
3330+ bfound = -1;
3331+ fidir = au_fi(file)->fi_hdir;
3332+ AuDebugOn(!fidir);
3333+ fi_write_lock(file);
5afbbe0d
AM
3334+ btop = au_fbtop(file);
3335+ bbot = au_fbbot_dir(file);
3336+ for (bindex = btop; bindex <= bbot; bindex++) {
076b876e
AM
3337+ hfile = fidir->fd_hfile + bindex;
3338+ if (!hfile->hf_file)
3339+ continue;
3340+
3341+ if (hfile->hf_br->br_id == br_id) {
3342+ bfound = bindex;
3343+ break;
3344+ }
3345+ }
3346+ AuDebugOn(bfound < 0);
3347+ au_set_h_fptr(file, bfound, NULL);
5afbbe0d
AM
3348+ if (bfound == btop) {
3349+ for (btop++; btop <= bbot; btop++)
3350+ if (au_hf_dir(file, btop)) {
3351+ au_set_fbtop(file, btop);
076b876e
AM
3352+ break;
3353+ }
3354+ }
3355+ fi_write_unlock(file);
3356+ }
3357+}
3358+
1facf9fc 3359+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
3360+ const aufs_bindex_t bindex,
5afbbe0d 3361+ const aufs_bindex_t bbot)
1facf9fc 3362+{
3363+ struct au_branch **brp, **p;
3364+
dece6358
AM
3365+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3366+
1facf9fc 3367+ brp = sbinfo->si_branch + bindex;
5afbbe0d
AM
3368+ if (bindex < bbot)
3369+ memmove(brp, brp + 1, sizeof(*brp) * (bbot - bindex));
3370+ sbinfo->si_branch[0 + bbot] = NULL;
3371+ sbinfo->si_bbot--;
1facf9fc 3372+
e2f27e51
AM
3373+ p = au_krealloc(sbinfo->si_branch, sizeof(*p) * bbot, AuGFP_SBILIST,
3374+ /*may_shrink*/1);
1facf9fc 3375+ if (p)
3376+ sbinfo->si_branch = p;
4a4d8108 3377+ /* harmless error */
1facf9fc 3378+}
3379+
3380+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
5afbbe0d 3381+ const aufs_bindex_t bbot)
1facf9fc 3382+{
3383+ struct au_hdentry *hdp, *p;
3384+
1308ab2a 3385+ AuRwMustWriteLock(&dinfo->di_rwsem);
3386+
5afbbe0d
AM
3387+ hdp = au_hdentry(dinfo, bindex);
3388+ if (bindex < bbot)
3389+ memmove(hdp, hdp + 1, sizeof(*hdp) * (bbot - bindex));
3390+ /* au_h_dentry_init(au_hdentry(dinfo, bbot); */
3391+ dinfo->di_bbot--;
1facf9fc 3392+
e2f27e51
AM
3393+ p = au_krealloc(dinfo->di_hdentry, sizeof(*p) * bbot, AuGFP_SBILIST,
3394+ /*may_shrink*/1);
1facf9fc 3395+ if (p)
3396+ dinfo->di_hdentry = p;
4a4d8108 3397+ /* harmless error */
1facf9fc 3398+}
3399+
3400+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
5afbbe0d 3401+ const aufs_bindex_t bbot)
1facf9fc 3402+{
3403+ struct au_hinode *hip, *p;
3404+
1308ab2a 3405+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3406+
5afbbe0d
AM
3407+ hip = au_hinode(iinfo, bindex);
3408+ if (bindex < bbot)
3409+ memmove(hip, hip + 1, sizeof(*hip) * (bbot - bindex));
3410+ /* au_hinode_init(au_hinode(iinfo, bbot)); */
3411+ iinfo->ii_bbot--;
1facf9fc 3412+
e2f27e51
AM
3413+ p = au_krealloc(iinfo->ii_hinode, sizeof(*p) * bbot, AuGFP_SBILIST,
3414+ /*may_shrink*/1);
1facf9fc 3415+ if (p)
3416+ iinfo->ii_hinode = p;
4a4d8108 3417+ /* harmless error */
1facf9fc 3418+}
3419+
3420+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
3421+ struct au_branch *br)
3422+{
5afbbe0d 3423+ aufs_bindex_t bbot;
1facf9fc 3424+ struct au_sbinfo *sbinfo;
53392da6
AM
3425+ struct dentry *root, *h_root;
3426+ struct inode *inode, *h_inode;
3427+ struct au_hinode *hinode;
1facf9fc 3428+
dece6358
AM
3429+ SiMustWriteLock(sb);
3430+
1facf9fc 3431+ root = sb->s_root;
5527c038 3432+ inode = d_inode(root);
1facf9fc 3433+ sbinfo = au_sbi(sb);
5afbbe0d 3434+ bbot = sbinfo->si_bbot;
1facf9fc 3435+
53392da6
AM
3436+ h_root = au_h_dptr(root, bindex);
3437+ hinode = au_hi(inode, bindex);
3438+ h_inode = au_igrab(hinode->hi_inode);
3439+ au_hiput(hinode);
1facf9fc 3440+
53392da6 3441+ au_sbilist_lock();
5afbbe0d
AM
3442+ au_br_do_del_brp(sbinfo, bindex, bbot);
3443+ au_br_do_del_hdp(au_di(root), bindex, bbot);
3444+ au_br_do_del_hip(au_ii(inode), bindex, bbot);
53392da6
AM
3445+ au_sbilist_unlock();
3446+
ae9dfd79
AM
3447+ /* ignore an error */
3448+ au_dr_br_fin(sb, br); /* always, regardless the mount option */
3449+
53392da6
AM
3450+ dput(h_root);
3451+ iput(h_inode);
3452+ au_br_do_free(br);
1facf9fc 3453+}
3454+
79b8bda9
AM
3455+static unsigned long long empty_cb(struct super_block *sb, void *array,
3456+ unsigned long long max, void *arg)
076b876e
AM
3457+{
3458+ return max;
3459+}
3460+
1facf9fc 3461+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
3462+{
3463+ int err, rerr, i;
076b876e 3464+ unsigned long long opened;
1facf9fc 3465+ unsigned int mnt_flags;
5afbbe0d 3466+ aufs_bindex_t bindex, bbot, br_id;
1facf9fc 3467+ unsigned char do_wh, verbose;
3468+ struct au_branch *br;
3469+ struct au_wbr *wbr;
076b876e
AM
3470+ struct dentry *root;
3471+ struct file **to_free;
1facf9fc 3472+
3473+ err = 0;
076b876e
AM
3474+ opened = 0;
3475+ to_free = NULL;
3476+ root = sb->s_root;
3477+ bindex = au_find_dbindex(root, del->h_path.dentry);
1facf9fc 3478+ if (bindex < 0) {
3479+ if (remount)
3480+ goto out; /* success */
3481+ err = -ENOENT;
4a4d8108 3482+ pr_err("%s no such branch\n", del->pathname);
1facf9fc 3483+ goto out;
3484+ }
3485+ AuDbg("bindex b%d\n", bindex);
3486+
3487+ err = -EBUSY;
3488+ mnt_flags = au_mntflags(sb);
3489+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
5afbbe0d
AM
3490+ bbot = au_sbbot(sb);
3491+ if (unlikely(!bbot)) {
1facf9fc 3492+ AuVerbose(verbose, "no more branches left\n");
3493+ goto out;
3494+ }
3495+ br = au_sbr(sb, bindex);
86dc4139 3496+ AuDebugOn(!path_equal(&br->br_path, &del->h_path));
076b876e
AM
3497+
3498+ br_id = br->br_id;
5afbbe0d 3499+ opened = au_br_count(br);
076b876e 3500+ if (unlikely(opened)) {
79b8bda9 3501+ to_free = au_array_alloc(&opened, empty_cb, sb, NULL);
076b876e
AM
3502+ err = PTR_ERR(to_free);
3503+ if (IS_ERR(to_free))
3504+ goto out;
3505+
3506+ err = test_file_busy(sb, br_id, to_free, opened);
3507+ if (unlikely(err)) {
3508+ AuVerbose(verbose, "%llu file(s) opened\n", opened);
3509+ goto out;
3510+ }
1facf9fc 3511+ }
3512+
3513+ wbr = br->br_wbr;
3514+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
3515+ if (do_wh) {
1308ab2a 3516+ /* instead of WbrWhMustWriteLock(wbr) */
3517+ SiMustWriteLock(sb);
1facf9fc 3518+ for (i = 0; i < AuBrWh_Last; i++) {
3519+ dput(wbr->wbr_wh[i]);
3520+ wbr->wbr_wh[i] = NULL;
3521+ }
3522+ }
3523+
076b876e 3524+ err = test_children_busy(root, bindex, verbose);
1facf9fc 3525+ if (unlikely(err)) {
3526+ if (do_wh)
3527+ goto out_wh;
3528+ goto out;
3529+ }
3530+
3531+ err = 0;
076b876e
AM
3532+ if (to_free) {
3533+ /*
3534+ * now we confirmed the branch is deletable.
3535+ * let's free the remaining opened dirs on the branch.
3536+ */
3537+ di_write_unlock(root);
3538+ br_del_file(to_free, opened, br_id);
3539+ di_write_lock_child(root);
3540+ }
3541+
1facf9fc 3542+ if (!remount)
3543+ au_br_do_del(sb, bindex, br);
3544+ else {
3545+ sysaufs_brs_del(sb, bindex);
3546+ au_br_do_del(sb, bindex, br);
3547+ sysaufs_brs_add(sb, bindex);
3548+ }
3549+
1308ab2a 3550+ if (!bindex) {
5527c038 3551+ au_cpup_attr_all(d_inode(root), /*force*/1);
1308ab2a 3552+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
3553+ } else
5527c038 3554+ au_sub_nlink(d_inode(root), d_inode(del->h_path.dentry));
1facf9fc 3555+ if (au_opt_test(mnt_flags, PLINK))
3556+ au_plink_half_refresh(sb, br_id);
3557+
b752ccd1 3558+ if (au_xino_brid(sb) == br_id)
1facf9fc 3559+ au_xino_brid_set(sb, -1);
3560+ goto out; /* success */
3561+
4f0767ce 3562+out_wh:
1facf9fc 3563+ /* revert */
86dc4139 3564+ rerr = au_br_init_wh(sb, br, br->br_perm);
1facf9fc 3565+ if (rerr)
0c3ec466
AM
3566+ pr_warn("failed re-creating base whiteout, %s. (%d)\n",
3567+ del->pathname, rerr);
4f0767ce 3568+out:
076b876e
AM
3569+ if (to_free)
3570+ au_farray_free(to_free, opened);
1facf9fc 3571+ return err;
3572+}
3573+
3574+/* ---------------------------------------------------------------------- */
3575+
027c5e7a
AM
3576+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
3577+{
3578+ int err;
5afbbe0d 3579+ aufs_bindex_t btop, bbot;
027c5e7a
AM
3580+ struct aufs_ibusy ibusy;
3581+ struct inode *inode, *h_inode;
3582+
3583+ err = -EPERM;
3584+ if (unlikely(!capable(CAP_SYS_ADMIN)))
3585+ goto out;
3586+
3587+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
3588+ if (!err)
3589+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
3590+ if (unlikely(err)) {
3591+ err = -EFAULT;
3592+ AuTraceErr(err);
3593+ goto out;
3594+ }
3595+
3596+ err = -EINVAL;
3597+ si_read_lock(sb, AuLock_FLUSH);
5afbbe0d 3598+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbbot(sb)))
027c5e7a
AM
3599+ goto out_unlock;
3600+
3601+ err = 0;
3602+ ibusy.h_ino = 0; /* invalid */
3603+ inode = ilookup(sb, ibusy.ino);
3604+ if (!inode
3605+ || inode->i_ino == AUFS_ROOT_INO
5afbbe0d 3606+ || au_is_bad_inode(inode))
027c5e7a
AM
3607+ goto out_unlock;
3608+
3609+ ii_read_lock_child(inode);
5afbbe0d
AM
3610+ btop = au_ibtop(inode);
3611+ bbot = au_ibbot(inode);
3612+ if (btop <= ibusy.bindex && ibusy.bindex <= bbot) {
027c5e7a 3613+ h_inode = au_h_iptr(inode, ibusy.bindex);
5afbbe0d 3614+ if (h_inode && au_test_ibusy(inode, btop, bbot))
027c5e7a
AM
3615+ ibusy.h_ino = h_inode->i_ino;
3616+ }
3617+ ii_read_unlock(inode);
3618+ iput(inode);
3619+
3620+out_unlock:
3621+ si_read_unlock(sb);
3622+ if (!err) {
3623+ err = __put_user(ibusy.h_ino, &arg->h_ino);
3624+ if (unlikely(err)) {
3625+ err = -EFAULT;
3626+ AuTraceErr(err);
3627+ }
3628+ }
3629+out:
3630+ return err;
3631+}
3632+
3633+long au_ibusy_ioctl(struct file *file, unsigned long arg)
3634+{
2000de60 3635+ return au_ibusy(file->f_path.dentry->d_sb, (void __user *)arg);
027c5e7a
AM
3636+}
3637+
3638+#ifdef CONFIG_COMPAT
3639+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
3640+{
2000de60 3641+ return au_ibusy(file->f_path.dentry->d_sb, compat_ptr(arg));
027c5e7a
AM
3642+}
3643+#endif
3644+
3645+/* ---------------------------------------------------------------------- */
3646+
1facf9fc 3647+/*
3648+ * change a branch permission
3649+ */
3650+
dece6358
AM
3651+static void au_warn_ima(void)
3652+{
3653+#ifdef CONFIG_IMA
1308ab2a 3654+ /* since it doesn't support mark_files_ro() */
027c5e7a 3655+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
dece6358
AM
3656+#endif
3657+}
3658+
1facf9fc 3659+static int do_need_sigen_inc(int a, int b)
3660+{
3661+ return au_br_whable(a) && !au_br_whable(b);
3662+}
3663+
3664+static int need_sigen_inc(int old, int new)
3665+{
3666+ return do_need_sigen_inc(old, new)
3667+ || do_need_sigen_inc(new, old);
3668+}
3669+
3670+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
3671+{
7f207e10 3672+ int err, do_warn;
027c5e7a 3673+ unsigned int mnt_flags;
7f207e10 3674+ unsigned long long ull, max;
e49829fe 3675+ aufs_bindex_t br_id;
38d290e6 3676+ unsigned char verbose, writer;
7f207e10 3677+ struct file *file, *hf, **array;
e49829fe 3678+ struct au_hfile *hfile;
1facf9fc 3679+
027c5e7a
AM
3680+ mnt_flags = au_mntflags(sb);
3681+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
3682+
7f207e10
AM
3683+ array = au_farray_alloc(sb, &max);
3684+ err = PTR_ERR(array);
3685+ if (IS_ERR(array))
1facf9fc 3686+ goto out;
3687+
7f207e10 3688+ do_warn = 0;
e49829fe 3689+ br_id = au_sbr_id(sb, bindex);
7f207e10
AM
3690+ for (ull = 0; ull < max; ull++) {
3691+ file = array[ull];
076b876e
AM
3692+ if (unlikely(!file))
3693+ break;
1facf9fc 3694+
523b37e3 3695+ /* AuDbg("%pD\n", file); */
1facf9fc 3696+ fi_read_lock(file);
3697+ if (unlikely(au_test_mmapped(file))) {
3698+ err = -EBUSY;
523b37e3 3699+ AuVerbose(verbose, "mmapped %pD\n", file);
7f207e10 3700+ AuDbgFile(file);
1facf9fc 3701+ FiMustNoWaiters(file);
3702+ fi_read_unlock(file);
7f207e10 3703+ goto out_array;
1facf9fc 3704+ }
3705+
e49829fe
JR
3706+ hfile = &au_fi(file)->fi_htop;
3707+ hf = hfile->hf_file;
7e9cd9fe 3708+ if (!d_is_reg(file->f_path.dentry)
1facf9fc 3709+ || !(file->f_mode & FMODE_WRITE)
e49829fe 3710+ || hfile->hf_br->br_id != br_id
7f207e10
AM
3711+ || !(hf->f_mode & FMODE_WRITE))
3712+ array[ull] = NULL;
3713+ else {
3714+ do_warn = 1;
3715+ get_file(file);
1facf9fc 3716+ }
3717+
1facf9fc 3718+ FiMustNoWaiters(file);
3719+ fi_read_unlock(file);
7f207e10
AM
3720+ fput(file);
3721+ }
1facf9fc 3722+
3723+ err = 0;
7f207e10 3724+ if (do_warn)
dece6358 3725+ au_warn_ima();
7f207e10
AM
3726+
3727+ for (ull = 0; ull < max; ull++) {
3728+ file = array[ull];
3729+ if (!file)
3730+ continue;
3731+
1facf9fc 3732+ /* todo: already flushed? */
523b37e3
AM
3733+ /*
3734+ * fs/super.c:mark_files_ro() is gone, but aufs keeps its
3735+ * approach which resets f_mode and calls mnt_drop_write() and
3736+ * file_release_write() for each file, because the branch
3737+ * attribute in aufs world is totally different from the native
3738+ * fs rw/ro mode.
3739+ */
7f207e10
AM
3740+ /* fi_read_lock(file); */
3741+ hfile = &au_fi(file)->fi_htop;
3742+ hf = hfile->hf_file;
3743+ /* fi_read_unlock(file); */
027c5e7a 3744+ spin_lock(&hf->f_lock);
38d290e6
JR
3745+ writer = !!(hf->f_mode & FMODE_WRITER);
3746+ hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER);
027c5e7a 3747+ spin_unlock(&hf->f_lock);
38d290e6
JR
3748+ if (writer) {
3749+ put_write_access(file_inode(hf));
c06a8ce3 3750+ __mnt_drop_write(hf->f_path.mnt);
1facf9fc 3751+ }
3752+ }
3753+
7f207e10
AM
3754+out_array:
3755+ au_farray_free(array, max);
4f0767ce 3756+out:
7f207e10 3757+ AuTraceErr(err);
1facf9fc 3758+ return err;
3759+}
3760+
3761+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 3762+ int *do_refresh)
1facf9fc 3763+{
3764+ int err, rerr;
3765+ aufs_bindex_t bindex;
3766+ struct dentry *root;
3767+ struct au_branch *br;
076b876e 3768+ struct au_br_fhsm *bf;
1facf9fc 3769+
3770+ root = sb->s_root;
1facf9fc 3771+ bindex = au_find_dbindex(root, mod->h_root);
3772+ if (bindex < 0) {
3773+ if (remount)
3774+ return 0; /* success */
3775+ err = -ENOENT;
4a4d8108 3776+ pr_err("%s no such branch\n", mod->path);
1facf9fc 3777+ goto out;
3778+ }
3779+ AuDbg("bindex b%d\n", bindex);
3780+
5527c038 3781+ err = test_br(d_inode(mod->h_root), mod->perm, mod->path);
1facf9fc 3782+ if (unlikely(err))
3783+ goto out;
3784+
3785+ br = au_sbr(sb, bindex);
86dc4139 3786+ AuDebugOn(mod->h_root != au_br_dentry(br));
1facf9fc 3787+ if (br->br_perm == mod->perm)
3788+ return 0; /* success */
3789+
076b876e
AM
3790+ /* pre-allocate for non-fhsm --> fhsm */
3791+ bf = NULL;
3792+ if (!au_br_fhsm(br->br_perm) && au_br_fhsm(mod->perm)) {
3793+ err = au_fhsm_br_alloc(br);
3794+ if (unlikely(err))
3795+ goto out;
3796+ bf = br->br_fhsm;
3797+ br->br_fhsm = NULL;
3798+ }
3799+
1facf9fc 3800+ if (au_br_writable(br->br_perm)) {
3801+ /* remove whiteout base */
86dc4139 3802+ err = au_br_init_wh(sb, br, mod->perm);
1facf9fc 3803+ if (unlikely(err))
076b876e 3804+ goto out_bf;
1facf9fc 3805+
3806+ if (!au_br_writable(mod->perm)) {
3807+ /* rw --> ro, file might be mmapped */
3808+ DiMustNoWaiters(root);
5527c038 3809+ IiMustNoWaiters(d_inode(root));
1facf9fc 3810+ di_write_unlock(root);
3811+ err = au_br_mod_files_ro(sb, bindex);
3812+ /* aufs_write_lock() calls ..._child() */
3813+ di_write_lock_child(root);
3814+
3815+ if (unlikely(err)) {
3816+ rerr = -ENOMEM;
be52b249 3817+ br->br_wbr = kzalloc(sizeof(*br->br_wbr),
1facf9fc 3818+ GFP_NOFS);
86dc4139
AM
3819+ if (br->br_wbr)
3820+ rerr = au_wbr_init(br, sb, br->br_perm);
1facf9fc 3821+ if (unlikely(rerr)) {
3822+ AuIOErr("nested error %d (%d)\n",
3823+ rerr, err);
3824+ br->br_perm = mod->perm;
3825+ }
3826+ }
3827+ }
3828+ } else if (au_br_writable(mod->perm)) {
3829+ /* ro --> rw */
3830+ err = -ENOMEM;
be52b249 3831+ br->br_wbr = kzalloc(sizeof(*br->br_wbr), GFP_NOFS);
1facf9fc 3832+ if (br->br_wbr) {
86dc4139 3833+ err = au_wbr_init(br, sb, mod->perm);
1facf9fc 3834+ if (unlikely(err)) {
ae9dfd79 3835+ kfree(br->br_wbr);
1facf9fc 3836+ br->br_wbr = NULL;
3837+ }
3838+ }
3839+ }
076b876e
AM
3840+ if (unlikely(err))
3841+ goto out_bf;
3842+
3843+ if (au_br_fhsm(br->br_perm)) {
3844+ if (!au_br_fhsm(mod->perm)) {
3845+ /* fhsm --> non-fhsm */
3846+ au_br_fhsm_fin(br->br_fhsm);
ae9dfd79 3847+ kfree(br->br_fhsm);
076b876e
AM
3848+ br->br_fhsm = NULL;
3849+ }
3850+ } else if (au_br_fhsm(mod->perm))
3851+ /* non-fhsm --> fhsm */
3852+ br->br_fhsm = bf;
3853+
076b876e
AM
3854+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
3855+ br->br_perm = mod->perm;
3856+ goto out; /* success */
1facf9fc 3857+
076b876e 3858+out_bf:
ae9dfd79 3859+ kfree(bf);
076b876e
AM
3860+out:
3861+ AuTraceErr(err);
3862+ return err;
3863+}
3864+
3865+/* ---------------------------------------------------------------------- */
3866+
3867+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs)
3868+{
3869+ int err;
3870+ struct kstatfs kstfs;
3871+
3872+ err = vfs_statfs(&br->br_path, &kstfs);
1facf9fc 3873+ if (!err) {
076b876e
AM
3874+ stfs->f_blocks = kstfs.f_blocks;
3875+ stfs->f_bavail = kstfs.f_bavail;
3876+ stfs->f_files = kstfs.f_files;
3877+ stfs->f_ffree = kstfs.f_ffree;
1facf9fc 3878+ }
3879+
1facf9fc 3880+ return err;
3881+}
e8791d4f
AM
3882diff -urNp -x '*.orig' linux-4.9/fs/aufs/branch.h linux-4.9/fs/aufs/branch.h
3883--- linux-4.9/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
3884+++ linux-4.9/fs/aufs/branch.h 2021-02-24 16:15:09.521573529 +0100
ae9dfd79 3885@@ -0,0 +1,324 @@
1facf9fc 3886+/*
ae9dfd79 3887+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 3888+ *
3889+ * This program, aufs is free software; you can redistribute it and/or modify
3890+ * it under the terms of the GNU General Public License as published by
3891+ * the Free Software Foundation; either version 2 of the License, or
3892+ * (at your option) any later version.
dece6358
AM
3893+ *
3894+ * This program is distributed in the hope that it will be useful,
3895+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3896+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3897+ * GNU General Public License for more details.
3898+ *
3899+ * You should have received a copy of the GNU General Public License
523b37e3 3900+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 3901+ */
3902+
3903+/*
3904+ * branch filesystems and xino for them
3905+ */
3906+
3907+#ifndef __AUFS_BRANCH_H__
3908+#define __AUFS_BRANCH_H__
3909+
3910+#ifdef __KERNEL__
3911+
1facf9fc 3912+#include <linux/mount.h>
ae9dfd79 3913+#include "dirren.h"
4a4d8108 3914+#include "dynop.h"
1facf9fc 3915+#include "rwsem.h"
3916+#include "super.h"
3917+
3918+/* ---------------------------------------------------------------------- */
3919+
3920+/* a xino file */
3921+struct au_xino_file {
3922+ struct file *xi_file;
ae9dfd79
AM
3923+ struct {
3924+ spinlock_t spin;
3925+ ino_t *array;
3926+ int total;
3927+ /* reserved for future use */
3928+ /* unsigned long *bitmap; */
3929+ wait_queue_head_t wqh;
3930+ } xi_nondir;
1facf9fc 3931+
3932+ /* todo: make xino files an array to support huge inode number */
3933+
3934+#ifdef CONFIG_DEBUG_FS
3935+ struct dentry *xi_dbgaufs;
3936+#endif
3937+};
3938+
076b876e
AM
3939+/* File-based Hierarchical Storage Management */
3940+struct au_br_fhsm {
3941+#ifdef CONFIG_AUFS_FHSM
3942+ struct mutex bf_lock;
3943+ unsigned long bf_jiffy;
3944+ struct aufs_stfs bf_stfs;
3945+ int bf_readable;
3946+#endif
3947+};
3948+
1facf9fc 3949+/* members for writable branch only */
3950+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
3951+struct au_wbr {
dece6358 3952+ struct au_rwsem wbr_wh_rwsem;
1facf9fc 3953+ struct dentry *wbr_wh[AuBrWh_Last];
4a4d8108 3954+ atomic_t wbr_wh_running;
1facf9fc 3955+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
3956+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
3957+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
3958+
3959+ /* mfs mode */
3960+ unsigned long long wbr_bytes;
3961+};
3962+
4a4d8108
AM
3963+/* ext2 has 3 types of operations at least, ext3 has 4 */
3964+#define AuBrDynOp (AuDyLast * 4)
3965+
1716fcea
AM
3966+#ifdef CONFIG_AUFS_HFSNOTIFY
3967+/* support for asynchronous destruction */
3968+struct au_br_hfsnotify {
3969+ struct fsnotify_group *hfsn_group;
3970+};
3971+#endif
3972+
392086de
AM
3973+/* sysfs entries */
3974+struct au_brsysfs {
3975+ char name[16];
3976+ struct attribute attr;
3977+};
3978+
3979+enum {
3980+ AuBrSysfs_BR,
3981+ AuBrSysfs_BRID,
3982+ AuBrSysfs_Last
3983+};
3984+
1facf9fc 3985+/* protected by superblock rwsem */
3986+struct au_branch {
3987+ struct au_xino_file br_xino;
3988+
3989+ aufs_bindex_t br_id;
3990+
3991+ int br_perm;
86dc4139 3992+ struct path br_path;
4a4d8108
AM
3993+ spinlock_t br_dykey_lock;
3994+ struct au_dykey *br_dykey[AuBrDynOp];
5afbbe0d 3995+ struct percpu_counter br_count;
1facf9fc 3996+
3997+ struct au_wbr *br_wbr;
076b876e 3998+ struct au_br_fhsm *br_fhsm;
1facf9fc 3999+
4000+ /* xino truncation */
1facf9fc 4001+ atomic_t br_xino_running;
4002+
027c5e7a 4003+#ifdef CONFIG_AUFS_HFSNOTIFY
1716fcea 4004+ struct au_br_hfsnotify *br_hfsn;
027c5e7a
AM
4005+#endif
4006+
1facf9fc 4007+#ifdef CONFIG_SYSFS
392086de
AM
4008+ /* entries under sysfs per mount-point */
4009+ struct au_brsysfs br_sysfs[AuBrSysfs_Last];
1facf9fc 4010+#endif
ae9dfd79
AM
4011+
4012+ struct au_dr_br br_dirren;
1facf9fc 4013+};
4014+
4015+/* ---------------------------------------------------------------------- */
4016+
86dc4139
AM
4017+static inline struct vfsmount *au_br_mnt(struct au_branch *br)
4018+{
4019+ return br->br_path.mnt;
4020+}
4021+
4022+static inline struct dentry *au_br_dentry(struct au_branch *br)
4023+{
4024+ return br->br_path.dentry;
4025+}
4026+
4027+static inline struct super_block *au_br_sb(struct au_branch *br)
4028+{
4029+ return au_br_mnt(br)->mnt_sb;
4030+}
4031+
5afbbe0d
AM
4032+static inline void au_br_get(struct au_branch *br)
4033+{
4034+ percpu_counter_inc(&br->br_count);
4035+}
4036+
4037+static inline void au_br_put(struct au_branch *br)
4038+{
4039+ percpu_counter_dec(&br->br_count);
4040+}
4041+
4042+static inline s64 au_br_count(struct au_branch *br)
4043+{
4044+ return percpu_counter_sum(&br->br_count);
4045+}
4046+
4047+static inline void au_br_count_init(struct au_branch *br)
4048+{
4049+ percpu_counter_init(&br->br_count, 0, GFP_NOFS);
4050+}
4051+
4052+static inline void au_br_count_fin(struct au_branch *br)
4053+{
4054+ percpu_counter_destroy(&br->br_count);
4055+}
4056+
1facf9fc 4057+static inline int au_br_rdonly(struct au_branch *br)
4058+{
86dc4139 4059+ return ((au_br_sb(br)->s_flags & MS_RDONLY)
1facf9fc 4060+ || !au_br_writable(br->br_perm))
4061+ ? -EROFS : 0;
4062+}
4063+
4a4d8108 4064+static inline int au_br_hnotifyable(int brperm __maybe_unused)
1facf9fc 4065+{
4a4d8108 4066+#ifdef CONFIG_AUFS_HNOTIFY
1e00d052 4067+ return !(brperm & AuBrPerm_RR);
1facf9fc 4068+#else
4069+ return 0;
4070+#endif
4071+}
4072+
b912730e
AM
4073+static inline int au_br_test_oflag(int oflag, struct au_branch *br)
4074+{
4075+ int err, exec_flag;
4076+
4077+ err = 0;
4078+ exec_flag = oflag & __FMODE_EXEC;
79b8bda9 4079+ if (unlikely(exec_flag && path_noexec(&br->br_path)))
b912730e
AM
4080+ err = -EACCES;
4081+
4082+ return err;
4083+}
4084+
1facf9fc 4085+/* ---------------------------------------------------------------------- */
4086+
4087+/* branch.c */
4088+struct au_sbinfo;
4089+void au_br_free(struct au_sbinfo *sinfo);
4090+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
4091+struct au_opt_add;
4092+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
4093+struct au_opt_del;
4094+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
027c5e7a
AM
4095+long au_ibusy_ioctl(struct file *file, unsigned long arg);
4096+#ifdef CONFIG_COMPAT
4097+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
4098+#endif
1facf9fc 4099+struct au_opt_mod;
4100+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4101+ int *do_refresh);
076b876e
AM
4102+struct aufs_stfs;
4103+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
1facf9fc 4104+
4105+/* xino.c */
4106+static const loff_t au_loff_max = LLONG_MAX;
4107+
4108+int au_xib_trunc(struct super_block *sb);
5527c038 4109+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
1facf9fc 4110+ loff_t *pos);
5527c038
JR
4111+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
4112+ size_t size, loff_t *pos);
1facf9fc 4113+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
4114+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
4115+ino_t au_xino_new_ino(struct super_block *sb);
b752ccd1 4116+void au_xino_delete_inode(struct inode *inode, const int unlinked);
1facf9fc 4117+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4118+ ino_t ino);
4119+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4120+ ino_t *ino);
4121+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
4122+ struct file *base_file, int do_test);
4123+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
4124+
4125+struct au_opt_xino;
4126+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
4127+void au_xino_clr(struct super_block *sb);
4128+struct file *au_xino_def(struct super_block *sb);
4129+int au_xino_path(struct seq_file *seq, struct file *file);
4130+
ae9dfd79
AM
4131+void au_xinondir_leave(struct super_block *sb, aufs_bindex_t bindex,
4132+ ino_t h_ino, int idx);
4133+int au_xinondir_enter(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4134+ int *idx);
4135+
1facf9fc 4136+/* ---------------------------------------------------------------------- */
4137+
4138+/* Superblock to branch */
4139+static inline
4140+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
4141+{
4142+ return au_sbr(sb, bindex)->br_id;
4143+}
4144+
4145+static inline
4146+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
4147+{
86dc4139 4148+ return au_br_mnt(au_sbr(sb, bindex));
1facf9fc 4149+}
4150+
4151+static inline
4152+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
4153+{
86dc4139 4154+ return au_br_sb(au_sbr(sb, bindex));
1facf9fc 4155+}
4156+
5afbbe0d
AM
4157+static inline void au_sbr_get(struct super_block *sb, aufs_bindex_t bindex)
4158+{
4159+ au_br_get(au_sbr(sb, bindex));
4160+}
4161+
1facf9fc 4162+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
4163+{
5afbbe0d 4164+ au_br_put(au_sbr(sb, bindex));
1facf9fc 4165+}
4166+
4167+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
4168+{
4169+ return au_sbr(sb, bindex)->br_perm;
4170+}
4171+
4172+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
4173+{
4174+ return au_br_whable(au_sbr_perm(sb, bindex));
4175+}
4176+
4177+/* ---------------------------------------------------------------------- */
4178+
4179+/*
4180+ * wbr_wh_read_lock, wbr_wh_write_lock
4181+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
4182+ */
4183+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
4184+
dece6358
AM
4185+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
4186+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
4187+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
4188+
076b876e
AM
4189+/* ---------------------------------------------------------------------- */
4190+
4191+#ifdef CONFIG_AUFS_FHSM
4192+static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
4193+{
4194+ mutex_init(&brfhsm->bf_lock);
4195+ brfhsm->bf_jiffy = 0;
4196+ brfhsm->bf_readable = 0;
4197+}
4198+
4199+static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
4200+{
4201+ mutex_destroy(&brfhsm->bf_lock);
4202+}
4203+#else
4204+AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
4205+AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
4206+#endif
4207+
1facf9fc 4208+#endif /* __KERNEL__ */
4209+#endif /* __AUFS_BRANCH_H__ */
e8791d4f
AM
4210diff -urNp -x '*.orig' linux-4.9/fs/aufs/conf.mk linux-4.9/fs/aufs/conf.mk
4211--- linux-4.9/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
4212+++ linux-4.9/fs/aufs/conf.mk 2021-02-24 16:15:09.521573529 +0100
ae9dfd79 4213@@ -0,0 +1,39 @@
4a4d8108
AM
4214+
4215+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
4216+
4217+define AuConf
4218+ifdef ${1}
4219+AuConfStr += ${1}=${${1}}
4220+endif
4221+endef
4222+
b752ccd1 4223+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
e49829fe 4224+ SBILIST \
7f207e10 4225+ HNOTIFY HFSNOTIFY \
4a4d8108 4226+ EXPORT INO_T_64 \
c1595e42 4227+ XATTR \
076b876e 4228+ FHSM \
4a4d8108 4229+ RDU \
ae9dfd79 4230+ DIRREN \
4a4d8108
AM
4231+ SHWH \
4232+ BR_RAMFS \
4233+ BR_FUSE POLL \
4234+ BR_HFSPLUS \
4235+ BDEV_LOOP \
b752ccd1
AM
4236+ DEBUG MAGIC_SYSRQ
4237+$(foreach i, ${AuConfAll}, \
4a4d8108
AM
4238+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
4239+
4240+AuConfName = ${obj}/conf.str
4241+${AuConfName}.tmp: FORCE
4242+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
4243+${AuConfName}: ${AuConfName}.tmp
4244+ @diff -q $< $@ > /dev/null 2>&1 || { \
4245+ echo ' GEN ' $@; \
4246+ cp -p $< $@; \
4247+ }
4248+FORCE:
4249+clean-files += ${AuConfName} ${AuConfName}.tmp
4250+${obj}/sysfs.o: ${AuConfName}
b752ccd1
AM
4251+
4252+-include ${srctree}/${src}/conf_priv.mk
e8791d4f
AM
4253diff -urNp -x '*.orig' linux-4.9/fs/aufs/cpup.c linux-4.9/fs/aufs/cpup.c
4254--- linux-4.9/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
4255+++ linux-4.9/fs/aufs/cpup.c 2021-02-24 16:15:09.521573529 +0100
ae9dfd79 4256@@ -0,0 +1,1414 @@
1facf9fc 4257+/*
ae9dfd79 4258+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 4259+ *
4260+ * This program, aufs is free software; you can redistribute it and/or modify
4261+ * it under the terms of the GNU General Public License as published by
4262+ * the Free Software Foundation; either version 2 of the License, or
4263+ * (at your option) any later version.
dece6358
AM
4264+ *
4265+ * This program is distributed in the hope that it will be useful,
4266+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4267+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4268+ * GNU General Public License for more details.
4269+ *
4270+ * You should have received a copy of the GNU General Public License
523b37e3 4271+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4272+ */
4273+
4274+/*
4275+ * copy-up functions, see wbr_policy.c for copy-down
4276+ */
4277+
4278+#include <linux/fs_stack.h>
dece6358 4279+#include <linux/mm.h>
8cdd5066 4280+#include <linux/task_work.h>
1facf9fc 4281+#include "aufs.h"
4282+
86dc4139 4283+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
1facf9fc 4284+{
4285+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
367653fa 4286+ | S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
1facf9fc 4287+
86dc4139
AM
4288+ BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
4289+
4290+ dst->i_flags |= iflags & ~mask;
1facf9fc 4291+ if (au_test_fs_notime(dst->i_sb))
4292+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
4293+}
4294+
4295+void au_cpup_attr_timesizes(struct inode *inode)
4296+{
4297+ struct inode *h_inode;
4298+
5afbbe0d 4299+ h_inode = au_h_iptr(inode, au_ibtop(inode));
1facf9fc 4300+ fsstack_copy_attr_times(inode, h_inode);
4a4d8108 4301+ fsstack_copy_inode_size(inode, h_inode);
1facf9fc 4302+}
4303+
4304+void au_cpup_attr_nlink(struct inode *inode, int force)
4305+{
4306+ struct inode *h_inode;
4307+ struct super_block *sb;
5afbbe0d 4308+ aufs_bindex_t bindex, bbot;
1facf9fc 4309+
4310+ sb = inode->i_sb;
5afbbe0d 4311+ bindex = au_ibtop(inode);
1facf9fc 4312+ h_inode = au_h_iptr(inode, bindex);
4313+ if (!force
4314+ && !S_ISDIR(h_inode->i_mode)
4315+ && au_opt_test(au_mntflags(sb), PLINK)
4316+ && au_plink_test(inode))
4317+ return;
4318+
7eafdf33
AM
4319+ /*
4320+ * 0 can happen in revalidating.
38d290e6
JR
4321+ * h_inode->i_mutex may not be held here, but it is harmless since once
4322+ * i_nlink reaches 0, it will never become positive except O_TMPFILE
4323+ * case.
4324+ * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause
4325+ * the incorrect link count.
7eafdf33 4326+ */
92d182d2 4327+ set_nlink(inode, h_inode->i_nlink);
1facf9fc 4328+
4329+ /*
4330+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
4331+ * it may includes whplink directory.
4332+ */
4333+ if (S_ISDIR(h_inode->i_mode)) {
5afbbe0d
AM
4334+ bbot = au_ibbot(inode);
4335+ for (bindex++; bindex <= bbot; bindex++) {
1facf9fc 4336+ h_inode = au_h_iptr(inode, bindex);
4337+ if (h_inode)
4338+ au_add_nlink(inode, h_inode);
4339+ }
4340+ }
4341+}
4342+
4343+void au_cpup_attr_changeable(struct inode *inode)
4344+{
4345+ struct inode *h_inode;
4346+
5afbbe0d 4347+ h_inode = au_h_iptr(inode, au_ibtop(inode));
1facf9fc 4348+ inode->i_mode = h_inode->i_mode;
4349+ inode->i_uid = h_inode->i_uid;
4350+ inode->i_gid = h_inode->i_gid;
4351+ au_cpup_attr_timesizes(inode);
86dc4139 4352+ au_cpup_attr_flags(inode, h_inode->i_flags);
1facf9fc 4353+}
4354+
4355+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
4356+{
4357+ struct au_iinfo *iinfo = au_ii(inode);
4358+
1308ab2a 4359+ IiMustWriteLock(inode);
4360+
1facf9fc 4361+ iinfo->ii_higen = h_inode->i_generation;
4362+ iinfo->ii_hsb1 = h_inode->i_sb;
4363+}
4364+
4365+void au_cpup_attr_all(struct inode *inode, int force)
4366+{
4367+ struct inode *h_inode;
4368+
5afbbe0d 4369+ h_inode = au_h_iptr(inode, au_ibtop(inode));
1facf9fc 4370+ au_cpup_attr_changeable(inode);
4371+ if (inode->i_nlink > 0)
4372+ au_cpup_attr_nlink(inode, force);
4373+ inode->i_rdev = h_inode->i_rdev;
4374+ inode->i_blkbits = h_inode->i_blkbits;
4375+ au_cpup_igen(inode, h_inode);
4376+}
4377+
4378+/* ---------------------------------------------------------------------- */
4379+
4380+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
4381+
4382+/* keep the timestamps of the parent dir when cpup */
4383+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4384+ struct path *h_path)
4385+{
4386+ struct inode *h_inode;
4387+
4388+ dt->dt_dentry = dentry;
4389+ dt->dt_h_path = *h_path;
5527c038 4390+ h_inode = d_inode(h_path->dentry);
1facf9fc 4391+ dt->dt_atime = h_inode->i_atime;
4392+ dt->dt_mtime = h_inode->i_mtime;
4393+ /* smp_mb(); */
4394+}
4395+
4396+void au_dtime_revert(struct au_dtime *dt)
4397+{
4398+ struct iattr attr;
4399+ int err;
4400+
4401+ attr.ia_atime = dt->dt_atime;
4402+ attr.ia_mtime = dt->dt_mtime;
4403+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
4404+ | ATTR_ATIME | ATTR_ATIME_SET;
4405+
523b37e3
AM
4406+ /* no delegation since this is a directory */
4407+ err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL);
1facf9fc 4408+ if (unlikely(err))
0c3ec466 4409+ pr_warn("restoring timestamps failed(%d). ignored\n", err);
1facf9fc 4410+}
4411+
4412+/* ---------------------------------------------------------------------- */
4413+
86dc4139
AM
4414+/* internal use only */
4415+struct au_cpup_reg_attr {
4416+ int valid;
4417+ struct kstat st;
4418+ unsigned int iflags; /* inode->i_flags */
4419+};
4420+
1facf9fc 4421+static noinline_for_stack
86dc4139
AM
4422+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src,
4423+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4424+{
c1595e42 4425+ int err, sbits, icex;
7e9cd9fe
AM
4426+ unsigned int mnt_flags;
4427+ unsigned char verbose;
1facf9fc 4428+ struct iattr ia;
4429+ struct path h_path;
1308ab2a 4430+ struct inode *h_isrc, *h_idst;
86dc4139 4431+ struct kstat *h_st;
c1595e42 4432+ struct au_branch *br;
1facf9fc 4433+
4434+ h_path.dentry = au_h_dptr(dst, bindex);
5527c038 4435+ h_idst = d_inode(h_path.dentry);
c1595e42
JR
4436+ br = au_sbr(dst->d_sb, bindex);
4437+ h_path.mnt = au_br_mnt(br);
5527c038 4438+ h_isrc = d_inode(h_src);
1308ab2a 4439+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
1facf9fc 4440+ | ATTR_ATIME | ATTR_MTIME
4441+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
86dc4139
AM
4442+ if (h_src_attr && h_src_attr->valid) {
4443+ h_st = &h_src_attr->st;
4444+ ia.ia_uid = h_st->uid;
4445+ ia.ia_gid = h_st->gid;
4446+ ia.ia_atime = h_st->atime;
4447+ ia.ia_mtime = h_st->mtime;
4448+ if (h_idst->i_mode != h_st->mode
4449+ && !S_ISLNK(h_idst->i_mode)) {
4450+ ia.ia_valid |= ATTR_MODE;
4451+ ia.ia_mode = h_st->mode;
4452+ }
4453+ sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
4454+ au_cpup_attr_flags(h_idst, h_src_attr->iflags);
4455+ } else {
4456+ ia.ia_uid = h_isrc->i_uid;
4457+ ia.ia_gid = h_isrc->i_gid;
4458+ ia.ia_atime = h_isrc->i_atime;
4459+ ia.ia_mtime = h_isrc->i_mtime;
4460+ if (h_idst->i_mode != h_isrc->i_mode
4461+ && !S_ISLNK(h_idst->i_mode)) {
4462+ ia.ia_valid |= ATTR_MODE;
4463+ ia.ia_mode = h_isrc->i_mode;
4464+ }
4465+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
4466+ au_cpup_attr_flags(h_idst, h_isrc->i_flags);
1308ab2a 4467+ }
523b37e3
AM
4468+ /* no delegation since it is just created */
4469+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4470+
4471+ /* is this nfs only? */
4472+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
4473+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
4474+ ia.ia_mode = h_isrc->i_mode;
523b37e3 4475+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4476+ }
4477+
c1595e42 4478+ icex = br->br_perm & AuBrAttr_ICEX;
7e9cd9fe
AM
4479+ if (!err) {
4480+ mnt_flags = au_mntflags(dst->d_sb);
4481+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4482+ err = au_cpup_xattr(h_path.dentry, h_src, icex, verbose);
4483+ }
c1595e42 4484+
1facf9fc 4485+ return err;
4486+}
4487+
4488+/* ---------------------------------------------------------------------- */
4489+
4490+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
4491+ char *buf, unsigned long blksize)
4492+{
4493+ int err;
4494+ size_t sz, rbytes, wbytes;
4495+ unsigned char all_zero;
4496+ char *p, *zp;
febd17d6 4497+ struct inode *h_inode;
1facf9fc 4498+ /* reduce stack usage */
4499+ struct iattr *ia;
4500+
4501+ zp = page_address(ZERO_PAGE(0));
4502+ if (unlikely(!zp))
4503+ return -ENOMEM; /* possible? */
4504+
4505+ err = 0;
4506+ all_zero = 0;
4507+ while (len) {
4508+ AuDbg("len %lld\n", len);
4509+ sz = blksize;
4510+ if (len < blksize)
4511+ sz = len;
4512+
4513+ rbytes = 0;
4514+ /* todo: signal_pending? */
4515+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
4516+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
4517+ err = rbytes;
4518+ }
4519+ if (unlikely(err < 0))
4520+ break;
4521+
4522+ all_zero = 0;
4523+ if (len >= rbytes && rbytes == blksize)
4524+ all_zero = !memcmp(buf, zp, rbytes);
4525+ if (!all_zero) {
4526+ wbytes = rbytes;
4527+ p = buf;
4528+ while (wbytes) {
4529+ size_t b;
4530+
4531+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
4532+ err = b;
4533+ /* todo: signal_pending? */
4534+ if (unlikely(err == -EAGAIN || err == -EINTR))
4535+ continue;
4536+ if (unlikely(err < 0))
4537+ break;
4538+ wbytes -= b;
4539+ p += b;
4540+ }
392086de
AM
4541+ if (unlikely(err < 0))
4542+ break;
1facf9fc 4543+ } else {
4544+ loff_t res;
4545+
4546+ AuLabel(hole);
4547+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
4548+ err = res;
4549+ if (unlikely(res < 0))
4550+ break;
4551+ }
4552+ len -= rbytes;
4553+ err = 0;
4554+ }
4555+
4556+ /* the last block may be a hole */
4557+ if (!err && all_zero) {
4558+ AuLabel(last hole);
4559+
4560+ err = 1;
2000de60 4561+ if (au_test_nfs(dst->f_path.dentry->d_sb)) {
1facf9fc 4562+ /* nfs requires this step to make last hole */
4563+ /* is this only nfs? */
4564+ do {
4565+ /* todo: signal_pending? */
4566+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
4567+ } while (err == -EAGAIN || err == -EINTR);
4568+ if (err == 1)
4569+ dst->f_pos--;
4570+ }
4571+
4572+ if (err == 1) {
4573+ ia = (void *)buf;
4574+ ia->ia_size = dst->f_pos;
4575+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
4576+ ia->ia_file = dst;
febd17d6
JR
4577+ h_inode = file_inode(dst);
4578+ inode_lock_nested(h_inode, AuLsc_I_CHILD2);
523b37e3
AM
4579+ /* no delegation since it is just created */
4580+ err = vfsub_notify_change(&dst->f_path, ia,
4581+ /*delegated*/NULL);
febd17d6 4582+ inode_unlock(h_inode);
1facf9fc 4583+ }
4584+ }
4585+
4586+ return err;
4587+}
4588+
4589+int au_copy_file(struct file *dst, struct file *src, loff_t len)
4590+{
4591+ int err;
4592+ unsigned long blksize;
4593+ unsigned char do_kfree;
4594+ char *buf;
4595+
4596+ err = -ENOMEM;
2000de60 4597+ blksize = dst->f_path.dentry->d_sb->s_blocksize;
1facf9fc 4598+ if (!blksize || PAGE_SIZE < blksize)
4599+ blksize = PAGE_SIZE;
4600+ AuDbg("blksize %lu\n", blksize);
4601+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
4602+ if (do_kfree)
4603+ buf = kmalloc(blksize, GFP_NOFS);
4604+ else
4605+ buf = (void *)__get_free_page(GFP_NOFS);
4606+ if (unlikely(!buf))
4607+ goto out;
4608+
4609+ if (len > (1 << 22))
4610+ AuDbg("copying a large file %lld\n", (long long)len);
4611+
4612+ src->f_pos = 0;
4613+ dst->f_pos = 0;
4614+ err = au_do_copy_file(dst, src, len, buf, blksize);
4615+ if (do_kfree)
ae9dfd79 4616+ kfree(buf);
1facf9fc 4617+ else
ae9dfd79 4618+ free_page((unsigned long)buf);
1facf9fc 4619+
4f0767ce 4620+out:
1facf9fc 4621+ return err;
4622+}
4623+
4624+/*
4625+ * to support a sparse file which is opened with O_APPEND,
4626+ * we need to close the file.
4627+ */
c2b27bf2 4628+static int au_cp_regular(struct au_cp_generic *cpg)
1facf9fc 4629+{
4630+ int err, i;
4631+ enum { SRC, DST };
4632+ struct {
4633+ aufs_bindex_t bindex;
4634+ unsigned int flags;
4635+ struct dentry *dentry;
392086de 4636+ int force_wr;
1facf9fc 4637+ struct file *file;
523b37e3 4638+ void *label;
1facf9fc 4639+ } *f, file[] = {
4640+ {
c2b27bf2 4641+ .bindex = cpg->bsrc,
1facf9fc 4642+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
523b37e3 4643+ .label = &&out
1facf9fc 4644+ },
4645+ {
c2b27bf2 4646+ .bindex = cpg->bdst,
1facf9fc 4647+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
392086de 4648+ .force_wr = !!au_ftest_cpup(cpg->flags, RWDST),
523b37e3 4649+ .label = &&out_src
1facf9fc 4650+ }
4651+ };
ae9dfd79 4652+ struct super_block *sb, *h_src_sb;
e2f27e51 4653+ struct inode *h_src_inode;
8cdd5066 4654+ struct task_struct *tsk = current;
1facf9fc 4655+
4656+ /* bsrc branch can be ro/rw. */
c2b27bf2 4657+ sb = cpg->dentry->d_sb;
1facf9fc 4658+ f = file;
4659+ for (i = 0; i < 2; i++, f++) {
c2b27bf2
AM
4660+ f->dentry = au_h_dptr(cpg->dentry, f->bindex);
4661+ f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
392086de 4662+ /*file*/NULL, f->force_wr);
1facf9fc 4663+ err = PTR_ERR(f->file);
4664+ if (IS_ERR(f->file))
4665+ goto *f->label;
1facf9fc 4666+ }
4667+
4668+ /* try stopping to update while we copyup */
e2f27e51 4669+ h_src_inode = d_inode(file[SRC].dentry);
ae9dfd79
AM
4670+ h_src_sb = h_src_inode->i_sb;
4671+ if (!au_test_nfs(h_src_sb))
e2f27e51 4672+ IMustLock(h_src_inode);
ae9dfd79
AM
4673+
4674+ if (h_src_sb != file_inode(file[DST].file)->i_sb
4675+ || !file[DST].file->f_op->clone_file_range)
4676+ err = au_copy_file(file[DST].file, file[SRC].file, cpg->len);
4677+ else {
4678+ if (!au_test_nfs(h_src_sb)) {
4679+ inode_unlock_shared(h_src_inode);
4680+ err = vfsub_clone_file_range(file[SRC].file,
4681+ file[DST].file, cpg->len);
4682+ vfsub_inode_lock_shared_nested(h_src_inode,
4683+ AuLsc_I_CHILD);
4684+ } else
4685+ err = vfsub_clone_file_range(file[SRC].file,
4686+ file[DST].file, cpg->len);
4687+ if (unlikely(err == -EOPNOTSUPP && au_test_nfs(h_src_sb)))
4688+ /* the backend fs on NFS may not support cloning */
4689+ err = au_copy_file(file[DST].file, file[SRC].file,
4690+ cpg->len);
4691+ AuTraceErr(err);
4692+ }
1facf9fc 4693+
8cdd5066
JR
4694+ /* i wonder if we had O_NO_DELAY_FPUT flag */
4695+ if (tsk->flags & PF_KTHREAD)
4696+ __fput_sync(file[DST].file);
4697+ else {
ae9dfd79 4698+ /* it happend actually */
8cdd5066
JR
4699+ fput(file[DST].file);
4700+ /*
4701+ * too bad.
4702+ * we have to call both since we don't know which place the file
4703+ * was added to.
4704+ */
4705+ task_work_run();
4706+ flush_delayed_fput();
4707+ }
1facf9fc 4708+ au_sbr_put(sb, file[DST].bindex);
523b37e3 4709+
4f0767ce 4710+out_src:
1facf9fc 4711+ fput(file[SRC].file);
4712+ au_sbr_put(sb, file[SRC].bindex);
4f0767ce 4713+out:
1facf9fc 4714+ return err;
4715+}
4716+
c2b27bf2 4717+static int au_do_cpup_regular(struct au_cp_generic *cpg,
86dc4139 4718+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4719+{
4720+ int err, rerr;
4721+ loff_t l;
86dc4139 4722+ struct path h_path;
38d290e6 4723+ struct inode *h_src_inode, *h_dst_inode;
1facf9fc 4724+
4725+ err = 0;
5527c038 4726+ h_src_inode = au_h_iptr(d_inode(cpg->dentry), cpg->bsrc);
86dc4139 4727+ l = i_size_read(h_src_inode);
c2b27bf2
AM
4728+ if (cpg->len == -1 || l < cpg->len)
4729+ cpg->len = l;
4730+ if (cpg->len) {
86dc4139 4731+ /* try stopping to update while we are referencing */
ae9dfd79 4732+ vfsub_inode_lock_shared_nested(h_src_inode, AuLsc_I_CHILD);
c2b27bf2 4733+ au_pin_hdir_unlock(cpg->pin);
1facf9fc 4734+
c2b27bf2
AM
4735+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
4736+ h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
86dc4139 4737+ h_src_attr->iflags = h_src_inode->i_flags;
5527c038
JR
4738+ if (!au_test_nfs(h_src_inode->i_sb))
4739+ err = vfs_getattr(&h_path, &h_src_attr->st);
4740+ else {
ae9dfd79 4741+ inode_unlock_shared(h_src_inode);
5527c038 4742+ err = vfs_getattr(&h_path, &h_src_attr->st);
ae9dfd79
AM
4743+ vfsub_inode_lock_shared_nested(h_src_inode,
4744+ AuLsc_I_CHILD);
5527c038 4745+ }
86dc4139 4746+ if (unlikely(err)) {
ae9dfd79 4747+ inode_unlock_shared(h_src_inode);
86dc4139
AM
4748+ goto out;
4749+ }
4750+ h_src_attr->valid = 1;
e2f27e51
AM
4751+ if (!au_test_nfs(h_src_inode->i_sb)) {
4752+ err = au_cp_regular(cpg);
ae9dfd79 4753+ inode_unlock_shared(h_src_inode);
e2f27e51 4754+ } else {
ae9dfd79 4755+ inode_unlock_shared(h_src_inode);
e2f27e51
AM
4756+ err = au_cp_regular(cpg);
4757+ }
c2b27bf2 4758+ rerr = au_pin_hdir_relock(cpg->pin);
86dc4139
AM
4759+ if (!err && rerr)
4760+ err = rerr;
1facf9fc 4761+ }
38d290e6
JR
4762+ if (!err && (h_src_inode->i_state & I_LINKABLE)) {
4763+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst);
5527c038 4764+ h_dst_inode = d_inode(h_path.dentry);
38d290e6
JR
4765+ spin_lock(&h_dst_inode->i_lock);
4766+ h_dst_inode->i_state |= I_LINKABLE;
4767+ spin_unlock(&h_dst_inode->i_lock);
4768+ }
1facf9fc 4769+
4f0767ce 4770+out:
1facf9fc 4771+ return err;
4772+}
4773+
4774+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
4775+ struct inode *h_dir)
4776+{
4777+ int err, symlen;
4778+ mm_segment_t old_fs;
b752ccd1
AM
4779+ union {
4780+ char *k;
4781+ char __user *u;
4782+ } sym;
5527c038
JR
4783+ struct inode *h_inode = d_inode(h_src);
4784+ const struct inode_operations *h_iop = h_inode->i_op;
1facf9fc 4785+
4786+ err = -ENOSYS;
5527c038 4787+ if (unlikely(!h_iop->readlink))
1facf9fc 4788+ goto out;
4789+
4790+ err = -ENOMEM;
537831f9 4791+ sym.k = (void *)__get_free_page(GFP_NOFS);
b752ccd1 4792+ if (unlikely(!sym.k))
1facf9fc 4793+ goto out;
4794+
9dbd164d 4795+ /* unnecessary to support mmap_sem since symlink is not mmap-able */
1facf9fc 4796+ old_fs = get_fs();
4797+ set_fs(KERNEL_DS);
5527c038 4798+ symlen = h_iop->readlink(h_src, sym.u, PATH_MAX);
1facf9fc 4799+ err = symlen;
4800+ set_fs(old_fs);
4801+
4802+ if (symlen > 0) {
b752ccd1
AM
4803+ sym.k[symlen] = 0;
4804+ err = vfsub_symlink(h_dir, h_path, sym.k);
1facf9fc 4805+ }
ae9dfd79 4806+ free_page((unsigned long)sym.k);
1facf9fc 4807+
4f0767ce 4808+out:
1facf9fc 4809+ return err;
4810+}
4811+
8cdd5066
JR
4812+/*
4813+ * regardless 'acl' option, reset all ACL.
4814+ * All ACL will be copied up later from the original entry on the lower branch.
4815+ */
4816+static int au_reset_acl(struct inode *h_dir, struct path *h_path, umode_t mode)
4817+{
4818+ int err;
4819+ struct dentry *h_dentry;
4820+ struct inode *h_inode;
4821+
4822+ h_dentry = h_path->dentry;
4823+ h_inode = d_inode(h_dentry);
4824+ /* forget_all_cached_acls(h_inode)); */
4825+ err = vfsub_removexattr(h_dentry, XATTR_NAME_POSIX_ACL_ACCESS);
4826+ AuTraceErr(err);
4827+ if (err == -EOPNOTSUPP)
4828+ err = 0;
4829+ if (!err)
4830+ err = vfsub_acl_chmod(h_inode, mode);
4831+
4832+ AuTraceErr(err);
4833+ return err;
4834+}
4835+
4836+static int au_do_cpup_dir(struct au_cp_generic *cpg, struct dentry *dst_parent,
4837+ struct inode *h_dir, struct path *h_path)
4838+{
4839+ int err;
4840+ struct inode *dir, *inode;
4841+
4842+ err = vfsub_removexattr(h_path->dentry, XATTR_NAME_POSIX_ACL_DEFAULT);
4843+ AuTraceErr(err);
4844+ if (err == -EOPNOTSUPP)
4845+ err = 0;
4846+ if (unlikely(err))
4847+ goto out;
4848+
4849+ /*
4850+ * strange behaviour from the users view,
4851+ * particularry setattr case
4852+ */
4853+ dir = d_inode(dst_parent);
5afbbe0d 4854+ if (au_ibtop(dir) == cpg->bdst)
8cdd5066
JR
4855+ au_cpup_attr_nlink(dir, /*force*/1);
4856+ inode = d_inode(cpg->dentry);
4857+ au_cpup_attr_nlink(inode, /*force*/1);
4858+
4859+out:
4860+ return err;
4861+}
4862+
1facf9fc 4863+static noinline_for_stack
c2b27bf2 4864+int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
86dc4139 4865+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4866+{
4867+ int err;
4868+ umode_t mode;
4869+ unsigned int mnt_flags;
076b876e 4870+ unsigned char isdir, isreg, force;
c2b27bf2 4871+ const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 4872+ struct au_dtime dt;
4873+ struct path h_path;
4874+ struct dentry *h_src, *h_dst, *h_parent;
8cdd5066 4875+ struct inode *h_inode, *h_dir;
1facf9fc 4876+ struct super_block *sb;
4877+
4878+ /* bsrc branch can be ro/rw. */
c2b27bf2 4879+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038
JR
4880+ h_inode = d_inode(h_src);
4881+ AuDebugOn(h_inode != au_h_iptr(d_inode(cpg->dentry), cpg->bsrc));
1facf9fc 4882+
4883+ /* try stopping to be referenced while we are creating */
c2b27bf2
AM
4884+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
4885+ if (au_ftest_cpup(cpg->flags, RENAME))
86dc4139
AM
4886+ AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
4887+ AUFS_WH_PFX_LEN));
1facf9fc 4888+ h_parent = h_dst->d_parent; /* dir inode is locked */
5527c038 4889+ h_dir = d_inode(h_parent);
1facf9fc 4890+ IMustLock(h_dir);
4891+ AuDebugOn(h_parent != h_dst->d_parent);
4892+
c2b27bf2
AM
4893+ sb = cpg->dentry->d_sb;
4894+ h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
1facf9fc 4895+ if (do_dt) {
4896+ h_path.dentry = h_parent;
4897+ au_dtime_store(&dt, dst_parent, &h_path);
4898+ }
4899+ h_path.dentry = h_dst;
4900+
076b876e 4901+ isreg = 0;
1facf9fc 4902+ isdir = 0;
4903+ mode = h_inode->i_mode;
4904+ switch (mode & S_IFMT) {
4905+ case S_IFREG:
076b876e 4906+ isreg = 1;
8cdd5066 4907+ err = vfsub_create(h_dir, &h_path, S_IRUSR | S_IWUSR,
b4510431 4908+ /*want_excl*/true);
1facf9fc 4909+ if (!err)
c2b27bf2 4910+ err = au_do_cpup_regular(cpg, h_src_attr);
1facf9fc 4911+ break;
4912+ case S_IFDIR:
4913+ isdir = 1;
4914+ err = vfsub_mkdir(h_dir, &h_path, mode);
8cdd5066
JR
4915+ if (!err)
4916+ err = au_do_cpup_dir(cpg, dst_parent, h_dir, &h_path);
1facf9fc 4917+ break;
4918+ case S_IFLNK:
4919+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
4920+ break;
4921+ case S_IFCHR:
4922+ case S_IFBLK:
4923+ AuDebugOn(!capable(CAP_MKNOD));
4924+ /*FALLTHROUGH*/
4925+ case S_IFIFO:
4926+ case S_IFSOCK:
4927+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
4928+ break;
4929+ default:
4930+ AuIOErr("Unknown inode type 0%o\n", mode);
4931+ err = -EIO;
4932+ }
8cdd5066
JR
4933+ if (!err)
4934+ err = au_reset_acl(h_dir, &h_path, mode);
1facf9fc 4935+
4936+ mnt_flags = au_mntflags(sb);
4937+ if (!au_opt_test(mnt_flags, UDBA_NONE)
4938+ && !isdir
4939+ && au_opt_test(mnt_flags, XINO)
38d290e6
JR
4940+ && (h_inode->i_nlink == 1
4941+ || (h_inode->i_state & I_LINKABLE))
1facf9fc 4942+ /* todo: unnecessary? */
5527c038 4943+ /* && d_inode(cpg->dentry)->i_nlink == 1 */
c2b27bf2
AM
4944+ && cpg->bdst < cpg->bsrc
4945+ && !au_ftest_cpup(cpg->flags, KEEPLINO))
4946+ au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
1facf9fc 4947+ /* ignore this error */
4948+
076b876e
AM
4949+ if (!err) {
4950+ force = 0;
4951+ if (isreg) {
4952+ force = !!cpg->len;
4953+ if (cpg->len == -1)
4954+ force = !!i_size_read(h_inode);
4955+ }
4956+ au_fhsm_wrote(sb, cpg->bdst, force);
4957+ }
4958+
1facf9fc 4959+ if (do_dt)
4960+ au_dtime_revert(&dt);
4961+ return err;
4962+}
4963+
392086de 4964+static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path)
86dc4139
AM
4965+{
4966+ int err;
392086de 4967+ struct dentry *dentry, *h_dentry, *h_parent, *parent;
86dc4139 4968+ struct inode *h_dir;
392086de 4969+ aufs_bindex_t bdst;
86dc4139 4970+
392086de
AM
4971+ dentry = cpg->dentry;
4972+ bdst = cpg->bdst;
4973+ h_dentry = au_h_dptr(dentry, bdst);
4974+ if (!au_ftest_cpup(cpg->flags, OVERWRITE)) {
4975+ dget(h_dentry);
4976+ au_set_h_dptr(dentry, bdst, NULL);
4977+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
4978+ if (!err)
4979+ h_path->dentry = dget(au_h_dptr(dentry, bdst));
86dc4139 4980+ au_set_h_dptr(dentry, bdst, h_dentry);
392086de
AM
4981+ } else {
4982+ err = 0;
4983+ parent = dget_parent(dentry);
4984+ h_parent = au_h_dptr(parent, bdst);
4985+ dput(parent);
4986+ h_path->dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
4987+ if (IS_ERR(h_path->dentry))
4988+ err = PTR_ERR(h_path->dentry);
86dc4139 4989+ }
392086de
AM
4990+ if (unlikely(err))
4991+ goto out;
86dc4139 4992+
86dc4139 4993+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5527c038 4994+ h_dir = d_inode(h_parent);
86dc4139 4995+ IMustLock(h_dir);
523b37e3
AM
4996+ AuDbg("%pd %pd\n", h_dentry, h_path->dentry);
4997+ /* no delegation since it is just created */
f2c43d5f
AM
4998+ err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL,
4999+ /*flags*/0);
86dc4139
AM
5000+ dput(h_path->dentry);
5001+
5002+out:
5003+ return err;
5004+}
5005+
1facf9fc 5006+/*
5007+ * copyup the @dentry from @bsrc to @bdst.
5008+ * the caller must set the both of lower dentries.
5009+ * @len is for truncating when it is -1 copyup the entire file.
5010+ * in link/rename cases, @dst_parent may be different from the real one.
c2b27bf2 5011+ * basic->bsrc can be larger than basic->bdst.
f2c43d5f
AM
5012+ * aufs doesn't touch the credential so
5013+ * security_inode_copy_up{,_xattr}() are unnecrssary.
1facf9fc 5014+ */
c2b27bf2 5015+static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5016+{
5017+ int err, rerr;
5afbbe0d 5018+ aufs_bindex_t old_ibtop;
1facf9fc 5019+ unsigned char isdir, plink;
1facf9fc 5020+ struct dentry *h_src, *h_dst, *h_parent;
5527c038 5021+ struct inode *dst_inode, *h_dir, *inode, *delegated, *src_inode;
1facf9fc 5022+ struct super_block *sb;
86dc4139 5023+ struct au_branch *br;
c2b27bf2
AM
5024+ /* to reuduce stack size */
5025+ struct {
5026+ struct au_dtime dt;
5027+ struct path h_path;
5028+ struct au_cpup_reg_attr h_src_attr;
5029+ } *a;
1facf9fc 5030+
c2b27bf2
AM
5031+ err = -ENOMEM;
5032+ a = kmalloc(sizeof(*a), GFP_NOFS);
5033+ if (unlikely(!a))
5034+ goto out;
5035+ a->h_src_attr.valid = 0;
1facf9fc 5036+
c2b27bf2
AM
5037+ sb = cpg->dentry->d_sb;
5038+ br = au_sbr(sb, cpg->bdst);
5039+ a->h_path.mnt = au_br_mnt(br);
5040+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
1facf9fc 5041+ h_parent = h_dst->d_parent; /* dir inode is locked */
5527c038 5042+ h_dir = d_inode(h_parent);
1facf9fc 5043+ IMustLock(h_dir);
5044+
c2b27bf2 5045+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038 5046+ inode = d_inode(cpg->dentry);
1facf9fc 5047+
5048+ if (!dst_parent)
c2b27bf2 5049+ dst_parent = dget_parent(cpg->dentry);
1facf9fc 5050+ else
5051+ dget(dst_parent);
5052+
5053+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
c2b27bf2 5054+ dst_inode = au_h_iptr(inode, cpg->bdst);
1facf9fc 5055+ if (dst_inode) {
5056+ if (unlikely(!plink)) {
5057+ err = -EIO;
027c5e7a
AM
5058+ AuIOErr("hi%lu(i%lu) exists on b%d "
5059+ "but plink is disabled\n",
c2b27bf2
AM
5060+ dst_inode->i_ino, inode->i_ino, cpg->bdst);
5061+ goto out_parent;
1facf9fc 5062+ }
5063+
5064+ if (dst_inode->i_nlink) {
c2b27bf2 5065+ const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5066+
c2b27bf2 5067+ h_src = au_plink_lkup(inode, cpg->bdst);
1facf9fc 5068+ err = PTR_ERR(h_src);
5069+ if (IS_ERR(h_src))
c2b27bf2 5070+ goto out_parent;
5527c038 5071+ if (unlikely(d_is_negative(h_src))) {
1facf9fc 5072+ err = -EIO;
79b8bda9 5073+ AuIOErr("i%lu exists on b%d "
027c5e7a 5074+ "but not pseudo-linked\n",
79b8bda9 5075+ inode->i_ino, cpg->bdst);
1facf9fc 5076+ dput(h_src);
c2b27bf2 5077+ goto out_parent;
1facf9fc 5078+ }
5079+
5080+ if (do_dt) {
c2b27bf2
AM
5081+ a->h_path.dentry = h_parent;
5082+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
1facf9fc 5083+ }
86dc4139 5084+
c2b27bf2 5085+ a->h_path.dentry = h_dst;
523b37e3
AM
5086+ delegated = NULL;
5087+ err = vfsub_link(h_src, h_dir, &a->h_path, &delegated);
c2b27bf2 5088+ if (!err && au_ftest_cpup(cpg->flags, RENAME))
392086de 5089+ err = au_do_ren_after_cpup(cpg, &a->h_path);
1facf9fc 5090+ if (do_dt)
c2b27bf2 5091+ au_dtime_revert(&a->dt);
523b37e3
AM
5092+ if (unlikely(err == -EWOULDBLOCK)) {
5093+ pr_warn("cannot retry for NFSv4 delegation"
5094+ " for an internal link\n");
5095+ iput(delegated);
5096+ }
1facf9fc 5097+ dput(h_src);
c2b27bf2 5098+ goto out_parent;
1facf9fc 5099+ } else
5100+ /* todo: cpup_wh_file? */
5101+ /* udba work */
4a4d8108 5102+ au_update_ibrange(inode, /*do_put_zero*/1);
1facf9fc 5103+ }
5104+
86dc4139 5105+ isdir = S_ISDIR(inode->i_mode);
5afbbe0d 5106+ old_ibtop = au_ibtop(inode);
c2b27bf2 5107+ err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
1facf9fc 5108+ if (unlikely(err))
86dc4139 5109+ goto out_rev;
5527c038 5110+ dst_inode = d_inode(h_dst);
febd17d6 5111+ inode_lock_nested(dst_inode, AuLsc_I_CHILD2);
86dc4139 5112+ /* todo: necessary? */
c2b27bf2 5113+ /* au_pin_hdir_unlock(cpg->pin); */
1facf9fc 5114+
c2b27bf2 5115+ err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr);
86dc4139
AM
5116+ if (unlikely(err)) {
5117+ /* todo: necessary? */
c2b27bf2 5118+ /* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
febd17d6 5119+ inode_unlock(dst_inode);
86dc4139
AM
5120+ goto out_rev;
5121+ }
5122+
5afbbe0d 5123+ if (cpg->bdst < old_ibtop) {
86dc4139 5124+ if (S_ISREG(inode->i_mode)) {
c2b27bf2 5125+ err = au_dy_iaop(inode, cpg->bdst, dst_inode);
86dc4139 5126+ if (unlikely(err)) {
c2b27bf2
AM
5127+ /* ignore an error */
5128+ /* au_pin_hdir_relock(cpg->pin); */
febd17d6 5129+ inode_unlock(dst_inode);
86dc4139 5130+ goto out_rev;
4a4d8108 5131+ }
4a4d8108 5132+ }
5afbbe0d 5133+ au_set_ibtop(inode, cpg->bdst);
c2b27bf2 5134+ } else
5afbbe0d 5135+ au_set_ibbot(inode, cpg->bdst);
c2b27bf2 5136+ au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
86dc4139
AM
5137+ au_hi_flags(inode, isdir));
5138+
5139+ /* todo: necessary? */
c2b27bf2 5140+ /* err = au_pin_hdir_relock(cpg->pin); */
febd17d6 5141+ inode_unlock(dst_inode);
86dc4139
AM
5142+ if (unlikely(err))
5143+ goto out_rev;
5144+
5527c038 5145+ src_inode = d_inode(h_src);
86dc4139 5146+ if (!isdir
5527c038
JR
5147+ && (src_inode->i_nlink > 1
5148+ || src_inode->i_state & I_LINKABLE)
86dc4139 5149+ && plink)
c2b27bf2 5150+ au_plink_append(inode, cpg->bdst, h_dst);
86dc4139 5151+
c2b27bf2
AM
5152+ if (au_ftest_cpup(cpg->flags, RENAME)) {
5153+ a->h_path.dentry = h_dst;
392086de 5154+ err = au_do_ren_after_cpup(cpg, &a->h_path);
86dc4139
AM
5155+ }
5156+ if (!err)
c2b27bf2 5157+ goto out_parent; /* success */
1facf9fc 5158+
5159+ /* revert */
4a4d8108 5160+out_rev:
c2b27bf2
AM
5161+ a->h_path.dentry = h_parent;
5162+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
5163+ a->h_path.dentry = h_dst;
86dc4139 5164+ rerr = 0;
5527c038 5165+ if (d_is_positive(h_dst)) {
523b37e3
AM
5166+ if (!isdir) {
5167+ /* no delegation since it is just created */
5168+ rerr = vfsub_unlink(h_dir, &a->h_path,
5169+ /*delegated*/NULL, /*force*/0);
5170+ } else
c2b27bf2 5171+ rerr = vfsub_rmdir(h_dir, &a->h_path);
86dc4139 5172+ }
c2b27bf2 5173+ au_dtime_revert(&a->dt);
1facf9fc 5174+ if (rerr) {
5175+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
5176+ err = -EIO;
5177+ }
c2b27bf2 5178+out_parent:
1facf9fc 5179+ dput(dst_parent);
ae9dfd79 5180+ kfree(a);
c2b27bf2 5181+out:
1facf9fc 5182+ return err;
5183+}
5184+
7e9cd9fe 5185+#if 0 /* reserved */
1facf9fc 5186+struct au_cpup_single_args {
5187+ int *errp;
c2b27bf2 5188+ struct au_cp_generic *cpg;
1facf9fc 5189+ struct dentry *dst_parent;
5190+};
5191+
5192+static void au_call_cpup_single(void *args)
5193+{
5194+ struct au_cpup_single_args *a = args;
86dc4139 5195+
c2b27bf2
AM
5196+ au_pin_hdir_acquire_nest(a->cpg->pin);
5197+ *a->errp = au_cpup_single(a->cpg, a->dst_parent);
5198+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5199+}
c2b27bf2 5200+#endif
1facf9fc 5201+
53392da6
AM
5202+/*
5203+ * prevent SIGXFSZ in copy-up.
5204+ * testing CAP_MKNOD is for generic fs,
5205+ * but CAP_FSETID is for xfs only, currently.
5206+ */
86dc4139 5207+static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
53392da6
AM
5208+{
5209+ int do_sio;
86dc4139
AM
5210+ struct super_block *sb;
5211+ struct inode *h_dir;
53392da6
AM
5212+
5213+ do_sio = 0;
86dc4139 5214+ sb = au_pinned_parent(pin)->d_sb;
53392da6
AM
5215+ if (!au_wkq_test()
5216+ && (!au_sbi(sb)->si_plink_maint_pid
5217+ || au_plink_maint(sb, AuLock_NOPLM))) {
5218+ switch (mode & S_IFMT) {
5219+ case S_IFREG:
5220+ /* no condition about RLIMIT_FSIZE and the file size */
5221+ do_sio = 1;
5222+ break;
5223+ case S_IFCHR:
5224+ case S_IFBLK:
5225+ do_sio = !capable(CAP_MKNOD);
5226+ break;
5227+ }
5228+ if (!do_sio)
5229+ do_sio = ((mode & (S_ISUID | S_ISGID))
5230+ && !capable(CAP_FSETID));
86dc4139
AM
5231+ /* this workaround may be removed in the future */
5232+ if (!do_sio) {
5233+ h_dir = au_pinned_h_dir(pin);
5234+ do_sio = h_dir->i_mode & S_ISVTX;
5235+ }
53392da6
AM
5236+ }
5237+
5238+ return do_sio;
5239+}
5240+
7e9cd9fe 5241+#if 0 /* reserved */
c2b27bf2 5242+int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5243+{
5244+ int err, wkq_err;
1facf9fc 5245+ struct dentry *h_dentry;
5246+
c2b27bf2 5247+ h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038 5248+ if (!au_cpup_sio_test(pin, d_inode(h_dentry)->i_mode))
c2b27bf2 5249+ err = au_cpup_single(cpg, dst_parent);
1facf9fc 5250+ else {
5251+ struct au_cpup_single_args args = {
5252+ .errp = &err,
c2b27bf2
AM
5253+ .cpg = cpg,
5254+ .dst_parent = dst_parent
1facf9fc 5255+ };
5256+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
5257+ if (unlikely(wkq_err))
5258+ err = wkq_err;
5259+ }
5260+
5261+ return err;
5262+}
c2b27bf2 5263+#endif
1facf9fc 5264+
5265+/*
5266+ * copyup the @dentry from the first active lower branch to @bdst,
5267+ * using au_cpup_single().
5268+ */
c2b27bf2 5269+static int au_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5270+{
5271+ int err;
c2b27bf2
AM
5272+ unsigned int flags_orig;
5273+ struct dentry *dentry;
5274+
5275+ AuDebugOn(cpg->bsrc < 0);
1facf9fc 5276+
c2b27bf2 5277+ dentry = cpg->dentry;
86dc4139 5278+ DiMustWriteLock(dentry);
1facf9fc 5279+
c2b27bf2 5280+ err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
1facf9fc 5281+ if (!err) {
c2b27bf2
AM
5282+ flags_orig = cpg->flags;
5283+ au_fset_cpup(cpg->flags, RENAME);
5284+ err = au_cpup_single(cpg, NULL);
5285+ cpg->flags = flags_orig;
1facf9fc 5286+ if (!err)
5287+ return 0; /* success */
5288+
5289+ /* revert */
c2b27bf2 5290+ au_set_h_dptr(dentry, cpg->bdst, NULL);
5afbbe0d 5291+ au_set_dbtop(dentry, cpg->bsrc);
1facf9fc 5292+ }
5293+
5294+ return err;
5295+}
5296+
5297+struct au_cpup_simple_args {
5298+ int *errp;
c2b27bf2 5299+ struct au_cp_generic *cpg;
1facf9fc 5300+};
5301+
5302+static void au_call_cpup_simple(void *args)
5303+{
5304+ struct au_cpup_simple_args *a = args;
86dc4139 5305+
c2b27bf2
AM
5306+ au_pin_hdir_acquire_nest(a->cpg->pin);
5307+ *a->errp = au_cpup_simple(a->cpg);
5308+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5309+}
5310+
c2b27bf2 5311+static int au_do_sio_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5312+{
5313+ int err, wkq_err;
c2b27bf2
AM
5314+ struct dentry *dentry, *parent;
5315+ struct file *h_file;
1facf9fc 5316+ struct inode *h_dir;
5317+
c2b27bf2
AM
5318+ dentry = cpg->dentry;
5319+ h_file = NULL;
5320+ if (au_ftest_cpup(cpg->flags, HOPEN)) {
5321+ AuDebugOn(cpg->bsrc < 0);
392086de 5322+ h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0);
c2b27bf2
AM
5323+ err = PTR_ERR(h_file);
5324+ if (IS_ERR(h_file))
5325+ goto out;
5326+ }
5327+
1facf9fc 5328+ parent = dget_parent(dentry);
5527c038 5329+ h_dir = au_h_iptr(d_inode(parent), cpg->bdst);
53392da6 5330+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
5527c038 5331+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
c2b27bf2 5332+ err = au_cpup_simple(cpg);
1facf9fc 5333+ else {
5334+ struct au_cpup_simple_args args = {
5335+ .errp = &err,
c2b27bf2 5336+ .cpg = cpg
1facf9fc 5337+ };
5338+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
5339+ if (unlikely(wkq_err))
5340+ err = wkq_err;
5341+ }
5342+
5343+ dput(parent);
c2b27bf2
AM
5344+ if (h_file)
5345+ au_h_open_post(dentry, cpg->bsrc, h_file);
5346+
5347+out:
1facf9fc 5348+ return err;
5349+}
5350+
c2b27bf2 5351+int au_sio_cpup_simple(struct au_cp_generic *cpg)
367653fa 5352+{
5afbbe0d 5353+ aufs_bindex_t bsrc, bbot;
c2b27bf2 5354+ struct dentry *dentry, *h_dentry;
367653fa 5355+
c2b27bf2
AM
5356+ if (cpg->bsrc < 0) {
5357+ dentry = cpg->dentry;
5afbbe0d
AM
5358+ bbot = au_dbbot(dentry);
5359+ for (bsrc = cpg->bdst + 1; bsrc <= bbot; bsrc++) {
c2b27bf2
AM
5360+ h_dentry = au_h_dptr(dentry, bsrc);
5361+ if (h_dentry) {
5527c038 5362+ AuDebugOn(d_is_negative(h_dentry));
c2b27bf2
AM
5363+ break;
5364+ }
5365+ }
5afbbe0d 5366+ AuDebugOn(bsrc > bbot);
c2b27bf2 5367+ cpg->bsrc = bsrc;
367653fa 5368+ }
c2b27bf2
AM
5369+ AuDebugOn(cpg->bsrc <= cpg->bdst);
5370+ return au_do_sio_cpup_simple(cpg);
5371+}
367653fa 5372+
c2b27bf2
AM
5373+int au_sio_cpdown_simple(struct au_cp_generic *cpg)
5374+{
5375+ AuDebugOn(cpg->bdst <= cpg->bsrc);
5376+ return au_do_sio_cpup_simple(cpg);
367653fa
AM
5377+}
5378+
1facf9fc 5379+/* ---------------------------------------------------------------------- */
5380+
5381+/*
5382+ * copyup the deleted file for writing.
5383+ */
c2b27bf2
AM
5384+static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
5385+ struct file *file)
1facf9fc 5386+{
5387+ int err;
c2b27bf2
AM
5388+ unsigned int flags_orig;
5389+ aufs_bindex_t bsrc_orig;
c2b27bf2 5390+ struct au_dinfo *dinfo;
5afbbe0d
AM
5391+ struct {
5392+ struct au_hdentry *hd;
5393+ struct dentry *h_dentry;
5394+ } hdst, hsrc;
1facf9fc 5395+
c2b27bf2 5396+ dinfo = au_di(cpg->dentry);
1308ab2a 5397+ AuRwMustWriteLock(&dinfo->di_rwsem);
5398+
c2b27bf2 5399+ bsrc_orig = cpg->bsrc;
5afbbe0d
AM
5400+ cpg->bsrc = dinfo->di_btop;
5401+ hdst.hd = au_hdentry(dinfo, cpg->bdst);
5402+ hdst.h_dentry = hdst.hd->hd_dentry;
5403+ hdst.hd->hd_dentry = wh_dentry;
5404+ dinfo->di_btop = cpg->bdst;
5405+
5406+ hsrc.h_dentry = NULL;
027c5e7a 5407+ if (file) {
5afbbe0d
AM
5408+ hsrc.hd = au_hdentry(dinfo, cpg->bsrc);
5409+ hsrc.h_dentry = hsrc.hd->hd_dentry;
5410+ hsrc.hd->hd_dentry = au_hf_top(file)->f_path.dentry;
027c5e7a 5411+ }
c2b27bf2
AM
5412+ flags_orig = cpg->flags;
5413+ cpg->flags = !AuCpup_DTIME;
5414+ err = au_cpup_single(cpg, /*h_parent*/NULL);
5415+ cpg->flags = flags_orig;
027c5e7a
AM
5416+ if (file) {
5417+ if (!err)
5418+ err = au_reopen_nondir(file);
5afbbe0d 5419+ hsrc.hd->hd_dentry = hsrc.h_dentry;
1facf9fc 5420+ }
5afbbe0d
AM
5421+ hdst.hd->hd_dentry = hdst.h_dentry;
5422+ dinfo->di_btop = cpg->bsrc;
c2b27bf2 5423+ cpg->bsrc = bsrc_orig;
1facf9fc 5424+
5425+ return err;
5426+}
5427+
c2b27bf2 5428+static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5429+{
5430+ int err;
c2b27bf2 5431+ aufs_bindex_t bdst;
1facf9fc 5432+ struct au_dtime dt;
c2b27bf2 5433+ struct dentry *dentry, *parent, *h_parent, *wh_dentry;
1facf9fc 5434+ struct au_branch *br;
5435+ struct path h_path;
5436+
c2b27bf2
AM
5437+ dentry = cpg->dentry;
5438+ bdst = cpg->bdst;
1facf9fc 5439+ br = au_sbr(dentry->d_sb, bdst);
5440+ parent = dget_parent(dentry);
5441+ h_parent = au_h_dptr(parent, bdst);
5442+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
5443+ err = PTR_ERR(wh_dentry);
5444+ if (IS_ERR(wh_dentry))
5445+ goto out;
5446+
5447+ h_path.dentry = h_parent;
86dc4139 5448+ h_path.mnt = au_br_mnt(br);
1facf9fc 5449+ au_dtime_store(&dt, parent, &h_path);
c2b27bf2 5450+ err = au_do_cpup_wh(cpg, wh_dentry, file);
1facf9fc 5451+ if (unlikely(err))
5452+ goto out_wh;
5453+
5454+ dget(wh_dentry);
5455+ h_path.dentry = wh_dentry;
2000de60 5456+ if (!d_is_dir(wh_dentry)) {
523b37e3 5457+ /* no delegation since it is just created */
5527c038 5458+ err = vfsub_unlink(d_inode(h_parent), &h_path,
523b37e3
AM
5459+ /*delegated*/NULL, /*force*/0);
5460+ } else
5527c038 5461+ err = vfsub_rmdir(d_inode(h_parent), &h_path);
1facf9fc 5462+ if (unlikely(err)) {
523b37e3
AM
5463+ AuIOErr("failed remove copied-up tmp file %pd(%d)\n",
5464+ wh_dentry, err);
1facf9fc 5465+ err = -EIO;
5466+ }
5467+ au_dtime_revert(&dt);
5527c038 5468+ au_set_hi_wh(d_inode(dentry), bdst, wh_dentry);
1facf9fc 5469+
4f0767ce 5470+out_wh:
1facf9fc 5471+ dput(wh_dentry);
4f0767ce 5472+out:
1facf9fc 5473+ dput(parent);
5474+ return err;
5475+}
5476+
5477+struct au_cpup_wh_args {
5478+ int *errp;
c2b27bf2 5479+ struct au_cp_generic *cpg;
1facf9fc 5480+ struct file *file;
5481+};
5482+
5483+static void au_call_cpup_wh(void *args)
5484+{
5485+ struct au_cpup_wh_args *a = args;
86dc4139 5486+
c2b27bf2
AM
5487+ au_pin_hdir_acquire_nest(a->cpg->pin);
5488+ *a->errp = au_cpup_wh(a->cpg, a->file);
5489+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5490+}
5491+
c2b27bf2 5492+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5493+{
5494+ int err, wkq_err;
c2b27bf2 5495+ aufs_bindex_t bdst;
c1595e42 5496+ struct dentry *dentry, *parent, *h_orph, *h_parent;
86dc4139 5497+ struct inode *dir, *h_dir, *h_tmpdir;
1facf9fc 5498+ struct au_wbr *wbr;
c2b27bf2 5499+ struct au_pin wh_pin, *pin_orig;
1facf9fc 5500+
c2b27bf2
AM
5501+ dentry = cpg->dentry;
5502+ bdst = cpg->bdst;
1facf9fc 5503+ parent = dget_parent(dentry);
5527c038 5504+ dir = d_inode(parent);
1facf9fc 5505+ h_orph = NULL;
5506+ h_parent = NULL;
5507+ h_dir = au_igrab(au_h_iptr(dir, bdst));
5508+ h_tmpdir = h_dir;
c2b27bf2 5509+ pin_orig = NULL;
1facf9fc 5510+ if (!h_dir->i_nlink) {
5511+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
5512+ h_orph = wbr->wbr_orph;
5513+
5514+ h_parent = dget(au_h_dptr(parent, bdst));
1facf9fc 5515+ au_set_h_dptr(parent, bdst, dget(h_orph));
5527c038 5516+ h_tmpdir = d_inode(h_orph);
1facf9fc 5517+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
5518+
febd17d6 5519+ inode_lock_nested(h_tmpdir, AuLsc_I_PARENT3);
4a4d8108 5520+ /* todo: au_h_open_pre()? */
86dc4139 5521+
c2b27bf2 5522+ pin_orig = cpg->pin;
86dc4139 5523+ au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
c2b27bf2
AM
5524+ AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
5525+ cpg->pin = &wh_pin;
1facf9fc 5526+ }
5527+
53392da6 5528+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
5527c038 5529+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
c2b27bf2 5530+ err = au_cpup_wh(cpg, file);
1facf9fc 5531+ else {
5532+ struct au_cpup_wh_args args = {
5533+ .errp = &err,
c2b27bf2
AM
5534+ .cpg = cpg,
5535+ .file = file
1facf9fc 5536+ };
5537+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
5538+ if (unlikely(wkq_err))
5539+ err = wkq_err;
5540+ }
5541+
5542+ if (h_orph) {
febd17d6 5543+ inode_unlock(h_tmpdir);
4a4d8108 5544+ /* todo: au_h_open_post()? */
1facf9fc 5545+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
1facf9fc 5546+ au_set_h_dptr(parent, bdst, h_parent);
c2b27bf2
AM
5547+ AuDebugOn(!pin_orig);
5548+ cpg->pin = pin_orig;
1facf9fc 5549+ }
5550+ iput(h_dir);
5551+ dput(parent);
5552+
5553+ return err;
5554+}
5555+
5556+/* ---------------------------------------------------------------------- */
5557+
5558+/*
5559+ * generic routine for both of copy-up and copy-down.
5560+ */
5561+/* cf. revalidate function in file.c */
5562+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
5563+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5564+ struct au_pin *pin,
1facf9fc 5565+ struct dentry *h_parent, void *arg),
5566+ void *arg)
5567+{
5568+ int err;
5569+ struct au_pin pin;
5527c038 5570+ struct dentry *d, *parent, *h_parent, *real_parent, *h_dentry;
1facf9fc 5571+
5572+ err = 0;
5573+ parent = dget_parent(dentry);
5574+ if (IS_ROOT(parent))
5575+ goto out;
5576+
5577+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
5578+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
5579+
5580+ /* do not use au_dpage */
5581+ real_parent = parent;
5582+ while (1) {
5583+ dput(parent);
5584+ parent = dget_parent(dentry);
5585+ h_parent = au_h_dptr(parent, bdst);
5586+ if (h_parent)
5587+ goto out; /* success */
5588+
5589+ /* find top dir which is necessary to cpup */
5590+ do {
5591+ d = parent;
5592+ dput(parent);
5593+ parent = dget_parent(d);
5594+ di_read_lock_parent3(parent, !AuLock_IR);
5595+ h_parent = au_h_dptr(parent, bdst);
5596+ di_read_unlock(parent, !AuLock_IR);
5597+ } while (!h_parent);
5598+
5599+ if (d != real_parent)
5600+ di_write_lock_child3(d);
5601+
5602+ /* somebody else might create while we were sleeping */
5527c038
JR
5603+ h_dentry = au_h_dptr(d, bdst);
5604+ if (!h_dentry || d_is_negative(h_dentry)) {
5605+ if (h_dentry)
5afbbe0d 5606+ au_update_dbtop(d);
1facf9fc 5607+
5608+ au_pin_set_dentry(&pin, d);
5609+ err = au_do_pin(&pin);
5610+ if (!err) {
86dc4139 5611+ err = cp(d, bdst, &pin, h_parent, arg);
1facf9fc 5612+ au_unpin(&pin);
5613+ }
5614+ }
5615+
5616+ if (d != real_parent)
5617+ di_write_unlock(d);
5618+ if (unlikely(err))
5619+ break;
5620+ }
5621+
4f0767ce 5622+out:
1facf9fc 5623+ dput(parent);
5624+ return err;
5625+}
5626+
5627+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5628+ struct au_pin *pin,
2000de60 5629+ struct dentry *h_parent __maybe_unused,
1facf9fc 5630+ void *arg __maybe_unused)
5631+{
c2b27bf2
AM
5632+ struct au_cp_generic cpg = {
5633+ .dentry = dentry,
5634+ .bdst = bdst,
5635+ .bsrc = -1,
5636+ .len = 0,
5637+ .pin = pin,
5638+ .flags = AuCpup_DTIME
5639+ };
5640+ return au_sio_cpup_simple(&cpg);
1facf9fc 5641+}
5642+
5643+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
5644+{
5645+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
5646+}
5647+
5648+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
5649+{
5650+ int err;
5651+ struct dentry *parent;
5652+ struct inode *dir;
5653+
5654+ parent = dget_parent(dentry);
5527c038 5655+ dir = d_inode(parent);
1facf9fc 5656+ err = 0;
5657+ if (au_h_iptr(dir, bdst))
5658+ goto out;
5659+
5660+ di_read_unlock(parent, AuLock_IR);
5661+ di_write_lock_parent(parent);
5662+ /* someone else might change our inode while we were sleeping */
5663+ if (!au_h_iptr(dir, bdst))
5664+ err = au_cpup_dirs(dentry, bdst);
5665+ di_downgrade_lock(parent, AuLock_IR);
5666+
4f0767ce 5667+out:
1facf9fc 5668+ dput(parent);
5669+ return err;
5670+}
e8791d4f
AM
5671diff -urNp -x '*.orig' linux-4.9/fs/aufs/cpup.h linux-4.9/fs/aufs/cpup.h
5672--- linux-4.9/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
5673+++ linux-4.9/fs/aufs/cpup.h 2021-02-24 16:15:09.521573529 +0100
ae9dfd79 5674@@ -0,0 +1,99 @@
1facf9fc 5675+/*
ae9dfd79 5676+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 5677+ *
5678+ * This program, aufs is free software; you can redistribute it and/or modify
5679+ * it under the terms of the GNU General Public License as published by
5680+ * the Free Software Foundation; either version 2 of the License, or
5681+ * (at your option) any later version.
dece6358
AM
5682+ *
5683+ * This program is distributed in the hope that it will be useful,
5684+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5685+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5686+ * GNU General Public License for more details.
5687+ *
5688+ * You should have received a copy of the GNU General Public License
523b37e3 5689+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 5690+ */
5691+
5692+/*
5693+ * copy-up/down functions
5694+ */
5695+
5696+#ifndef __AUFS_CPUP_H__
5697+#define __AUFS_CPUP_H__
5698+
5699+#ifdef __KERNEL__
5700+
dece6358 5701+#include <linux/path.h>
1facf9fc 5702+
dece6358
AM
5703+struct inode;
5704+struct file;
86dc4139 5705+struct au_pin;
dece6358 5706+
86dc4139 5707+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
1facf9fc 5708+void au_cpup_attr_timesizes(struct inode *inode);
5709+void au_cpup_attr_nlink(struct inode *inode, int force);
5710+void au_cpup_attr_changeable(struct inode *inode);
5711+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
5712+void au_cpup_attr_all(struct inode *inode, int force);
5713+
5714+/* ---------------------------------------------------------------------- */
5715+
c2b27bf2
AM
5716+struct au_cp_generic {
5717+ struct dentry *dentry;
5718+ aufs_bindex_t bdst, bsrc;
5719+ loff_t len;
5720+ struct au_pin *pin;
5721+ unsigned int flags;
5722+};
5723+
1facf9fc 5724+/* cpup flags */
392086de
AM
5725+#define AuCpup_DTIME 1 /* do dtime_store/revert */
5726+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
5727+ for link(2) */
5728+#define AuCpup_RENAME (1 << 2) /* rename after cpup */
5729+#define AuCpup_HOPEN (1 << 3) /* call h_open_pre/post() in
5730+ cpup */
5731+#define AuCpup_OVERWRITE (1 << 4) /* allow overwriting the
5732+ existing entry */
5733+#define AuCpup_RWDST (1 << 5) /* force write target even if
5734+ the branch is marked as RO */
c2b27bf2 5735+
ae9dfd79
AM
5736+#ifndef CONFIG_AUFS_BR_HFSPLUS
5737+#undef AuCpup_HOPEN
5738+#define AuCpup_HOPEN 0
5739+#endif
5740+
1facf9fc 5741+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
7f207e10
AM
5742+#define au_fset_cpup(flags, name) \
5743+ do { (flags) |= AuCpup_##name; } while (0)
5744+#define au_fclr_cpup(flags, name) \
5745+ do { (flags) &= ~AuCpup_##name; } while (0)
1facf9fc 5746+
5747+int au_copy_file(struct file *dst, struct file *src, loff_t len);
c2b27bf2
AM
5748+int au_sio_cpup_simple(struct au_cp_generic *cpg);
5749+int au_sio_cpdown_simple(struct au_cp_generic *cpg);
5750+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
1facf9fc 5751+
5752+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
5753+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5754+ struct au_pin *pin,
1facf9fc 5755+ struct dentry *h_parent, void *arg),
5756+ void *arg);
5757+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
5758+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
5759+
5760+/* ---------------------------------------------------------------------- */
5761+
5762+/* keep timestamps when copyup */
5763+struct au_dtime {
5764+ struct dentry *dt_dentry;
5765+ struct path dt_h_path;
5766+ struct timespec dt_atime, dt_mtime;
5767+};
5768+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
5769+ struct path *h_path);
5770+void au_dtime_revert(struct au_dtime *dt);
5771+
5772+#endif /* __KERNEL__ */
5773+#endif /* __AUFS_CPUP_H__ */
e8791d4f
AM
5774diff -urNp -x '*.orig' linux-4.9/fs/aufs/dbgaufs.c linux-4.9/fs/aufs/dbgaufs.c
5775--- linux-4.9/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
5776+++ linux-4.9/fs/aufs/dbgaufs.c 2021-02-24 16:15:09.524906971 +0100
ae9dfd79 5777@@ -0,0 +1,437 @@
1facf9fc 5778+/*
ae9dfd79 5779+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 5780+ *
5781+ * This program, aufs is free software; you can redistribute it and/or modify
5782+ * it under the terms of the GNU General Public License as published by
5783+ * the Free Software Foundation; either version 2 of the License, or
5784+ * (at your option) any later version.
dece6358
AM
5785+ *
5786+ * This program is distributed in the hope that it will be useful,
5787+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5788+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5789+ * GNU General Public License for more details.
5790+ *
5791+ * You should have received a copy of the GNU General Public License
523b37e3 5792+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 5793+ */
5794+
5795+/*
5796+ * debugfs interface
5797+ */
5798+
5799+#include <linux/debugfs.h>
5800+#include "aufs.h"
5801+
5802+#ifndef CONFIG_SYSFS
5803+#error DEBUG_FS depends upon SYSFS
5804+#endif
5805+
5806+static struct dentry *dbgaufs;
5807+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
5808+
5809+/* 20 is max digits length of ulong 64 */
5810+struct dbgaufs_arg {
5811+ int n;
5812+ char a[20 * 4];
5813+};
5814+
5815+/*
5816+ * common function for all XINO files
5817+ */
5818+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
5819+ struct file *file)
5820+{
ae9dfd79 5821+ kfree(file->private_data);
1facf9fc 5822+ return 0;
5823+}
5824+
5825+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
5826+{
5827+ int err;
5828+ struct kstat st;
5829+ struct dbgaufs_arg *p;
5830+
5831+ err = -ENOMEM;
5832+ p = kmalloc(sizeof(*p), GFP_NOFS);
5833+ if (unlikely(!p))
5834+ goto out;
5835+
5836+ err = 0;
5837+ p->n = 0;
5838+ file->private_data = p;
5839+ if (!xf)
5840+ goto out;
5841+
c06a8ce3 5842+ err = vfs_getattr(&xf->f_path, &st);
1facf9fc 5843+ if (!err) {
5844+ if (do_fcnt)
5845+ p->n = snprintf
5846+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
5847+ (long)file_count(xf), st.blocks, st.blksize,
5848+ (long long)st.size);
5849+ else
5850+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
5851+ st.blocks, st.blksize,
5852+ (long long)st.size);
5853+ AuDebugOn(p->n >= sizeof(p->a));
5854+ } else {
5855+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
5856+ err = 0;
5857+ }
5858+
4f0767ce 5859+out:
1facf9fc 5860+ return err;
5861+
5862+}
5863+
5864+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
5865+ size_t count, loff_t *ppos)
5866+{
5867+ struct dbgaufs_arg *p;
5868+
5869+ p = file->private_data;
5870+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
5871+}
5872+
5873+/* ---------------------------------------------------------------------- */
5874+
86dc4139
AM
5875+struct dbgaufs_plink_arg {
5876+ int n;
5877+ char a[];
5878+};
5879+
5880+static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
5881+ struct file *file)
5882+{
ae9dfd79 5883+ free_page((unsigned long)file->private_data);
86dc4139
AM
5884+ return 0;
5885+}
5886+
5887+static int dbgaufs_plink_open(struct inode *inode, struct file *file)
5888+{
5889+ int err, i, limit;
5890+ unsigned long n, sum;
5891+ struct dbgaufs_plink_arg *p;
5892+ struct au_sbinfo *sbinfo;
5893+ struct super_block *sb;
ae9dfd79 5894+ struct hlist_bl_head *hbl;
86dc4139
AM
5895+
5896+ err = -ENOMEM;
5897+ p = (void *)get_zeroed_page(GFP_NOFS);
5898+ if (unlikely(!p))
5899+ goto out;
5900+
5901+ err = -EFBIG;
5902+ sbinfo = inode->i_private;
5903+ sb = sbinfo->si_sb;
5904+ si_noflush_read_lock(sb);
5905+ if (au_opt_test(au_mntflags(sb), PLINK)) {
5906+ limit = PAGE_SIZE - sizeof(p->n);
5907+
5908+ /* the number of buckets */
5909+ n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
5910+ p->n += n;
5911+ limit -= n;
5912+
5913+ sum = 0;
ae9dfd79
AM
5914+ for (i = 0, hbl = sbinfo->si_plink; i < AuPlink_NHASH;
5915+ i++, hbl++) {
5916+ n = au_hbl_count(hbl);
86dc4139
AM
5917+ sum += n;
5918+
5919+ n = snprintf(p->a + p->n, limit, "%lu ", n);
5920+ p->n += n;
5921+ limit -= n;
5922+ if (unlikely(limit <= 0))
5923+ goto out_free;
5924+ }
5925+ p->a[p->n - 1] = '\n';
5926+
5927+ /* the sum of plinks */
5928+ n = snprintf(p->a + p->n, limit, "%lu\n", sum);
5929+ p->n += n;
5930+ limit -= n;
5931+ if (unlikely(limit <= 0))
5932+ goto out_free;
5933+ } else {
5934+#define str "1\n0\n0\n"
5935+ p->n = sizeof(str) - 1;
5936+ strcpy(p->a, str);
5937+#undef str
5938+ }
5939+ si_read_unlock(sb);
5940+
5941+ err = 0;
5942+ file->private_data = p;
5943+ goto out; /* success */
5944+
5945+out_free:
ae9dfd79 5946+ free_page((unsigned long)p);
86dc4139
AM
5947+out:
5948+ return err;
5949+}
5950+
5951+static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
5952+ size_t count, loff_t *ppos)
5953+{
5954+ struct dbgaufs_plink_arg *p;
5955+
5956+ p = file->private_data;
5957+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
5958+}
5959+
5960+static const struct file_operations dbgaufs_plink_fop = {
5961+ .owner = THIS_MODULE,
5962+ .open = dbgaufs_plink_open,
5963+ .release = dbgaufs_plink_release,
5964+ .read = dbgaufs_plink_read
5965+};
5966+
5967+/* ---------------------------------------------------------------------- */
5968+
1facf9fc 5969+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
5970+{
5971+ int err;
5972+ struct au_sbinfo *sbinfo;
5973+ struct super_block *sb;
5974+
5975+ sbinfo = inode->i_private;
5976+ sb = sbinfo->si_sb;
5977+ si_noflush_read_lock(sb);
5978+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
5979+ si_read_unlock(sb);
5980+ return err;
5981+}
5982+
5983+static const struct file_operations dbgaufs_xib_fop = {
4a4d8108 5984+ .owner = THIS_MODULE,
1facf9fc 5985+ .open = dbgaufs_xib_open,
5986+ .release = dbgaufs_xi_release,
5987+ .read = dbgaufs_xi_read
5988+};
5989+
5990+/* ---------------------------------------------------------------------- */
5991+
5992+#define DbgaufsXi_PREFIX "xi"
5993+
5994+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
5995+{
5996+ int err;
5997+ long l;
5998+ struct au_sbinfo *sbinfo;
5999+ struct super_block *sb;
6000+ struct file *xf;
6001+ struct qstr *name;
6002+
6003+ err = -ENOENT;
6004+ xf = NULL;
2000de60 6005+ name = &file->f_path.dentry->d_name;
1facf9fc 6006+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
6007+ || memcmp(name->name, DbgaufsXi_PREFIX,
6008+ sizeof(DbgaufsXi_PREFIX) - 1)))
6009+ goto out;
9dbd164d 6010+ err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
1facf9fc 6011+ if (unlikely(err))
6012+ goto out;
6013+
6014+ sbinfo = inode->i_private;
6015+ sb = sbinfo->si_sb;
6016+ si_noflush_read_lock(sb);
5afbbe0d 6017+ if (l <= au_sbbot(sb)) {
1facf9fc 6018+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
6019+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
6020+ } else
6021+ err = -ENOENT;
6022+ si_read_unlock(sb);
6023+
4f0767ce 6024+out:
1facf9fc 6025+ return err;
6026+}
6027+
6028+static const struct file_operations dbgaufs_xino_fop = {
4a4d8108 6029+ .owner = THIS_MODULE,
1facf9fc 6030+ .open = dbgaufs_xino_open,
6031+ .release = dbgaufs_xi_release,
6032+ .read = dbgaufs_xi_read
6033+};
6034+
6035+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
6036+{
5afbbe0d 6037+ aufs_bindex_t bbot;
1facf9fc 6038+ struct au_branch *br;
6039+ struct au_xino_file *xi;
6040+
6041+ if (!au_sbi(sb)->si_dbgaufs)
6042+ return;
6043+
5afbbe0d
AM
6044+ bbot = au_sbbot(sb);
6045+ for (; bindex <= bbot; bindex++) {
1facf9fc 6046+ br = au_sbr(sb, bindex);
6047+ xi = &br->br_xino;
e2f27e51
AM
6048+ /* debugfs acquires the parent i_mutex */
6049+ lockdep_off();
c06a8ce3 6050+ debugfs_remove(xi->xi_dbgaufs);
e2f27e51 6051+ lockdep_on();
c06a8ce3 6052+ xi->xi_dbgaufs = NULL;
1facf9fc 6053+ }
6054+}
6055+
6056+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
6057+{
6058+ struct au_sbinfo *sbinfo;
6059+ struct dentry *parent;
6060+ struct au_branch *br;
6061+ struct au_xino_file *xi;
5afbbe0d 6062+ aufs_bindex_t bbot;
1facf9fc 6063+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
6064+
6065+ sbinfo = au_sbi(sb);
6066+ parent = sbinfo->si_dbgaufs;
6067+ if (!parent)
6068+ return;
6069+
5afbbe0d
AM
6070+ bbot = au_sbbot(sb);
6071+ for (; bindex <= bbot; bindex++) {
1facf9fc 6072+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
6073+ br = au_sbr(sb, bindex);
6074+ xi = &br->br_xino;
6075+ AuDebugOn(xi->xi_dbgaufs);
f0c0a007
AM
6076+ /* debugfs acquires the parent i_mutex */
6077+ lockdep_off();
1facf9fc 6078+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
6079+ sbinfo, &dbgaufs_xino_fop);
f0c0a007 6080+ lockdep_on();
1facf9fc 6081+ /* ignore an error */
6082+ if (unlikely(!xi->xi_dbgaufs))
6083+ AuWarn1("failed %s under debugfs\n", name);
6084+ }
6085+}
6086+
6087+/* ---------------------------------------------------------------------- */
6088+
6089+#ifdef CONFIG_AUFS_EXPORT
6090+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
6091+{
6092+ int err;
6093+ struct au_sbinfo *sbinfo;
6094+ struct super_block *sb;
6095+
6096+ sbinfo = inode->i_private;
6097+ sb = sbinfo->si_sb;
6098+ si_noflush_read_lock(sb);
6099+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
6100+ si_read_unlock(sb);
6101+ return err;
6102+}
6103+
6104+static const struct file_operations dbgaufs_xigen_fop = {
4a4d8108 6105+ .owner = THIS_MODULE,
1facf9fc 6106+ .open = dbgaufs_xigen_open,
6107+ .release = dbgaufs_xi_release,
6108+ .read = dbgaufs_xi_read
6109+};
6110+
6111+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6112+{
6113+ int err;
6114+
dece6358 6115+ /*
c1595e42 6116+ * This function is a dynamic '__init' function actually,
dece6358
AM
6117+ * so the tiny check for si_rwsem is unnecessary.
6118+ */
6119+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6120+
1facf9fc 6121+ err = -EIO;
6122+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
6123+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6124+ &dbgaufs_xigen_fop);
6125+ if (sbinfo->si_dbgaufs_xigen)
6126+ err = 0;
6127+
6128+ return err;
6129+}
6130+#else
6131+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6132+{
6133+ return 0;
6134+}
6135+#endif /* CONFIG_AUFS_EXPORT */
6136+
6137+/* ---------------------------------------------------------------------- */
6138+
6139+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
6140+{
dece6358 6141+ /*
7e9cd9fe 6142+ * This function is a dynamic '__fin' function actually,
dece6358
AM
6143+ * so the tiny check for si_rwsem is unnecessary.
6144+ */
6145+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6146+
1facf9fc 6147+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
6148+ sbinfo->si_dbgaufs = NULL;
6149+ kobject_put(&sbinfo->si_kobj);
6150+}
6151+
6152+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
6153+{
6154+ int err;
6155+ char name[SysaufsSiNameLen];
6156+
dece6358 6157+ /*
c1595e42 6158+ * This function is a dynamic '__init' function actually,
dece6358
AM
6159+ * so the tiny check for si_rwsem is unnecessary.
6160+ */
6161+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6162+
1facf9fc 6163+ err = -ENOENT;
6164+ if (!dbgaufs) {
6165+ AuErr1("/debug/aufs is uninitialized\n");
6166+ goto out;
6167+ }
6168+
6169+ err = -EIO;
6170+ sysaufs_name(sbinfo, name);
6171+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
6172+ if (unlikely(!sbinfo->si_dbgaufs))
6173+ goto out;
6174+ kobject_get(&sbinfo->si_kobj);
6175+
6176+ sbinfo->si_dbgaufs_xib = debugfs_create_file
6177+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6178+ &dbgaufs_xib_fop);
6179+ if (unlikely(!sbinfo->si_dbgaufs_xib))
6180+ goto out_dir;
6181+
86dc4139
AM
6182+ sbinfo->si_dbgaufs_plink = debugfs_create_file
6183+ ("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6184+ &dbgaufs_plink_fop);
6185+ if (unlikely(!sbinfo->si_dbgaufs_plink))
6186+ goto out_dir;
6187+
1facf9fc 6188+ err = dbgaufs_xigen_init(sbinfo);
6189+ if (!err)
6190+ goto out; /* success */
6191+
4f0767ce 6192+out_dir:
1facf9fc 6193+ dbgaufs_si_fin(sbinfo);
4f0767ce 6194+out:
1facf9fc 6195+ return err;
6196+}
6197+
6198+/* ---------------------------------------------------------------------- */
6199+
6200+void dbgaufs_fin(void)
6201+{
6202+ debugfs_remove(dbgaufs);
6203+}
6204+
6205+int __init dbgaufs_init(void)
6206+{
6207+ int err;
6208+
6209+ err = -EIO;
6210+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
6211+ if (dbgaufs)
6212+ err = 0;
6213+ return err;
6214+}
e8791d4f
AM
6215diff -urNp -x '*.orig' linux-4.9/fs/aufs/dbgaufs.h linux-4.9/fs/aufs/dbgaufs.h
6216--- linux-4.9/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
6217+++ linux-4.9/fs/aufs/dbgaufs.h 2021-02-24 16:15:09.524906971 +0100
523b37e3 6218@@ -0,0 +1,48 @@
1facf9fc 6219+/*
ae9dfd79 6220+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 6221+ *
6222+ * This program, aufs is free software; you can redistribute it and/or modify
6223+ * it under the terms of the GNU General Public License as published by
6224+ * the Free Software Foundation; either version 2 of the License, or
6225+ * (at your option) any later version.
dece6358
AM
6226+ *
6227+ * This program is distributed in the hope that it will be useful,
6228+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6229+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6230+ * GNU General Public License for more details.
6231+ *
6232+ * You should have received a copy of the GNU General Public License
523b37e3 6233+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6234+ */
6235+
6236+/*
6237+ * debugfs interface
6238+ */
6239+
6240+#ifndef __DBGAUFS_H__
6241+#define __DBGAUFS_H__
6242+
6243+#ifdef __KERNEL__
6244+
dece6358 6245+struct super_block;
1facf9fc 6246+struct au_sbinfo;
dece6358 6247+
1facf9fc 6248+#ifdef CONFIG_DEBUG_FS
6249+/* dbgaufs.c */
6250+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
6251+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
6252+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
6253+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
6254+void dbgaufs_fin(void);
6255+int __init dbgaufs_init(void);
1facf9fc 6256+#else
4a4d8108
AM
6257+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
6258+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
6259+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
6260+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
6261+AuStubVoid(dbgaufs_fin, void)
6262+AuStubInt0(__init dbgaufs_init, void)
1facf9fc 6263+#endif /* CONFIG_DEBUG_FS */
6264+
6265+#endif /* __KERNEL__ */
6266+#endif /* __DBGAUFS_H__ */
e8791d4f
AM
6267diff -urNp -x '*.orig' linux-4.9/fs/aufs/dcsub.c linux-4.9/fs/aufs/dcsub.c
6268--- linux-4.9/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
6269+++ linux-4.9/fs/aufs/dcsub.c 2021-02-24 16:15:09.524906971 +0100
e2f27e51 6270@@ -0,0 +1,225 @@
1facf9fc 6271+/*
ae9dfd79 6272+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 6273+ *
6274+ * This program, aufs is free software; you can redistribute it and/or modify
6275+ * it under the terms of the GNU General Public License as published by
6276+ * the Free Software Foundation; either version 2 of the License, or
6277+ * (at your option) any later version.
dece6358
AM
6278+ *
6279+ * This program is distributed in the hope that it will be useful,
6280+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6281+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6282+ * GNU General Public License for more details.
6283+ *
6284+ * You should have received a copy of the GNU General Public License
523b37e3 6285+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6286+ */
6287+
6288+/*
6289+ * sub-routines for dentry cache
6290+ */
6291+
6292+#include "aufs.h"
6293+
6294+static void au_dpage_free(struct au_dpage *dpage)
6295+{
6296+ int i;
6297+ struct dentry **p;
6298+
6299+ p = dpage->dentries;
6300+ for (i = 0; i < dpage->ndentry; i++)
6301+ dput(*p++);
ae9dfd79 6302+ free_page((unsigned long)dpage->dentries);
1facf9fc 6303+}
6304+
6305+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
6306+{
6307+ int err;
6308+ void *p;
6309+
6310+ err = -ENOMEM;
6311+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
6312+ if (unlikely(!dpages->dpages))
6313+ goto out;
6314+
6315+ p = (void *)__get_free_page(gfp);
6316+ if (unlikely(!p))
6317+ goto out_dpages;
6318+
6319+ dpages->dpages[0].ndentry = 0;
6320+ dpages->dpages[0].dentries = p;
6321+ dpages->ndpage = 1;
6322+ return 0; /* success */
6323+
4f0767ce 6324+out_dpages:
ae9dfd79 6325+ kfree(dpages->dpages);
4f0767ce 6326+out:
1facf9fc 6327+ return err;
6328+}
6329+
6330+void au_dpages_free(struct au_dcsub_pages *dpages)
6331+{
6332+ int i;
6333+ struct au_dpage *p;
6334+
6335+ p = dpages->dpages;
6336+ for (i = 0; i < dpages->ndpage; i++)
6337+ au_dpage_free(p++);
ae9dfd79 6338+ kfree(dpages->dpages);
1facf9fc 6339+}
6340+
6341+static int au_dpages_append(struct au_dcsub_pages *dpages,
6342+ struct dentry *dentry, gfp_t gfp)
6343+{
6344+ int err, sz;
6345+ struct au_dpage *dpage;
6346+ void *p;
6347+
6348+ dpage = dpages->dpages + dpages->ndpage - 1;
6349+ sz = PAGE_SIZE / sizeof(dentry);
6350+ if (unlikely(dpage->ndentry >= sz)) {
6351+ AuLabel(new dpage);
6352+ err = -ENOMEM;
6353+ sz = dpages->ndpage * sizeof(*dpages->dpages);
6354+ p = au_kzrealloc(dpages->dpages, sz,
e2f27e51
AM
6355+ sz + sizeof(*dpages->dpages), gfp,
6356+ /*may_shrink*/0);
1facf9fc 6357+ if (unlikely(!p))
6358+ goto out;
6359+
6360+ dpages->dpages = p;
6361+ dpage = dpages->dpages + dpages->ndpage;
6362+ p = (void *)__get_free_page(gfp);
6363+ if (unlikely(!p))
6364+ goto out;
6365+
6366+ dpage->ndentry = 0;
6367+ dpage->dentries = p;
6368+ dpages->ndpage++;
6369+ }
6370+
c1595e42 6371+ AuDebugOn(au_dcount(dentry) <= 0);
027c5e7a 6372+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
1facf9fc 6373+ return 0; /* success */
6374+
4f0767ce 6375+out:
1facf9fc 6376+ return err;
6377+}
6378+
c1595e42
JR
6379+/* todo: BAD approach */
6380+/* copied from linux/fs/dcache.c */
6381+enum d_walk_ret {
6382+ D_WALK_CONTINUE,
6383+ D_WALK_QUIT,
6384+ D_WALK_NORETRY,
6385+ D_WALK_SKIP,
6386+};
6387+
6388+extern void d_walk(struct dentry *parent, void *data,
6389+ enum d_walk_ret (*enter)(void *, struct dentry *),
6390+ void (*finish)(void *));
6391+
6392+struct ac_dpages_arg {
1facf9fc 6393+ int err;
c1595e42
JR
6394+ struct au_dcsub_pages *dpages;
6395+ struct super_block *sb;
6396+ au_dpages_test test;
6397+ void *arg;
6398+};
1facf9fc 6399+
c1595e42
JR
6400+static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
6401+{
6402+ enum d_walk_ret ret;
6403+ struct ac_dpages_arg *arg = _arg;
1facf9fc 6404+
c1595e42
JR
6405+ ret = D_WALK_CONTINUE;
6406+ if (dentry->d_sb == arg->sb
6407+ && !IS_ROOT(dentry)
6408+ && au_dcount(dentry) > 0
6409+ && au_di(dentry)
6410+ && (!arg->test || arg->test(dentry, arg->arg))) {
6411+ arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
6412+ if (unlikely(arg->err))
6413+ ret = D_WALK_QUIT;
1facf9fc 6414+ }
6415+
c1595e42
JR
6416+ return ret;
6417+}
027c5e7a 6418+
c1595e42
JR
6419+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6420+ au_dpages_test test, void *arg)
6421+{
6422+ struct ac_dpages_arg args = {
6423+ .err = 0,
6424+ .dpages = dpages,
6425+ .sb = root->d_sb,
6426+ .test = test,
6427+ .arg = arg
6428+ };
027c5e7a 6429+
c1595e42
JR
6430+ d_walk(root, &args, au_call_dpages_append, NULL);
6431+
6432+ return args.err;
1facf9fc 6433+}
6434+
6435+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6436+ int do_include, au_dpages_test test, void *arg)
6437+{
6438+ int err;
6439+
6440+ err = 0;
027c5e7a
AM
6441+ write_seqlock(&rename_lock);
6442+ spin_lock(&dentry->d_lock);
6443+ if (do_include
c1595e42 6444+ && au_dcount(dentry) > 0
027c5e7a 6445+ && (!test || test(dentry, arg)))
1facf9fc 6446+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6447+ spin_unlock(&dentry->d_lock);
6448+ if (unlikely(err))
6449+ goto out;
6450+
6451+ /*
523b37e3 6452+ * RCU for vfsmount is unnecessary since this is a traverse in a single
027c5e7a
AM
6453+ * mount
6454+ */
1facf9fc 6455+ while (!IS_ROOT(dentry)) {
027c5e7a
AM
6456+ dentry = dentry->d_parent; /* rename_lock is locked */
6457+ spin_lock(&dentry->d_lock);
c1595e42 6458+ if (au_dcount(dentry) > 0
027c5e7a 6459+ && (!test || test(dentry, arg)))
1facf9fc 6460+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6461+ spin_unlock(&dentry->d_lock);
6462+ if (unlikely(err))
6463+ break;
1facf9fc 6464+ }
6465+
4f0767ce 6466+out:
027c5e7a 6467+ write_sequnlock(&rename_lock);
1facf9fc 6468+ return err;
6469+}
6470+
027c5e7a
AM
6471+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
6472+{
6473+ return au_di(dentry) && dentry->d_sb == arg;
6474+}
6475+
6476+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6477+ struct dentry *dentry, int do_include)
6478+{
6479+ return au_dcsub_pages_rev(dpages, dentry, do_include,
6480+ au_dcsub_dpages_aufs, dentry->d_sb);
6481+}
6482+
4a4d8108 6483+int au_test_subdir(struct dentry *d1, struct dentry *d2)
1facf9fc 6484+{
4a4d8108
AM
6485+ struct path path[2] = {
6486+ {
6487+ .dentry = d1
6488+ },
6489+ {
6490+ .dentry = d2
6491+ }
6492+ };
1facf9fc 6493+
4a4d8108 6494+ return path_is_under(path + 0, path + 1);
1facf9fc 6495+}
e8791d4f
AM
6496diff -urNp -x '*.orig' linux-4.9/fs/aufs/dcsub.h linux-4.9/fs/aufs/dcsub.h
6497--- linux-4.9/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
6498+++ linux-4.9/fs/aufs/dcsub.h 2021-02-24 16:15:09.524906971 +0100
5527c038 6499@@ -0,0 +1,136 @@
1facf9fc 6500+/*
ae9dfd79 6501+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 6502+ *
6503+ * This program, aufs is free software; you can redistribute it and/or modify
6504+ * it under the terms of the GNU General Public License as published by
6505+ * the Free Software Foundation; either version 2 of the License, or
6506+ * (at your option) any later version.
dece6358
AM
6507+ *
6508+ * This program is distributed in the hope that it will be useful,
6509+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6510+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6511+ * GNU General Public License for more details.
6512+ *
6513+ * You should have received a copy of the GNU General Public License
523b37e3 6514+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6515+ */
6516+
6517+/*
6518+ * sub-routines for dentry cache
6519+ */
6520+
6521+#ifndef __AUFS_DCSUB_H__
6522+#define __AUFS_DCSUB_H__
6523+
6524+#ifdef __KERNEL__
6525+
7f207e10 6526+#include <linux/dcache.h>
027c5e7a 6527+#include <linux/fs.h>
dece6358 6528+
1facf9fc 6529+struct au_dpage {
6530+ int ndentry;
6531+ struct dentry **dentries;
6532+};
6533+
6534+struct au_dcsub_pages {
6535+ int ndpage;
6536+ struct au_dpage *dpages;
6537+};
6538+
6539+/* ---------------------------------------------------------------------- */
6540+
7f207e10 6541+/* dcsub.c */
1facf9fc 6542+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
6543+void au_dpages_free(struct au_dcsub_pages *dpages);
6544+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
6545+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6546+ au_dpages_test test, void *arg);
6547+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6548+ int do_include, au_dpages_test test, void *arg);
027c5e7a
AM
6549+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6550+ struct dentry *dentry, int do_include);
4a4d8108 6551+int au_test_subdir(struct dentry *d1, struct dentry *d2);
1facf9fc 6552+
7f207e10
AM
6553+/* ---------------------------------------------------------------------- */
6554+
523b37e3
AM
6555+/*
6556+ * todo: in linux-3.13, several similar (but faster) helpers are added to
6557+ * include/linux/dcache.h. Try them (in the future).
6558+ */
6559+
027c5e7a
AM
6560+static inline int au_d_hashed_positive(struct dentry *d)
6561+{
6562+ int err;
5527c038 6563+ struct inode *inode = d_inode(d);
076b876e 6564+
027c5e7a 6565+ err = 0;
5527c038
JR
6566+ if (unlikely(d_unhashed(d)
6567+ || d_is_negative(d)
6568+ || !inode->i_nlink))
027c5e7a
AM
6569+ err = -ENOENT;
6570+ return err;
6571+}
6572+
38d290e6
JR
6573+static inline int au_d_linkable(struct dentry *d)
6574+{
6575+ int err;
5527c038 6576+ struct inode *inode = d_inode(d);
076b876e 6577+
38d290e6
JR
6578+ err = au_d_hashed_positive(d);
6579+ if (err
5527c038 6580+ && d_is_positive(d)
38d290e6
JR
6581+ && (inode->i_state & I_LINKABLE))
6582+ err = 0;
6583+ return err;
6584+}
6585+
027c5e7a
AM
6586+static inline int au_d_alive(struct dentry *d)
6587+{
6588+ int err;
6589+ struct inode *inode;
076b876e 6590+
027c5e7a
AM
6591+ err = 0;
6592+ if (!IS_ROOT(d))
6593+ err = au_d_hashed_positive(d);
6594+ else {
5527c038
JR
6595+ inode = d_inode(d);
6596+ if (unlikely(d_unlinked(d)
6597+ || d_is_negative(d)
6598+ || !inode->i_nlink))
027c5e7a
AM
6599+ err = -ENOENT;
6600+ }
6601+ return err;
6602+}
6603+
6604+static inline int au_alive_dir(struct dentry *d)
7f207e10 6605+{
027c5e7a 6606+ int err;
076b876e 6607+
027c5e7a 6608+ err = au_d_alive(d);
5527c038 6609+ if (unlikely(err || IS_DEADDIR(d_inode(d))))
027c5e7a
AM
6610+ err = -ENOENT;
6611+ return err;
7f207e10
AM
6612+}
6613+
38d290e6
JR
6614+static inline int au_qstreq(struct qstr *a, struct qstr *b)
6615+{
6616+ return a->len == b->len
6617+ && !memcmp(a->name, b->name, a->len);
6618+}
6619+
7e9cd9fe
AM
6620+/*
6621+ * by the commit
6622+ * 360f547 2015-01-25 dcache: let the dentry count go down to zero without
6623+ * taking d_lock
6624+ * the type of d_lockref.count became int, but the inlined function d_count()
6625+ * still returns unsigned int.
6626+ * I don't know why. Maybe it is for every d_count() users?
6627+ * Anyway au_dcount() lives on.
6628+ */
c1595e42
JR
6629+static inline int au_dcount(struct dentry *d)
6630+{
6631+ return (int)d_count(d);
6632+}
6633+
1facf9fc 6634+#endif /* __KERNEL__ */
6635+#endif /* __AUFS_DCSUB_H__ */
e8791d4f
AM
6636diff -urNp -x '*.orig' linux-4.9/fs/aufs/debug.c linux-4.9/fs/aufs/debug.c
6637--- linux-4.9/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
6638+++ linux-4.9/fs/aufs/debug.c 2021-02-24 16:15:09.524906971 +0100
f0c0a007 6639@@ -0,0 +1,440 @@
1facf9fc 6640+/*
ae9dfd79 6641+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 6642+ *
6643+ * This program, aufs is free software; you can redistribute it and/or modify
6644+ * it under the terms of the GNU General Public License as published by
6645+ * the Free Software Foundation; either version 2 of the License, or
6646+ * (at your option) any later version.
dece6358
AM
6647+ *
6648+ * This program is distributed in the hope that it will be useful,
6649+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6650+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6651+ * GNU General Public License for more details.
6652+ *
6653+ * You should have received a copy of the GNU General Public License
523b37e3 6654+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6655+ */
6656+
6657+/*
6658+ * debug print functions
6659+ */
6660+
6661+#include "aufs.h"
6662+
392086de
AM
6663+/* Returns 0, or -errno. arg is in kp->arg. */
6664+static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
6665+{
6666+ int err, n;
6667+
6668+ err = kstrtoint(val, 0, &n);
6669+ if (!err) {
6670+ if (n > 0)
6671+ au_debug_on();
6672+ else
6673+ au_debug_off();
6674+ }
6675+ return err;
6676+}
6677+
6678+/* Returns length written or -errno. Buffer is 4k (ie. be short!) */
6679+static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
6680+{
6681+ atomic_t *a;
6682+
6683+ a = kp->arg;
6684+ return sprintf(buffer, "%d", atomic_read(a));
6685+}
6686+
6687+static struct kernel_param_ops param_ops_atomic_t = {
6688+ .set = param_atomic_t_set,
6689+ .get = param_atomic_t_get
6690+ /* void (*free)(void *arg) */
6691+};
6692+
6693+atomic_t aufs_debug = ATOMIC_INIT(0);
1facf9fc 6694+MODULE_PARM_DESC(debug, "debug print");
392086de 6695+module_param_named(debug, aufs_debug, atomic_t, S_IRUGO | S_IWUSR | S_IWGRP);
1facf9fc 6696+
c1595e42 6697+DEFINE_MUTEX(au_dbg_mtx); /* just to serialize the dbg msgs */
1facf9fc 6698+char *au_plevel = KERN_DEBUG;
e49829fe
JR
6699+#define dpri(fmt, ...) do { \
6700+ if ((au_plevel \
6701+ && strcmp(au_plevel, KERN_DEBUG)) \
6702+ || au_debug_test()) \
6703+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
1facf9fc 6704+} while (0)
6705+
6706+/* ---------------------------------------------------------------------- */
6707+
6708+void au_dpri_whlist(struct au_nhash *whlist)
6709+{
6710+ unsigned long ul, n;
6711+ struct hlist_head *head;
c06a8ce3 6712+ struct au_vdir_wh *pos;
1facf9fc 6713+
6714+ n = whlist->nh_num;
6715+ head = whlist->nh_head;
6716+ for (ul = 0; ul < n; ul++) {
c06a8ce3 6717+ hlist_for_each_entry(pos, head, wh_hash)
1facf9fc 6718+ dpri("b%d, %.*s, %d\n",
c06a8ce3
AM
6719+ pos->wh_bindex,
6720+ pos->wh_str.len, pos->wh_str.name,
6721+ pos->wh_str.len);
1facf9fc 6722+ head++;
6723+ }
6724+}
6725+
6726+void au_dpri_vdir(struct au_vdir *vdir)
6727+{
6728+ unsigned long ul;
6729+ union au_vdir_deblk_p p;
6730+ unsigned char *o;
6731+
6732+ if (!vdir || IS_ERR(vdir)) {
6733+ dpri("err %ld\n", PTR_ERR(vdir));
6734+ return;
6735+ }
6736+
6737+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
6738+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
6739+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
6740+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
6741+ p.deblk = vdir->vd_deblk[ul];
6742+ o = p.deblk;
6743+ dpri("[%lu]: %p\n", ul, o);
6744+ }
6745+}
6746+
53392da6 6747+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
1facf9fc 6748+ struct dentry *wh)
6749+{
6750+ char *n = NULL;
6751+ int l = 0;
6752+
6753+ if (!inode || IS_ERR(inode)) {
6754+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
6755+ return -1;
6756+ }
6757+
c2b27bf2 6758+ /* the type of i_blocks depends upon CONFIG_LBDAF */
1facf9fc 6759+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
6760+ && sizeof(inode->i_blocks) != sizeof(u64));
6761+ if (wh) {
6762+ n = (void *)wh->d_name.name;
6763+ l = wh->d_name.len;
6764+ }
6765+
53392da6
AM
6766+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
6767+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
6768+ bindex, inode,
1facf9fc 6769+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
6770+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
6771+ i_size_read(inode), (unsigned long long)inode->i_blocks,
53392da6 6772+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
1facf9fc 6773+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
b752ccd1
AM
6774+ inode->i_state, inode->i_flags, inode->i_version,
6775+ inode->i_generation,
1facf9fc 6776+ l ? ", wh " : "", l, n);
6777+ return 0;
6778+}
6779+
6780+void au_dpri_inode(struct inode *inode)
6781+{
6782+ struct au_iinfo *iinfo;
5afbbe0d 6783+ struct au_hinode *hi;
1facf9fc 6784+ aufs_bindex_t bindex;
53392da6 6785+ int err, hn;
1facf9fc 6786+
53392da6 6787+ err = do_pri_inode(-1, inode, -1, NULL);
5afbbe0d 6788+ if (err || !au_test_aufs(inode->i_sb) || au_is_bad_inode(inode))
1facf9fc 6789+ return;
6790+
6791+ iinfo = au_ii(inode);
5afbbe0d
AM
6792+ dpri("i-1: btop %d, bbot %d, gen %d\n",
6793+ iinfo->ii_btop, iinfo->ii_bbot, au_iigen(inode, NULL));
6794+ if (iinfo->ii_btop < 0)
1facf9fc 6795+ return;
53392da6 6796+ hn = 0;
5afbbe0d
AM
6797+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; bindex++) {
6798+ hi = au_hinode(iinfo, bindex);
6799+ hn = !!au_hn(hi);
6800+ do_pri_inode(bindex, hi->hi_inode, hn, hi->hi_whdentry);
53392da6 6801+ }
1facf9fc 6802+}
6803+
2cbb1c4b
JR
6804+void au_dpri_dalias(struct inode *inode)
6805+{
6806+ struct dentry *d;
6807+
6808+ spin_lock(&inode->i_lock);
c1595e42 6809+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
2cbb1c4b
JR
6810+ au_dpri_dentry(d);
6811+ spin_unlock(&inode->i_lock);
6812+}
6813+
1facf9fc 6814+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
6815+{
6816+ struct dentry *wh = NULL;
53392da6 6817+ int hn;
5afbbe0d 6818+ struct inode *inode;
076b876e 6819+ struct au_iinfo *iinfo;
5afbbe0d 6820+ struct au_hinode *hi;
1facf9fc 6821+
6822+ if (!dentry || IS_ERR(dentry)) {
6823+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
6824+ return -1;
6825+ }
6826+ /* do not call dget_parent() here */
027c5e7a 6827+ /* note: access d_xxx without d_lock */
523b37e3
AM
6828+ dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
6829+ bindex, dentry, dentry,
1facf9fc 6830+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
c1595e42 6831+ au_dcount(dentry), dentry->d_flags,
523b37e3 6832+ d_unhashed(dentry) ? "un" : "");
53392da6 6833+ hn = -1;
5afbbe0d
AM
6834+ inode = NULL;
6835+ if (d_is_positive(dentry))
6836+ inode = d_inode(dentry);
6837+ if (inode
6838+ && au_test_aufs(dentry->d_sb)
6839+ && bindex >= 0
6840+ && !au_is_bad_inode(inode)) {
6841+ iinfo = au_ii(inode);
6842+ hi = au_hinode(iinfo, bindex);
6843+ hn = !!au_hn(hi);
6844+ wh = hi->hi_whdentry;
6845+ }
6846+ do_pri_inode(bindex, inode, hn, wh);
1facf9fc 6847+ return 0;
6848+}
6849+
6850+void au_dpri_dentry(struct dentry *dentry)
6851+{
6852+ struct au_dinfo *dinfo;
6853+ aufs_bindex_t bindex;
6854+ int err;
6855+
6856+ err = do_pri_dentry(-1, dentry);
6857+ if (err || !au_test_aufs(dentry->d_sb))
6858+ return;
6859+
6860+ dinfo = au_di(dentry);
6861+ if (!dinfo)
6862+ return;
5afbbe0d
AM
6863+ dpri("d-1: btop %d, bbot %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
6864+ dinfo->di_btop, dinfo->di_bbot,
38d290e6
JR
6865+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
6866+ dinfo->di_tmpfile);
5afbbe0d 6867+ if (dinfo->di_btop < 0)
1facf9fc 6868+ return;
5afbbe0d
AM
6869+ for (bindex = dinfo->di_btop; bindex <= dinfo->di_bbot; bindex++)
6870+ do_pri_dentry(bindex, au_hdentry(dinfo, bindex)->hd_dentry);
1facf9fc 6871+}
6872+
6873+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
6874+{
6875+ char a[32];
6876+
6877+ if (!file || IS_ERR(file)) {
6878+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
6879+ return -1;
6880+ }
6881+ a[0] = 0;
6882+ if (bindex < 0
b912730e 6883+ && !IS_ERR_OR_NULL(file->f_path.dentry)
2000de60 6884+ && au_test_aufs(file->f_path.dentry->d_sb)
1facf9fc 6885+ && au_fi(file))
e49829fe 6886+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
2cbb1c4b 6887+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
b752ccd1 6888+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
1facf9fc 6889+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
b752ccd1 6890+ file->f_version, file->f_pos, a);
b912730e 6891+ if (!IS_ERR_OR_NULL(file->f_path.dentry))
2000de60 6892+ do_pri_dentry(bindex, file->f_path.dentry);
1facf9fc 6893+ return 0;
6894+}
6895+
6896+void au_dpri_file(struct file *file)
6897+{
6898+ struct au_finfo *finfo;
4a4d8108
AM
6899+ struct au_fidir *fidir;
6900+ struct au_hfile *hfile;
1facf9fc 6901+ aufs_bindex_t bindex;
6902+ int err;
6903+
6904+ err = do_pri_file(-1, file);
2000de60 6905+ if (err
b912730e 6906+ || IS_ERR_OR_NULL(file->f_path.dentry)
2000de60 6907+ || !au_test_aufs(file->f_path.dentry->d_sb))
1facf9fc 6908+ return;
6909+
6910+ finfo = au_fi(file);
6911+ if (!finfo)
6912+ return;
4a4d8108 6913+ if (finfo->fi_btop < 0)
1facf9fc 6914+ return;
4a4d8108
AM
6915+ fidir = finfo->fi_hdir;
6916+ if (!fidir)
6917+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
6918+ else
e49829fe
JR
6919+ for (bindex = finfo->fi_btop;
6920+ bindex >= 0 && bindex <= fidir->fd_bbot;
4a4d8108
AM
6921+ bindex++) {
6922+ hfile = fidir->fd_hfile + bindex;
6923+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
6924+ }
1facf9fc 6925+}
6926+
6927+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
6928+{
6929+ struct vfsmount *mnt;
6930+ struct super_block *sb;
6931+
6932+ if (!br || IS_ERR(br))
6933+ goto out;
86dc4139 6934+ mnt = au_br_mnt(br);
1facf9fc 6935+ if (!mnt || IS_ERR(mnt))
6936+ goto out;
6937+ sb = mnt->mnt_sb;
6938+ if (!sb || IS_ERR(sb))
6939+ goto out;
6940+
5afbbe0d 6941+ dpri("s%d: {perm 0x%x, id %d, cnt %lld, wbr %p}, "
b752ccd1 6942+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
1facf9fc 6943+ "xino %d\n",
5afbbe0d 6944+ bindex, br->br_perm, br->br_id, au_br_count(br),
1e00d052 6945+ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
b752ccd1 6946+ sb->s_flags, sb->s_count,
1facf9fc 6947+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
6948+ return 0;
6949+
4f0767ce 6950+out:
1facf9fc 6951+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
6952+ return -1;
6953+}
6954+
6955+void au_dpri_sb(struct super_block *sb)
6956+{
6957+ struct au_sbinfo *sbinfo;
6958+ aufs_bindex_t bindex;
6959+ int err;
6960+ /* to reuduce stack size */
6961+ struct {
6962+ struct vfsmount mnt;
6963+ struct au_branch fake;
6964+ } *a;
6965+
6966+ /* this function can be called from magic sysrq */
6967+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
6968+ if (unlikely(!a)) {
6969+ dpri("no memory\n");
6970+ return;
6971+ }
6972+
6973+ a->mnt.mnt_sb = sb;
86dc4139 6974+ a->fake.br_path.mnt = &a->mnt;
5afbbe0d 6975+ au_br_count_init(&a->fake);
1facf9fc 6976+ err = do_pri_br(-1, &a->fake);
5afbbe0d 6977+ au_br_count_fin(&a->fake);
ae9dfd79 6978+ kfree(a);
1facf9fc 6979+ dpri("dev 0x%x\n", sb->s_dev);
6980+ if (err || !au_test_aufs(sb))
6981+ return;
6982+
6983+ sbinfo = au_sbi(sb);
6984+ if (!sbinfo)
6985+ return;
f0c0a007
AM
6986+ dpri("nw %d, gen %u, kobj %d\n",
6987+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
1facf9fc 6988+ atomic_read(&sbinfo->si_kobj.kref.refcount));
5afbbe0d 6989+ for (bindex = 0; bindex <= sbinfo->si_bbot; bindex++)
1facf9fc 6990+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
6991+}
6992+
6993+/* ---------------------------------------------------------------------- */
6994+
027c5e7a
AM
6995+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
6996+{
5527c038 6997+ struct inode *h_inode, *inode = d_inode(dentry);
027c5e7a 6998+ struct dentry *h_dentry;
5afbbe0d 6999+ aufs_bindex_t bindex, bbot, bi;
027c5e7a
AM
7000+
7001+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
7002+ return;
7003+
5afbbe0d
AM
7004+ bbot = au_dbbot(dentry);
7005+ bi = au_ibbot(inode);
7006+ if (bi < bbot)
7007+ bbot = bi;
7008+ bindex = au_dbtop(dentry);
7009+ bi = au_ibtop(inode);
027c5e7a
AM
7010+ if (bi > bindex)
7011+ bindex = bi;
7012+
5afbbe0d 7013+ for (; bindex <= bbot; bindex++) {
027c5e7a
AM
7014+ h_dentry = au_h_dptr(dentry, bindex);
7015+ if (!h_dentry)
7016+ continue;
7017+ h_inode = au_h_iptr(inode, bindex);
5527c038 7018+ if (unlikely(h_inode != d_inode(h_dentry))) {
392086de 7019+ au_debug_on();
027c5e7a
AM
7020+ AuDbg("b%d, %s:%d\n", bindex, func, line);
7021+ AuDbgDentry(dentry);
7022+ AuDbgInode(inode);
392086de 7023+ au_debug_off();
027c5e7a
AM
7024+ BUG();
7025+ }
7026+ }
7027+}
7028+
1facf9fc 7029+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
7030+{
7031+ int err, i, j;
7032+ struct au_dcsub_pages dpages;
7033+ struct au_dpage *dpage;
7034+ struct dentry **dentries;
7035+
7036+ err = au_dpages_init(&dpages, GFP_NOFS);
7037+ AuDebugOn(err);
027c5e7a 7038+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
1facf9fc 7039+ AuDebugOn(err);
7040+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
7041+ dpage = dpages.dpages + i;
7042+ dentries = dpage->dentries;
7043+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
027c5e7a 7044+ AuDebugOn(au_digen_test(dentries[j], sigen));
1facf9fc 7045+ }
7046+ au_dpages_free(&dpages);
7047+}
7048+
1facf9fc 7049+void au_dbg_verify_kthread(void)
7050+{
53392da6 7051+ if (au_wkq_test()) {
1facf9fc 7052+ au_dbg_blocked();
1e00d052
AM
7053+ /*
7054+ * It may be recursive, but udba=notify between two aufs mounts,
7055+ * where a single ro branch is shared, is not a problem.
7056+ */
7057+ /* WARN_ON(1); */
1facf9fc 7058+ }
7059+}
7060+
7061+/* ---------------------------------------------------------------------- */
7062+
1facf9fc 7063+int __init au_debug_init(void)
7064+{
7065+ aufs_bindex_t bindex;
7066+ struct au_vdir_destr destr;
7067+
7068+ bindex = -1;
7069+ AuDebugOn(bindex >= 0);
7070+
7071+ destr.len = -1;
7072+ AuDebugOn(destr.len < NAME_MAX);
7073+
7074+#ifdef CONFIG_4KSTACKS
0c3ec466 7075+ pr_warn("CONFIG_4KSTACKS is defined.\n");
1facf9fc 7076+#endif
7077+
1facf9fc 7078+ return 0;
7079+}
e8791d4f
AM
7080diff -urNp -x '*.orig' linux-4.9/fs/aufs/debug.h linux-4.9/fs/aufs/debug.h
7081--- linux-4.9/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
7082+++ linux-4.9/fs/aufs/debug.h 2021-02-24 16:15:09.524906971 +0100
5527c038 7083@@ -0,0 +1,225 @@
1facf9fc 7084+/*
ae9dfd79 7085+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 7086+ *
7087+ * This program, aufs is free software; you can redistribute it and/or modify
7088+ * it under the terms of the GNU General Public License as published by
7089+ * the Free Software Foundation; either version 2 of the License, or
7090+ * (at your option) any later version.
dece6358
AM
7091+ *
7092+ * This program is distributed in the hope that it will be useful,
7093+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7094+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7095+ * GNU General Public License for more details.
7096+ *
7097+ * You should have received a copy of the GNU General Public License
523b37e3 7098+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7099+ */
7100+
7101+/*
7102+ * debug print functions
7103+ */
7104+
7105+#ifndef __AUFS_DEBUG_H__
7106+#define __AUFS_DEBUG_H__
7107+
7108+#ifdef __KERNEL__
7109+
392086de 7110+#include <linux/atomic.h>
4a4d8108
AM
7111+#include <linux/module.h>
7112+#include <linux/kallsyms.h>
1facf9fc 7113+#include <linux/sysrq.h>
4a4d8108 7114+
1facf9fc 7115+#ifdef CONFIG_AUFS_DEBUG
7116+#define AuDebugOn(a) BUG_ON(a)
7117+
7118+/* module parameter */
392086de
AM
7119+extern atomic_t aufs_debug;
7120+static inline void au_debug_on(void)
1facf9fc 7121+{
392086de
AM
7122+ atomic_inc(&aufs_debug);
7123+}
7124+static inline void au_debug_off(void)
7125+{
7126+ atomic_dec_if_positive(&aufs_debug);
1facf9fc 7127+}
7128+
7129+static inline int au_debug_test(void)
7130+{
392086de 7131+ return atomic_read(&aufs_debug) > 0;
1facf9fc 7132+}
7133+#else
7134+#define AuDebugOn(a) do {} while (0)
392086de
AM
7135+AuStubVoid(au_debug_on, void)
7136+AuStubVoid(au_debug_off, void)
4a4d8108 7137+AuStubInt0(au_debug_test, void)
1facf9fc 7138+#endif /* CONFIG_AUFS_DEBUG */
7139+
392086de
AM
7140+#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
7141+
1facf9fc 7142+/* ---------------------------------------------------------------------- */
7143+
7144+/* debug print */
7145+
4a4d8108 7146+#define AuDbg(fmt, ...) do { \
1facf9fc 7147+ if (au_debug_test()) \
4a4d8108 7148+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
1facf9fc 7149+} while (0)
4a4d8108
AM
7150+#define AuLabel(l) AuDbg(#l "\n")
7151+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
7152+#define AuWarn1(fmt, ...) do { \
1facf9fc 7153+ static unsigned char _c; \
7154+ if (!_c++) \
0c3ec466 7155+ pr_warn(fmt, ##__VA_ARGS__); \
1facf9fc 7156+} while (0)
7157+
4a4d8108 7158+#define AuErr1(fmt, ...) do { \
1facf9fc 7159+ static unsigned char _c; \
7160+ if (!_c++) \
4a4d8108 7161+ pr_err(fmt, ##__VA_ARGS__); \
1facf9fc 7162+} while (0)
7163+
4a4d8108 7164+#define AuIOErr1(fmt, ...) do { \
1facf9fc 7165+ static unsigned char _c; \
7166+ if (!_c++) \
4a4d8108 7167+ AuIOErr(fmt, ##__VA_ARGS__); \
1facf9fc 7168+} while (0)
7169+
7170+#define AuUnsupportMsg "This operation is not supported." \
7171+ " Please report this application to aufs-users ML."
4a4d8108
AM
7172+#define AuUnsupport(fmt, ...) do { \
7173+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
1facf9fc 7174+ dump_stack(); \
7175+} while (0)
7176+
7177+#define AuTraceErr(e) do { \
7178+ if (unlikely((e) < 0)) \
7179+ AuDbg("err %d\n", (int)(e)); \
7180+} while (0)
7181+
7182+#define AuTraceErrPtr(p) do { \
7183+ if (IS_ERR(p)) \
7184+ AuDbg("err %ld\n", PTR_ERR(p)); \
7185+} while (0)
7186+
7187+/* dirty macros for debug print, use with "%.*s" and caution */
7188+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
1facf9fc 7189+
7190+/* ---------------------------------------------------------------------- */
7191+
dece6358 7192+struct dentry;
1facf9fc 7193+#ifdef CONFIG_AUFS_DEBUG
c1595e42 7194+extern struct mutex au_dbg_mtx;
1facf9fc 7195+extern char *au_plevel;
7196+struct au_nhash;
7197+void au_dpri_whlist(struct au_nhash *whlist);
7198+struct au_vdir;
7199+void au_dpri_vdir(struct au_vdir *vdir);
dece6358 7200+struct inode;
1facf9fc 7201+void au_dpri_inode(struct inode *inode);
2cbb1c4b 7202+void au_dpri_dalias(struct inode *inode);
1facf9fc 7203+void au_dpri_dentry(struct dentry *dentry);
dece6358 7204+struct file;
1facf9fc 7205+void au_dpri_file(struct file *filp);
dece6358 7206+struct super_block;
1facf9fc 7207+void au_dpri_sb(struct super_block *sb);
7208+
027c5e7a
AM
7209+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
7210+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
1facf9fc 7211+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
1facf9fc 7212+void au_dbg_verify_kthread(void);
7213+
7214+int __init au_debug_init(void);
7e9cd9fe 7215+
1facf9fc 7216+#define AuDbgWhlist(w) do { \
c1595e42 7217+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7218+ AuDbg(#w "\n"); \
7219+ au_dpri_whlist(w); \
c1595e42 7220+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7221+} while (0)
7222+
7223+#define AuDbgVdir(v) do { \
c1595e42 7224+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7225+ AuDbg(#v "\n"); \
7226+ au_dpri_vdir(v); \
c1595e42 7227+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7228+} while (0)
7229+
7230+#define AuDbgInode(i) do { \
c1595e42 7231+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7232+ AuDbg(#i "\n"); \
7233+ au_dpri_inode(i); \
c1595e42 7234+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7235+} while (0)
7236+
2cbb1c4b 7237+#define AuDbgDAlias(i) do { \
c1595e42 7238+ mutex_lock(&au_dbg_mtx); \
2cbb1c4b
JR
7239+ AuDbg(#i "\n"); \
7240+ au_dpri_dalias(i); \
c1595e42 7241+ mutex_unlock(&au_dbg_mtx); \
2cbb1c4b
JR
7242+} while (0)
7243+
1facf9fc 7244+#define AuDbgDentry(d) do { \
c1595e42 7245+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7246+ AuDbg(#d "\n"); \
7247+ au_dpri_dentry(d); \
c1595e42 7248+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7249+} while (0)
7250+
7251+#define AuDbgFile(f) do { \
c1595e42 7252+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7253+ AuDbg(#f "\n"); \
7254+ au_dpri_file(f); \
c1595e42 7255+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7256+} while (0)
7257+
7258+#define AuDbgSb(sb) do { \
c1595e42 7259+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7260+ AuDbg(#sb "\n"); \
7261+ au_dpri_sb(sb); \
c1595e42 7262+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7263+} while (0)
7264+
4a4d8108
AM
7265+#define AuDbgSym(addr) do { \
7266+ char sym[KSYM_SYMBOL_LEN]; \
7267+ sprint_symbol(sym, (unsigned long)addr); \
7268+ AuDbg("%s\n", sym); \
7269+} while (0)
1facf9fc 7270+#else
027c5e7a 7271+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
4a4d8108
AM
7272+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
7273+AuStubVoid(au_dbg_verify_kthread, void)
7274+AuStubInt0(__init au_debug_init, void)
1facf9fc 7275+
1facf9fc 7276+#define AuDbgWhlist(w) do {} while (0)
7277+#define AuDbgVdir(v) do {} while (0)
7278+#define AuDbgInode(i) do {} while (0)
2cbb1c4b 7279+#define AuDbgDAlias(i) do {} while (0)
1facf9fc 7280+#define AuDbgDentry(d) do {} while (0)
7281+#define AuDbgFile(f) do {} while (0)
7282+#define AuDbgSb(sb) do {} while (0)
4a4d8108 7283+#define AuDbgSym(addr) do {} while (0)
1facf9fc 7284+#endif /* CONFIG_AUFS_DEBUG */
7285+
7286+/* ---------------------------------------------------------------------- */
7287+
7288+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
7289+int __init au_sysrq_init(void);
7290+void au_sysrq_fin(void);
7291+
7292+#ifdef CONFIG_HW_CONSOLE
7293+#define au_dbg_blocked() do { \
7294+ WARN_ON(1); \
0c5527e5 7295+ handle_sysrq('w'); \
1facf9fc 7296+} while (0)
7297+#else
4a4d8108 7298+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7299+#endif
7300+
7301+#else
4a4d8108
AM
7302+AuStubInt0(__init au_sysrq_init, void)
7303+AuStubVoid(au_sysrq_fin, void)
7304+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7305+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
7306+
7307+#endif /* __KERNEL__ */
7308+#endif /* __AUFS_DEBUG_H__ */
e8791d4f
AM
7309diff -urNp -x '*.orig' linux-4.9/fs/aufs/dentry.c linux-4.9/fs/aufs/dentry.c
7310--- linux-4.9/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
7311+++ linux-4.9/fs/aufs/dentry.c 2021-02-24 16:15:09.524906971 +0100
ae9dfd79 7312@@ -0,0 +1,1152 @@
1facf9fc 7313+/*
ae9dfd79 7314+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 7315+ *
7316+ * This program, aufs is free software; you can redistribute it and/or modify
7317+ * it under the terms of the GNU General Public License as published by
7318+ * the Free Software Foundation; either version 2 of the License, or
7319+ * (at your option) any later version.
dece6358
AM
7320+ *
7321+ * This program is distributed in the hope that it will be useful,
7322+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7323+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7324+ * GNU General Public License for more details.
7325+ *
7326+ * You should have received a copy of the GNU General Public License
523b37e3 7327+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7328+ */
7329+
7330+/*
7331+ * lookup and dentry operations
7332+ */
7333+
dece6358 7334+#include <linux/namei.h>
1facf9fc 7335+#include "aufs.h"
7336+
1facf9fc 7337+/*
7338+ * returns positive/negative dentry, NULL or an error.
7339+ * NULL means whiteout-ed or not-found.
7340+ */
7341+static struct dentry*
7342+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
ae9dfd79 7343+ aufs_bindex_t bindex, struct au_do_lookup_args *args)
1facf9fc 7344+{
7345+ struct dentry *h_dentry;
2000de60 7346+ struct inode *h_inode;
1facf9fc 7347+ struct au_branch *br;
7348+ int wh_found, opq;
7349+ unsigned char wh_able;
7350+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
076b876e
AM
7351+ const unsigned char ignore_perm = !!au_ftest_lkup(args->flags,
7352+ IGNORE_PERM);
1facf9fc 7353+
1facf9fc 7354+ wh_found = 0;
7355+ br = au_sbr(dentry->d_sb, bindex);
7356+ wh_able = !!au_br_whable(br->br_perm);
7357+ if (wh_able)
ae9dfd79 7358+ wh_found = au_wh_test(h_parent, &args->whname, ignore_perm);
1facf9fc 7359+ h_dentry = ERR_PTR(wh_found);
7360+ if (!wh_found)
7361+ goto real_lookup;
7362+ if (unlikely(wh_found < 0))
7363+ goto out;
7364+
7365+ /* We found a whiteout */
5afbbe0d 7366+ /* au_set_dbbot(dentry, bindex); */
1facf9fc 7367+ au_set_dbwh(dentry, bindex);
7368+ if (!allow_neg)
7369+ return NULL; /* success */
7370+
4f0767ce 7371+real_lookup:
076b876e 7372+ if (!ignore_perm)
ae9dfd79 7373+ h_dentry = vfsub_lkup_one(args->name, h_parent);
076b876e 7374+ else
ae9dfd79 7375+ h_dentry = au_sio_lkup_one(args->name, h_parent);
2000de60
JR
7376+ if (IS_ERR(h_dentry)) {
7377+ if (PTR_ERR(h_dentry) == -ENAMETOOLONG
7378+ && !allow_neg)
7379+ h_dentry = NULL;
1facf9fc 7380+ goto out;
2000de60 7381+ }
1facf9fc 7382+
5527c038
JR
7383+ h_inode = d_inode(h_dentry);
7384+ if (d_is_negative(h_dentry)) {
1facf9fc 7385+ if (!allow_neg)
7386+ goto out_neg;
7387+ } else if (wh_found
7388+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
7389+ goto out_neg;
ae9dfd79
AM
7390+ else if (au_ftest_lkup(args->flags, DIRREN)
7391+ /* && h_inode */
7392+ && !au_dr_lkup_h_ino(args, bindex, h_inode->i_ino)) {
7393+ AuDbg("b%d %pd ignored hi%llu\n", bindex, h_dentry,
7394+ (unsigned long long)h_inode->i_ino);
7395+ goto out_neg;
7396+ }
1facf9fc 7397+
5afbbe0d
AM
7398+ if (au_dbbot(dentry) <= bindex)
7399+ au_set_dbbot(dentry, bindex);
7400+ if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
7401+ au_set_dbtop(dentry, bindex);
1facf9fc 7402+ au_set_h_dptr(dentry, bindex, h_dentry);
7403+
2000de60
JR
7404+ if (!d_is_dir(h_dentry)
7405+ || !wh_able
5527c038 7406+ || (d_really_is_positive(dentry) && !d_is_dir(dentry)))
1facf9fc 7407+ goto out; /* success */
7408+
ae9dfd79 7409+ vfsub_inode_lock_shared_nested(h_inode, AuLsc_I_CHILD);
076b876e 7410+ opq = au_diropq_test(h_dentry);
ae9dfd79 7411+ inode_unlock_shared(h_inode);
1facf9fc 7412+ if (opq > 0)
7413+ au_set_dbdiropq(dentry, bindex);
7414+ else if (unlikely(opq < 0)) {
7415+ au_set_h_dptr(dentry, bindex, NULL);
7416+ h_dentry = ERR_PTR(opq);
7417+ }
7418+ goto out;
7419+
4f0767ce 7420+out_neg:
1facf9fc 7421+ dput(h_dentry);
7422+ h_dentry = NULL;
4f0767ce 7423+out:
1facf9fc 7424+ return h_dentry;
7425+}
7426+
dece6358
AM
7427+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
7428+{
7429+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
7430+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
7431+ return -EPERM;
7432+ return 0;
7433+}
7434+
1facf9fc 7435+/*
7436+ * returns the number of lower positive dentries,
7437+ * otherwise an error.
7438+ * can be called at unlinking with @type is zero.
7439+ */
5afbbe0d
AM
7440+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
7441+ unsigned int flags)
1facf9fc 7442+{
7443+ int npositive, err;
7444+ aufs_bindex_t bindex, btail, bdiropq;
ae9dfd79 7445+ unsigned char isdir, dirperm1, dirren;
1facf9fc 7446+ struct au_do_lookup_args args = {
ae9dfd79
AM
7447+ .flags = flags,
7448+ .name = &dentry->d_name
1facf9fc 7449+ };
1facf9fc 7450+ struct dentry *parent;
076b876e 7451+ struct super_block *sb;
1facf9fc 7452+
076b876e 7453+ sb = dentry->d_sb;
ae9dfd79 7454+ err = au_test_shwh(sb, args.name);
dece6358 7455+ if (unlikely(err))
1facf9fc 7456+ goto out;
7457+
ae9dfd79 7458+ err = au_wh_name_alloc(&args.whname, args.name);
1facf9fc 7459+ if (unlikely(err))
7460+ goto out;
7461+
2000de60 7462+ isdir = !!d_is_dir(dentry);
076b876e 7463+ dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1);
ae9dfd79
AM
7464+ dirren = !!au_opt_test(au_mntflags(sb), DIRREN);
7465+ if (dirren)
7466+ au_fset_lkup(args.flags, DIRREN);
1facf9fc 7467+
7468+ npositive = 0;
4a4d8108 7469+ parent = dget_parent(dentry);
1facf9fc 7470+ btail = au_dbtaildir(parent);
5afbbe0d 7471+ for (bindex = btop; bindex <= btail; bindex++) {
1facf9fc 7472+ struct dentry *h_parent, *h_dentry;
7473+ struct inode *h_inode, *h_dir;
ae9dfd79 7474+ struct au_branch *br;
1facf9fc 7475+
7476+ h_dentry = au_h_dptr(dentry, bindex);
7477+ if (h_dentry) {
5527c038 7478+ if (d_is_positive(h_dentry))
1facf9fc 7479+ npositive++;
5afbbe0d 7480+ break;
1facf9fc 7481+ }
7482+ h_parent = au_h_dptr(parent, bindex);
2000de60 7483+ if (!h_parent || !d_is_dir(h_parent))
1facf9fc 7484+ continue;
7485+
ae9dfd79
AM
7486+ if (dirren) {
7487+ /* if the inum matches, then use the prepared name */
7488+ err = au_dr_lkup_name(&args, bindex);
7489+ if (unlikely(err))
7490+ goto out_parent;
7491+ }
7492+
5527c038 7493+ h_dir = d_inode(h_parent);
ae9dfd79
AM
7494+ vfsub_inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
7495+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &args);
7496+ inode_unlock_shared(h_dir);
1facf9fc 7497+ err = PTR_ERR(h_dentry);
7498+ if (IS_ERR(h_dentry))
4a4d8108 7499+ goto out_parent;
2000de60
JR
7500+ if (h_dentry)
7501+ au_fclr_lkup(args.flags, ALLOW_NEG);
076b876e
AM
7502+ if (dirperm1)
7503+ au_fset_lkup(args.flags, IGNORE_PERM);
1facf9fc 7504+
79b8bda9 7505+ if (au_dbwh(dentry) == bindex)
1facf9fc 7506+ break;
7507+ if (!h_dentry)
7508+ continue;
5527c038 7509+ if (d_is_negative(h_dentry))
1facf9fc 7510+ continue;
5527c038 7511+ h_inode = d_inode(h_dentry);
1facf9fc 7512+ npositive++;
7513+ if (!args.type)
7514+ args.type = h_inode->i_mode & S_IFMT;
7515+ if (args.type != S_IFDIR)
7516+ break;
7517+ else if (isdir) {
7518+ /* the type of lower may be different */
7519+ bdiropq = au_dbdiropq(dentry);
7520+ if (bdiropq >= 0 && bdiropq <= bindex)
7521+ break;
7522+ }
ae9dfd79
AM
7523+ br = au_sbr(sb, bindex);
7524+ if (dirren
7525+ && au_dr_hino_test_add(&br->br_dirren, h_inode->i_ino,
7526+ /*add_ent*/NULL)) {
7527+ /* prepare next name to lookup */
7528+ err = au_dr_lkup(&args, dentry, bindex);
7529+ if (unlikely(err))
7530+ goto out_parent;
7531+ }
1facf9fc 7532+ }
7533+
7534+ if (npositive) {
7535+ AuLabel(positive);
5afbbe0d 7536+ au_update_dbtop(dentry);
1facf9fc 7537+ }
7538+ err = npositive;
076b876e 7539+ if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
5afbbe0d 7540+ && au_dbtop(dentry) < 0)) {
1facf9fc 7541+ err = -EIO;
523b37e3
AM
7542+ AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
7543+ dentry, err);
027c5e7a 7544+ }
1facf9fc 7545+
4f0767ce 7546+out_parent:
4a4d8108 7547+ dput(parent);
ae9dfd79
AM
7548+ kfree(args.whname.name);
7549+ if (dirren)
7550+ au_dr_lkup_fin(&args);
4f0767ce 7551+out:
1facf9fc 7552+ return err;
7553+}
7554+
076b876e 7555+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent)
1facf9fc 7556+{
7557+ struct dentry *dentry;
7558+ int wkq_err;
7559+
5527c038 7560+ if (!au_test_h_perm_sio(d_inode(parent), MAY_EXEC))
b4510431 7561+ dentry = vfsub_lkup_one(name, parent);
1facf9fc 7562+ else {
b4510431
AM
7563+ struct vfsub_lkup_one_args args = {
7564+ .errp = &dentry,
7565+ .name = name,
7566+ .parent = parent
1facf9fc 7567+ };
7568+
b4510431 7569+ wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args);
1facf9fc 7570+ if (unlikely(wkq_err))
7571+ dentry = ERR_PTR(wkq_err);
7572+ }
7573+
7574+ return dentry;
7575+}
7576+
7577+/*
7578+ * lookup @dentry on @bindex which should be negative.
7579+ */
86dc4139 7580+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
1facf9fc 7581+{
7582+ int err;
7583+ struct dentry *parent, *h_parent, *h_dentry;
86dc4139 7584+ struct au_branch *br;
1facf9fc 7585+
1facf9fc 7586+ parent = dget_parent(dentry);
7587+ h_parent = au_h_dptr(parent, bindex);
86dc4139
AM
7588+ br = au_sbr(dentry->d_sb, bindex);
7589+ if (wh)
7590+ h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
7591+ else
076b876e 7592+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
1facf9fc 7593+ err = PTR_ERR(h_dentry);
7594+ if (IS_ERR(h_dentry))
7595+ goto out;
5527c038 7596+ if (unlikely(d_is_positive(h_dentry))) {
1facf9fc 7597+ err = -EIO;
523b37e3 7598+ AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex);
1facf9fc 7599+ dput(h_dentry);
7600+ goto out;
7601+ }
7602+
4a4d8108 7603+ err = 0;
5afbbe0d
AM
7604+ if (bindex < au_dbtop(dentry))
7605+ au_set_dbtop(dentry, bindex);
7606+ if (au_dbbot(dentry) < bindex)
7607+ au_set_dbbot(dentry, bindex);
1facf9fc 7608+ au_set_h_dptr(dentry, bindex, h_dentry);
1facf9fc 7609+
4f0767ce 7610+out:
1facf9fc 7611+ dput(parent);
7612+ return err;
7613+}
7614+
7615+/* ---------------------------------------------------------------------- */
7616+
7617+/* subset of struct inode */
7618+struct au_iattr {
7619+ unsigned long i_ino;
7620+ /* unsigned int i_nlink; */
0c3ec466
AM
7621+ kuid_t i_uid;
7622+ kgid_t i_gid;
1facf9fc 7623+ u64 i_version;
7624+/*
7625+ loff_t i_size;
7626+ blkcnt_t i_blocks;
7627+*/
7628+ umode_t i_mode;
7629+};
7630+
7631+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
7632+{
7633+ ia->i_ino = h_inode->i_ino;
7634+ /* ia->i_nlink = h_inode->i_nlink; */
7635+ ia->i_uid = h_inode->i_uid;
7636+ ia->i_gid = h_inode->i_gid;
7637+ ia->i_version = h_inode->i_version;
7638+/*
7639+ ia->i_size = h_inode->i_size;
7640+ ia->i_blocks = h_inode->i_blocks;
7641+*/
7642+ ia->i_mode = (h_inode->i_mode & S_IFMT);
7643+}
7644+
7645+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
7646+{
7647+ return ia->i_ino != h_inode->i_ino
7648+ /* || ia->i_nlink != h_inode->i_nlink */
0c3ec466 7649+ || !uid_eq(ia->i_uid, h_inode->i_uid)
2dfbb274 7650+ || !gid_eq(ia->i_gid, h_inode->i_gid)
1facf9fc 7651+ || ia->i_version != h_inode->i_version
7652+/*
7653+ || ia->i_size != h_inode->i_size
7654+ || ia->i_blocks != h_inode->i_blocks
7655+*/
7656+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
7657+}
7658+
7659+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
7660+ struct au_branch *br)
7661+{
7662+ int err;
7663+ struct au_iattr ia;
7664+ struct inode *h_inode;
7665+ struct dentry *h_d;
7666+ struct super_block *h_sb;
7667+
7668+ err = 0;
7669+ memset(&ia, -1, sizeof(ia));
7670+ h_sb = h_dentry->d_sb;
5527c038
JR
7671+ h_inode = NULL;
7672+ if (d_is_positive(h_dentry)) {
7673+ h_inode = d_inode(h_dentry);
1facf9fc 7674+ au_iattr_save(&ia, h_inode);
5527c038 7675+ } else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
1facf9fc 7676+ /* nfs d_revalidate may return 0 for negative dentry */
7677+ /* fuse d_revalidate always return 0 for negative dentry */
7678+ goto out;
7679+
7680+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
b4510431 7681+ h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent);
1facf9fc 7682+ err = PTR_ERR(h_d);
7683+ if (IS_ERR(h_d))
7684+ goto out;
7685+
7686+ err = 0;
7687+ if (unlikely(h_d != h_dentry
5527c038 7688+ || d_inode(h_d) != h_inode
1facf9fc 7689+ || (h_inode && au_iattr_test(&ia, h_inode))))
7690+ err = au_busy_or_stale();
7691+ dput(h_d);
7692+
4f0767ce 7693+out:
1facf9fc 7694+ AuTraceErr(err);
7695+ return err;
7696+}
7697+
7698+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
7699+ struct dentry *h_parent, struct au_branch *br)
7700+{
7701+ int err;
7702+
7703+ err = 0;
027c5e7a
AM
7704+ if (udba == AuOpt_UDBA_REVAL
7705+ && !au_test_fs_remote(h_dentry->d_sb)) {
1facf9fc 7706+ IMustLock(h_dir);
5527c038 7707+ err = (d_inode(h_dentry->d_parent) != h_dir);
027c5e7a 7708+ } else if (udba != AuOpt_UDBA_NONE)
1facf9fc 7709+ err = au_h_verify_dentry(h_dentry, h_parent, br);
7710+
7711+ return err;
7712+}
7713+
7714+/* ---------------------------------------------------------------------- */
7715+
027c5e7a 7716+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
1facf9fc 7717+{
027c5e7a 7718+ int err;
5afbbe0d 7719+ aufs_bindex_t new_bindex, bindex, bbot, bwh, bdiropq;
027c5e7a
AM
7720+ struct au_hdentry tmp, *p, *q;
7721+ struct au_dinfo *dinfo;
7722+ struct super_block *sb;
1facf9fc 7723+
027c5e7a 7724+ DiMustWriteLock(dentry);
1308ab2a 7725+
027c5e7a
AM
7726+ sb = dentry->d_sb;
7727+ dinfo = au_di(dentry);
5afbbe0d 7728+ bbot = dinfo->di_bbot;
1facf9fc 7729+ bwh = dinfo->di_bwh;
7730+ bdiropq = dinfo->di_bdiropq;
5afbbe0d
AM
7731+ bindex = dinfo->di_btop;
7732+ p = au_hdentry(dinfo, bindex);
7733+ for (; bindex <= bbot; bindex++, p++) {
027c5e7a 7734+ if (!p->hd_dentry)
1facf9fc 7735+ continue;
7736+
027c5e7a
AM
7737+ new_bindex = au_br_index(sb, p->hd_id);
7738+ if (new_bindex == bindex)
1facf9fc 7739+ continue;
1facf9fc 7740+
1facf9fc 7741+ if (dinfo->di_bwh == bindex)
7742+ bwh = new_bindex;
7743+ if (dinfo->di_bdiropq == bindex)
7744+ bdiropq = new_bindex;
7745+ if (new_bindex < 0) {
7746+ au_hdput(p);
7747+ p->hd_dentry = NULL;
7748+ continue;
7749+ }
7750+
7751+ /* swap two lower dentries, and loop again */
5afbbe0d 7752+ q = au_hdentry(dinfo, new_bindex);
1facf9fc 7753+ tmp = *q;
7754+ *q = *p;
7755+ *p = tmp;
7756+ if (tmp.hd_dentry) {
7757+ bindex--;
7758+ p--;
7759+ }
7760+ }
7761+
1facf9fc 7762+ dinfo->di_bwh = -1;
5afbbe0d 7763+ if (bwh >= 0 && bwh <= au_sbbot(sb) && au_sbr_whable(sb, bwh))
1facf9fc 7764+ dinfo->di_bwh = bwh;
7765+
7766+ dinfo->di_bdiropq = -1;
7767+ if (bdiropq >= 0
5afbbe0d 7768+ && bdiropq <= au_sbbot(sb)
1facf9fc 7769+ && au_sbr_whable(sb, bdiropq))
7770+ dinfo->di_bdiropq = bdiropq;
7771+
027c5e7a 7772+ err = -EIO;
5afbbe0d
AM
7773+ dinfo->di_btop = -1;
7774+ dinfo->di_bbot = -1;
7775+ bbot = au_dbbot(parent);
7776+ bindex = 0;
7777+ p = au_hdentry(dinfo, bindex);
7778+ for (; bindex <= bbot; bindex++, p++)
1facf9fc 7779+ if (p->hd_dentry) {
5afbbe0d 7780+ dinfo->di_btop = bindex;
1facf9fc 7781+ break;
7782+ }
7783+
5afbbe0d
AM
7784+ if (dinfo->di_btop >= 0) {
7785+ bindex = bbot;
7786+ p = au_hdentry(dinfo, bindex);
7787+ for (; bindex >= 0; bindex--, p--)
027c5e7a 7788+ if (p->hd_dentry) {
5afbbe0d 7789+ dinfo->di_bbot = bindex;
027c5e7a
AM
7790+ err = 0;
7791+ break;
7792+ }
7793+ }
7794+
7795+ return err;
1facf9fc 7796+}
7797+
027c5e7a 7798+static void au_do_hide(struct dentry *dentry)
1facf9fc 7799+{
027c5e7a 7800+ struct inode *inode;
1facf9fc 7801+
5527c038
JR
7802+ if (d_really_is_positive(dentry)) {
7803+ inode = d_inode(dentry);
7804+ if (!d_is_dir(dentry)) {
027c5e7a
AM
7805+ if (inode->i_nlink && !d_unhashed(dentry))
7806+ drop_nlink(inode);
7807+ } else {
7808+ clear_nlink(inode);
7809+ /* stop next lookup */
7810+ inode->i_flags |= S_DEAD;
7811+ }
7812+ smp_mb(); /* necessary? */
7813+ }
7814+ d_drop(dentry);
7815+}
1308ab2a 7816+
027c5e7a
AM
7817+static int au_hide_children(struct dentry *parent)
7818+{
7819+ int err, i, j, ndentry;
7820+ struct au_dcsub_pages dpages;
7821+ struct au_dpage *dpage;
7822+ struct dentry *dentry;
1facf9fc 7823+
027c5e7a 7824+ err = au_dpages_init(&dpages, GFP_NOFS);
1facf9fc 7825+ if (unlikely(err))
7826+ goto out;
027c5e7a
AM
7827+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
7828+ if (unlikely(err))
7829+ goto out_dpages;
1facf9fc 7830+
027c5e7a
AM
7831+ /* in reverse order */
7832+ for (i = dpages.ndpage - 1; i >= 0; i--) {
7833+ dpage = dpages.dpages + i;
7834+ ndentry = dpage->ndentry;
7835+ for (j = ndentry - 1; j >= 0; j--) {
7836+ dentry = dpage->dentries[j];
7837+ if (dentry != parent)
7838+ au_do_hide(dentry);
7839+ }
7840+ }
1facf9fc 7841+
027c5e7a
AM
7842+out_dpages:
7843+ au_dpages_free(&dpages);
4f0767ce 7844+out:
027c5e7a 7845+ return err;
1facf9fc 7846+}
7847+
027c5e7a 7848+static void au_hide(struct dentry *dentry)
1facf9fc 7849+{
027c5e7a 7850+ int err;
1facf9fc 7851+
027c5e7a 7852+ AuDbgDentry(dentry);
2000de60 7853+ if (d_is_dir(dentry)) {
027c5e7a
AM
7854+ /* shrink_dcache_parent(dentry); */
7855+ err = au_hide_children(dentry);
7856+ if (unlikely(err))
523b37e3
AM
7857+ AuIOErr("%pd, failed hiding children, ignored %d\n",
7858+ dentry, err);
027c5e7a
AM
7859+ }
7860+ au_do_hide(dentry);
7861+}
1facf9fc 7862+
027c5e7a
AM
7863+/*
7864+ * By adding a dirty branch, a cached dentry may be affected in various ways.
7865+ *
7866+ * a dirty branch is added
7867+ * - on the top of layers
7868+ * - in the middle of layers
7869+ * - to the bottom of layers
7870+ *
7871+ * on the added branch there exists
7872+ * - a whiteout
7873+ * - a diropq
7874+ * - a same named entry
7875+ * + exist
7876+ * * negative --> positive
7877+ * * positive --> positive
7878+ * - type is unchanged
7879+ * - type is changed
7880+ * + doesn't exist
7881+ * * negative --> negative
7882+ * * positive --> negative (rejected by au_br_del() for non-dir case)
7883+ * - none
7884+ */
7885+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
7886+ struct au_dinfo *tmp)
7887+{
7888+ int err;
5afbbe0d 7889+ aufs_bindex_t bindex, bbot;
027c5e7a
AM
7890+ struct {
7891+ struct dentry *dentry;
7892+ struct inode *inode;
7893+ mode_t mode;
be52b249
AM
7894+ } orig_h, tmp_h = {
7895+ .dentry = NULL
7896+ };
027c5e7a
AM
7897+ struct au_hdentry *hd;
7898+ struct inode *inode, *h_inode;
7899+ struct dentry *h_dentry;
7900+
7901+ err = 0;
5afbbe0d 7902+ AuDebugOn(dinfo->di_btop < 0);
027c5e7a 7903+ orig_h.mode = 0;
5afbbe0d 7904+ orig_h.dentry = au_hdentry(dinfo, dinfo->di_btop)->hd_dentry;
5527c038
JR
7905+ orig_h.inode = NULL;
7906+ if (d_is_positive(orig_h.dentry)) {
7907+ orig_h.inode = d_inode(orig_h.dentry);
027c5e7a 7908+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
5527c038 7909+ }
5afbbe0d
AM
7910+ if (tmp->di_btop >= 0) {
7911+ tmp_h.dentry = au_hdentry(tmp, tmp->di_btop)->hd_dentry;
5527c038
JR
7912+ if (d_is_positive(tmp_h.dentry)) {
7913+ tmp_h.inode = d_inode(tmp_h.dentry);
027c5e7a 7914+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
5527c038 7915+ }
027c5e7a
AM
7916+ }
7917+
5527c038
JR
7918+ inode = NULL;
7919+ if (d_really_is_positive(dentry))
7920+ inode = d_inode(dentry);
027c5e7a
AM
7921+ if (!orig_h.inode) {
7922+ AuDbg("nagative originally\n");
7923+ if (inode) {
7924+ au_hide(dentry);
7925+ goto out;
7926+ }
7927+ AuDebugOn(inode);
5afbbe0d 7928+ AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
027c5e7a
AM
7929+ AuDebugOn(dinfo->di_bdiropq != -1);
7930+
7931+ if (!tmp_h.inode) {
7932+ AuDbg("negative --> negative\n");
7933+ /* should have only one negative lower */
5afbbe0d
AM
7934+ if (tmp->di_btop >= 0
7935+ && tmp->di_btop < dinfo->di_btop) {
7936+ AuDebugOn(tmp->di_btop != tmp->di_bbot);
7937+ AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
7938+ au_set_h_dptr(dentry, dinfo->di_btop, NULL);
027c5e7a 7939+ au_di_cp(dinfo, tmp);
5afbbe0d
AM
7940+ hd = au_hdentry(tmp, tmp->di_btop);
7941+ au_set_h_dptr(dentry, tmp->di_btop,
027c5e7a
AM
7942+ dget(hd->hd_dentry));
7943+ }
7944+ au_dbg_verify_dinode(dentry);
7945+ } else {
7946+ AuDbg("negative --> positive\n");
7947+ /*
7948+ * similar to the behaviour of creating with bypassing
7949+ * aufs.
7950+ * unhash it in order to force an error in the
7951+ * succeeding create operation.
7952+ * we should not set S_DEAD here.
7953+ */
7954+ d_drop(dentry);
7955+ /* au_di_swap(tmp, dinfo); */
7956+ au_dbg_verify_dinode(dentry);
7957+ }
7958+ } else {
7959+ AuDbg("positive originally\n");
7960+ /* inode may be NULL */
7961+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
7962+ if (!tmp_h.inode) {
7963+ AuDbg("positive --> negative\n");
7964+ /* or bypassing aufs */
7965+ au_hide(dentry);
5afbbe0d 7966+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_btop)
027c5e7a
AM
7967+ dinfo->di_bwh = tmp->di_bwh;
7968+ if (inode)
7969+ err = au_refresh_hinode_self(inode);
7970+ au_dbg_verify_dinode(dentry);
7971+ } else if (orig_h.mode == tmp_h.mode) {
7972+ AuDbg("positive --> positive, same type\n");
7973+ if (!S_ISDIR(orig_h.mode)
5afbbe0d 7974+ && dinfo->di_btop > tmp->di_btop) {
027c5e7a
AM
7975+ /*
7976+ * similar to the behaviour of removing and
7977+ * creating.
7978+ */
7979+ au_hide(dentry);
7980+ if (inode)
7981+ err = au_refresh_hinode_self(inode);
7982+ au_dbg_verify_dinode(dentry);
7983+ } else {
7984+ /* fill empty slots */
5afbbe0d
AM
7985+ if (dinfo->di_btop > tmp->di_btop)
7986+ dinfo->di_btop = tmp->di_btop;
7987+ if (dinfo->di_bbot < tmp->di_bbot)
7988+ dinfo->di_bbot = tmp->di_bbot;
027c5e7a
AM
7989+ dinfo->di_bwh = tmp->di_bwh;
7990+ dinfo->di_bdiropq = tmp->di_bdiropq;
5afbbe0d
AM
7991+ bbot = dinfo->di_bbot;
7992+ bindex = tmp->di_btop;
7993+ hd = au_hdentry(tmp, bindex);
7994+ for (; bindex <= bbot; bindex++, hd++) {
027c5e7a
AM
7995+ if (au_h_dptr(dentry, bindex))
7996+ continue;
5afbbe0d 7997+ h_dentry = hd->hd_dentry;
027c5e7a
AM
7998+ if (!h_dentry)
7999+ continue;
5527c038
JR
8000+ AuDebugOn(d_is_negative(h_dentry));
8001+ h_inode = d_inode(h_dentry);
027c5e7a
AM
8002+ AuDebugOn(orig_h.mode
8003+ != (h_inode->i_mode
8004+ & S_IFMT));
8005+ au_set_h_dptr(dentry, bindex,
8006+ dget(h_dentry));
8007+ }
5afbbe0d
AM
8008+ if (inode)
8009+ err = au_refresh_hinode(inode, dentry);
027c5e7a
AM
8010+ au_dbg_verify_dinode(dentry);
8011+ }
8012+ } else {
8013+ AuDbg("positive --> positive, different type\n");
8014+ /* similar to the behaviour of removing and creating */
8015+ au_hide(dentry);
8016+ if (inode)
8017+ err = au_refresh_hinode_self(inode);
8018+ au_dbg_verify_dinode(dentry);
8019+ }
8020+ }
8021+
8022+out:
8023+ return err;
8024+}
8025+
79b8bda9
AM
8026+void au_refresh_dop(struct dentry *dentry, int force_reval)
8027+{
8028+ const struct dentry_operations *dop
8029+ = force_reval ? &aufs_dop : dentry->d_sb->s_d_op;
8030+ static const unsigned int mask
8031+ = DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE;
8032+
8033+ BUILD_BUG_ON(sizeof(mask) != sizeof(dentry->d_flags));
8034+
8035+ if (dentry->d_op == dop)
8036+ return;
8037+
8038+ AuDbg("%pd\n", dentry);
8039+ spin_lock(&dentry->d_lock);
8040+ if (dop == &aufs_dop)
8041+ dentry->d_flags |= mask;
8042+ else
8043+ dentry->d_flags &= ~mask;
8044+ dentry->d_op = dop;
8045+ spin_unlock(&dentry->d_lock);
8046+}
8047+
027c5e7a
AM
8048+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
8049+{
e2f27e51 8050+ int err, ebrange, nbr;
027c5e7a
AM
8051+ unsigned int sigen;
8052+ struct au_dinfo *dinfo, *tmp;
8053+ struct super_block *sb;
8054+ struct inode *inode;
8055+
8056+ DiMustWriteLock(dentry);
8057+ AuDebugOn(IS_ROOT(dentry));
5527c038 8058+ AuDebugOn(d_really_is_negative(parent));
027c5e7a
AM
8059+
8060+ sb = dentry->d_sb;
027c5e7a
AM
8061+ sigen = au_sigen(sb);
8062+ err = au_digen_test(parent, sigen);
8063+ if (unlikely(err))
8064+ goto out;
8065+
e2f27e51 8066+ nbr = au_sbbot(sb) + 1;
027c5e7a 8067+ dinfo = au_di(dentry);
e2f27e51 8068+ err = au_di_realloc(dinfo, nbr, /*may_shrink*/0);
027c5e7a
AM
8069+ if (unlikely(err))
8070+ goto out;
8071+ ebrange = au_dbrange_test(dentry);
8072+ if (!ebrange)
8073+ ebrange = au_do_refresh_hdentry(dentry, parent);
8074+
38d290e6 8075+ if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
5afbbe0d 8076+ AuDebugOn(au_dbtop(dentry) < 0 && au_dbbot(dentry) >= 0);
5527c038
JR
8077+ if (d_really_is_positive(dentry)) {
8078+ inode = d_inode(dentry);
027c5e7a 8079+ err = au_refresh_hinode_self(inode);
5527c038 8080+ }
027c5e7a
AM
8081+ au_dbg_verify_dinode(dentry);
8082+ if (!err)
8083+ goto out_dgen; /* success */
8084+ goto out;
8085+ }
8086+
8087+ /* temporary dinfo */
8088+ AuDbgDentry(dentry);
8089+ err = -ENOMEM;
8090+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
8091+ if (unlikely(!tmp))
8092+ goto out;
8093+ au_di_swap(tmp, dinfo);
8094+ /* returns the number of positive dentries */
8095+ /*
8096+ * if current working dir is removed, it returns an error.
8097+ * but the dentry is legal.
8098+ */
5afbbe0d 8099+ err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
027c5e7a
AM
8100+ AuDbgDentry(dentry);
8101+ au_di_swap(tmp, dinfo);
8102+ if (err == -ENOENT)
8103+ err = 0;
8104+ if (err >= 0) {
8105+ /* compare/refresh by dinfo */
8106+ AuDbgDentry(dentry);
8107+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
8108+ au_dbg_verify_dinode(dentry);
8109+ AuTraceErr(err);
8110+ }
e2f27e51 8111+ au_di_realloc(dinfo, nbr, /*may_shrink*/1); /* harmless if err */
027c5e7a
AM
8112+ au_rw_write_unlock(&tmp->di_rwsem);
8113+ au_di_free(tmp);
8114+ if (unlikely(err))
8115+ goto out;
8116+
8117+out_dgen:
8118+ au_update_digen(dentry);
8119+out:
8120+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
523b37e3 8121+ AuIOErr("failed refreshing %pd, %d\n", dentry, err);
027c5e7a
AM
8122+ AuDbgDentry(dentry);
8123+ }
8124+ AuTraceErr(err);
8125+ return err;
8126+}
8127+
b4510431
AM
8128+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags,
8129+ struct dentry *dentry, aufs_bindex_t bindex)
027c5e7a
AM
8130+{
8131+ int err, valid;
027c5e7a
AM
8132+
8133+ err = 0;
8134+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
8135+ goto out;
027c5e7a
AM
8136+
8137+ AuDbg("b%d\n", bindex);
b4510431
AM
8138+ /*
8139+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
8140+ * due to whiteout and branch permission.
8141+ */
8142+ flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
8143+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
8144+ /* it may return tri-state */
8145+ valid = h_dentry->d_op->d_revalidate(h_dentry, flags);
1facf9fc 8146+
8147+ if (unlikely(valid < 0))
8148+ err = valid;
8149+ else if (!valid)
8150+ err = -EINVAL;
8151+
4f0767ce 8152+out:
1facf9fc 8153+ AuTraceErr(err);
8154+ return err;
8155+}
8156+
8157+/* todo: remove this */
8158+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
ae9dfd79 8159+ unsigned int flags, int do_udba, int dirren)
1facf9fc 8160+{
8161+ int err;
8162+ umode_t mode, h_mode;
5afbbe0d 8163+ aufs_bindex_t bindex, btail, btop, ibs, ibe;
38d290e6 8164+ unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
4a4d8108 8165+ struct inode *h_inode, *h_cached_inode;
1facf9fc 8166+ struct dentry *h_dentry;
8167+ struct qstr *name, *h_name;
8168+
8169+ err = 0;
8170+ plus = 0;
8171+ mode = 0;
1facf9fc 8172+ ibs = -1;
8173+ ibe = -1;
8174+ unhashed = !!d_unhashed(dentry);
8175+ is_root = !!IS_ROOT(dentry);
8176+ name = &dentry->d_name;
38d290e6 8177+ tmpfile = au_di(dentry)->di_tmpfile;
1facf9fc 8178+
8179+ /*
7f207e10
AM
8180+ * Theoretically, REVAL test should be unnecessary in case of
8181+ * {FS,I}NOTIFY.
8182+ * But {fs,i}notify doesn't fire some necessary events,
1facf9fc 8183+ * IN_ATTRIB for atime/nlink/pageio
1facf9fc 8184+ * Let's do REVAL test too.
8185+ */
8186+ if (do_udba && inode) {
8187+ mode = (inode->i_mode & S_IFMT);
8188+ plus = (inode->i_nlink > 0);
5afbbe0d
AM
8189+ ibs = au_ibtop(inode);
8190+ ibe = au_ibbot(inode);
1facf9fc 8191+ }
8192+
5afbbe0d
AM
8193+ btop = au_dbtop(dentry);
8194+ btail = btop;
1facf9fc 8195+ if (inode && S_ISDIR(inode->i_mode))
8196+ btail = au_dbtaildir(dentry);
5afbbe0d 8197+ for (bindex = btop; bindex <= btail; bindex++) {
1facf9fc 8198+ h_dentry = au_h_dptr(dentry, bindex);
8199+ if (!h_dentry)
8200+ continue;
8201+
523b37e3
AM
8202+ AuDbg("b%d, %pd\n", bindex, h_dentry);
8203+ h_nfs = !!au_test_nfs(h_dentry->d_sb);
027c5e7a 8204+ spin_lock(&h_dentry->d_lock);
1facf9fc 8205+ h_name = &h_dentry->d_name;
8206+ if (unlikely(do_udba
8207+ && !is_root
523b37e3
AM
8208+ && ((!h_nfs
8209+ && (unhashed != !!d_unhashed(h_dentry)
ae9dfd79 8210+ || (!tmpfile && !dirren
38d290e6
JR
8211+ && !au_qstreq(name, h_name))
8212+ ))
523b37e3
AM
8213+ || (h_nfs
8214+ && !(flags & LOOKUP_OPEN)
8215+ && (h_dentry->d_flags
8216+ & DCACHE_NFSFS_RENAMED)))
1facf9fc 8217+ )) {
38d290e6
JR
8218+ int h_unhashed;
8219+
8220+ h_unhashed = d_unhashed(h_dentry);
027c5e7a 8221+ spin_unlock(&h_dentry->d_lock);
38d290e6
JR
8222+ AuDbg("unhash 0x%x 0x%x, %pd %pd\n",
8223+ unhashed, h_unhashed, dentry, h_dentry);
1facf9fc 8224+ goto err;
8225+ }
027c5e7a 8226+ spin_unlock(&h_dentry->d_lock);
1facf9fc 8227+
b4510431 8228+ err = au_do_h_d_reval(h_dentry, flags, dentry, bindex);
1facf9fc 8229+ if (unlikely(err))
8230+ /* do not goto err, to keep the errno */
8231+ break;
8232+
8233+ /* todo: plink too? */
8234+ if (!do_udba)
8235+ continue;
8236+
8237+ /* UDBA tests */
5527c038 8238+ if (unlikely(!!inode != d_is_positive(h_dentry)))
1facf9fc 8239+ goto err;
8240+
5527c038
JR
8241+ h_inode = NULL;
8242+ if (d_is_positive(h_dentry))
8243+ h_inode = d_inode(h_dentry);
1facf9fc 8244+ h_plus = plus;
8245+ h_mode = mode;
8246+ h_cached_inode = h_inode;
8247+ if (h_inode) {
8248+ h_mode = (h_inode->i_mode & S_IFMT);
8249+ h_plus = (h_inode->i_nlink > 0);
8250+ }
8251+ if (inode && ibs <= bindex && bindex <= ibe)
8252+ h_cached_inode = au_h_iptr(inode, bindex);
8253+
523b37e3 8254+ if (!h_nfs) {
38d290e6 8255+ if (unlikely(plus != h_plus && !tmpfile))
523b37e3
AM
8256+ goto err;
8257+ } else {
8258+ if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED)
8259+ && !is_root
8260+ && !IS_ROOT(h_dentry)
8261+ && unhashed != d_unhashed(h_dentry)))
8262+ goto err;
8263+ }
8264+ if (unlikely(mode != h_mode
1facf9fc 8265+ || h_cached_inode != h_inode))
8266+ goto err;
8267+ continue;
8268+
f6b6e03d 8269+err:
1facf9fc 8270+ err = -EINVAL;
8271+ break;
8272+ }
8273+
523b37e3 8274+ AuTraceErr(err);
1facf9fc 8275+ return err;
8276+}
8277+
027c5e7a 8278+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
1facf9fc 8279+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
8280+{
8281+ int err;
8282+ struct dentry *parent;
1facf9fc 8283+
027c5e7a 8284+ if (!au_digen_test(dentry, sigen))
1facf9fc 8285+ return 0;
8286+
8287+ parent = dget_parent(dentry);
8288+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8289+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 8290+ au_dbg_verify_gen(parent, sigen);
027c5e7a 8291+ err = au_refresh_dentry(dentry, parent);
1facf9fc 8292+ di_read_unlock(parent, AuLock_IR);
8293+ dput(parent);
027c5e7a 8294+ AuTraceErr(err);
1facf9fc 8295+ return err;
8296+}
8297+
8298+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
8299+{
8300+ int err;
8301+ struct dentry *d, *parent;
1facf9fc 8302+
027c5e7a 8303+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
1facf9fc 8304+ return simple_reval_dpath(dentry, sigen);
8305+
8306+ /* slow loop, keep it simple and stupid */
8307+ /* cf: au_cpup_dirs() */
8308+ err = 0;
8309+ parent = NULL;
027c5e7a 8310+ while (au_digen_test(dentry, sigen)) {
1facf9fc 8311+ d = dentry;
8312+ while (1) {
8313+ dput(parent);
8314+ parent = dget_parent(d);
027c5e7a 8315+ if (!au_digen_test(parent, sigen))
1facf9fc 8316+ break;
8317+ d = parent;
8318+ }
8319+
1facf9fc 8320+ if (d != dentry)
027c5e7a 8321+ di_write_lock_child2(d);
1facf9fc 8322+
8323+ /* someone might update our dentry while we were sleeping */
027c5e7a
AM
8324+ if (au_digen_test(d, sigen)) {
8325+ /*
8326+ * todo: consolidate with simple_reval_dpath(),
8327+ * do_refresh() and au_reval_for_attr().
8328+ */
1facf9fc 8329+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8330+ err = au_refresh_dentry(d, parent);
1facf9fc 8331+ di_read_unlock(parent, AuLock_IR);
8332+ }
8333+
8334+ if (d != dentry)
8335+ di_write_unlock(d);
8336+ dput(parent);
8337+ if (unlikely(err))
8338+ break;
8339+ }
8340+
8341+ return err;
8342+}
8343+
8344+/*
8345+ * if valid returns 1, otherwise 0.
8346+ */
b4510431 8347+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
1facf9fc 8348+{
8349+ int valid, err;
8350+ unsigned int sigen;
ae9dfd79 8351+ unsigned char do_udba, dirren;
1facf9fc 8352+ struct super_block *sb;
8353+ struct inode *inode;
8354+
027c5e7a 8355+ /* todo: support rcu-walk? */
b4510431 8356+ if (flags & LOOKUP_RCU)
027c5e7a
AM
8357+ return -ECHILD;
8358+
8359+ valid = 0;
8360+ if (unlikely(!au_di(dentry)))
8361+ goto out;
8362+
e49829fe 8363+ valid = 1;
1facf9fc 8364+ sb = dentry->d_sb;
e49829fe
JR
8365+ /*
8366+ * todo: very ugly
8367+ * i_mutex of parent dir may be held,
8368+ * but we should not return 'invalid' due to busy.
8369+ */
8370+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
8371+ if (unlikely(err)) {
8372+ valid = err;
027c5e7a 8373+ AuTraceErr(err);
e49829fe
JR
8374+ goto out;
8375+ }
5527c038
JR
8376+ inode = NULL;
8377+ if (d_really_is_positive(dentry))
8378+ inode = d_inode(dentry);
5afbbe0d 8379+ if (unlikely(inode && au_is_bad_inode(inode))) {
c1595e42
JR
8380+ err = -EINVAL;
8381+ AuTraceErr(err);
8382+ goto out_dgrade;
8383+ }
027c5e7a
AM
8384+ if (unlikely(au_dbrange_test(dentry))) {
8385+ err = -EINVAL;
8386+ AuTraceErr(err);
8387+ goto out_dgrade;
1facf9fc 8388+ }
027c5e7a
AM
8389+
8390+ sigen = au_sigen(sb);
8391+ if (au_digen_test(dentry, sigen)) {
1facf9fc 8392+ AuDebugOn(IS_ROOT(dentry));
027c5e7a
AM
8393+ err = au_reval_dpath(dentry, sigen);
8394+ if (unlikely(err)) {
8395+ AuTraceErr(err);
1facf9fc 8396+ goto out_dgrade;
027c5e7a 8397+ }
1facf9fc 8398+ }
8399+ di_downgrade_lock(dentry, AuLock_IR);
8400+
1facf9fc 8401+ err = -EINVAL;
c1595e42 8402+ if (!(flags & (LOOKUP_OPEN | LOOKUP_EMPTY))
523b37e3 8403+ && inode
38d290e6 8404+ && !(inode->i_state && I_LINKABLE)
79b8bda9
AM
8405+ && (IS_DEADDIR(inode) || !inode->i_nlink)) {
8406+ AuTraceErr(err);
027c5e7a 8407+ goto out_inval;
79b8bda9 8408+ }
027c5e7a 8409+
1facf9fc 8410+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
8411+ if (do_udba && inode) {
5afbbe0d 8412+ aufs_bindex_t btop = au_ibtop(inode);
027c5e7a 8413+ struct inode *h_inode;
1facf9fc 8414+
5afbbe0d
AM
8415+ if (btop >= 0) {
8416+ h_inode = au_h_iptr(inode, btop);
79b8bda9
AM
8417+ if (h_inode && au_test_higen(inode, h_inode)) {
8418+ AuTraceErr(err);
027c5e7a 8419+ goto out_inval;
79b8bda9 8420+ }
027c5e7a 8421+ }
1facf9fc 8422+ }
8423+
ae9dfd79
AM
8424+ dirren = !!au_opt_test(au_mntflags(sb), DIRREN);
8425+ err = h_d_revalidate(dentry, inode, flags, do_udba, dirren);
5afbbe0d 8426+ if (unlikely(!err && do_udba && au_dbtop(dentry) < 0)) {
1facf9fc 8427+ err = -EIO;
523b37e3
AM
8428+ AuDbg("both of real entry and whiteout found, %p, err %d\n",
8429+ dentry, err);
027c5e7a 8430+ }
e49829fe 8431+ goto out_inval;
1facf9fc 8432+
4f0767ce 8433+out_dgrade:
1facf9fc 8434+ di_downgrade_lock(dentry, AuLock_IR);
e49829fe 8435+out_inval:
1facf9fc 8436+ aufs_read_unlock(dentry, AuLock_IR);
8437+ AuTraceErr(err);
8438+ valid = !err;
e49829fe 8439+out:
027c5e7a 8440+ if (!valid) {
523b37e3 8441+ AuDbg("%pd invalid, %d\n", dentry, valid);
027c5e7a
AM
8442+ d_drop(dentry);
8443+ }
1facf9fc 8444+ return valid;
8445+}
8446+
8447+static void aufs_d_release(struct dentry *dentry)
8448+{
027c5e7a 8449+ if (au_di(dentry)) {
4a4d8108
AM
8450+ au_di_fin(dentry);
8451+ au_hn_di_reinit(dentry);
1facf9fc 8452+ }
1facf9fc 8453+}
8454+
4a4d8108 8455+const struct dentry_operations aufs_dop = {
c06a8ce3
AM
8456+ .d_revalidate = aufs_d_revalidate,
8457+ .d_weak_revalidate = aufs_d_revalidate,
8458+ .d_release = aufs_d_release
1facf9fc 8459+};
79b8bda9
AM
8460+
8461+/* aufs_dop without d_revalidate */
8462+const struct dentry_operations aufs_dop_noreval = {
8463+ .d_release = aufs_d_release
8464+};
e8791d4f
AM
8465diff -urNp -x '*.orig' linux-4.9/fs/aufs/dentry.h linux-4.9/fs/aufs/dentry.h
8466--- linux-4.9/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
8467+++ linux-4.9/fs/aufs/dentry.h 2021-02-24 16:15:09.524906971 +0100
ae9dfd79 8468@@ -0,0 +1,266 @@
1facf9fc 8469+/*
ae9dfd79 8470+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 8471+ *
8472+ * This program, aufs is free software; you can redistribute it and/or modify
8473+ * it under the terms of the GNU General Public License as published by
8474+ * the Free Software Foundation; either version 2 of the License, or
8475+ * (at your option) any later version.
dece6358
AM
8476+ *
8477+ * This program is distributed in the hope that it will be useful,
8478+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8479+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8480+ * GNU General Public License for more details.
8481+ *
8482+ * You should have received a copy of the GNU General Public License
523b37e3 8483+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 8484+ */
8485+
8486+/*
8487+ * lookup and dentry operations
8488+ */
8489+
8490+#ifndef __AUFS_DENTRY_H__
8491+#define __AUFS_DENTRY_H__
8492+
8493+#ifdef __KERNEL__
8494+
dece6358 8495+#include <linux/dcache.h>
ae9dfd79 8496+#include "dirren.h"
1facf9fc 8497+#include "rwsem.h"
8498+
1facf9fc 8499+struct au_hdentry {
8500+ struct dentry *hd_dentry;
027c5e7a 8501+ aufs_bindex_t hd_id;
1facf9fc 8502+};
8503+
8504+struct au_dinfo {
8505+ atomic_t di_generation;
8506+
dece6358 8507+ struct au_rwsem di_rwsem;
5afbbe0d 8508+ aufs_bindex_t di_btop, di_bbot, di_bwh, di_bdiropq;
38d290e6 8509+ unsigned char di_tmpfile; /* to allow the different name */
ae9dfd79 8510+ struct au_hdentry *di_hdentry;
4a4d8108 8511+} ____cacheline_aligned_in_smp;
1facf9fc 8512+
8513+/* ---------------------------------------------------------------------- */
8514+
5afbbe0d
AM
8515+/* flags for au_lkup_dentry() */
8516+#define AuLkup_ALLOW_NEG 1
8517+#define AuLkup_IGNORE_PERM (1 << 1)
ae9dfd79 8518+#define AuLkup_DIRREN (1 << 2)
5afbbe0d
AM
8519+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
8520+#define au_fset_lkup(flags, name) \
8521+ do { (flags) |= AuLkup_##name; } while (0)
8522+#define au_fclr_lkup(flags, name) \
8523+ do { (flags) &= ~AuLkup_##name; } while (0)
8524+
ae9dfd79
AM
8525+#ifndef CONFIG_AUFS_DIRREN
8526+#undef AuLkup_DIRREN
8527+#define AuLkup_DIRREN 0
8528+#endif
8529+
8530+struct au_do_lookup_args {
8531+ unsigned int flags;
8532+ mode_t type;
8533+ struct qstr whname, *name;
8534+ struct au_dr_lookup dirren;
8535+};
8536+
5afbbe0d
AM
8537+/* ---------------------------------------------------------------------- */
8538+
1facf9fc 8539+/* dentry.c */
79b8bda9 8540+extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
1facf9fc 8541+struct au_branch;
076b876e 8542+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
1facf9fc 8543+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
8544+ struct dentry *h_parent, struct au_branch *br);
8545+
5afbbe0d
AM
8546+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
8547+ unsigned int flags);
86dc4139 8548+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
027c5e7a 8549+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
1facf9fc 8550+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
79b8bda9 8551+void au_refresh_dop(struct dentry *dentry, int force_reval);
1facf9fc 8552+
8553+/* dinfo.c */
4a4d8108 8554+void au_di_init_once(void *_di);
027c5e7a
AM
8555+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
8556+void au_di_free(struct au_dinfo *dinfo);
8557+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
8558+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
4a4d8108
AM
8559+int au_di_init(struct dentry *dentry);
8560+void au_di_fin(struct dentry *dentry);
e2f27e51 8561+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink);
1facf9fc 8562+
8563+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
8564+void di_read_unlock(struct dentry *d, int flags);
8565+void di_downgrade_lock(struct dentry *d, int flags);
8566+void di_write_lock(struct dentry *d, unsigned int lsc);
8567+void di_write_unlock(struct dentry *d);
8568+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
8569+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
8570+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
8571+
8572+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
2cbb1c4b 8573+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
1facf9fc 8574+aufs_bindex_t au_dbtail(struct dentry *dentry);
8575+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
8576+
8577+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
8578+ struct dentry *h_dentry);
027c5e7a
AM
8579+int au_digen_test(struct dentry *dentry, unsigned int sigen);
8580+int au_dbrange_test(struct dentry *dentry);
1facf9fc 8581+void au_update_digen(struct dentry *dentry);
8582+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
5afbbe0d
AM
8583+void au_update_dbtop(struct dentry *dentry);
8584+void au_update_dbbot(struct dentry *dentry);
1facf9fc 8585+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
8586+
8587+/* ---------------------------------------------------------------------- */
8588+
8589+static inline struct au_dinfo *au_di(struct dentry *dentry)
8590+{
8591+ return dentry->d_fsdata;
8592+}
8593+
8594+/* ---------------------------------------------------------------------- */
8595+
8596+/* lock subclass for dinfo */
8597+enum {
8598+ AuLsc_DI_CHILD, /* child first */
4a4d8108 8599+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
1facf9fc 8600+ AuLsc_DI_CHILD3, /* copyup dirs */
8601+ AuLsc_DI_PARENT,
8602+ AuLsc_DI_PARENT2,
027c5e7a
AM
8603+ AuLsc_DI_PARENT3,
8604+ AuLsc_DI_TMP /* temp for replacing dinfo */
1facf9fc 8605+};
8606+
8607+/*
8608+ * di_read_lock_child, di_write_lock_child,
8609+ * di_read_lock_child2, di_write_lock_child2,
8610+ * di_read_lock_child3, di_write_lock_child3,
8611+ * di_read_lock_parent, di_write_lock_parent,
8612+ * di_read_lock_parent2, di_write_lock_parent2,
8613+ * di_read_lock_parent3, di_write_lock_parent3,
8614+ */
8615+#define AuReadLockFunc(name, lsc) \
8616+static inline void di_read_lock_##name(struct dentry *d, int flags) \
8617+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
8618+
8619+#define AuWriteLockFunc(name, lsc) \
8620+static inline void di_write_lock_##name(struct dentry *d) \
8621+{ di_write_lock(d, AuLsc_DI_##lsc); }
8622+
8623+#define AuRWLockFuncs(name, lsc) \
8624+ AuReadLockFunc(name, lsc) \
8625+ AuWriteLockFunc(name, lsc)
8626+
8627+AuRWLockFuncs(child, CHILD);
8628+AuRWLockFuncs(child2, CHILD2);
8629+AuRWLockFuncs(child3, CHILD3);
8630+AuRWLockFuncs(parent, PARENT);
8631+AuRWLockFuncs(parent2, PARENT2);
8632+AuRWLockFuncs(parent3, PARENT3);
8633+
8634+#undef AuReadLockFunc
8635+#undef AuWriteLockFunc
8636+#undef AuRWLockFuncs
8637+
8638+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
dece6358
AM
8639+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
8640+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
1facf9fc 8641+
8642+/* ---------------------------------------------------------------------- */
8643+
8644+/* todo: memory barrier? */
8645+static inline unsigned int au_digen(struct dentry *d)
8646+{
8647+ return atomic_read(&au_di(d)->di_generation);
8648+}
8649+
8650+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
8651+{
8652+ hdentry->hd_dentry = NULL;
8653+}
8654+
5afbbe0d
AM
8655+static inline struct au_hdentry *au_hdentry(struct au_dinfo *di,
8656+ aufs_bindex_t bindex)
8657+{
8658+ return di->di_hdentry + bindex;
8659+}
8660+
1facf9fc 8661+static inline void au_hdput(struct au_hdentry *hd)
8662+{
4a4d8108
AM
8663+ if (hd)
8664+ dput(hd->hd_dentry);
1facf9fc 8665+}
8666+
5afbbe0d 8667+static inline aufs_bindex_t au_dbtop(struct dentry *dentry)
1facf9fc 8668+{
1308ab2a 8669+ DiMustAnyLock(dentry);
5afbbe0d 8670+ return au_di(dentry)->di_btop;
1facf9fc 8671+}
8672+
5afbbe0d 8673+static inline aufs_bindex_t au_dbbot(struct dentry *dentry)
1facf9fc 8674+{
1308ab2a 8675+ DiMustAnyLock(dentry);
5afbbe0d 8676+ return au_di(dentry)->di_bbot;
1facf9fc 8677+}
8678+
8679+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
8680+{
1308ab2a 8681+ DiMustAnyLock(dentry);
1facf9fc 8682+ return au_di(dentry)->di_bwh;
8683+}
8684+
8685+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
8686+{
1308ab2a 8687+ DiMustAnyLock(dentry);
1facf9fc 8688+ return au_di(dentry)->di_bdiropq;
8689+}
8690+
8691+/* todo: hard/soft set? */
5afbbe0d 8692+static inline void au_set_dbtop(struct dentry *dentry, aufs_bindex_t bindex)
1facf9fc 8693+{
1308ab2a 8694+ DiMustWriteLock(dentry);
5afbbe0d 8695+ au_di(dentry)->di_btop = bindex;
1facf9fc 8696+}
8697+
5afbbe0d 8698+static inline void au_set_dbbot(struct dentry *dentry, aufs_bindex_t bindex)
1facf9fc 8699+{
1308ab2a 8700+ DiMustWriteLock(dentry);
5afbbe0d 8701+ au_di(dentry)->di_bbot = bindex;
1facf9fc 8702+}
8703+
8704+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
8705+{
1308ab2a 8706+ DiMustWriteLock(dentry);
5afbbe0d 8707+ /* dbwh can be outside of btop - bbot range */
1facf9fc 8708+ au_di(dentry)->di_bwh = bindex;
8709+}
8710+
8711+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
8712+{
1308ab2a 8713+ DiMustWriteLock(dentry);
1facf9fc 8714+ au_di(dentry)->di_bdiropq = bindex;
8715+}
8716+
8717+/* ---------------------------------------------------------------------- */
8718+
4a4d8108 8719+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 8720+static inline void au_digen_dec(struct dentry *d)
8721+{
e49829fe 8722+ atomic_dec(&au_di(d)->di_generation);
1facf9fc 8723+}
8724+
4a4d8108 8725+static inline void au_hn_di_reinit(struct dentry *dentry)
1facf9fc 8726+{
8727+ dentry->d_fsdata = NULL;
8728+}
8729+#else
4a4d8108
AM
8730+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
8731+#endif /* CONFIG_AUFS_HNOTIFY */
1facf9fc 8732+
8733+#endif /* __KERNEL__ */
8734+#endif /* __AUFS_DENTRY_H__ */
e8791d4f
AM
8735diff -urNp -x '*.orig' linux-4.9/fs/aufs/dinfo.c linux-4.9/fs/aufs/dinfo.c
8736--- linux-4.9/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
8737+++ linux-4.9/fs/aufs/dinfo.c 2021-02-24 16:15:09.524906971 +0100
e2f27e51 8738@@ -0,0 +1,553 @@
1facf9fc 8739+/*
ae9dfd79 8740+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 8741+ *
8742+ * This program, aufs is free software; you can redistribute it and/or modify
8743+ * it under the terms of the GNU General Public License as published by
8744+ * the Free Software Foundation; either version 2 of the License, or
8745+ * (at your option) any later version.
dece6358
AM
8746+ *
8747+ * This program is distributed in the hope that it will be useful,
8748+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8749+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8750+ * GNU General Public License for more details.
8751+ *
8752+ * You should have received a copy of the GNU General Public License
523b37e3 8753+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 8754+ */
8755+
8756+/*
8757+ * dentry private data
8758+ */
8759+
8760+#include "aufs.h"
8761+
e49829fe 8762+void au_di_init_once(void *_dinfo)
4a4d8108 8763+{
e49829fe 8764+ struct au_dinfo *dinfo = _dinfo;
4a4d8108 8765+
e49829fe 8766+ au_rw_init(&dinfo->di_rwsem);
4a4d8108
AM
8767+}
8768+
027c5e7a 8769+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
1facf9fc 8770+{
8771+ struct au_dinfo *dinfo;
027c5e7a 8772+ int nbr, i;
1facf9fc 8773+
8774+ dinfo = au_cache_alloc_dinfo();
8775+ if (unlikely(!dinfo))
8776+ goto out;
8777+
5afbbe0d 8778+ nbr = au_sbbot(sb) + 1;
1facf9fc 8779+ if (nbr <= 0)
8780+ nbr = 1;
8781+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
027c5e7a
AM
8782+ if (dinfo->di_hdentry) {
8783+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
5afbbe0d
AM
8784+ dinfo->di_btop = -1;
8785+ dinfo->di_bbot = -1;
027c5e7a
AM
8786+ dinfo->di_bwh = -1;
8787+ dinfo->di_bdiropq = -1;
38d290e6 8788+ dinfo->di_tmpfile = 0;
027c5e7a
AM
8789+ for (i = 0; i < nbr; i++)
8790+ dinfo->di_hdentry[i].hd_id = -1;
8791+ goto out;
8792+ }
1facf9fc 8793+
ae9dfd79 8794+ au_cache_free_dinfo(dinfo);
027c5e7a
AM
8795+ dinfo = NULL;
8796+
4f0767ce 8797+out:
027c5e7a 8798+ return dinfo;
1facf9fc 8799+}
8800+
027c5e7a 8801+void au_di_free(struct au_dinfo *dinfo)
4a4d8108 8802+{
4a4d8108 8803+ struct au_hdentry *p;
5afbbe0d 8804+ aufs_bindex_t bbot, bindex;
4a4d8108
AM
8805+
8806+ /* dentry may not be revalidated */
5afbbe0d 8807+ bindex = dinfo->di_btop;
4a4d8108 8808+ if (bindex >= 0) {
5afbbe0d
AM
8809+ bbot = dinfo->di_bbot;
8810+ p = au_hdentry(dinfo, bindex);
8811+ while (bindex++ <= bbot)
4a4d8108
AM
8812+ au_hdput(p++);
8813+ }
ae9dfd79
AM
8814+ kfree(dinfo->di_hdentry);
8815+ au_cache_free_dinfo(dinfo);
027c5e7a
AM
8816+}
8817+
8818+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
8819+{
8820+ struct au_hdentry *p;
8821+ aufs_bindex_t bi;
8822+
8823+ AuRwMustWriteLock(&a->di_rwsem);
8824+ AuRwMustWriteLock(&b->di_rwsem);
8825+
8826+#define DiSwap(v, name) \
8827+ do { \
8828+ v = a->di_##name; \
8829+ a->di_##name = b->di_##name; \
8830+ b->di_##name = v; \
8831+ } while (0)
8832+
8833+ DiSwap(p, hdentry);
5afbbe0d
AM
8834+ DiSwap(bi, btop);
8835+ DiSwap(bi, bbot);
027c5e7a
AM
8836+ DiSwap(bi, bwh);
8837+ DiSwap(bi, bdiropq);
8838+ /* smp_mb(); */
8839+
8840+#undef DiSwap
8841+}
8842+
8843+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
8844+{
8845+ AuRwMustWriteLock(&dst->di_rwsem);
8846+ AuRwMustWriteLock(&src->di_rwsem);
8847+
5afbbe0d
AM
8848+ dst->di_btop = src->di_btop;
8849+ dst->di_bbot = src->di_bbot;
027c5e7a
AM
8850+ dst->di_bwh = src->di_bwh;
8851+ dst->di_bdiropq = src->di_bdiropq;
8852+ /* smp_mb(); */
8853+}
8854+
8855+int au_di_init(struct dentry *dentry)
8856+{
8857+ int err;
8858+ struct super_block *sb;
8859+ struct au_dinfo *dinfo;
8860+
8861+ err = 0;
8862+ sb = dentry->d_sb;
8863+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
8864+ if (dinfo) {
8865+ atomic_set(&dinfo->di_generation, au_sigen(sb));
8866+ /* smp_mb(); */ /* atomic_set */
8867+ dentry->d_fsdata = dinfo;
8868+ } else
8869+ err = -ENOMEM;
8870+
8871+ return err;
8872+}
8873+
8874+void au_di_fin(struct dentry *dentry)
8875+{
8876+ struct au_dinfo *dinfo;
8877+
8878+ dinfo = au_di(dentry);
8879+ AuRwDestroy(&dinfo->di_rwsem);
8880+ au_di_free(dinfo);
4a4d8108
AM
8881+}
8882+
e2f27e51 8883+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink)
1facf9fc 8884+{
8885+ int err, sz;
8886+ struct au_hdentry *hdp;
8887+
1308ab2a 8888+ AuRwMustWriteLock(&dinfo->di_rwsem);
8889+
1facf9fc 8890+ err = -ENOMEM;
5afbbe0d 8891+ sz = sizeof(*hdp) * (dinfo->di_bbot + 1);
1facf9fc 8892+ if (!sz)
8893+ sz = sizeof(*hdp);
e2f27e51
AM
8894+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS,
8895+ may_shrink);
1facf9fc 8896+ if (hdp) {
8897+ dinfo->di_hdentry = hdp;
8898+ err = 0;
8899+ }
8900+
8901+ return err;
8902+}
8903+
8904+/* ---------------------------------------------------------------------- */
8905+
8906+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
8907+{
8908+ switch (lsc) {
8909+ case AuLsc_DI_CHILD:
8910+ ii_write_lock_child(inode);
8911+ break;
8912+ case AuLsc_DI_CHILD2:
8913+ ii_write_lock_child2(inode);
8914+ break;
8915+ case AuLsc_DI_CHILD3:
8916+ ii_write_lock_child3(inode);
8917+ break;
8918+ case AuLsc_DI_PARENT:
8919+ ii_write_lock_parent(inode);
8920+ break;
8921+ case AuLsc_DI_PARENT2:
8922+ ii_write_lock_parent2(inode);
8923+ break;
8924+ case AuLsc_DI_PARENT3:
8925+ ii_write_lock_parent3(inode);
8926+ break;
8927+ default:
8928+ BUG();
8929+ }
8930+}
8931+
8932+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
8933+{
8934+ switch (lsc) {
8935+ case AuLsc_DI_CHILD:
8936+ ii_read_lock_child(inode);
8937+ break;
8938+ case AuLsc_DI_CHILD2:
8939+ ii_read_lock_child2(inode);
8940+ break;
8941+ case AuLsc_DI_CHILD3:
8942+ ii_read_lock_child3(inode);
8943+ break;
8944+ case AuLsc_DI_PARENT:
8945+ ii_read_lock_parent(inode);
8946+ break;
8947+ case AuLsc_DI_PARENT2:
8948+ ii_read_lock_parent2(inode);
8949+ break;
8950+ case AuLsc_DI_PARENT3:
8951+ ii_read_lock_parent3(inode);
8952+ break;
8953+ default:
8954+ BUG();
8955+ }
8956+}
8957+
8958+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
8959+{
5527c038
JR
8960+ struct inode *inode;
8961+
dece6358 8962+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
5527c038
JR
8963+ if (d_really_is_positive(d)) {
8964+ inode = d_inode(d);
1facf9fc 8965+ if (au_ftest_lock(flags, IW))
5527c038 8966+ do_ii_write_lock(inode, lsc);
1facf9fc 8967+ else if (au_ftest_lock(flags, IR))
5527c038 8968+ do_ii_read_lock(inode, lsc);
1facf9fc 8969+ }
8970+}
8971+
8972+void di_read_unlock(struct dentry *d, int flags)
8973+{
5527c038
JR
8974+ struct inode *inode;
8975+
8976+ if (d_really_is_positive(d)) {
8977+ inode = d_inode(d);
027c5e7a
AM
8978+ if (au_ftest_lock(flags, IW)) {
8979+ au_dbg_verify_dinode(d);
5527c038 8980+ ii_write_unlock(inode);
027c5e7a
AM
8981+ } else if (au_ftest_lock(flags, IR)) {
8982+ au_dbg_verify_dinode(d);
5527c038 8983+ ii_read_unlock(inode);
027c5e7a 8984+ }
1facf9fc 8985+ }
dece6358 8986+ au_rw_read_unlock(&au_di(d)->di_rwsem);
1facf9fc 8987+}
8988+
8989+void di_downgrade_lock(struct dentry *d, int flags)
8990+{
5527c038
JR
8991+ if (d_really_is_positive(d) && au_ftest_lock(flags, IR))
8992+ ii_downgrade_lock(d_inode(d));
dece6358 8993+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
1facf9fc 8994+}
8995+
8996+void di_write_lock(struct dentry *d, unsigned int lsc)
8997+{
dece6358 8998+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
5527c038
JR
8999+ if (d_really_is_positive(d))
9000+ do_ii_write_lock(d_inode(d), lsc);
1facf9fc 9001+}
9002+
9003+void di_write_unlock(struct dentry *d)
9004+{
027c5e7a 9005+ au_dbg_verify_dinode(d);
5527c038
JR
9006+ if (d_really_is_positive(d))
9007+ ii_write_unlock(d_inode(d));
dece6358 9008+ au_rw_write_unlock(&au_di(d)->di_rwsem);
1facf9fc 9009+}
9010+
9011+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
9012+{
9013+ AuDebugOn(d1 == d2
5527c038 9014+ || d_inode(d1) == d_inode(d2)
1facf9fc 9015+ || d1->d_sb != d2->d_sb);
9016+
ae9dfd79
AM
9017+ if ((isdir && au_test_subdir(d1, d2))
9018+ || d1 < d2) {
1facf9fc 9019+ di_write_lock_child(d1);
9020+ di_write_lock_child2(d2);
9021+ } else {
1facf9fc 9022+ di_write_lock_child(d2);
9023+ di_write_lock_child2(d1);
9024+ }
9025+}
9026+
9027+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
9028+{
9029+ AuDebugOn(d1 == d2
5527c038 9030+ || d_inode(d1) == d_inode(d2)
1facf9fc 9031+ || d1->d_sb != d2->d_sb);
9032+
ae9dfd79
AM
9033+ if ((isdir && au_test_subdir(d1, d2))
9034+ || d1 < d2) {
1facf9fc 9035+ di_write_lock_parent(d1);
9036+ di_write_lock_parent2(d2);
9037+ } else {
1facf9fc 9038+ di_write_lock_parent(d2);
9039+ di_write_lock_parent2(d1);
9040+ }
9041+}
9042+
9043+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
9044+{
9045+ di_write_unlock(d1);
5527c038 9046+ if (d_inode(d1) == d_inode(d2))
dece6358 9047+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
1facf9fc 9048+ else
9049+ di_write_unlock(d2);
9050+}
9051+
9052+/* ---------------------------------------------------------------------- */
9053+
9054+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
9055+{
9056+ struct dentry *d;
9057+
1308ab2a 9058+ DiMustAnyLock(dentry);
9059+
5afbbe0d 9060+ if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
1facf9fc 9061+ return NULL;
9062+ AuDebugOn(bindex < 0);
5afbbe0d 9063+ d = au_hdentry(au_di(dentry), bindex)->hd_dentry;
c1595e42 9064+ AuDebugOn(d && au_dcount(d) <= 0);
1facf9fc 9065+ return d;
9066+}
9067+
2cbb1c4b
JR
9068+/*
9069+ * extended version of au_h_dptr().
38d290e6
JR
9070+ * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
9071+ * error.
2cbb1c4b
JR
9072+ */
9073+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
9074+{
9075+ struct dentry *h_dentry;
9076+ struct inode *inode, *h_inode;
9077+
5527c038 9078+ AuDebugOn(d_really_is_negative(dentry));
2cbb1c4b
JR
9079+
9080+ h_dentry = NULL;
5afbbe0d
AM
9081+ if (au_dbtop(dentry) <= bindex
9082+ && bindex <= au_dbbot(dentry))
2cbb1c4b 9083+ h_dentry = au_h_dptr(dentry, bindex);
38d290e6 9084+ if (h_dentry && !au_d_linkable(h_dentry)) {
2cbb1c4b
JR
9085+ dget(h_dentry);
9086+ goto out; /* success */
9087+ }
9088+
5527c038 9089+ inode = d_inode(dentry);
5afbbe0d
AM
9090+ AuDebugOn(bindex < au_ibtop(inode));
9091+ AuDebugOn(au_ibbot(inode) < bindex);
2cbb1c4b
JR
9092+ h_inode = au_h_iptr(inode, bindex);
9093+ h_dentry = d_find_alias(h_inode);
9094+ if (h_dentry) {
9095+ if (!IS_ERR(h_dentry)) {
38d290e6 9096+ if (!au_d_linkable(h_dentry))
2cbb1c4b
JR
9097+ goto out; /* success */
9098+ dput(h_dentry);
9099+ } else
9100+ goto out;
9101+ }
9102+
9103+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
9104+ h_dentry = au_plink_lkup(inode, bindex);
9105+ AuDebugOn(!h_dentry);
9106+ if (!IS_ERR(h_dentry)) {
9107+ if (!au_d_hashed_positive(h_dentry))
9108+ goto out; /* success */
9109+ dput(h_dentry);
9110+ h_dentry = NULL;
9111+ }
9112+ }
9113+
9114+out:
9115+ AuDbgDentry(h_dentry);
9116+ return h_dentry;
9117+}
9118+
1facf9fc 9119+aufs_bindex_t au_dbtail(struct dentry *dentry)
9120+{
5afbbe0d 9121+ aufs_bindex_t bbot, bwh;
1facf9fc 9122+
5afbbe0d
AM
9123+ bbot = au_dbbot(dentry);
9124+ if (0 <= bbot) {
1facf9fc 9125+ bwh = au_dbwh(dentry);
9126+ if (!bwh)
9127+ return bwh;
5afbbe0d 9128+ if (0 < bwh && bwh < bbot)
1facf9fc 9129+ return bwh - 1;
9130+ }
5afbbe0d 9131+ return bbot;
1facf9fc 9132+}
9133+
9134+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
9135+{
5afbbe0d 9136+ aufs_bindex_t bbot, bopq;
1facf9fc 9137+
5afbbe0d
AM
9138+ bbot = au_dbtail(dentry);
9139+ if (0 <= bbot) {
1facf9fc 9140+ bopq = au_dbdiropq(dentry);
5afbbe0d
AM
9141+ if (0 <= bopq && bopq < bbot)
9142+ bbot = bopq;
1facf9fc 9143+ }
5afbbe0d 9144+ return bbot;
1facf9fc 9145+}
9146+
9147+/* ---------------------------------------------------------------------- */
9148+
9149+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
9150+ struct dentry *h_dentry)
9151+{
5afbbe0d
AM
9152+ struct au_dinfo *dinfo;
9153+ struct au_hdentry *hd;
027c5e7a 9154+ struct au_branch *br;
1facf9fc 9155+
1308ab2a 9156+ DiMustWriteLock(dentry);
9157+
5afbbe0d
AM
9158+ dinfo = au_di(dentry);
9159+ hd = au_hdentry(dinfo, bindex);
4a4d8108 9160+ au_hdput(hd);
1facf9fc 9161+ hd->hd_dentry = h_dentry;
027c5e7a
AM
9162+ if (h_dentry) {
9163+ br = au_sbr(dentry->d_sb, bindex);
9164+ hd->hd_id = br->br_id;
9165+ }
9166+}
9167+
9168+int au_dbrange_test(struct dentry *dentry)
9169+{
9170+ int err;
5afbbe0d 9171+ aufs_bindex_t btop, bbot;
027c5e7a
AM
9172+
9173+ err = 0;
5afbbe0d
AM
9174+ btop = au_dbtop(dentry);
9175+ bbot = au_dbbot(dentry);
9176+ if (btop >= 0)
9177+ AuDebugOn(bbot < 0 && btop > bbot);
027c5e7a
AM
9178+ else {
9179+ err = -EIO;
5afbbe0d 9180+ AuDebugOn(bbot >= 0);
027c5e7a
AM
9181+ }
9182+
9183+ return err;
9184+}
9185+
9186+int au_digen_test(struct dentry *dentry, unsigned int sigen)
9187+{
9188+ int err;
9189+
9190+ err = 0;
9191+ if (unlikely(au_digen(dentry) != sigen
5527c038 9192+ || au_iigen_test(d_inode(dentry), sigen)))
027c5e7a
AM
9193+ err = -EIO;
9194+
9195+ return err;
1facf9fc 9196+}
9197+
9198+void au_update_digen(struct dentry *dentry)
9199+{
9200+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
9201+ /* smp_mb(); */ /* atomic_set */
9202+}
9203+
9204+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
9205+{
9206+ struct au_dinfo *dinfo;
9207+ struct dentry *h_d;
4a4d8108 9208+ struct au_hdentry *hdp;
5afbbe0d 9209+ aufs_bindex_t bindex, bbot;
1facf9fc 9210+
1308ab2a 9211+ DiMustWriteLock(dentry);
9212+
1facf9fc 9213+ dinfo = au_di(dentry);
5afbbe0d 9214+ if (!dinfo || dinfo->di_btop < 0)
1facf9fc 9215+ return;
9216+
9217+ if (do_put_zero) {
5afbbe0d
AM
9218+ bbot = dinfo->di_bbot;
9219+ bindex = dinfo->di_btop;
9220+ hdp = au_hdentry(dinfo, bindex);
9221+ for (; bindex <= bbot; bindex++, hdp++) {
9222+ h_d = hdp->hd_dentry;
5527c038 9223+ if (h_d && d_is_negative(h_d))
1facf9fc 9224+ au_set_h_dptr(dentry, bindex, NULL);
9225+ }
9226+ }
9227+
5afbbe0d
AM
9228+ dinfo->di_btop = 0;
9229+ hdp = au_hdentry(dinfo, dinfo->di_btop);
9230+ for (; dinfo->di_btop <= dinfo->di_bbot; dinfo->di_btop++, hdp++)
9231+ if (hdp->hd_dentry)
1facf9fc 9232+ break;
5afbbe0d
AM
9233+ if (dinfo->di_btop > dinfo->di_bbot) {
9234+ dinfo->di_btop = -1;
9235+ dinfo->di_bbot = -1;
1facf9fc 9236+ return;
9237+ }
9238+
5afbbe0d
AM
9239+ hdp = au_hdentry(dinfo, dinfo->di_bbot);
9240+ for (; dinfo->di_bbot >= 0; dinfo->di_bbot--, hdp--)
9241+ if (hdp->hd_dentry)
1facf9fc 9242+ break;
5afbbe0d 9243+ AuDebugOn(dinfo->di_btop > dinfo->di_bbot || dinfo->di_bbot < 0);
1facf9fc 9244+}
9245+
5afbbe0d 9246+void au_update_dbtop(struct dentry *dentry)
1facf9fc 9247+{
5afbbe0d 9248+ aufs_bindex_t bindex, bbot;
1facf9fc 9249+ struct dentry *h_dentry;
9250+
5afbbe0d
AM
9251+ bbot = au_dbbot(dentry);
9252+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
1facf9fc 9253+ h_dentry = au_h_dptr(dentry, bindex);
9254+ if (!h_dentry)
9255+ continue;
5527c038 9256+ if (d_is_positive(h_dentry)) {
5afbbe0d 9257+ au_set_dbtop(dentry, bindex);
1facf9fc 9258+ return;
9259+ }
9260+ au_set_h_dptr(dentry, bindex, NULL);
9261+ }
9262+}
9263+
5afbbe0d 9264+void au_update_dbbot(struct dentry *dentry)
1facf9fc 9265+{
5afbbe0d 9266+ aufs_bindex_t bindex, btop;
1facf9fc 9267+ struct dentry *h_dentry;
9268+
5afbbe0d
AM
9269+ btop = au_dbtop(dentry);
9270+ for (bindex = au_dbbot(dentry); bindex >= btop; bindex--) {
1facf9fc 9271+ h_dentry = au_h_dptr(dentry, bindex);
9272+ if (!h_dentry)
9273+ continue;
5527c038 9274+ if (d_is_positive(h_dentry)) {
5afbbe0d 9275+ au_set_dbbot(dentry, bindex);
1facf9fc 9276+ return;
9277+ }
9278+ au_set_h_dptr(dentry, bindex, NULL);
9279+ }
9280+}
9281+
9282+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
9283+{
5afbbe0d 9284+ aufs_bindex_t bindex, bbot;
1facf9fc 9285+
5afbbe0d
AM
9286+ bbot = au_dbbot(dentry);
9287+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++)
1facf9fc 9288+ if (au_h_dptr(dentry, bindex) == h_dentry)
9289+ return bindex;
9290+ return -1;
9291+}
e8791d4f
AM
9292diff -urNp -x '*.orig' linux-4.9/fs/aufs/dir.c linux-4.9/fs/aufs/dir.c
9293--- linux-4.9/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
9294+++ linux-4.9/fs/aufs/dir.c 2021-02-24 16:15:09.524906971 +0100
ae9dfd79 9295@@ -0,0 +1,759 @@
1facf9fc 9296+/*
ae9dfd79 9297+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 9298+ *
9299+ * This program, aufs is free software; you can redistribute it and/or modify
9300+ * it under the terms of the GNU General Public License as published by
9301+ * the Free Software Foundation; either version 2 of the License, or
9302+ * (at your option) any later version.
dece6358
AM
9303+ *
9304+ * This program is distributed in the hope that it will be useful,
9305+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9306+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9307+ * GNU General Public License for more details.
9308+ *
9309+ * You should have received a copy of the GNU General Public License
523b37e3 9310+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 9311+ */
9312+
9313+/*
9314+ * directory operations
9315+ */
9316+
9317+#include <linux/fs_stack.h>
9318+#include "aufs.h"
9319+
9320+void au_add_nlink(struct inode *dir, struct inode *h_dir)
9321+{
9dbd164d
AM
9322+ unsigned int nlink;
9323+
1facf9fc 9324+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9325+
9dbd164d
AM
9326+ nlink = dir->i_nlink;
9327+ nlink += h_dir->i_nlink - 2;
1facf9fc 9328+ if (h_dir->i_nlink < 2)
9dbd164d 9329+ nlink += 2;
f6b6e03d 9330+ smp_mb(); /* for i_nlink */
7eafdf33 9331+ /* 0 can happen in revaliding */
92d182d2 9332+ set_nlink(dir, nlink);
1facf9fc 9333+}
9334+
9335+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
9336+{
9dbd164d
AM
9337+ unsigned int nlink;
9338+
1facf9fc 9339+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9340+
9dbd164d
AM
9341+ nlink = dir->i_nlink;
9342+ nlink -= h_dir->i_nlink - 2;
1facf9fc 9343+ if (h_dir->i_nlink < 2)
9dbd164d 9344+ nlink -= 2;
f6b6e03d 9345+ smp_mb(); /* for i_nlink */
92d182d2 9346+ /* nlink == 0 means the branch-fs is broken */
9dbd164d 9347+ set_nlink(dir, nlink);
1facf9fc 9348+}
9349+
1308ab2a 9350+loff_t au_dir_size(struct file *file, struct dentry *dentry)
9351+{
9352+ loff_t sz;
5afbbe0d 9353+ aufs_bindex_t bindex, bbot;
1308ab2a 9354+ struct file *h_file;
9355+ struct dentry *h_dentry;
9356+
9357+ sz = 0;
9358+ if (file) {
2000de60 9359+ AuDebugOn(!d_is_dir(file->f_path.dentry));
1308ab2a 9360+
5afbbe0d
AM
9361+ bbot = au_fbbot_dir(file);
9362+ for (bindex = au_fbtop(file);
9363+ bindex <= bbot && sz < KMALLOC_MAX_SIZE;
1308ab2a 9364+ bindex++) {
4a4d8108 9365+ h_file = au_hf_dir(file, bindex);
c06a8ce3
AM
9366+ if (h_file && file_inode(h_file))
9367+ sz += vfsub_f_size_read(h_file);
1308ab2a 9368+ }
9369+ } else {
9370+ AuDebugOn(!dentry);
2000de60 9371+ AuDebugOn(!d_is_dir(dentry));
1308ab2a 9372+
5afbbe0d
AM
9373+ bbot = au_dbtaildir(dentry);
9374+ for (bindex = au_dbtop(dentry);
9375+ bindex <= bbot && sz < KMALLOC_MAX_SIZE;
1308ab2a 9376+ bindex++) {
9377+ h_dentry = au_h_dptr(dentry, bindex);
5527c038
JR
9378+ if (h_dentry && d_is_positive(h_dentry))
9379+ sz += i_size_read(d_inode(h_dentry));
1308ab2a 9380+ }
9381+ }
9382+ if (sz < KMALLOC_MAX_SIZE)
9383+ sz = roundup_pow_of_two(sz);
9384+ if (sz > KMALLOC_MAX_SIZE)
9385+ sz = KMALLOC_MAX_SIZE;
9386+ else if (sz < NAME_MAX) {
9387+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
9388+ sz = AUFS_RDBLK_DEF;
9389+ }
9390+ return sz;
9391+}
9392+
b912730e
AM
9393+struct au_dir_ts_arg {
9394+ struct dentry *dentry;
9395+ aufs_bindex_t brid;
9396+};
9397+
9398+static void au_do_dir_ts(void *arg)
9399+{
9400+ struct au_dir_ts_arg *a = arg;
9401+ struct au_dtime dt;
9402+ struct path h_path;
9403+ struct inode *dir, *h_dir;
9404+ struct super_block *sb;
9405+ struct au_branch *br;
9406+ struct au_hinode *hdir;
9407+ int err;
5afbbe0d 9408+ aufs_bindex_t btop, bindex;
b912730e
AM
9409+
9410+ sb = a->dentry->d_sb;
5527c038 9411+ if (d_really_is_negative(a->dentry))
b912730e 9412+ goto out;
5527c038 9413+ /* no dir->i_mutex lock */
b95c5147
AM
9414+ aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
9415+
5527c038 9416+ dir = d_inode(a->dentry);
5afbbe0d 9417+ btop = au_ibtop(dir);
b912730e 9418+ bindex = au_br_index(sb, a->brid);
5afbbe0d 9419+ if (bindex < btop)
b912730e
AM
9420+ goto out_unlock;
9421+
9422+ br = au_sbr(sb, bindex);
9423+ h_path.dentry = au_h_dptr(a->dentry, bindex);
9424+ if (!h_path.dentry)
9425+ goto out_unlock;
9426+ h_path.mnt = au_br_mnt(br);
9427+ au_dtime_store(&dt, a->dentry, &h_path);
9428+
5afbbe0d 9429+ br = au_sbr(sb, btop);
b912730e
AM
9430+ if (!au_br_writable(br->br_perm))
9431+ goto out_unlock;
5afbbe0d 9432+ h_path.dentry = au_h_dptr(a->dentry, btop);
b912730e
AM
9433+ h_path.mnt = au_br_mnt(br);
9434+ err = vfsub_mnt_want_write(h_path.mnt);
9435+ if (err)
9436+ goto out_unlock;
5afbbe0d
AM
9437+ hdir = au_hi(dir, btop);
9438+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
9439+ h_dir = au_h_iptr(dir, btop);
b912730e
AM
9440+ if (h_dir->i_nlink
9441+ && timespec_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
9442+ dt.dt_h_path = h_path;
9443+ au_dtime_revert(&dt);
9444+ }
5afbbe0d 9445+ au_hn_inode_unlock(hdir);
b912730e
AM
9446+ vfsub_mnt_drop_write(h_path.mnt);
9447+ au_cpup_attr_timesizes(dir);
9448+
9449+out_unlock:
9450+ aufs_read_unlock(a->dentry, AuLock_DW);
9451+out:
9452+ dput(a->dentry);
9453+ au_nwt_done(&au_sbi(sb)->si_nowait);
ae9dfd79 9454+ kfree(arg);
b912730e
AM
9455+}
9456+
9457+void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
9458+{
9459+ int perm, wkq_err;
5afbbe0d 9460+ aufs_bindex_t btop;
b912730e
AM
9461+ struct au_dir_ts_arg *arg;
9462+ struct dentry *dentry;
9463+ struct super_block *sb;
9464+
9465+ IMustLock(dir);
9466+
9467+ dentry = d_find_any_alias(dir);
9468+ AuDebugOn(!dentry);
9469+ sb = dentry->d_sb;
5afbbe0d
AM
9470+ btop = au_ibtop(dir);
9471+ if (btop == bindex) {
b912730e
AM
9472+ au_cpup_attr_timesizes(dir);
9473+ goto out;
9474+ }
9475+
5afbbe0d 9476+ perm = au_sbr_perm(sb, btop);
b912730e
AM
9477+ if (!au_br_writable(perm))
9478+ goto out;
9479+
9480+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
9481+ if (!arg)
9482+ goto out;
9483+
9484+ arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
9485+ arg->brid = au_sbr_id(sb, bindex);
9486+ wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
9487+ if (unlikely(wkq_err)) {
9488+ pr_err("wkq %d\n", wkq_err);
9489+ dput(dentry);
ae9dfd79 9490+ kfree(arg);
b912730e
AM
9491+ }
9492+
9493+out:
9494+ dput(dentry);
9495+}
9496+
1facf9fc 9497+/* ---------------------------------------------------------------------- */
9498+
9499+static int reopen_dir(struct file *file)
9500+{
9501+ int err;
9502+ unsigned int flags;
5afbbe0d 9503+ aufs_bindex_t bindex, btail, btop;
1facf9fc 9504+ struct dentry *dentry, *h_dentry;
9505+ struct file *h_file;
9506+
9507+ /* open all lower dirs */
2000de60 9508+ dentry = file->f_path.dentry;
5afbbe0d
AM
9509+ btop = au_dbtop(dentry);
9510+ for (bindex = au_fbtop(file); bindex < btop; bindex++)
1facf9fc 9511+ au_set_h_fptr(file, bindex, NULL);
5afbbe0d 9512+ au_set_fbtop(file, btop);
1facf9fc 9513+
9514+ btail = au_dbtaildir(dentry);
5afbbe0d 9515+ for (bindex = au_fbbot_dir(file); btail < bindex; bindex--)
1facf9fc 9516+ au_set_h_fptr(file, bindex, NULL);
5afbbe0d 9517+ au_set_fbbot_dir(file, btail);
1facf9fc 9518+
4a4d8108 9519+ flags = vfsub_file_flags(file);
5afbbe0d 9520+ for (bindex = btop; bindex <= btail; bindex++) {
1facf9fc 9521+ h_dentry = au_h_dptr(dentry, bindex);
9522+ if (!h_dentry)
9523+ continue;
4a4d8108 9524+ h_file = au_hf_dir(file, bindex);
1facf9fc 9525+ if (h_file)
9526+ continue;
9527+
392086de 9528+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9529+ err = PTR_ERR(h_file);
9530+ if (IS_ERR(h_file))
9531+ goto out; /* close all? */
9532+ au_set_h_fptr(file, bindex, h_file);
9533+ }
9534+ au_update_figen(file);
9535+ /* todo: necessary? */
9536+ /* file->f_ra = h_file->f_ra; */
9537+ err = 0;
9538+
4f0767ce 9539+out:
1facf9fc 9540+ return err;
9541+}
9542+
b912730e 9543+static int do_open_dir(struct file *file, int flags, struct file *h_file)
1facf9fc 9544+{
9545+ int err;
9546+ aufs_bindex_t bindex, btail;
9547+ struct dentry *dentry, *h_dentry;
8cdd5066 9548+ struct vfsmount *mnt;
1facf9fc 9549+
1308ab2a 9550+ FiMustWriteLock(file);
b912730e 9551+ AuDebugOn(h_file);
1308ab2a 9552+
523b37e3 9553+ err = 0;
8cdd5066 9554+ mnt = file->f_path.mnt;
2000de60 9555+ dentry = file->f_path.dentry;
5527c038 9556+ file->f_version = d_inode(dentry)->i_version;
5afbbe0d
AM
9557+ bindex = au_dbtop(dentry);
9558+ au_set_fbtop(file, bindex);
1facf9fc 9559+ btail = au_dbtaildir(dentry);
5afbbe0d 9560+ au_set_fbbot_dir(file, btail);
1facf9fc 9561+ for (; !err && bindex <= btail; bindex++) {
9562+ h_dentry = au_h_dptr(dentry, bindex);
9563+ if (!h_dentry)
9564+ continue;
9565+
8cdd5066
JR
9566+ err = vfsub_test_mntns(mnt, h_dentry->d_sb);
9567+ if (unlikely(err))
9568+ break;
392086de 9569+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9570+ if (IS_ERR(h_file)) {
9571+ err = PTR_ERR(h_file);
9572+ break;
9573+ }
9574+ au_set_h_fptr(file, bindex, h_file);
9575+ }
9576+ au_update_figen(file);
9577+ /* todo: necessary? */
9578+ /* file->f_ra = h_file->f_ra; */
9579+ if (!err)
9580+ return 0; /* success */
9581+
9582+ /* close all */
5afbbe0d 9583+ for (bindex = au_fbtop(file); bindex <= btail; bindex++)
1facf9fc 9584+ au_set_h_fptr(file, bindex, NULL);
5afbbe0d
AM
9585+ au_set_fbtop(file, -1);
9586+ au_set_fbbot_dir(file, -1);
4a4d8108 9587+
1facf9fc 9588+ return err;
9589+}
9590+
9591+static int aufs_open_dir(struct inode *inode __maybe_unused,
9592+ struct file *file)
9593+{
4a4d8108
AM
9594+ int err;
9595+ struct super_block *sb;
9596+ struct au_fidir *fidir;
9597+
9598+ err = -ENOMEM;
2000de60 9599+ sb = file->f_path.dentry->d_sb;
4a4d8108 9600+ si_read_lock(sb, AuLock_FLUSH);
e49829fe 9601+ fidir = au_fidir_alloc(sb);
4a4d8108 9602+ if (fidir) {
b912730e
AM
9603+ struct au_do_open_args args = {
9604+ .open = do_open_dir,
9605+ .fidir = fidir
9606+ };
9607+ err = au_do_open(file, &args);
4a4d8108 9608+ if (unlikely(err))
ae9dfd79 9609+ kfree(fidir);
4a4d8108
AM
9610+ }
9611+ si_read_unlock(sb);
9612+ return err;
1facf9fc 9613+}
9614+
9615+static int aufs_release_dir(struct inode *inode __maybe_unused,
9616+ struct file *file)
9617+{
9618+ struct au_vdir *vdir_cache;
4a4d8108
AM
9619+ struct au_finfo *finfo;
9620+ struct au_fidir *fidir;
f0c0a007 9621+ struct au_hfile *hf;
5afbbe0d 9622+ aufs_bindex_t bindex, bbot;
1facf9fc 9623+
4a4d8108
AM
9624+ finfo = au_fi(file);
9625+ fidir = finfo->fi_hdir;
9626+ if (fidir) {
ae9dfd79
AM
9627+ au_hbl_del(&finfo->fi_hlist,
9628+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
4a4d8108
AM
9629+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
9630+ if (vdir_cache)
ae9dfd79 9631+ au_vdir_free(vdir_cache);
4a4d8108
AM
9632+
9633+ bindex = finfo->fi_btop;
9634+ if (bindex >= 0) {
f0c0a007 9635+ hf = fidir->fd_hfile + bindex;
4a4d8108
AM
9636+ /*
9637+ * calls fput() instead of filp_close(),
9638+ * since no dnotify or lock for the lower file.
9639+ */
5afbbe0d 9640+ bbot = fidir->fd_bbot;
f0c0a007
AM
9641+ for (; bindex <= bbot; bindex++, hf++)
9642+ if (hf->hf_file)
ae9dfd79 9643+ au_hfput(hf, /*execed*/0);
4a4d8108 9644+ }
ae9dfd79 9645+ kfree(fidir);
4a4d8108 9646+ finfo->fi_hdir = NULL;
1facf9fc 9647+ }
ae9dfd79 9648+ au_finfo_fin(file);
1facf9fc 9649+ return 0;
9650+}
9651+
9652+/* ---------------------------------------------------------------------- */
9653+
4a4d8108
AM
9654+static int au_do_flush_dir(struct file *file, fl_owner_t id)
9655+{
9656+ int err;
5afbbe0d 9657+ aufs_bindex_t bindex, bbot;
4a4d8108
AM
9658+ struct file *h_file;
9659+
9660+ err = 0;
5afbbe0d
AM
9661+ bbot = au_fbbot_dir(file);
9662+ for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
4a4d8108
AM
9663+ h_file = au_hf_dir(file, bindex);
9664+ if (h_file)
9665+ err = vfsub_flush(h_file, id);
9666+ }
9667+ return err;
9668+}
9669+
9670+static int aufs_flush_dir(struct file *file, fl_owner_t id)
9671+{
9672+ return au_do_flush(file, id, au_do_flush_dir);
9673+}
9674+
9675+/* ---------------------------------------------------------------------- */
9676+
1facf9fc 9677+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
9678+{
9679+ int err;
5afbbe0d 9680+ aufs_bindex_t bbot, bindex;
1facf9fc 9681+ struct inode *inode;
9682+ struct super_block *sb;
9683+
9684+ err = 0;
9685+ sb = dentry->d_sb;
5527c038 9686+ inode = d_inode(dentry);
1facf9fc 9687+ IMustLock(inode);
5afbbe0d
AM
9688+ bbot = au_dbbot(dentry);
9689+ for (bindex = au_dbtop(dentry); !err && bindex <= bbot; bindex++) {
1facf9fc 9690+ struct path h_path;
1facf9fc 9691+
9692+ if (au_test_ro(sb, bindex, inode))
9693+ continue;
9694+ h_path.dentry = au_h_dptr(dentry, bindex);
9695+ if (!h_path.dentry)
9696+ continue;
1facf9fc 9697+
1facf9fc 9698+ h_path.mnt = au_sbr_mnt(sb, bindex);
53392da6 9699+ err = vfsub_fsync(NULL, &h_path, datasync);
1facf9fc 9700+ }
9701+
9702+ return err;
9703+}
9704+
9705+static int au_do_fsync_dir(struct file *file, int datasync)
9706+{
9707+ int err;
5afbbe0d 9708+ aufs_bindex_t bbot, bindex;
1facf9fc 9709+ struct file *h_file;
9710+ struct super_block *sb;
9711+ struct inode *inode;
1facf9fc 9712+
ae9dfd79 9713+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1, /*fi_lsc*/0);
1facf9fc 9714+ if (unlikely(err))
9715+ goto out;
9716+
c06a8ce3 9717+ inode = file_inode(file);
b912730e 9718+ sb = inode->i_sb;
5afbbe0d
AM
9719+ bbot = au_fbbot_dir(file);
9720+ for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
4a4d8108 9721+ h_file = au_hf_dir(file, bindex);
1facf9fc 9722+ if (!h_file || au_test_ro(sb, bindex, inode))
9723+ continue;
9724+
53392da6 9725+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
1facf9fc 9726+ }
9727+
4f0767ce 9728+out:
1facf9fc 9729+ return err;
9730+}
9731+
9732+/*
9733+ * @file may be NULL
9734+ */
1e00d052
AM
9735+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
9736+ int datasync)
1facf9fc 9737+{
9738+ int err;
b752ccd1 9739+ struct dentry *dentry;
5527c038 9740+ struct inode *inode;
1facf9fc 9741+ struct super_block *sb;
1facf9fc 9742+
9743+ err = 0;
2000de60 9744+ dentry = file->f_path.dentry;
5527c038 9745+ inode = d_inode(dentry);
febd17d6 9746+ inode_lock(inode);
1facf9fc 9747+ sb = dentry->d_sb;
9748+ si_noflush_read_lock(sb);
9749+ if (file)
9750+ err = au_do_fsync_dir(file, datasync);
9751+ else {
9752+ di_write_lock_child(dentry);
9753+ err = au_do_fsync_dir_no_file(dentry, datasync);
9754+ }
5527c038 9755+ au_cpup_attr_timesizes(inode);
1facf9fc 9756+ di_write_unlock(dentry);
9757+ if (file)
9758+ fi_write_unlock(file);
9759+
9760+ si_read_unlock(sb);
febd17d6 9761+ inode_unlock(inode);
1facf9fc 9762+ return err;
9763+}
9764+
9765+/* ---------------------------------------------------------------------- */
9766+
5afbbe0d 9767+static int aufs_iterate_shared(struct file *file, struct dir_context *ctx)
1facf9fc 9768+{
9769+ int err;
9770+ struct dentry *dentry;
9dbd164d 9771+ struct inode *inode, *h_inode;
1facf9fc 9772+ struct super_block *sb;
9773+
523b37e3 9774+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 9775+
2000de60 9776+ dentry = file->f_path.dentry;
5527c038 9777+ inode = d_inode(dentry);
1facf9fc 9778+ IMustLock(inode);
9779+
9780+ sb = dentry->d_sb;
9781+ si_read_lock(sb, AuLock_FLUSH);
ae9dfd79 9782+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1, /*fi_lsc*/0);
1facf9fc 9783+ if (unlikely(err))
9784+ goto out;
027c5e7a
AM
9785+ err = au_alive_dir(dentry);
9786+ if (!err)
9787+ err = au_vdir_init(file);
1facf9fc 9788+ di_downgrade_lock(dentry, AuLock_IR);
9789+ if (unlikely(err))
9790+ goto out_unlock;
9791+
5afbbe0d 9792+ h_inode = au_h_iptr(inode, au_ibtop(inode));
b752ccd1 9793+ if (!au_test_nfsd()) {
392086de 9794+ err = au_vdir_fill_de(file, ctx);
9dbd164d 9795+ fsstack_copy_attr_atime(inode, h_inode);
1facf9fc 9796+ } else {
9797+ /*
9798+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
9799+ * encode_fh() and others.
9800+ */
9dbd164d 9801+ atomic_inc(&h_inode->i_count);
1facf9fc 9802+ di_read_unlock(dentry, AuLock_IR);
9803+ si_read_unlock(sb);
392086de 9804+ err = au_vdir_fill_de(file, ctx);
1facf9fc 9805+ fsstack_copy_attr_atime(inode, h_inode);
9806+ fi_write_unlock(file);
9dbd164d 9807+ iput(h_inode);
1facf9fc 9808+
9809+ AuTraceErr(err);
9810+ return err;
9811+ }
9812+
4f0767ce 9813+out_unlock:
1facf9fc 9814+ di_read_unlock(dentry, AuLock_IR);
9815+ fi_write_unlock(file);
4f0767ce 9816+out:
1facf9fc 9817+ si_read_unlock(sb);
9818+ return err;
9819+}
9820+
9821+/* ---------------------------------------------------------------------- */
9822+
9823+#define AuTestEmpty_WHONLY 1
dece6358
AM
9824+#define AuTestEmpty_CALLED (1 << 1)
9825+#define AuTestEmpty_SHWH (1 << 2)
1facf9fc 9826+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
7f207e10
AM
9827+#define au_fset_testempty(flags, name) \
9828+ do { (flags) |= AuTestEmpty_##name; } while (0)
9829+#define au_fclr_testempty(flags, name) \
9830+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
1facf9fc 9831+
dece6358
AM
9832+#ifndef CONFIG_AUFS_SHWH
9833+#undef AuTestEmpty_SHWH
9834+#define AuTestEmpty_SHWH 0
9835+#endif
9836+
1facf9fc 9837+struct test_empty_arg {
392086de 9838+ struct dir_context ctx;
1308ab2a 9839+ struct au_nhash *whlist;
1facf9fc 9840+ unsigned int flags;
9841+ int err;
9842+ aufs_bindex_t bindex;
9843+};
9844+
392086de
AM
9845+static int test_empty_cb(struct dir_context *ctx, const char *__name,
9846+ int namelen, loff_t offset __maybe_unused, u64 ino,
dece6358 9847+ unsigned int d_type)
1facf9fc 9848+{
392086de
AM
9849+ struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
9850+ ctx);
1facf9fc 9851+ char *name = (void *)__name;
9852+
9853+ arg->err = 0;
9854+ au_fset_testempty(arg->flags, CALLED);
9855+ /* smp_mb(); */
9856+ if (name[0] == '.'
9857+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
9858+ goto out; /* success */
9859+
9860+ if (namelen <= AUFS_WH_PFX_LEN
9861+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
9862+ if (au_ftest_testempty(arg->flags, WHONLY)
1308ab2a 9863+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 9864+ arg->err = -ENOTEMPTY;
9865+ goto out;
9866+ }
9867+
9868+ name += AUFS_WH_PFX_LEN;
9869+ namelen -= AUFS_WH_PFX_LEN;
1308ab2a 9870+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 9871+ arg->err = au_nhash_append_wh
1308ab2a 9872+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
dece6358 9873+ au_ftest_testempty(arg->flags, SHWH));
1facf9fc 9874+
4f0767ce 9875+out:
1facf9fc 9876+ /* smp_mb(); */
9877+ AuTraceErr(arg->err);
9878+ return arg->err;
9879+}
9880+
9881+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
9882+{
9883+ int err;
9884+ struct file *h_file;
9885+
9886+ h_file = au_h_open(dentry, arg->bindex,
9887+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
392086de 9888+ /*file*/NULL, /*force_wr*/0);
1facf9fc 9889+ err = PTR_ERR(h_file);
9890+ if (IS_ERR(h_file))
9891+ goto out;
9892+
9893+ err = 0;
9894+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
c06a8ce3 9895+ && !file_inode(h_file)->i_nlink)
1facf9fc 9896+ goto out_put;
9897+
9898+ do {
9899+ arg->err = 0;
9900+ au_fclr_testempty(arg->flags, CALLED);
9901+ /* smp_mb(); */
392086de 9902+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1facf9fc 9903+ if (err >= 0)
9904+ err = arg->err;
9905+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
9906+
4f0767ce 9907+out_put:
1facf9fc 9908+ fput(h_file);
9909+ au_sbr_put(dentry->d_sb, arg->bindex);
4f0767ce 9910+out:
1facf9fc 9911+ return err;
9912+}
9913+
9914+struct do_test_empty_args {
9915+ int *errp;
9916+ struct dentry *dentry;
9917+ struct test_empty_arg *arg;
9918+};
9919+
9920+static void call_do_test_empty(void *args)
9921+{
9922+ struct do_test_empty_args *a = args;
9923+ *a->errp = do_test_empty(a->dentry, a->arg);
9924+}
9925+
9926+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
9927+{
9928+ int err, wkq_err;
9929+ struct dentry *h_dentry;
9930+ struct inode *h_inode;
9931+
9932+ h_dentry = au_h_dptr(dentry, arg->bindex);
5527c038 9933+ h_inode = d_inode(h_dentry);
53392da6 9934+ /* todo: i_mode changes anytime? */
ae9dfd79 9935+ vfsub_inode_lock_shared_nested(h_inode, AuLsc_I_CHILD);
1facf9fc 9936+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
ae9dfd79 9937+ inode_unlock_shared(h_inode);
1facf9fc 9938+ if (!err)
9939+ err = do_test_empty(dentry, arg);
9940+ else {
9941+ struct do_test_empty_args args = {
9942+ .errp = &err,
9943+ .dentry = dentry,
9944+ .arg = arg
9945+ };
9946+ unsigned int flags = arg->flags;
9947+
9948+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
9949+ if (unlikely(wkq_err))
9950+ err = wkq_err;
9951+ arg->flags = flags;
9952+ }
9953+
9954+ return err;
9955+}
9956+
9957+int au_test_empty_lower(struct dentry *dentry)
9958+{
9959+ int err;
1308ab2a 9960+ unsigned int rdhash;
5afbbe0d 9961+ aufs_bindex_t bindex, btop, btail;
1308ab2a 9962+ struct au_nhash whlist;
392086de
AM
9963+ struct test_empty_arg arg = {
9964+ .ctx = {
2000de60 9965+ .actor = test_empty_cb
392086de
AM
9966+ }
9967+ };
076b876e 9968+ int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
1facf9fc 9969+
dece6358
AM
9970+ SiMustAnyLock(dentry->d_sb);
9971+
1308ab2a 9972+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
9973+ if (!rdhash)
9974+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
9975+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
dece6358 9976+ if (unlikely(err))
1facf9fc 9977+ goto out;
9978+
1facf9fc 9979+ arg.flags = 0;
1308ab2a 9980+ arg.whlist = &whlist;
5afbbe0d 9981+ btop = au_dbtop(dentry);
dece6358
AM
9982+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
9983+ au_fset_testempty(arg.flags, SHWH);
076b876e
AM
9984+ test_empty = do_test_empty;
9985+ if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
9986+ test_empty = sio_test_empty;
5afbbe0d 9987+ arg.bindex = btop;
076b876e 9988+ err = test_empty(dentry, &arg);
1facf9fc 9989+ if (unlikely(err))
9990+ goto out_whlist;
9991+
9992+ au_fset_testempty(arg.flags, WHONLY);
9993+ btail = au_dbtaildir(dentry);
5afbbe0d 9994+ for (bindex = btop + 1; !err && bindex <= btail; bindex++) {
1facf9fc 9995+ struct dentry *h_dentry;
9996+
9997+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 9998+ if (h_dentry && d_is_positive(h_dentry)) {
1facf9fc 9999+ arg.bindex = bindex;
076b876e 10000+ err = test_empty(dentry, &arg);
1facf9fc 10001+ }
10002+ }
10003+
4f0767ce 10004+out_whlist:
1308ab2a 10005+ au_nhash_wh_free(&whlist);
4f0767ce 10006+out:
1facf9fc 10007+ return err;
10008+}
10009+
10010+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
10011+{
10012+ int err;
392086de
AM
10013+ struct test_empty_arg arg = {
10014+ .ctx = {
2000de60 10015+ .actor = test_empty_cb
392086de
AM
10016+ }
10017+ };
1facf9fc 10018+ aufs_bindex_t bindex, btail;
10019+
10020+ err = 0;
1308ab2a 10021+ arg.whlist = whlist;
1facf9fc 10022+ arg.flags = AuTestEmpty_WHONLY;
dece6358
AM
10023+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10024+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 10025+ btail = au_dbtaildir(dentry);
5afbbe0d 10026+ for (bindex = au_dbtop(dentry); !err && bindex <= btail; bindex++) {
1facf9fc 10027+ struct dentry *h_dentry;
10028+
10029+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 10030+ if (h_dentry && d_is_positive(h_dentry)) {
1facf9fc 10031+ arg.bindex = bindex;
10032+ err = sio_test_empty(dentry, &arg);
10033+ }
10034+ }
10035+
10036+ return err;
10037+}
10038+
10039+/* ---------------------------------------------------------------------- */
10040+
10041+const struct file_operations aufs_dir_fop = {
4a4d8108 10042+ .owner = THIS_MODULE,
027c5e7a 10043+ .llseek = default_llseek,
1facf9fc 10044+ .read = generic_read_dir,
5afbbe0d 10045+ .iterate_shared = aufs_iterate_shared,
1facf9fc 10046+ .unlocked_ioctl = aufs_ioctl_dir,
b752ccd1
AM
10047+#ifdef CONFIG_COMPAT
10048+ .compat_ioctl = aufs_compat_ioctl_dir,
10049+#endif
1facf9fc 10050+ .open = aufs_open_dir,
10051+ .release = aufs_release_dir,
4a4d8108 10052+ .flush = aufs_flush_dir,
1facf9fc 10053+ .fsync = aufs_fsync_dir
10054+};
e8791d4f
AM
10055diff -urNp -x '*.orig' linux-4.9/fs/aufs/dir.h linux-4.9/fs/aufs/dir.h
10056--- linux-4.9/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
10057+++ linux-4.9/fs/aufs/dir.h 2021-02-24 16:15:09.524906971 +0100
ae9dfd79 10058@@ -0,0 +1,131 @@
1facf9fc 10059+/*
ae9dfd79 10060+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 10061+ *
10062+ * This program, aufs is free software; you can redistribute it and/or modify
10063+ * it under the terms of the GNU General Public License as published by
10064+ * the Free Software Foundation; either version 2 of the License, or
10065+ * (at your option) any later version.
dece6358
AM
10066+ *
10067+ * This program is distributed in the hope that it will be useful,
10068+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10069+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10070+ * GNU General Public License for more details.
10071+ *
10072+ * You should have received a copy of the GNU General Public License
523b37e3 10073+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10074+ */
10075+
10076+/*
10077+ * directory operations
10078+ */
10079+
10080+#ifndef __AUFS_DIR_H__
10081+#define __AUFS_DIR_H__
10082+
10083+#ifdef __KERNEL__
10084+
10085+#include <linux/fs.h>
1facf9fc 10086+
10087+/* ---------------------------------------------------------------------- */
10088+
10089+/* need to be faster and smaller */
10090+
10091+struct au_nhash {
dece6358
AM
10092+ unsigned int nh_num;
10093+ struct hlist_head *nh_head;
1facf9fc 10094+};
10095+
10096+struct au_vdir_destr {
10097+ unsigned char len;
10098+ unsigned char name[0];
10099+} __packed;
10100+
10101+struct au_vdir_dehstr {
10102+ struct hlist_node hash;
ae9dfd79 10103+ struct au_vdir_destr *str;
4a4d8108 10104+} ____cacheline_aligned_in_smp;
1facf9fc 10105+
10106+struct au_vdir_de {
10107+ ino_t de_ino;
10108+ unsigned char de_type;
10109+ /* caution: packed */
10110+ struct au_vdir_destr de_str;
10111+} __packed;
10112+
10113+struct au_vdir_wh {
10114+ struct hlist_node wh_hash;
dece6358
AM
10115+#ifdef CONFIG_AUFS_SHWH
10116+ ino_t wh_ino;
1facf9fc 10117+ aufs_bindex_t wh_bindex;
dece6358
AM
10118+ unsigned char wh_type;
10119+#else
10120+ aufs_bindex_t wh_bindex;
10121+#endif
10122+ /* caution: packed */
1facf9fc 10123+ struct au_vdir_destr wh_str;
10124+} __packed;
10125+
10126+union au_vdir_deblk_p {
10127+ unsigned char *deblk;
10128+ struct au_vdir_de *de;
10129+};
10130+
10131+struct au_vdir {
10132+ unsigned char **vd_deblk;
10133+ unsigned long vd_nblk;
1facf9fc 10134+ struct {
10135+ unsigned long ul;
10136+ union au_vdir_deblk_p p;
10137+ } vd_last;
10138+
10139+ unsigned long vd_version;
dece6358 10140+ unsigned int vd_deblk_sz;
ae9dfd79 10141+ unsigned long vd_jiffy;
4a4d8108 10142+} ____cacheline_aligned_in_smp;
1facf9fc 10143+
10144+/* ---------------------------------------------------------------------- */
10145+
10146+/* dir.c */
10147+extern const struct file_operations aufs_dir_fop;
10148+void au_add_nlink(struct inode *dir, struct inode *h_dir);
10149+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
1308ab2a 10150+loff_t au_dir_size(struct file *file, struct dentry *dentry);
b912730e 10151+void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
1facf9fc 10152+int au_test_empty_lower(struct dentry *dentry);
10153+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
10154+
10155+/* vdir.c */
1308ab2a 10156+unsigned int au_rdhash_est(loff_t sz);
dece6358
AM
10157+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
10158+void au_nhash_wh_free(struct au_nhash *whlist);
1facf9fc 10159+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
10160+ int limit);
dece6358
AM
10161+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
10162+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
10163+ unsigned int d_type, aufs_bindex_t bindex,
10164+ unsigned char shwh);
ae9dfd79 10165+void au_vdir_free(struct au_vdir *vdir);
1facf9fc 10166+int au_vdir_init(struct file *file);
392086de 10167+int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
1facf9fc 10168+
10169+/* ioctl.c */
10170+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
10171+
1308ab2a 10172+#ifdef CONFIG_AUFS_RDU
10173+/* rdu.c */
10174+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
10175+#ifdef CONFIG_COMPAT
10176+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
10177+ unsigned long arg);
10178+#endif
1308ab2a 10179+#else
c1595e42
JR
10180+AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
10181+ unsigned int cmd, unsigned long arg)
b752ccd1 10182+#ifdef CONFIG_COMPAT
c1595e42
JR
10183+AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
10184+ unsigned int cmd, unsigned long arg)
b752ccd1 10185+#endif
1308ab2a 10186+#endif
10187+
1facf9fc 10188+#endif /* __KERNEL__ */
10189+#endif /* __AUFS_DIR_H__ */
e8791d4f
AM
10190diff -urNp -x '*.orig' linux-4.9/fs/aufs/dirren.c linux-4.9/fs/aufs/dirren.c
10191--- linux-4.9/fs/aufs/dirren.c 1970-01-01 01:00:00.000000000 +0100
10192+++ linux-4.9/fs/aufs/dirren.c 2021-02-24 16:15:09.524906971 +0100
ae9dfd79
AM
10193@@ -0,0 +1,1314 @@
10194+/*
10195+ * Copyright (C) 2017-2018 Junjiro R. Okajima
10196+ *
10197+ * This program, aufs is free software; you can redistribute it and/or modify
10198+ * it under the terms of the GNU General Public License as published by
10199+ * the Free Software Foundation; either version 2 of the License, or
10200+ * (at your option) any later version.
10201+ *
10202+ * This program is distributed in the hope that it will be useful,
10203+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10204+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10205+ * GNU General Public License for more details.
10206+ *
10207+ * You should have received a copy of the GNU General Public License
10208+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
10209+ */
10210+
10211+/*
10212+ * special handling in renaming a directoy
10213+ * in order to support looking-up the before-renamed name on the lower readonly
10214+ * branches
10215+ */
10216+
10217+#include <linux/byteorder/generic.h>
10218+#include "aufs.h"
10219+
10220+static void au_dr_hino_del(struct au_dr_br *dr, struct au_dr_hino *ent)
10221+{
10222+ int idx;
10223+
10224+ idx = au_dr_ihash(ent->dr_h_ino);
10225+ au_hbl_del(&ent->dr_hnode, dr->dr_h_ino + idx);
10226+}
10227+
10228+static int au_dr_hino_test_empty(struct au_dr_br *dr)
10229+{
10230+ int ret, i;
10231+ struct hlist_bl_head *hbl;
10232+
10233+ ret = 1;
10234+ for (i = 0; ret && i < AuDirren_NHASH; i++) {
10235+ hbl = dr->dr_h_ino + i;
10236+ hlist_bl_lock(hbl);
10237+ ret &= hlist_bl_empty(hbl);
10238+ hlist_bl_unlock(hbl);
10239+ }
10240+
10241+ return ret;
10242+}
10243+
10244+static struct au_dr_hino *au_dr_hino_find(struct au_dr_br *dr, ino_t ino)
10245+{
10246+ struct au_dr_hino *found, *ent;
10247+ struct hlist_bl_head *hbl;
10248+ struct hlist_bl_node *pos;
10249+ int idx;
10250+
10251+ found = NULL;
10252+ idx = au_dr_ihash(ino);
10253+ hbl = dr->dr_h_ino + idx;
10254+ hlist_bl_lock(hbl);
10255+ hlist_bl_for_each_entry(ent, pos, hbl, dr_hnode)
10256+ if (ent->dr_h_ino == ino) {
10257+ found = ent;
10258+ break;
10259+ }
10260+ hlist_bl_unlock(hbl);
10261+
10262+ return found;
10263+}
10264+
10265+int au_dr_hino_test_add(struct au_dr_br *dr, ino_t ino,
10266+ struct au_dr_hino *add_ent)
10267+{
10268+ int found, idx;
10269+ struct hlist_bl_head *hbl;
10270+ struct hlist_bl_node *pos;
10271+ struct au_dr_hino *ent;
10272+
10273+ found = 0;
10274+ idx = au_dr_ihash(ino);
10275+ hbl = dr->dr_h_ino + idx;
10276+#if 0
10277+ {
10278+ struct hlist_bl_node *tmp;
10279+
10280+ hlist_bl_for_each_entry_safe(ent, pos, tmp, hbl, dr_hnode)
10281+ AuDbg("hi%llu\n", (unsigned long long)ent->dr_h_ino);
10282+ }
10283+#endif
10284+ hlist_bl_lock(hbl);
10285+ hlist_bl_for_each_entry(ent, pos, hbl, dr_hnode)
10286+ if (ent->dr_h_ino == ino) {
10287+ found = 1;
10288+ break;
10289+ }
10290+ if (!found && add_ent)
10291+ hlist_bl_add_head(&add_ent->dr_hnode, hbl);
10292+ hlist_bl_unlock(hbl);
10293+
10294+ if (!found && add_ent)
10295+ AuDbg("i%llu added\n", (unsigned long long)add_ent->dr_h_ino);
10296+
10297+ return found;
10298+}
10299+
10300+void au_dr_hino_free(struct au_dr_br *dr)
10301+{
10302+ int i;
10303+ struct hlist_bl_head *hbl;
10304+ struct hlist_bl_node *pos, *tmp;
10305+ struct au_dr_hino *ent;
10306+
10307+ /* SiMustWriteLock(sb); */
10308+
10309+ for (i = 0; i < AuDirren_NHASH; i++) {
10310+ hbl = dr->dr_h_ino + i;
10311+ /* no spinlock since sbinfo must be write-locked */
10312+ hlist_bl_for_each_entry_safe(ent, pos, tmp, hbl, dr_hnode)
10313+ kfree(ent);
10314+ INIT_HLIST_BL_HEAD(hbl);
10315+ }
10316+}
10317+
10318+/* returns the number of inodes or an error */
10319+static int au_dr_hino_store(struct super_block *sb, struct au_branch *br,
10320+ struct file *hinofile)
10321+{
10322+ int err, i;
10323+ ssize_t ssz;
10324+ loff_t pos, oldsize;
10325+ uint64_t u64;
10326+ struct inode *hinoinode;
10327+ struct hlist_bl_head *hbl;
10328+ struct hlist_bl_node *n1, *n2;
10329+ struct au_dr_hino *ent;
10330+
10331+ SiMustWriteLock(sb);
10332+ AuDebugOn(!au_br_writable(br->br_perm));
10333+
10334+ hinoinode = file_inode(hinofile);
10335+ oldsize = i_size_read(hinoinode);
10336+
10337+ err = 0;
10338+ pos = 0;
10339+ hbl = br->br_dirren.dr_h_ino;
10340+ for (i = 0; !err && i < AuDirren_NHASH; i++, hbl++) {
10341+ /* no bit-lock since sbinfo must be write-locked */
10342+ hlist_bl_for_each_entry_safe(ent, n1, n2, hbl, dr_hnode) {
10343+ AuDbg("hi%llu, %pD2\n",
10344+ (unsigned long long)ent->dr_h_ino, hinofile);
10345+ u64 = cpu_to_be64(ent->dr_h_ino);
10346+ ssz = vfsub_write_k(hinofile, &u64, sizeof(u64), &pos);
10347+ if (ssz == sizeof(u64))
10348+ continue;
10349+
10350+ /* write error */
10351+ pr_err("ssz %zd, %pD2\n", ssz, hinofile);
10352+ err = -ENOSPC;
10353+ if (ssz < 0)
10354+ err = ssz;
10355+ break;
10356+ }
10357+ }
10358+ /* regardless the error */
10359+ if (pos < oldsize) {
10360+ err = vfsub_trunc(&hinofile->f_path, pos, /*attr*/0, hinofile);
10361+ AuTraceErr(err);
10362+ }
10363+
10364+ AuTraceErr(err);
10365+ return err;
10366+}
10367+
10368+static int au_dr_hino_load(struct au_dr_br *dr, struct file *hinofile)
10369+{
10370+ int err, hidx;
10371+ ssize_t ssz;
10372+ size_t sz, n;
10373+ loff_t pos;
10374+ uint64_t u64;
10375+ struct au_dr_hino *ent;
10376+ struct inode *hinoinode;
10377+ struct hlist_bl_head *hbl;
10378+
10379+ err = 0;
10380+ pos = 0;
10381+ hbl = dr->dr_h_ino;
10382+ hinoinode = file_inode(hinofile);
10383+ sz = i_size_read(hinoinode);
10384+ AuDebugOn(sz % sizeof(u64));
10385+ n = sz / sizeof(u64);
10386+ while (n--) {
10387+ ssz = vfsub_read_k(hinofile, &u64, sizeof(u64), &pos);
10388+ if (unlikely(ssz != sizeof(u64))) {
10389+ pr_err("ssz %zd, %pD2\n", ssz, hinofile);
10390+ err = -EINVAL;
10391+ if (ssz < 0)
10392+ err = ssz;
10393+ goto out_free;
10394+ }
10395+
10396+ ent = kmalloc(sizeof(*ent), GFP_NOFS);
10397+ if (!ent) {
10398+ err = -ENOMEM;
10399+ AuTraceErr(err);
10400+ goto out_free;
10401+ }
10402+ ent->dr_h_ino = be64_to_cpu(u64);
10403+ AuDbg("hi%llu, %pD2\n",
10404+ (unsigned long long)ent->dr_h_ino, hinofile);
10405+ hidx = au_dr_ihash(ent->dr_h_ino);
10406+ au_hbl_add(&ent->dr_hnode, hbl + hidx);
10407+ }
10408+ goto out; /* success */
10409+
10410+out_free:
10411+ au_dr_hino_free(dr);
10412+out:
10413+ AuTraceErr(err);
10414+ return err;
10415+}
10416+
10417+/*
10418+ * @bindex/@br is a switch to distinguish whether suspending hnotify or not.
10419+ * @path is a switch to distinguish load and store.
10420+ */
10421+static int au_dr_hino(struct super_block *sb, aufs_bindex_t bindex,
10422+ struct au_branch *br, const struct path *path)
10423+{
10424+ int err, flags;
10425+ unsigned char load, suspend;
10426+ struct file *hinofile;
10427+ struct au_hinode *hdir;
10428+ struct inode *dir, *delegated;
10429+ struct path hinopath;
10430+ struct qstr hinoname = QSTR_INIT(AUFS_WH_DR_BRHINO,
10431+ sizeof(AUFS_WH_DR_BRHINO) - 1);
10432+
10433+ AuDebugOn(bindex < 0 && !br);
10434+ AuDebugOn(bindex >= 0 && br);
10435+
10436+ err = -EINVAL;
10437+ suspend = !br;
10438+ if (suspend)
10439+ br = au_sbr(sb, bindex);
10440+ load = !!path;
10441+ if (!load) {
10442+ path = &br->br_path;
10443+ AuDebugOn(!au_br_writable(br->br_perm));
10444+ if (unlikely(!au_br_writable(br->br_perm)))
10445+ goto out;
10446+ }
10447+
10448+ hdir = NULL;
10449+ if (suspend) {
10450+ dir = d_inode(sb->s_root);
10451+ hdir = au_hinode(au_ii(dir), bindex);
10452+ dir = hdir->hi_inode;
10453+ au_hn_inode_lock_nested(hdir, AuLsc_I_CHILD);
10454+ } else {
10455+ dir = d_inode(path->dentry);
10456+ inode_lock_nested(dir, AuLsc_I_CHILD);
10457+ }
10458+ hinopath.dentry = vfsub_lkup_one(&hinoname, path->dentry);
10459+ err = PTR_ERR(hinopath.dentry);
10460+ if (IS_ERR(hinopath.dentry))
10461+ goto out_unlock;
10462+
10463+ err = 0;
10464+ flags = O_RDONLY;
10465+ if (load) {
10466+ if (d_is_negative(hinopath.dentry))
10467+ goto out_dput; /* success */
10468+ } else {
10469+ if (au_dr_hino_test_empty(&br->br_dirren)) {
10470+ if (d_is_positive(hinopath.dentry)) {
10471+ delegated = NULL;
10472+ err = vfsub_unlink(dir, &hinopath, &delegated,
10473+ /*force*/0);
10474+ AuTraceErr(err);
10475+ if (unlikely(err))
10476+ pr_err("ignored err %d, %pd2\n",
10477+ err, hinopath.dentry);
10478+ if (unlikely(err == -EWOULDBLOCK))
10479+ iput(delegated);
10480+ err = 0;
10481+ }
10482+ goto out_dput;
10483+ } else if (!d_is_positive(hinopath.dentry)) {
10484+ err = vfsub_create(dir, &hinopath, 0600,
10485+ /*want_excl*/false);
10486+ AuTraceErr(err);
10487+ if (unlikely(err))
10488+ goto out_dput;
10489+ }
10490+ flags = O_WRONLY;
10491+ }
10492+ hinopath.mnt = path->mnt;
10493+ hinofile = vfsub_dentry_open(&hinopath, flags);
10494+ if (suspend)
10495+ au_hn_inode_unlock(hdir);
10496+ else
10497+ inode_unlock(dir);
10498+ dput(hinopath.dentry);
10499+ AuTraceErrPtr(hinofile);
10500+ if (IS_ERR(hinofile)) {
10501+ err = PTR_ERR(hinofile);
10502+ goto out;
10503+ }
10504+
10505+ if (load)
10506+ err = au_dr_hino_load(&br->br_dirren, hinofile);
10507+ else
10508+ err = au_dr_hino_store(sb, br, hinofile);
10509+ fput(hinofile);
10510+ goto out;
10511+
10512+out_dput:
10513+ dput(hinopath.dentry);
10514+out_unlock:
10515+ if (suspend)
10516+ au_hn_inode_unlock(hdir);
10517+ else
10518+ inode_unlock(dir);
10519+out:
10520+ AuTraceErr(err);
10521+ return err;
10522+}
10523+
10524+/* ---------------------------------------------------------------------- */
10525+
10526+static int au_dr_brid_init(struct au_dr_brid *brid, const struct path *path)
10527+{
10528+ int err;
10529+ struct kstatfs kstfs;
10530+ dev_t dev;
10531+ struct dentry *dentry;
10532+ struct super_block *sb;
10533+
10534+ err = vfs_statfs((void *)path, &kstfs);
10535+ AuTraceErr(err);
10536+ if (unlikely(err))
10537+ goto out;
10538+
10539+ /* todo: support for UUID */
10540+
10541+ if (kstfs.f_fsid.val[0] || kstfs.f_fsid.val[1]) {
10542+ brid->type = AuBrid_FSID;
10543+ brid->fsid = kstfs.f_fsid;
10544+ } else {
10545+ dentry = path->dentry;
10546+ sb = dentry->d_sb;
10547+ dev = sb->s_dev;
10548+ if (dev) {
10549+ brid->type = AuBrid_DEV;
10550+ brid->dev = dev;
10551+ }
10552+ }
10553+
10554+out:
10555+ return err;
10556+}
10557+
10558+int au_dr_br_init(struct super_block *sb, struct au_branch *br,
10559+ const struct path *path)
10560+{
10561+ int err, i;
10562+ struct au_dr_br *dr;
10563+ struct hlist_bl_head *hbl;
10564+
10565+ dr = &br->br_dirren;
10566+ hbl = dr->dr_h_ino;
10567+ for (i = 0; i < AuDirren_NHASH; i++, hbl++)
10568+ INIT_HLIST_BL_HEAD(hbl);
10569+
10570+ err = au_dr_brid_init(&dr->dr_brid, path);
10571+ if (unlikely(err))
10572+ goto out;
10573+
10574+ if (au_opt_test(au_mntflags(sb), DIRREN))
10575+ err = au_dr_hino(sb, /*bindex*/-1, br, path);
10576+
10577+out:
10578+ AuTraceErr(err);
10579+ return err;
10580+}
10581+
10582+int au_dr_br_fin(struct super_block *sb, struct au_branch *br)
10583+{
10584+ int err;
10585+
10586+ err = 0;
10587+ if (au_br_writable(br->br_perm))
10588+ err = au_dr_hino(sb, /*bindex*/-1, br, /*path*/NULL);
10589+ if (!err)
10590+ au_dr_hino_free(&br->br_dirren);
10591+
10592+ return err;
10593+}
10594+
10595+/* ---------------------------------------------------------------------- */
10596+
10597+static int au_brid_str(struct au_dr_brid *brid, struct inode *h_inode,
10598+ char *buf, size_t sz)
10599+{
10600+ int err;
10601+ unsigned int major, minor;
10602+ char *p;
10603+
10604+ p = buf;
10605+ err = snprintf(p, sz, "%d_", brid->type);
10606+ AuDebugOn(err > sz);
10607+ p += err;
10608+ sz -= err;
10609+ switch (brid->type) {
10610+ case AuBrid_Unset:
10611+ return -EINVAL;
10612+ case AuBrid_UUID:
10613+ err = snprintf(p, sz, "%pU", brid->uuid.__u_bits);
10614+ break;
10615+ case AuBrid_FSID:
10616+ err = snprintf(p, sz, "%08x-%08x",
10617+ brid->fsid.val[0], brid->fsid.val[1]);
10618+ break;
10619+ case AuBrid_DEV:
10620+ major = MAJOR(brid->dev);
10621+ minor = MINOR(brid->dev);
10622+ if (major <= 0xff && minor <= 0xff)
10623+ err = snprintf(p, sz, "%02x%02x", major, minor);
10624+ else
10625+ err = snprintf(p, sz, "%03x:%05x", major, minor);
10626+ break;
10627+ }
10628+ AuDebugOn(err > sz);
10629+ p += err;
10630+ sz -= err;
10631+ err = snprintf(p, sz, "_%llu", (unsigned long long)h_inode->i_ino);
10632+ AuDebugOn(err > sz);
10633+ p += err;
10634+ sz -= err;
10635+
10636+ return p - buf;
10637+}
10638+
10639+static int au_drinfo_name(struct au_branch *br, char *name, int len)
10640+{
10641+ int rlen;
10642+ struct dentry *br_dentry;
10643+ struct inode *br_inode;
10644+
10645+ br_dentry = au_br_dentry(br);
10646+ br_inode = d_inode(br_dentry);
10647+ rlen = au_brid_str(&br->br_dirren.dr_brid, br_inode, name, len);
10648+ AuDebugOn(rlen >= AUFS_DIRREN_ENV_VAL_SZ);
10649+ AuDebugOn(rlen > len);
10650+
10651+ return rlen;
10652+}
10653+
10654+/* ---------------------------------------------------------------------- */
10655+
10656+/*
10657+ * from the given @h_dentry, construct drinfo at @*fdata.
10658+ * when the size of @*fdata is not enough, reallocate and return new @fdata and
10659+ * @allocated.
10660+ */
10661+static int au_drinfo_construct(struct au_drinfo_fdata **fdata,
10662+ struct dentry *h_dentry,
10663+ unsigned char *allocated)
10664+{
10665+ int err, v;
10666+ struct au_drinfo_fdata *f, *p;
10667+ struct au_drinfo *drinfo;
10668+ struct inode *h_inode;
10669+ struct qstr *qname;
10670+
10671+ err = 0;
10672+ f = *fdata;
10673+ h_inode = d_inode(h_dentry);
10674+ qname = &h_dentry->d_name;
10675+ drinfo = &f->drinfo;
10676+ drinfo->ino = cpu_to_be64(h_inode->i_ino);
10677+ drinfo->oldnamelen = qname->len;
10678+ if (*allocated < sizeof(*f) + qname->len) {
10679+ v = roundup_pow_of_two(*allocated + qname->len);
10680+ p = au_krealloc(f, v, GFP_NOFS, /*may_shrink*/0);
10681+ if (unlikely(!p)) {
10682+ err = -ENOMEM;
10683+ AuTraceErr(err);
10684+ goto out;
10685+ }
10686+ f = p;
10687+ *fdata = f;
10688+ *allocated = v;
10689+ drinfo = &f->drinfo;
10690+ }
10691+ memcpy(drinfo->oldname, qname->name, qname->len);
10692+ AuDbg("i%llu, %.*s\n",
10693+ be64_to_cpu(drinfo->ino), drinfo->oldnamelen, drinfo->oldname);
10694+
10695+out:
10696+ AuTraceErr(err);
10697+ return err;
10698+}
10699+
10700+/* callers have to free the return value */
10701+static struct au_drinfo *au_drinfo_read_k(struct file *file, ino_t h_ino)
10702+{
10703+ struct au_drinfo *ret, *drinfo;
10704+ struct au_drinfo_fdata fdata;
10705+ int len;
10706+ loff_t pos;
10707+ ssize_t ssz;
10708+
10709+ ret = ERR_PTR(-EIO);
10710+ pos = 0;
10711+ ssz = vfsub_read_k(file, &fdata, sizeof(fdata), &pos);
10712+ if (unlikely(ssz != sizeof(fdata))) {
10713+ AuIOErr("ssz %zd, %u, %pD2\n",
10714+ ssz, (unsigned int)sizeof(fdata), file);
10715+ goto out;
10716+ }
10717+
10718+ fdata.magic = ntohl(fdata.magic);
10719+ switch (fdata.magic) {
10720+ case AUFS_DRINFO_MAGIC_V1:
10721+ break;
10722+ default:
10723+ AuIOErr("magic-num 0x%x, 0x%x, %pD2\n",
10724+ fdata.magic, AUFS_DRINFO_MAGIC_V1, file);
10725+ goto out;
10726+ }
10727+
10728+ drinfo = &fdata.drinfo;
10729+ len = drinfo->oldnamelen;
10730+ if (!len) {
10731+ AuIOErr("broken drinfo %pD2\n", file);
10732+ goto out;
10733+ }
10734+
10735+ ret = NULL;
10736+ drinfo->ino = be64_to_cpu(drinfo->ino);
10737+ if (unlikely(h_ino && drinfo->ino != h_ino)) {
10738+ AuDbg("ignored i%llu, i%llu, %pD2\n",
10739+ (unsigned long long)drinfo->ino,
10740+ (unsigned long long)h_ino, file);
10741+ goto out; /* success */
10742+ }
10743+
10744+ ret = kmalloc(sizeof(*ret) + len, GFP_NOFS);
10745+ if (unlikely(!ret)) {
10746+ ret = ERR_PTR(-ENOMEM);
10747+ AuTraceErrPtr(ret);
10748+ goto out;
10749+ }
10750+
10751+ *ret = *drinfo;
10752+ ssz = vfsub_read_k(file, (void *)ret->oldname, len, &pos);
10753+ if (unlikely(ssz != len)) {
10754+ kfree(ret);
10755+ ret = ERR_PTR(-EIO);
10756+ AuIOErr("ssz %zd, %u, %pD2\n", ssz, len, file);
10757+ goto out;
10758+ }
10759+
10760+ AuDbg("oldname %.*s\n", ret->oldnamelen, ret->oldname);
10761+
10762+out:
10763+ return ret;
10764+}
10765+
10766+/* ---------------------------------------------------------------------- */
10767+
10768+/* in order to be revertible */
10769+struct au_drinfo_rev_elm {
10770+ int created;
10771+ struct dentry *info_dentry;
10772+ struct au_drinfo *info_last;
10773+};
10774+
10775+struct au_drinfo_rev {
10776+ unsigned char already;
10777+ aufs_bindex_t nelm;
10778+ struct au_drinfo_rev_elm elm[0];
10779+};
10780+
10781+/* todo: isn't it too large? */
10782+struct au_drinfo_store {
10783+ struct path h_ppath;
10784+ struct dentry *h_dentry;
10785+ struct au_drinfo_fdata *fdata;
10786+ char *infoname; /* inside of whname, just after PFX */
10787+ char whname[sizeof(AUFS_WH_DR_INFO_PFX) + AUFS_DIRREN_ENV_VAL_SZ];
10788+ aufs_bindex_t btgt, btail;
10789+ unsigned char no_sio,
10790+ allocated, /* current size of *fdata */
10791+ infonamelen, /* room size for p */
10792+ whnamelen, /* length of the genarated name */
10793+ renameback; /* renamed back */
10794+};
10795+
10796+/* on rename(2) error, the caller should revert it using @elm */
10797+static int au_drinfo_do_store(struct au_drinfo_store *w,
10798+ struct au_drinfo_rev_elm *elm)
10799+{
10800+ int err, len;
10801+ ssize_t ssz;
10802+ loff_t pos;
10803+ struct path infopath = {
10804+ .mnt = w->h_ppath.mnt
10805+ };
10806+ struct inode *h_dir, *h_inode, *delegated;
10807+ struct file *infofile;
10808+ struct qstr *qname;
10809+
10810+ AuDebugOn(elm
10811+ && memcmp(elm, page_address(ZERO_PAGE(0)), sizeof(*elm)));
10812+
10813+ infopath.dentry = vfsub_lookup_one_len(w->whname, w->h_ppath.dentry,
10814+ w->whnamelen);
10815+ AuTraceErrPtr(infopath.dentry);
10816+ if (IS_ERR(infopath.dentry)) {
10817+ err = PTR_ERR(infopath.dentry);
10818+ goto out;
10819+ }
10820+
10821+ err = 0;
10822+ h_dir = d_inode(w->h_ppath.dentry);
10823+ if (elm && d_is_negative(infopath.dentry)) {
10824+ err = vfsub_create(h_dir, &infopath, 0600, /*want_excl*/true);
10825+ AuTraceErr(err);
10826+ if (unlikely(err))
10827+ goto out_dput;
10828+ elm->created = 1;
10829+ elm->info_dentry = dget(infopath.dentry);
10830+ }
10831+
10832+ infofile = vfsub_dentry_open(&infopath, O_RDWR);
10833+ AuTraceErrPtr(infofile);
10834+ if (IS_ERR(infofile)) {
10835+ err = PTR_ERR(infofile);
10836+ goto out_dput;
10837+ }
10838+
10839+ h_inode = d_inode(infopath.dentry);
10840+ if (elm && i_size_read(h_inode)) {
10841+ h_inode = d_inode(w->h_dentry);
10842+ elm->info_last = au_drinfo_read_k(infofile, h_inode->i_ino);
10843+ AuTraceErrPtr(elm->info_last);
10844+ if (IS_ERR(elm->info_last)) {
10845+ err = PTR_ERR(elm->info_last);
10846+ elm->info_last = NULL;
10847+ AuDebugOn(elm->info_dentry);
10848+ goto out_fput;
10849+ }
10850+ }
10851+
10852+ if (elm && w->renameback) {
10853+ delegated = NULL;
10854+ err = vfsub_unlink(h_dir, &infopath, &delegated, /*force*/0);
10855+ AuTraceErr(err);
10856+ if (unlikely(err == -EWOULDBLOCK))
10857+ iput(delegated);
10858+ goto out_fput;
10859+ }
10860+
10861+ pos = 0;
10862+ qname = &w->h_dentry->d_name;
10863+ len = sizeof(*w->fdata) + qname->len;
10864+ if (!elm)
10865+ len = sizeof(*w->fdata) + w->fdata->drinfo.oldnamelen;
10866+ ssz = vfsub_write_k(infofile, w->fdata, len, &pos);
10867+ if (ssz == len) {
10868+ AuDbg("hi%llu, %.*s\n", w->fdata->drinfo.ino,
10869+ w->fdata->drinfo.oldnamelen, w->fdata->drinfo.oldname);
10870+ goto out_fput; /* success */
10871+ } else {
10872+ err = -EIO;
10873+ if (ssz < 0)
10874+ err = ssz;
10875+ /* the caller should revert it using @elm */
10876+ }
10877+
10878+out_fput:
10879+ fput(infofile);
10880+out_dput:
10881+ dput(infopath.dentry);
10882+out:
10883+ AuTraceErr(err);
10884+ return err;
10885+}
10886+
10887+struct au_call_drinfo_do_store_args {
10888+ int *errp;
10889+ struct au_drinfo_store *w;
10890+ struct au_drinfo_rev_elm *elm;
10891+};
10892+
10893+static void au_call_drinfo_do_store(void *args)
10894+{
10895+ struct au_call_drinfo_do_store_args *a = args;
10896+
10897+ *a->errp = au_drinfo_do_store(a->w, a->elm);
10898+}
10899+
10900+static int au_drinfo_store_sio(struct au_drinfo_store *w,
10901+ struct au_drinfo_rev_elm *elm)
10902+{
10903+ int err, wkq_err;
10904+
10905+ if (w->no_sio)
10906+ err = au_drinfo_do_store(w, elm);
10907+ else {
10908+ struct au_call_drinfo_do_store_args a = {
10909+ .errp = &err,
10910+ .w = w,
10911+ .elm = elm
10912+ };
10913+ wkq_err = au_wkq_wait(au_call_drinfo_do_store, &a);
10914+ if (unlikely(wkq_err))
10915+ err = wkq_err;
10916+ }
10917+ AuTraceErr(err);
10918+
10919+ return err;
10920+}
10921+
10922+static int au_drinfo_store_work_init(struct au_drinfo_store *w,
10923+ aufs_bindex_t btgt)
10924+{
10925+ int err;
10926+
10927+ memset(w, 0, sizeof(*w));
10928+ w->allocated = roundup_pow_of_two(sizeof(*w->fdata) + 40);
10929+ strcpy(w->whname, AUFS_WH_DR_INFO_PFX);
10930+ w->infoname = w->whname + sizeof(AUFS_WH_DR_INFO_PFX) - 1;
10931+ w->infonamelen = sizeof(w->whname) - sizeof(AUFS_WH_DR_INFO_PFX);
10932+ w->btgt = btgt;
10933+ w->no_sio = !!uid_eq(current_fsuid(), GLOBAL_ROOT_UID);
10934+
10935+ err = -ENOMEM;
10936+ w->fdata = kcalloc(1, w->allocated, GFP_NOFS);
10937+ if (unlikely(!w->fdata)) {
10938+ AuTraceErr(err);
10939+ goto out;
10940+ }
10941+ w->fdata->magic = htonl(AUFS_DRINFO_MAGIC_V1);
10942+ err = 0;
10943+
10944+out:
10945+ return err;
10946+}
10947+
10948+static void au_drinfo_store_work_fin(struct au_drinfo_store *w)
10949+{
10950+ kfree(w->fdata);
10951+}
10952+
10953+static void au_drinfo_store_rev(struct au_drinfo_rev *rev,
10954+ struct au_drinfo_store *w)
10955+{
10956+ struct au_drinfo_rev_elm *elm;
10957+ struct inode *h_dir, *delegated;
10958+ int err, nelm;
10959+ struct path infopath = {
10960+ .mnt = w->h_ppath.mnt
10961+ };
10962+
10963+ h_dir = d_inode(w->h_ppath.dentry);
10964+ IMustLock(h_dir);
10965+
10966+ err = 0;
10967+ elm = rev->elm;
10968+ for (nelm = rev->nelm; nelm > 0; nelm--, elm++) {
10969+ AuDebugOn(elm->created && elm->info_last);
10970+ if (elm->created) {
10971+ AuDbg("here\n");
10972+ delegated = NULL;
10973+ infopath.dentry = elm->info_dentry;
10974+ err = vfsub_unlink(h_dir, &infopath, &delegated,
10975+ !w->no_sio);
10976+ AuTraceErr(err);
10977+ if (unlikely(err == -EWOULDBLOCK))
10978+ iput(delegated);
10979+ dput(elm->info_dentry);
10980+ } else if (elm->info_last) {
10981+ AuDbg("here\n");
10982+ w->fdata->drinfo = *elm->info_last;
10983+ memcpy(w->fdata->drinfo.oldname,
10984+ elm->info_last->oldname,
10985+ elm->info_last->oldnamelen);
10986+ err = au_drinfo_store_sio(w, /*elm*/NULL);
10987+ kfree(elm->info_last);
10988+ }
10989+ if (unlikely(err))
10990+ AuIOErr("%d, %s\n", err, w->whname);
10991+ /* go on even if err */
10992+ }
10993+}
10994+
10995+/* caller has to call au_dr_rename_fin() later */
10996+static int au_drinfo_store(struct dentry *dentry, aufs_bindex_t btgt,
10997+ struct qstr *dst_name, void *_rev)
10998+{
10999+ int err, sz, nelm;
11000+ aufs_bindex_t bindex, btail;
11001+ struct au_drinfo_store work;
11002+ struct au_drinfo_rev *rev, **p;
11003+ struct au_drinfo_rev_elm *elm;
11004+ struct super_block *sb;
11005+ struct au_branch *br;
11006+ struct au_hinode *hdir;
11007+
11008+ err = au_drinfo_store_work_init(&work, btgt);
11009+ AuTraceErr(err);
11010+ if (unlikely(err))
11011+ goto out;
11012+
11013+ err = -ENOMEM;
11014+ btail = au_dbtaildir(dentry);
11015+ nelm = btail - btgt;
11016+ sz = sizeof(*rev) + sizeof(*elm) * nelm;
11017+ rev = kcalloc(1, sz, GFP_NOFS);
11018+ if (unlikely(!rev)) {
11019+ AuTraceErr(err);
11020+ goto out_args;
11021+ }
11022+ rev->nelm = nelm;
11023+ elm = rev->elm;
11024+ p = _rev;
11025+ *p = rev;
11026+
11027+ err = 0;
11028+ sb = dentry->d_sb;
11029+ work.h_ppath.dentry = au_h_dptr(dentry, btgt);
11030+ work.h_ppath.mnt = au_sbr_mnt(sb, btgt);
11031+ hdir = au_hi(d_inode(dentry), btgt);
11032+ au_hn_inode_lock_nested(hdir, AuLsc_I_CHILD);
11033+ for (bindex = btgt + 1; bindex <= btail; bindex++, elm++) {
11034+ work.h_dentry = au_h_dptr(dentry, bindex);
11035+ if (!work.h_dentry)
11036+ continue;
11037+
11038+ err = au_drinfo_construct(&work.fdata, work.h_dentry,
11039+ &work.allocated);
11040+ AuTraceErr(err);
11041+ if (unlikely(err))
11042+ break;
11043+
11044+ work.renameback = au_qstreq(&work.h_dentry->d_name, dst_name);
11045+ br = au_sbr(sb, bindex);
11046+ work.whnamelen = sizeof(AUFS_WH_DR_INFO_PFX) - 1;
11047+ work.whnamelen += au_drinfo_name(br, work.infoname,
11048+ work.infonamelen);
11049+ AuDbg("whname %.*s, i%llu, %.*s\n",
11050+ work.whnamelen, work.whname,
11051+ be64_to_cpu(work.fdata->drinfo.ino),
11052+ work.fdata->drinfo.oldnamelen,
11053+ work.fdata->drinfo.oldname);
11054+
11055+ err = au_drinfo_store_sio(&work, elm);
11056+ AuTraceErr(err);
11057+ if (unlikely(err))
11058+ break;
11059+ }
11060+ if (unlikely(err)) {
11061+ /* revert all drinfo */
11062+ au_drinfo_store_rev(rev, &work);
11063+ kfree(rev);
11064+ *p = NULL;
11065+ }
11066+ au_hn_inode_unlock(hdir);
11067+
11068+out_args:
11069+ au_drinfo_store_work_fin(&work);
11070+out:
11071+ return err;
11072+}
11073+
11074+/* ---------------------------------------------------------------------- */
11075+
11076+int au_dr_rename(struct dentry *src, aufs_bindex_t bindex,
11077+ struct qstr *dst_name, void *_rev)
11078+{
11079+ int err, already;
11080+ ino_t ino;
11081+ struct super_block *sb;
11082+ struct au_branch *br;
11083+ struct au_dr_br *dr;
11084+ struct dentry *h_dentry;
11085+ struct inode *h_inode;
11086+ struct au_dr_hino *ent;
11087+ struct au_drinfo_rev *rev, **p;
11088+
11089+ AuDbg("bindex %d\n", bindex);
11090+
11091+ err = -ENOMEM;
11092+ ent = kmalloc(sizeof(*ent), GFP_NOFS);
11093+ if (unlikely(!ent))
11094+ goto out;
11095+
11096+ sb = src->d_sb;
11097+ br = au_sbr(sb, bindex);
11098+ dr = &br->br_dirren;
11099+ h_dentry = au_h_dptr(src, bindex);
11100+ h_inode = d_inode(h_dentry);
11101+ ino = h_inode->i_ino;
11102+ ent->dr_h_ino = ino;
11103+ already = au_dr_hino_test_add(dr, ino, ent);
11104+ AuDbg("b%d, hi%llu, already %d\n",
11105+ bindex, (unsigned long long)ino, already);
11106+
11107+ err = au_drinfo_store(src, bindex, dst_name, _rev);
11108+ AuTraceErr(err);
11109+ if (!err) {
11110+ p = _rev;
11111+ rev = *p;
11112+ rev->already = already;
11113+ goto out; /* success */
11114+ }
11115+
11116+ /* revert */
11117+ if (!already)
11118+ au_dr_hino_del(dr, ent);
11119+ kfree(ent);
11120+
11121+out:
11122+ AuTraceErr(err);
11123+ return err;
11124+}
11125+
11126+void au_dr_rename_fin(struct dentry *src, aufs_bindex_t btgt, void *_rev)
11127+{
11128+ struct au_drinfo_rev *rev;
11129+ struct au_drinfo_rev_elm *elm;
11130+ int nelm;
11131+
11132+ rev = _rev;
11133+ elm = rev->elm;
11134+ for (nelm = rev->nelm; nelm > 0; nelm--, elm++) {
11135+ dput(elm->info_dentry);
11136+ kfree(elm->info_last);
11137+ }
11138+ kfree(rev);
11139+}
11140+
11141+void au_dr_rename_rev(struct dentry *src, aufs_bindex_t btgt, void *_rev)
11142+{
11143+ int err;
11144+ struct au_drinfo_store work;
11145+ struct au_drinfo_rev *rev = _rev;
11146+ struct super_block *sb;
11147+ struct au_branch *br;
11148+ struct inode *h_inode;
11149+ struct au_dr_br *dr;
11150+ struct au_dr_hino *ent;
11151+
11152+ err = au_drinfo_store_work_init(&work, btgt);
11153+ if (unlikely(err))
11154+ goto out;
11155+
11156+ sb = src->d_sb;
11157+ br = au_sbr(sb, btgt);
11158+ work.h_ppath.dentry = au_h_dptr(src, btgt);
11159+ work.h_ppath.mnt = au_br_mnt(br);
11160+ au_drinfo_store_rev(rev, &work);
11161+ au_drinfo_store_work_fin(&work);
11162+ if (rev->already)
11163+ goto out;
11164+
11165+ dr = &br->br_dirren;
11166+ h_inode = d_inode(work.h_ppath.dentry);
11167+ ent = au_dr_hino_find(dr, h_inode->i_ino);
11168+ BUG_ON(!ent);
11169+ au_dr_hino_del(dr, ent);
11170+ kfree(ent);
11171+
11172+out:
11173+ kfree(rev);
11174+ if (unlikely(err))
11175+ pr_err("failed to remove dirren info\n");
11176+}
11177+
11178+/* ---------------------------------------------------------------------- */
11179+
11180+static struct au_drinfo *au_drinfo_do_load(struct path *h_ppath,
11181+ char *whname, int whnamelen,
11182+ struct dentry **info_dentry)
11183+{
11184+ struct au_drinfo *drinfo;
11185+ struct file *f;
11186+ struct inode *h_dir;
11187+ struct path infopath;
11188+ int unlocked;
11189+
11190+ AuDbg("%pd/%.*s\n", h_ppath->dentry, whnamelen, whname);
11191+
11192+ *info_dentry = NULL;
11193+ drinfo = NULL;
11194+ unlocked = 0;
11195+ h_dir = d_inode(h_ppath->dentry);
11196+ vfsub_inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
11197+ infopath.dentry = vfsub_lookup_one_len(whname, h_ppath->dentry,
11198+ whnamelen);
11199+ if (IS_ERR(infopath.dentry)) {
11200+ drinfo = (void *)infopath.dentry;
11201+ goto out;
11202+ }
11203+
11204+ if (d_is_negative(infopath.dentry))
11205+ goto out_dput; /* success */
11206+
11207+ infopath.mnt = h_ppath->mnt;
11208+ f = vfsub_dentry_open(&infopath, O_RDONLY);
11209+ inode_unlock_shared(h_dir);
11210+ unlocked = 1;
11211+ if (IS_ERR(f)) {
11212+ drinfo = (void *)f;
11213+ goto out_dput;
11214+ }
11215+
11216+ drinfo = au_drinfo_read_k(f, /*h_ino*/0);
11217+ if (IS_ERR_OR_NULL(drinfo))
11218+ goto out_fput;
11219+
11220+ AuDbg("oldname %.*s\n", drinfo->oldnamelen, drinfo->oldname);
11221+ *info_dentry = dget(infopath.dentry); /* keep it alive */
11222+
11223+out_fput:
11224+ fput(f);
11225+out_dput:
11226+ dput(infopath.dentry);
11227+out:
11228+ if (!unlocked)
11229+ inode_unlock_shared(h_dir);
11230+ AuTraceErrPtr(drinfo);
11231+ return drinfo;
11232+}
11233+
11234+struct au_drinfo_do_load_args {
11235+ struct au_drinfo **drinfop;
11236+ struct path *h_ppath;
11237+ char *whname;
11238+ int whnamelen;
11239+ struct dentry **info_dentry;
11240+};
11241+
11242+static void au_call_drinfo_do_load(void *args)
11243+{
11244+ struct au_drinfo_do_load_args *a = args;
11245+
11246+ *a->drinfop = au_drinfo_do_load(a->h_ppath, a->whname, a->whnamelen,
11247+ a->info_dentry);
11248+}
11249+
11250+struct au_drinfo_load {
11251+ struct path h_ppath;
11252+ struct qstr *qname;
11253+ unsigned char no_sio;
11254+
11255+ aufs_bindex_t ninfo;
11256+ struct au_drinfo **drinfo;
11257+};
11258+
11259+static int au_drinfo_load(struct au_drinfo_load *w, aufs_bindex_t bindex,
11260+ struct au_branch *br)
11261+{
11262+ int err, wkq_err, whnamelen, e;
11263+ char whname[sizeof(AUFS_WH_DR_INFO_PFX) + AUFS_DIRREN_ENV_VAL_SZ]
11264+ = AUFS_WH_DR_INFO_PFX;
11265+ struct au_drinfo *drinfo;
11266+ struct qstr oldname;
11267+ struct inode *h_dir, *delegated;
11268+ struct dentry *info_dentry;
11269+ struct path infopath;
11270+
11271+ whnamelen = sizeof(AUFS_WH_DR_INFO_PFX) - 1;
11272+ whnamelen += au_drinfo_name(br, whname + whnamelen,
11273+ sizeof(whname) - whnamelen);
11274+ if (w->no_sio)
11275+ drinfo = au_drinfo_do_load(&w->h_ppath, whname, whnamelen,
11276+ &info_dentry);
11277+ else {
11278+ struct au_drinfo_do_load_args args = {
11279+ .drinfop = &drinfo,
11280+ .h_ppath = &w->h_ppath,
11281+ .whname = whname,
11282+ .whnamelen = whnamelen,
11283+ .info_dentry = &info_dentry
11284+ };
11285+ wkq_err = au_wkq_wait(au_call_drinfo_do_load, &args);
11286+ if (unlikely(wkq_err))
11287+ drinfo = ERR_PTR(wkq_err);
11288+ }
11289+ err = PTR_ERR(drinfo);
11290+ if (IS_ERR_OR_NULL(drinfo))
11291+ goto out;
11292+
11293+ err = 0;
11294+ oldname.len = drinfo->oldnamelen;
11295+ oldname.name = drinfo->oldname;
11296+ if (au_qstreq(w->qname, &oldname)) {
11297+ /* the name is renamed back */
11298+ kfree(drinfo);
11299+ drinfo = NULL;
11300+
11301+ infopath.dentry = info_dentry;
11302+ infopath.mnt = w->h_ppath.mnt;
11303+ h_dir = d_inode(w->h_ppath.dentry);
11304+ delegated = NULL;
11305+ inode_lock_nested(h_dir, AuLsc_I_PARENT);
11306+ e = vfsub_unlink(h_dir, &infopath, &delegated, !w->no_sio);
11307+ inode_unlock(h_dir);
11308+ if (unlikely(e))
11309+ AuIOErr("ignored %d, %pd2\n", e, &infopath.dentry);
11310+ if (unlikely(e == -EWOULDBLOCK))
11311+ iput(delegated);
11312+ }
11313+ kfree(w->drinfo[bindex]);
11314+ w->drinfo[bindex] = drinfo;
11315+ dput(info_dentry);
11316+
11317+out:
11318+ AuTraceErr(err);
11319+ return err;
11320+}
11321+
11322+/* ---------------------------------------------------------------------- */
11323+
11324+static void au_dr_lkup_free(struct au_drinfo **drinfo, int n)
11325+{
11326+ struct au_drinfo **p = drinfo;
11327+
11328+ while (n-- > 0)
11329+ kfree(*drinfo++);
11330+ kfree(p);
11331+}
11332+
11333+int au_dr_lkup(struct au_do_lookup_args *lkup, struct dentry *dentry,
11334+ aufs_bindex_t btgt)
11335+{
11336+ int err, ninfo;
11337+ struct au_drinfo_load w;
11338+ aufs_bindex_t bindex, bbot;
11339+ struct au_branch *br;
11340+ struct inode *h_dir;
11341+ struct au_dr_hino *ent;
11342+ struct super_block *sb;
11343+
11344+ AuDbg("%.*s, name %.*s, whname %.*s, b%d\n",
11345+ AuLNPair(&dentry->d_name), AuLNPair(&lkup->dirren.dr_name),
11346+ AuLNPair(&lkup->whname), btgt);
11347+
11348+ sb = dentry->d_sb;
11349+ bbot = au_sbbot(sb);
11350+ w.ninfo = bbot + 1;
11351+ if (!lkup->dirren.drinfo) {
11352+ lkup->dirren.drinfo = kcalloc(w.ninfo,
11353+ sizeof(*lkup->dirren.drinfo),
11354+ GFP_NOFS);
11355+ if (unlikely(!lkup->dirren.drinfo)) {
11356+ err = -ENOMEM;
11357+ goto out;
11358+ }
11359+ lkup->dirren.ninfo = w.ninfo;
11360+ }
11361+ w.drinfo = lkup->dirren.drinfo;
11362+ w.no_sio = !!uid_eq(current_fsuid(), GLOBAL_ROOT_UID);
11363+ w.h_ppath.dentry = au_h_dptr(dentry, btgt);
11364+ AuDebugOn(!w.h_ppath.dentry);
11365+ w.h_ppath.mnt = au_sbr_mnt(sb, btgt);
11366+ w.qname = &dentry->d_name;
11367+
11368+ ninfo = 0;
11369+ for (bindex = btgt + 1; bindex <= bbot; bindex++) {
11370+ br = au_sbr(sb, bindex);
11371+ err = au_drinfo_load(&w, bindex, br);
11372+ if (unlikely(err))
11373+ goto out_free;
11374+ if (w.drinfo[bindex])
11375+ ninfo++;
11376+ }
11377+ if (!ninfo) {
11378+ br = au_sbr(sb, btgt);
11379+ h_dir = d_inode(w.h_ppath.dentry);
11380+ ent = au_dr_hino_find(&br->br_dirren, h_dir->i_ino);
11381+ AuDebugOn(!ent);
11382+ au_dr_hino_del(&br->br_dirren, ent);
11383+ kfree(ent);
11384+ }
11385+ goto out; /* success */
11386+
11387+out_free:
11388+ au_dr_lkup_free(lkup->dirren.drinfo, lkup->dirren.ninfo);
11389+ lkup->dirren.ninfo = 0;
11390+ lkup->dirren.drinfo = NULL;
11391+out:
11392+ AuTraceErr(err);
11393+ return err;
11394+}
11395+
11396+void au_dr_lkup_fin(struct au_do_lookup_args *lkup)
11397+{
11398+ au_dr_lkup_free(lkup->dirren.drinfo, lkup->dirren.ninfo);
11399+}
11400+
11401+int au_dr_lkup_name(struct au_do_lookup_args *lkup, aufs_bindex_t btgt)
11402+{
11403+ int err;
11404+ struct au_drinfo *drinfo;
11405+
11406+ err = 0;
11407+ if (!lkup->dirren.drinfo)
11408+ goto out;
11409+ AuDebugOn(lkup->dirren.ninfo < btgt + 1);
11410+ drinfo = lkup->dirren.drinfo[btgt + 1];
11411+ if (!drinfo)
11412+ goto out;
11413+
11414+ kfree(lkup->whname.name);
11415+ lkup->whname.name = NULL;
11416+ lkup->dirren.dr_name.len = drinfo->oldnamelen;
11417+ lkup->dirren.dr_name.name = drinfo->oldname;
11418+ lkup->name = &lkup->dirren.dr_name;
11419+ err = au_wh_name_alloc(&lkup->whname, lkup->name);
11420+ if (!err)
11421+ AuDbg("name %.*s, whname %.*s, b%d\n",
11422+ AuLNPair(lkup->name), AuLNPair(&lkup->whname),
11423+ btgt);
11424+
11425+out:
11426+ AuTraceErr(err);
11427+ return err;
11428+}
11429+
11430+int au_dr_lkup_h_ino(struct au_do_lookup_args *lkup, aufs_bindex_t bindex,
11431+ ino_t h_ino)
11432+{
11433+ int match;
11434+ struct au_drinfo *drinfo;
11435+
11436+ match = 1;
11437+ if (!lkup->dirren.drinfo)
11438+ goto out;
11439+ AuDebugOn(lkup->dirren.ninfo < bindex + 1);
11440+ drinfo = lkup->dirren.drinfo[bindex + 1];
11441+ if (!drinfo)
11442+ goto out;
11443+
11444+ match = (drinfo->ino == h_ino);
11445+ AuDbg("match %d\n", match);
11446+
11447+out:
11448+ return match;
11449+}
11450+
11451+/* ---------------------------------------------------------------------- */
11452+
11453+int au_dr_opt_set(struct super_block *sb)
11454+{
11455+ int err;
11456+ aufs_bindex_t bindex, bbot;
11457+ struct au_branch *br;
11458+
11459+ err = 0;
11460+ bbot = au_sbbot(sb);
11461+ for (bindex = 0; !err && bindex <= bbot; bindex++) {
11462+ br = au_sbr(sb, bindex);
11463+ err = au_dr_hino(sb, bindex, /*br*/NULL, &br->br_path);
11464+ }
11465+
11466+ return err;
11467+}
11468+
e8791d4f
AM
11469+int au_dr_opt_flush(struct super_block *sb)
11470+{
11471+ int err;
11472+ aufs_bindex_t bindex, bbot;
11473+ struct au_branch *br;
11474+
11475+ err = 0;
11476+ bbot = au_sbbot(sb);
11477+ for (bindex = 0; !err && bindex <= bbot; bindex++) {
11478+ br = au_sbr(sb, bindex);
11479+ if (au_br_writable(br->br_perm))
11480+ err = au_dr_hino(sb, bindex, /*br*/NULL, /*path*/NULL);
11481+ }
11482+
11483+ return err;
11484+}
11485+
11486+int au_dr_opt_clr(struct super_block *sb, int no_flush)
11487+{
11488+ int err;
11489+ aufs_bindex_t bindex, bbot;
11490+ struct au_branch *br;
11491+
11492+ err = 0;
11493+ if (!no_flush) {
11494+ err = au_dr_opt_flush(sb);
11495+ if (unlikely(err))
11496+ goto out;
11497+ }
11498+
11499+ bbot = au_sbbot(sb);
11500+ for (bindex = 0; bindex <= bbot; bindex++) {
11501+ br = au_sbr(sb, bindex);
11502+ au_dr_hino_free(&br->br_dirren);
11503+ }
11504+
11505+out:
11506+ return err;
11507+}
11508diff -urNp -x '*.orig' linux-4.9/fs/aufs/dirren.h linux-4.9/fs/aufs/dirren.h
11509--- linux-4.9/fs/aufs/dirren.h 1970-01-01 01:00:00.000000000 +0100
11510+++ linux-4.9/fs/aufs/dirren.h 2021-02-24 16:15:09.524906971 +0100
11511@@ -0,0 +1,145 @@
11512+/*
11513+ * Copyright (C) 2017-2018 Junjiro R. Okajima
11514+ *
11515+ * This program, aufs is free software; you can redistribute it and/or modify
11516+ * it under the terms of the GNU General Public License as published by
11517+ * the Free Software Foundation; either version 2 of the License, or
11518+ * (at your option) any later version.
11519+ *
11520+ * This program is distributed in the hope that it will be useful,
11521+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11522+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11523+ * GNU General Public License for more details.
11524+ *
11525+ * You should have received a copy of the GNU General Public License
11526+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
11527+ */
11528+
11529+/*
11530+ * renamed dir info
11531+ */
11532+
11533+#ifndef __AUFS_DIRREN_H__
11534+#define __AUFS_DIRREN_H__
11535+
11536+#ifdef __KERNEL__
11537+
11538+#include <linux/dcache.h>
11539+#include <linux/statfs.h>
11540+#include "hbl.h"
11541+
11542+#define AuDirren_NHASH 100
11543+
11544+#ifdef CONFIG_AUFS_DIRREN
11545+/* copied from linux/fs/xfs/uuid.h */
11546+typedef struct {
11547+ unsigned char __u_bits[16];
11548+} uuid_t;
11549+
11550+#define __UUID_TMPLT "01234567-0123-4567-0123-456701234567"
11551+
11552+enum au_brid_type {
11553+ AuBrid_Unset,
11554+ AuBrid_UUID,
11555+ AuBrid_FSID,
11556+ AuBrid_DEV
11557+};
11558+
11559+struct au_dr_brid {
11560+ enum au_brid_type type;
11561+ union {
11562+ uuid_t uuid; /* unimplemented yet */
11563+ fsid_t fsid;
11564+ dev_t dev;
11565+ };
11566+};
11567+
11568+/* 20 is the max digits length of ulong 64 */
11569+/* brid-type "_" uuid "_" inum */
11570+#define AUFS_DIRREN_FNAME_SZ (1 + 1 + sizeof(__UUID_TMPLT) + 20)
11571+#define AUFS_DIRREN_ENV_VAL_SZ (AUFS_DIRREN_FNAME_SZ + 1 + 20)
11572+
11573+struct au_dr_hino {
11574+ struct hlist_bl_node dr_hnode;
11575+ ino_t dr_h_ino;
11576+};
11577+
11578+struct au_dr_br {
11579+ struct hlist_bl_head dr_h_ino[AuDirren_NHASH];
11580+ struct au_dr_brid dr_brid;
11581+};
11582+
11583+struct au_dr_lookup {
11584+ /* dr_name is pointed by struct au_do_lookup_args.name */
11585+ struct qstr dr_name; /* subset of dr_info */
11586+ aufs_bindex_t ninfo;
11587+ struct au_drinfo **drinfo;
11588+};
11589+#else
11590+struct au_dr_hino;
11591+/* empty */
11592+struct au_dr_br { };
11593+struct au_dr_lookup { };
11594+#endif
11595+
11596+/* ---------------------------------------------------------------------- */
11597+
11598+struct au_branch;
11599+struct au_do_lookup_args;
11600+struct au_hinode;
11601+#ifdef CONFIG_AUFS_DIRREN
11602+int au_dr_hino_test_add(struct au_dr_br *dr, ino_t h_ino,
11603+ struct au_dr_hino *add_ent);
11604+void au_dr_hino_free(struct au_dr_br *dr);
11605+int au_dr_br_init(struct super_block *sb, struct au_branch *br,
11606+ const struct path *path);
11607+int au_dr_br_fin(struct super_block *sb, struct au_branch *br);
11608+int au_dr_rename(struct dentry *src, aufs_bindex_t bindex,
11609+ struct qstr *dst_name, void *_rev);
11610+void au_dr_rename_fin(struct dentry *src, aufs_bindex_t btgt, void *rev);
11611+void au_dr_rename_rev(struct dentry *src, aufs_bindex_t bindex, void *rev);
11612+int au_dr_lkup(struct au_do_lookup_args *lkup, struct dentry *dentry,
11613+ aufs_bindex_t bindex);
11614+int au_dr_lkup_name(struct au_do_lookup_args *lkup, aufs_bindex_t btgt);
11615+int au_dr_lkup_h_ino(struct au_do_lookup_args *lkup, aufs_bindex_t bindex,
11616+ ino_t h_ino);
11617+void au_dr_lkup_fin(struct au_do_lookup_args *lkup);
11618+int au_dr_opt_set(struct super_block *sb);
11619+int au_dr_opt_flush(struct super_block *sb);
11620+int au_dr_opt_clr(struct super_block *sb, int no_flush);
11621+#else
11622+AuStubInt0(au_dr_hino_test_add, struct au_dr_br *dr, ino_t h_ino,
11623+ struct au_dr_hino *add_ent);
11624+AuStubVoid(au_dr_hino_free, struct au_dr_br *dr);
11625+AuStubInt0(au_dr_br_init, struct super_block *sb, struct au_branch *br,
11626+ const struct path *path);
11627+AuStubInt0(au_dr_br_fin, struct super_block *sb, struct au_branch *br);
11628+AuStubInt0(au_dr_rename, struct dentry *src, aufs_bindex_t bindex,
11629+ struct qstr *dst_name, void *_rev);
11630+AuStubVoid(au_dr_rename_fin, struct dentry *src, aufs_bindex_t btgt, void *rev);
11631+AuStubVoid(au_dr_rename_rev, struct dentry *src, aufs_bindex_t bindex,
11632+ void *rev);
11633+AuStubInt0(au_dr_lkup, struct au_do_lookup_args *lkup, struct dentry *dentry,
11634+ aufs_bindex_t bindex);
11635+AuStubInt0(au_dr_lkup_name, struct au_do_lookup_args *lkup, aufs_bindex_t btgt);
11636+AuStubInt0(au_dr_lkup_h_ino, struct au_do_lookup_args *lkup,
11637+ aufs_bindex_t bindex, ino_t h_ino);
11638+AuStubVoid(au_dr_lkup_fin, struct au_do_lookup_args *lkup);
11639+AuStubInt0(au_dr_opt_set, struct super_block *sb);
11640+AuStubInt0(au_dr_opt_flush, struct super_block *sb);
11641+AuStubInt0(au_dr_opt_clr, struct super_block *sb, int no_flush);
11642+#endif
11643+
11644+/* ---------------------------------------------------------------------- */
11645+
11646+#ifdef CONFIG_AUFS_DIRREN
11647+static inline int au_dr_ihash(ino_t h_ino)
11648+{
11649+ return h_ino % AuDirren_NHASH;
11650+}
11651+#else
11652+AuStubInt0(au_dr_ihash, ino_t h_ino);
11653+#endif
11654+
11655+#endif /* __KERNEL__ */
11656+#endif /* __AUFS_DIRREN_H__ */
11657diff -urNp -x '*.orig' linux-4.9/fs/aufs/dynop.c linux-4.9/fs/aufs/dynop.c
11658--- linux-4.9/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
11659+++ linux-4.9/fs/aufs/dynop.c 2021-02-24 16:15:09.524906971 +0100
11660@@ -0,0 +1,369 @@
11661+/*
11662+ * Copyright (C) 2010-2018 Junjiro R. Okajima
11663+ *
11664+ * This program, aufs is free software; you can redistribute it and/or modify
11665+ * it under the terms of the GNU General Public License as published by
11666+ * the Free Software Foundation; either version 2 of the License, or
11667+ * (at your option) any later version.
11668+ *
11669+ * This program is distributed in the hope that it will be useful,
11670+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11671+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11672+ * GNU General Public License for more details.
11673+ *
11674+ * You should have received a copy of the GNU General Public License
11675+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
11676+ */
11677+
11678+/*
11679+ * dynamically customizable operations for regular files
11680+ */
11681+
11682+#include "aufs.h"
11683+
11684+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
11685+
11686+/*
11687+ * How large will these lists be?
11688+ * Usually just a few elements, 20-30 at most for each, I guess.
11689+ */
11690+static struct hlist_bl_head dynop[AuDyLast];
11691+
11692+static struct au_dykey *dy_gfind_get(struct hlist_bl_head *hbl,
11693+ const void *h_op)
11694+{
11695+ struct au_dykey *key, *tmp;
11696+ struct hlist_bl_node *pos;
11697+
11698+ key = NULL;
11699+ hlist_bl_lock(hbl);
11700+ hlist_bl_for_each_entry(tmp, pos, hbl, dk_hnode)
11701+ if (tmp->dk_op.dy_hop == h_op) {
11702+ key = tmp;
11703+ kref_get(&key->dk_kref);
11704+ break;
11705+ }
11706+ hlist_bl_unlock(hbl);
11707+
11708+ return key;
11709+}
11710+
11711+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
11712+{
11713+ struct au_dykey **k, *found;
11714+ const void *h_op = key->dk_op.dy_hop;
11715+ int i;
11716+
11717+ found = NULL;
11718+ k = br->br_dykey;
11719+ for (i = 0; i < AuBrDynOp; i++)
11720+ if (k[i]) {
11721+ if (k[i]->dk_op.dy_hop == h_op) {
11722+ found = k[i];
11723+ break;
11724+ }
11725+ } else
11726+ break;
11727+ if (!found) {
11728+ spin_lock(&br->br_dykey_lock);
11729+ for (; i < AuBrDynOp; i++)
11730+ if (k[i]) {
11731+ if (k[i]->dk_op.dy_hop == h_op) {
11732+ found = k[i];
11733+ break;
11734+ }
11735+ } else {
11736+ k[i] = key;
11737+ break;
11738+ }
11739+ spin_unlock(&br->br_dykey_lock);
11740+ BUG_ON(i == AuBrDynOp); /* expand the array */
11741+ }
11742+
11743+ return found;
11744+}
11745+
11746+/* kref_get() if @key is already added */
11747+static struct au_dykey *dy_gadd(struct hlist_bl_head *hbl, struct au_dykey *key)
11748+{
11749+ struct au_dykey *tmp, *found;
11750+ struct hlist_bl_node *pos;
11751+ const void *h_op = key->dk_op.dy_hop;
11752+
11753+ found = NULL;
11754+ hlist_bl_lock(hbl);
11755+ hlist_bl_for_each_entry(tmp, pos, hbl, dk_hnode)
11756+ if (tmp->dk_op.dy_hop == h_op) {
11757+ kref_get(&tmp->dk_kref);
11758+ found = tmp;
11759+ break;
11760+ }
11761+ if (!found)
11762+ hlist_bl_add_head(&key->dk_hnode, hbl);
11763+ hlist_bl_unlock(hbl);
11764+
11765+ if (!found)
11766+ DyPrSym(key);
11767+ return found;
11768+}
11769+
11770+static void dy_free_rcu(struct rcu_head *rcu)
11771+{
11772+ struct au_dykey *key;
11773+
11774+ key = container_of(rcu, struct au_dykey, dk_rcu);
11775+ DyPrSym(key);
11776+ kfree(key);
11777+}
11778+
11779+static void dy_free(struct kref *kref)
11780+{
11781+ struct au_dykey *key;
11782+ struct hlist_bl_head *hbl;
11783+
11784+ key = container_of(kref, struct au_dykey, dk_kref);
11785+ hbl = dynop + key->dk_op.dy_type;
11786+ au_hbl_del(&key->dk_hnode, hbl);
11787+ call_rcu(&key->dk_rcu, dy_free_rcu);
11788+}
11789+
11790+void au_dy_put(struct au_dykey *key)
11791+{
11792+ kref_put(&key->dk_kref, dy_free);
11793+}
11794+
11795+/* ---------------------------------------------------------------------- */
11796+
11797+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
11798+
11799+#ifdef CONFIG_AUFS_DEBUG
11800+#define DyDbgDeclare(cnt) unsigned int cnt = 0
11801+#define DyDbgInc(cnt) do { cnt++; } while (0)
11802+#else
11803+#define DyDbgDeclare(cnt) do {} while (0)
11804+#define DyDbgInc(cnt) do {} while (0)
11805+#endif
11806+
11807+#define DySet(func, dst, src, h_op, h_sb) do { \
11808+ DyDbgInc(cnt); \
11809+ if (h_op->func) { \
11810+ if (src.func) \
11811+ dst.func = src.func; \
11812+ else \
11813+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
11814+ } \
11815+} while (0)
11816+
11817+#define DySetForce(func, dst, src) do { \
11818+ AuDebugOn(!src.func); \
11819+ DyDbgInc(cnt); \
11820+ dst.func = src.func; \
11821+} while (0)
11822+
11823+#define DySetAop(func) \
11824+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
11825+#define DySetAopForce(func) \
11826+ DySetForce(func, dyaop->da_op, aufs_aop)
11827+
11828+static void dy_aop(struct au_dykey *key, const void *h_op,
11829+ struct super_block *h_sb __maybe_unused)
11830+{
11831+ struct au_dyaop *dyaop = (void *)key;
11832+ const struct address_space_operations *h_aop = h_op;
11833+ DyDbgDeclare(cnt);
11834+
11835+ AuDbg("%s\n", au_sbtype(h_sb));
11836+
11837+ DySetAop(writepage);
11838+ DySetAopForce(readpage); /* force */
11839+ DySetAop(writepages);
11840+ DySetAop(set_page_dirty);
11841+ DySetAop(readpages);
11842+ DySetAop(write_begin);
11843+ DySetAop(write_end);
11844+ DySetAop(bmap);
11845+ DySetAop(invalidatepage);
11846+ DySetAop(releasepage);
11847+ DySetAop(freepage);
11848+ /* this one will be changed according to an aufs mount option */
11849+ DySetAop(direct_IO);
11850+ DySetAop(migratepage);
11851+ DySetAop(isolate_page);
11852+ DySetAop(putback_page);
11853+ DySetAop(launder_page);
11854+ DySetAop(is_partially_uptodate);
11855+ DySetAop(is_dirty_writeback);
11856+ DySetAop(error_remove_page);
11857+ DySetAop(swap_activate);
11858+ DySetAop(swap_deactivate);
11859+
11860+ DyDbgSize(cnt, *h_aop);
11861+}
11862+
11863+/* ---------------------------------------------------------------------- */
11864+
11865+static void dy_bug(struct kref *kref)
11866+{
11867+ BUG();
11868+}
11869+
11870+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
11871+{
11872+ struct au_dykey *key, *old;
11873+ struct hlist_bl_head *hbl;
11874+ struct op {
11875+ unsigned int sz;
11876+ void (*set)(struct au_dykey *key, const void *h_op,
11877+ struct super_block *h_sb __maybe_unused);
11878+ };
11879+ static const struct op a[] = {
11880+ [AuDy_AOP] = {
11881+ .sz = sizeof(struct au_dyaop),
11882+ .set = dy_aop
11883+ }
11884+ };
11885+ const struct op *p;
11886+
11887+ hbl = dynop + op->dy_type;
11888+ key = dy_gfind_get(hbl, op->dy_hop);
11889+ if (key)
11890+ goto out_add; /* success */
11891+
11892+ p = a + op->dy_type;
11893+ key = kzalloc(p->sz, GFP_NOFS);
11894+ if (unlikely(!key)) {
11895+ key = ERR_PTR(-ENOMEM);
11896+ goto out;
11897+ }
11898+
11899+ key->dk_op.dy_hop = op->dy_hop;
11900+ kref_init(&key->dk_kref);
11901+ p->set(key, op->dy_hop, au_br_sb(br));
11902+ old = dy_gadd(hbl, key);
11903+ if (old) {
11904+ kfree(key);
11905+ key = old;
11906+ }
11907+
11908+out_add:
11909+ old = dy_bradd(br, key);
11910+ if (old)
11911+ /* its ref-count should never be zero here */
11912+ kref_put(&key->dk_kref, dy_bug);
11913+out:
11914+ return key;
11915+}
11916+
11917+/* ---------------------------------------------------------------------- */
11918+/*
11919+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
11920+ * This behaviour is necessary to return an error from open(O_DIRECT) instead
11921+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
11922+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
11923+ * See the aufs manual in detail.
11924+ */
11925+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
11926+{
11927+ if (!do_dx)
11928+ dyaop->da_op.direct_IO = NULL;
11929+ else
11930+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
11931+}
11932+
11933+static struct au_dyaop *dy_aget(struct au_branch *br,
11934+ const struct address_space_operations *h_aop,
11935+ int do_dx)
11936+{
11937+ struct au_dyaop *dyaop;
11938+ struct au_dynop op;
11939+
11940+ op.dy_type = AuDy_AOP;
11941+ op.dy_haop = h_aop;
11942+ dyaop = (void *)dy_get(&op, br);
11943+ if (IS_ERR(dyaop))
11944+ goto out;
11945+ dy_adx(dyaop, do_dx);
11946+
11947+out:
11948+ return dyaop;
11949+}
11950+
11951+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
11952+ struct inode *h_inode)
11953+{
11954+ int err, do_dx;
11955+ struct super_block *sb;
11956+ struct au_branch *br;
11957+ struct au_dyaop *dyaop;
11958+
11959+ AuDebugOn(!S_ISREG(h_inode->i_mode));
11960+ IiMustWriteLock(inode);
11961+
11962+ sb = inode->i_sb;
11963+ br = au_sbr(sb, bindex);
11964+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
11965+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
11966+ err = PTR_ERR(dyaop);
11967+ if (IS_ERR(dyaop))
11968+ /* unnecessary to call dy_fput() */
11969+ goto out;
11970+
11971+ err = 0;
11972+ inode->i_mapping->a_ops = &dyaop->da_op;
11973+
11974+out:
11975+ return err;
11976+}
11977+
11978+/*
11979+ * Is it safe to replace a_ops during the inode/file is in operation?
11980+ * Yes, I hope so.
11981+ */
11982+int au_dy_irefresh(struct inode *inode)
ae9dfd79
AM
11983+{
11984+ int err;
e8791d4f
AM
11985+ aufs_bindex_t btop;
11986+ struct inode *h_inode;
ae9dfd79
AM
11987+
11988+ err = 0;
e8791d4f
AM
11989+ if (S_ISREG(inode->i_mode)) {
11990+ btop = au_ibtop(inode);
11991+ h_inode = au_h_iptr(inode, btop);
11992+ err = au_dy_iaop(inode, btop, h_inode);
ae9dfd79 11993+ }
ae9dfd79
AM
11994+ return err;
11995+}
11996+
e8791d4f 11997+void au_dy_arefresh(int do_dx)
ae9dfd79 11998+{
e8791d4f
AM
11999+ struct hlist_bl_head *hbl;
12000+ struct hlist_bl_node *pos;
12001+ struct au_dykey *key;
ae9dfd79 12002+
e8791d4f
AM
12003+ hbl = dynop + AuDy_AOP;
12004+ hlist_bl_lock(hbl);
12005+ hlist_bl_for_each_entry(key, pos, hbl, dk_hnode)
12006+ dy_adx((void *)key, do_dx);
12007+ hlist_bl_unlock(hbl);
12008+}
ae9dfd79 12009+
e8791d4f 12010+/* ---------------------------------------------------------------------- */
ae9dfd79 12011+
e8791d4f
AM
12012+void __init au_dy_init(void)
12013+{
12014+ int i;
12015+
12016+ /* make sure that 'struct au_dykey *' can be any type */
12017+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
12018+
12019+ for (i = 0; i < AuDyLast; i++)
12020+ INIT_HLIST_BL_HEAD(dynop + i);
ae9dfd79 12021+}
e8791d4f
AM
12022+
12023+void au_dy_fin(void)
12024+{
12025+ int i;
12026+
12027+ for (i = 0; i < AuDyLast; i++)
12028+ WARN_ON(!hlist_bl_empty(dynop + i));
12029+}
12030diff -urNp -x '*.orig' linux-4.9/fs/aufs/dynop.h linux-4.9/fs/aufs/dynop.h
12031--- linux-4.9/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
12032+++ linux-4.9/fs/aufs/dynop.h 2021-02-24 16:15:09.524906971 +0100
12033@@ -0,0 +1,74 @@
ae9dfd79 12034+/*
e8791d4f 12035+ * Copyright (C) 2010-2018 Junjiro R. Okajima
ae9dfd79
AM
12036+ *
12037+ * This program, aufs is free software; you can redistribute it and/or modify
12038+ * it under the terms of the GNU General Public License as published by
12039+ * the Free Software Foundation; either version 2 of the License, or
12040+ * (at your option) any later version.
12041+ *
12042+ * This program is distributed in the hope that it will be useful,
12043+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12044+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12045+ * GNU General Public License for more details.
12046+ *
12047+ * You should have received a copy of the GNU General Public License
12048+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
12049+ */
12050+
12051+/*
e8791d4f 12052+ * dynamically customizable operations (for regular files only)
ae9dfd79
AM
12053+ */
12054+
e8791d4f
AM
12055+#ifndef __AUFS_DYNOP_H__
12056+#define __AUFS_DYNOP_H__
ae9dfd79
AM
12057+
12058+#ifdef __KERNEL__
12059+
e8791d4f
AM
12060+#include <linux/fs.h>
12061+#include <linux/kref.h>
ae9dfd79 12062+
e8791d4f 12063+enum {AuDy_AOP, AuDyLast};
ae9dfd79 12064+
e8791d4f
AM
12065+struct au_dynop {
12066+ int dy_type;
ae9dfd79 12067+ union {
e8791d4f
AM
12068+ const void *dy_hop;
12069+ const struct address_space_operations *dy_haop;
ae9dfd79
AM
12070+ };
12071+};
12072+
e8791d4f
AM
12073+struct au_dykey {
12074+ union {
12075+ struct hlist_bl_node dk_hnode;
12076+ struct rcu_head dk_rcu;
12077+ };
12078+ struct au_dynop dk_op;
ae9dfd79 12079+
e8791d4f
AM
12080+ /*
12081+ * during I am in the branch local array, kref is gotten. when the
12082+ * branch is removed, kref is put.
12083+ */
12084+ struct kref dk_kref;
ae9dfd79
AM
12085+};
12086+
e8791d4f
AM
12087+/* stop unioning since their sizes are very different from each other */
12088+struct au_dyaop {
12089+ struct au_dykey da_key;
12090+ struct address_space_operations da_op; /* not const */
ae9dfd79 12091+};
ae9dfd79
AM
12092+
12093+/* ---------------------------------------------------------------------- */
12094+
e8791d4f 12095+/* dynop.c */
ae9dfd79 12096+struct au_branch;
e8791d4f
AM
12097+void au_dy_put(struct au_dykey *key);
12098+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
12099+ struct inode *h_inode);
12100+int au_dy_irefresh(struct inode *inode);
12101+void au_dy_arefresh(int do_dio);
ae9dfd79 12102+
e8791d4f
AM
12103+void __init au_dy_init(void);
12104+void au_dy_fin(void);
ae9dfd79
AM
12105+
12106+#endif /* __KERNEL__ */
e8791d4f
AM
12107+#endif /* __AUFS_DYNOP_H__ */
12108diff -urNp -x '*.orig' linux-4.9/fs/aufs/export.c linux-4.9/fs/aufs/export.c
12109--- linux-4.9/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
12110+++ linux-4.9/fs/aufs/export.c 2021-02-24 16:15:09.528240413 +0100
12111@@ -0,0 +1,836 @@
1facf9fc 12112+/*
e8791d4f 12113+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 12114+ *
12115+ * This program, aufs is free software; you can redistribute it and/or modify
12116+ * it under the terms of the GNU General Public License as published by
12117+ * the Free Software Foundation; either version 2 of the License, or
12118+ * (at your option) any later version.
dece6358
AM
12119+ *
12120+ * This program is distributed in the hope that it will be useful,
12121+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12122+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12123+ * GNU General Public License for more details.
12124+ *
12125+ * You should have received a copy of the GNU General Public License
523b37e3 12126+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 12127+ */
12128+
e8791d4f
AM
12129+/*
12130+ * export via nfs
12131+ */
12132+
12133+#include <linux/exportfs.h>
12134+#include <linux/fs_struct.h>
12135+#include <linux/namei.h>
12136+#include <linux/nsproxy.h>
12137+#include <linux/random.h>
12138+#include <linux/writeback.h>
12139+#include "aufs.h"
12140+
12141+union conv {
12142+#ifdef CONFIG_AUFS_INO_T_64
12143+ __u32 a[2];
12144+#else
12145+ __u32 a[1];
12146+#endif
12147+ ino_t ino;
12148+};
12149+
12150+static ino_t decode_ino(__u32 *a)
12151+{
12152+ union conv u;
12153+
12154+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
12155+ u.a[0] = a[0];
12156+#ifdef CONFIG_AUFS_INO_T_64
12157+ u.a[1] = a[1];
12158+#endif
12159+ return u.ino;
12160+}
12161+
12162+static void encode_ino(__u32 *a, ino_t ino)
12163+{
12164+ union conv u;
12165+
12166+ u.ino = ino;
12167+ a[0] = u.a[0];
12168+#ifdef CONFIG_AUFS_INO_T_64
12169+ a[1] = u.a[1];
12170+#endif
12171+}
12172+
12173+/* NFS file handle */
12174+enum {
12175+ Fh_br_id,
12176+ Fh_sigen,
12177+#ifdef CONFIG_AUFS_INO_T_64
12178+ /* support 64bit inode number */
12179+ Fh_ino1,
12180+ Fh_ino2,
12181+ Fh_dir_ino1,
12182+ Fh_dir_ino2,
12183+#else
12184+ Fh_ino1,
12185+ Fh_dir_ino1,
12186+#endif
12187+ Fh_igen,
12188+ Fh_h_type,
12189+ Fh_tail,
12190+
12191+ Fh_ino = Fh_ino1,
12192+ Fh_dir_ino = Fh_dir_ino1
12193+};
12194+
12195+static int au_test_anon(struct dentry *dentry)
12196+{
12197+ /* note: read d_flags without d_lock */
12198+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
12199+}
12200+
12201+int au_test_nfsd(void)
12202+{
12203+ int ret;
12204+ struct task_struct *tsk = current;
12205+ char comm[sizeof(tsk->comm)];
12206+
12207+ ret = 0;
12208+ if (tsk->flags & PF_KTHREAD) {
12209+ get_task_comm(comm, tsk);
12210+ ret = !strcmp(comm, "nfsd");
12211+ }
12212+
12213+ return ret;
12214+}
12215+
12216+/* ---------------------------------------------------------------------- */
12217+/* inode generation external table */
12218+
12219+void au_xigen_inc(struct inode *inode)
12220+{
12221+ loff_t pos;
12222+ ssize_t sz;
12223+ __u32 igen;
12224+ struct super_block *sb;
12225+ struct au_sbinfo *sbinfo;
12226+
12227+ sb = inode->i_sb;
12228+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
12229+
12230+ sbinfo = au_sbi(sb);
12231+ pos = inode->i_ino;
12232+ pos *= sizeof(igen);
12233+ igen = inode->i_generation + 1;
12234+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
12235+ sizeof(igen), &pos);
12236+ if (sz == sizeof(igen))
12237+ return; /* success */
12238+
12239+ if (unlikely(sz >= 0))
12240+ AuIOErr("xigen error (%zd)\n", sz);
12241+}
12242+
12243+int au_xigen_new(struct inode *inode)
12244+{
12245+ int err;
12246+ loff_t pos;
12247+ ssize_t sz;
12248+ struct super_block *sb;
12249+ struct au_sbinfo *sbinfo;
12250+ struct file *file;
12251+
12252+ err = 0;
12253+ /* todo: dirty, at mount time */
12254+ if (inode->i_ino == AUFS_ROOT_INO)
12255+ goto out;
12256+ sb = inode->i_sb;
12257+ SiMustAnyLock(sb);
12258+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
12259+ goto out;
12260+
12261+ err = -EFBIG;
12262+ pos = inode->i_ino;
12263+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
12264+ AuIOErr1("too large i%lld\n", pos);
12265+ goto out;
12266+ }
12267+ pos *= sizeof(inode->i_generation);
12268+
12269+ err = 0;
12270+ sbinfo = au_sbi(sb);
12271+ file = sbinfo->si_xigen;
12272+ BUG_ON(!file);
12273+
12274+ if (vfsub_f_size_read(file)
12275+ < pos + sizeof(inode->i_generation)) {
12276+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
12277+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
12278+ sizeof(inode->i_generation), &pos);
12279+ } else
12280+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
12281+ sizeof(inode->i_generation), &pos);
12282+ if (sz == sizeof(inode->i_generation))
12283+ goto out; /* success */
12284+
12285+ err = sz;
12286+ if (unlikely(sz >= 0)) {
12287+ err = -EIO;
12288+ AuIOErr("xigen error (%zd)\n", sz);
12289+ }
12290+
12291+out:
12292+ return err;
12293+}
12294+
12295+int au_xigen_set(struct super_block *sb, struct file *base)
12296+{
12297+ int err;
12298+ struct au_sbinfo *sbinfo;
12299+ struct file *file;
12300+
12301+ SiMustWriteLock(sb);
12302+
12303+ sbinfo = au_sbi(sb);
12304+ file = au_xino_create2(base, sbinfo->si_xigen);
12305+ err = PTR_ERR(file);
12306+ if (IS_ERR(file))
12307+ goto out;
12308+ err = 0;
12309+ if (sbinfo->si_xigen)
12310+ fput(sbinfo->si_xigen);
12311+ sbinfo->si_xigen = file;
12312+
12313+out:
12314+ return err;
12315+}
12316+
12317+void au_xigen_clr(struct super_block *sb)
12318+{
12319+ struct au_sbinfo *sbinfo;
12320+
12321+ SiMustWriteLock(sb);
12322+
12323+ sbinfo = au_sbi(sb);
12324+ if (sbinfo->si_xigen) {
12325+ fput(sbinfo->si_xigen);
12326+ sbinfo->si_xigen = NULL;
12327+ }
12328+}
12329+
12330+/* ---------------------------------------------------------------------- */
12331+
12332+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
12333+ ino_t dir_ino)
12334+{
12335+ struct dentry *dentry, *d;
12336+ struct inode *inode;
12337+ unsigned int sigen;
12338+
12339+ dentry = NULL;
12340+ inode = ilookup(sb, ino);
12341+ if (!inode)
12342+ goto out;
12343+
12344+ dentry = ERR_PTR(-ESTALE);
12345+ sigen = au_sigen(sb);
12346+ if (unlikely(au_is_bad_inode(inode)
12347+ || IS_DEADDIR(inode)
12348+ || sigen != au_iigen(inode, NULL)))
12349+ goto out_iput;
12350+
12351+ dentry = NULL;
12352+ if (!dir_ino || S_ISDIR(inode->i_mode))
12353+ dentry = d_find_alias(inode);
12354+ else {
12355+ spin_lock(&inode->i_lock);
12356+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
12357+ spin_lock(&d->d_lock);
12358+ if (!au_test_anon(d)
12359+ && d_inode(d->d_parent)->i_ino == dir_ino) {
12360+ dentry = dget_dlock(d);
12361+ spin_unlock(&d->d_lock);
12362+ break;
12363+ }
12364+ spin_unlock(&d->d_lock);
12365+ }
12366+ spin_unlock(&inode->i_lock);
12367+ }
12368+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
12369+ /* need to refresh */
12370+ dput(dentry);
12371+ dentry = NULL;
12372+ }
12373+
12374+out_iput:
12375+ iput(inode);
12376+out:
12377+ AuTraceErrPtr(dentry);
12378+ return dentry;
12379+}
12380+
12381+/* ---------------------------------------------------------------------- */
12382+
12383+/* todo: dirty? */
12384+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
12385+
12386+struct au_compare_mnt_args {
12387+ /* input */
12388+ struct super_block *sb;
12389+
12390+ /* output */
12391+ struct vfsmount *mnt;
12392+};
12393+
12394+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
12395+{
12396+ struct au_compare_mnt_args *a = arg;
12397+
12398+ if (mnt->mnt_sb != a->sb)
12399+ return 0;
12400+ a->mnt = mntget(mnt);
12401+ return 1;
12402+}
12403+
12404+static struct vfsmount *au_mnt_get(struct super_block *sb)
12405+{
12406+ int err;
12407+ struct path root;
12408+ struct au_compare_mnt_args args = {
12409+ .sb = sb
12410+ };
12411+
12412+ get_fs_root(current->fs, &root);
12413+ rcu_read_lock();
12414+ err = iterate_mounts(au_compare_mnt, &args, root.mnt);
12415+ rcu_read_unlock();
12416+ path_put(&root);
12417+ AuDebugOn(!err);
12418+ AuDebugOn(!args.mnt);
12419+ return args.mnt;
12420+}
12421+
12422+struct au_nfsd_si_lock {
12423+ unsigned int sigen;
12424+ aufs_bindex_t bindex, br_id;
12425+ unsigned char force_lock;
12426+};
12427+
12428+static int si_nfsd_read_lock(struct super_block *sb,
12429+ struct au_nfsd_si_lock *nsi_lock)
12430+{
12431+ int err;
12432+ aufs_bindex_t bindex;
1facf9fc 12433+
e8791d4f 12434+ si_read_lock(sb, AuLock_FLUSH);
1facf9fc 12435+
e8791d4f
AM
12436+ /* branch id may be wrapped around */
12437+ err = 0;
12438+ bindex = au_br_index(sb, nsi_lock->br_id);
12439+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
12440+ goto out; /* success */
1facf9fc 12441+
e8791d4f
AM
12442+ err = -ESTALE;
12443+ bindex = -1;
12444+ if (!nsi_lock->force_lock)
12445+ si_read_unlock(sb);
4a4d8108 12446+
e8791d4f
AM
12447+out:
12448+ nsi_lock->bindex = bindex;
12449+ return err;
12450+}
12451+
12452+struct find_name_by_ino {
12453+ struct dir_context ctx;
12454+ int called, found;
12455+ ino_t ino;
12456+ char *name;
12457+ int namelen;
12458+};
12459+
12460+static int
12461+find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
12462+ loff_t offset, u64 ino, unsigned int d_type)
1facf9fc 12463+{
e8791d4f
AM
12464+ struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
12465+ ctx);
1facf9fc 12466+
e8791d4f
AM
12467+ a->called++;
12468+ if (a->ino != ino)
12469+ return 0;
4a4d8108 12470+
e8791d4f
AM
12471+ memcpy(a->name, name, namelen);
12472+ a->namelen = namelen;
12473+ a->found = 1;
12474+ return 1;
1facf9fc 12475+}
12476+
e8791d4f
AM
12477+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
12478+ struct au_nfsd_si_lock *nsi_lock)
1facf9fc 12479+{
e8791d4f
AM
12480+ struct dentry *dentry, *parent;
12481+ struct file *file;
12482+ struct inode *dir;
12483+ struct find_name_by_ino arg = {
12484+ .ctx = {
12485+ .actor = find_name_by_ino
12486+ }
12487+ };
12488+ int err;
1facf9fc 12489+
e8791d4f
AM
12490+ parent = path->dentry;
12491+ if (nsi_lock)
12492+ si_read_unlock(parent->d_sb);
12493+ file = vfsub_dentry_open(path, au_dir_roflags);
12494+ dentry = (void *)file;
12495+ if (IS_ERR(file))
12496+ goto out;
12497+
12498+ dentry = ERR_PTR(-ENOMEM);
12499+ arg.name = (void *)__get_free_page(GFP_NOFS);
12500+ if (unlikely(!arg.name))
12501+ goto out_file;
12502+ arg.ino = ino;
12503+ arg.found = 0;
12504+ do {
12505+ arg.called = 0;
12506+ /* smp_mb(); */
12507+ err = vfsub_iterate_dir(file, &arg.ctx);
12508+ } while (!err && !arg.found && arg.called);
12509+ dentry = ERR_PTR(err);
12510+ if (unlikely(err))
12511+ goto out_name;
12512+ /* instead of ENOENT */
12513+ dentry = ERR_PTR(-ESTALE);
12514+ if (!arg.found)
12515+ goto out_name;
12516+
12517+ /* do not call vfsub_lkup_one() */
12518+ dir = d_inode(parent);
12519+ dentry = vfsub_lookup_one_len_unlocked(arg.name, parent, arg.namelen);
12520+ AuTraceErrPtr(dentry);
12521+ if (IS_ERR(dentry))
12522+ goto out_name;
12523+ AuDebugOn(au_test_anon(dentry));
12524+ if (unlikely(d_really_is_negative(dentry))) {
12525+ dput(dentry);
12526+ dentry = ERR_PTR(-ENOENT);
4a4d8108
AM
12527+ }
12528+
e8791d4f
AM
12529+out_name:
12530+ free_page((unsigned long)arg.name);
12531+out_file:
12532+ fput(file);
12533+out:
12534+ if (unlikely(nsi_lock
12535+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
12536+ if (!IS_ERR(dentry)) {
12537+ dput(dentry);
12538+ dentry = ERR_PTR(-ESTALE);
12539+ }
12540+ AuTraceErrPtr(dentry);
12541+ return dentry;
1facf9fc 12542+}
12543+
e8791d4f
AM
12544+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
12545+ ino_t dir_ino,
12546+ struct au_nfsd_si_lock *nsi_lock)
4a4d8108 12547+{
e8791d4f
AM
12548+ struct dentry *dentry;
12549+ struct path path;
1facf9fc 12550+
e8791d4f
AM
12551+ if (dir_ino != AUFS_ROOT_INO) {
12552+ path.dentry = decode_by_ino(sb, dir_ino, 0);
12553+ dentry = path.dentry;
12554+ if (!path.dentry || IS_ERR(path.dentry))
12555+ goto out;
12556+ AuDebugOn(au_test_anon(path.dentry));
12557+ } else
12558+ path.dentry = dget(sb->s_root);
1facf9fc 12559+
e8791d4f
AM
12560+ path.mnt = au_mnt_get(sb);
12561+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
12562+ path_put(&path);
12563+
12564+out:
12565+ AuTraceErrPtr(dentry);
12566+ return dentry;
4a4d8108
AM
12567+}
12568+
e8791d4f 12569+/* ---------------------------------------------------------------------- */
4a4d8108 12570+
e8791d4f
AM
12571+static int h_acceptable(void *expv, struct dentry *dentry)
12572+{
12573+ return 1;
1facf9fc 12574+}
12575+
e8791d4f
AM
12576+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
12577+ char *buf, int len, struct super_block *sb)
4a4d8108 12578+{
e8791d4f
AM
12579+ char *p;
12580+ int n;
12581+ struct path path;
1facf9fc 12582+
e8791d4f
AM
12583+ p = d_path(h_rootpath, buf, len);
12584+ if (IS_ERR(p))
12585+ goto out;
12586+ n = strlen(p);
4a4d8108 12587+
e8791d4f
AM
12588+ path.mnt = h_rootpath->mnt;
12589+ path.dentry = h_parent;
12590+ p = d_path(&path, buf, len);
12591+ if (IS_ERR(p))
12592+ goto out;
12593+ if (n != 1)
12594+ p += n;
1facf9fc 12595+
e8791d4f
AM
12596+ path.mnt = au_mnt_get(sb);
12597+ path.dentry = sb->s_root;
12598+ p = d_path(&path, buf, len - strlen(p));
12599+ mntput(path.mnt);
12600+ if (IS_ERR(p))
12601+ goto out;
12602+ if (n != 1)
12603+ p[strlen(p)] = '/';
4a4d8108 12604+
e8791d4f
AM
12605+out:
12606+ AuTraceErrPtr(p);
12607+ return p;
12608+}
4a4d8108 12609+
e8791d4f
AM
12610+static
12611+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
12612+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
12613+{
12614+ struct dentry *dentry, *h_parent, *root;
12615+ struct super_block *h_sb;
12616+ char *pathname, *p;
12617+ struct vfsmount *h_mnt;
12618+ struct au_branch *br;
12619+ int err;
12620+ struct path path;
4a4d8108 12621+
e8791d4f
AM
12622+ br = au_sbr(sb, nsi_lock->bindex);
12623+ h_mnt = au_br_mnt(br);
12624+ h_sb = h_mnt->mnt_sb;
12625+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
12626+ lockdep_off();
12627+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
12628+ fh_len - Fh_tail, fh[Fh_h_type],
12629+ h_acceptable, /*context*/NULL);
12630+ lockdep_on();
12631+ dentry = h_parent;
12632+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
12633+ AuWarn1("%s decode_fh failed, %ld\n",
12634+ au_sbtype(h_sb), PTR_ERR(h_parent));
12635+ goto out;
12636+ }
12637+ dentry = NULL;
12638+ if (unlikely(au_test_anon(h_parent))) {
12639+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
12640+ au_sbtype(h_sb));
12641+ goto out_h_parent;
12642+ }
4a4d8108 12643+
e8791d4f
AM
12644+ dentry = ERR_PTR(-ENOMEM);
12645+ pathname = (void *)__get_free_page(GFP_NOFS);
12646+ if (unlikely(!pathname))
12647+ goto out_h_parent;
4a4d8108 12648+
e8791d4f
AM
12649+ root = sb->s_root;
12650+ path.mnt = h_mnt;
12651+ di_read_lock_parent(root, !AuLock_IR);
12652+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
12653+ di_read_unlock(root, !AuLock_IR);
12654+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
12655+ dentry = (void *)p;
12656+ if (IS_ERR(p))
12657+ goto out_pathname;
4a4d8108 12658+
e8791d4f
AM
12659+ si_read_unlock(sb);
12660+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
12661+ dentry = ERR_PTR(err);
12662+ if (unlikely(err))
12663+ goto out_relock;
4a4d8108 12664+
e8791d4f
AM
12665+ dentry = ERR_PTR(-ENOENT);
12666+ AuDebugOn(au_test_anon(path.dentry));
12667+ if (unlikely(d_really_is_negative(path.dentry)))
12668+ goto out_path;
4a4d8108 12669+
e8791d4f
AM
12670+ if (ino != d_inode(path.dentry)->i_ino)
12671+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
12672+ else
12673+ dentry = dget(path.dentry);
4a4d8108 12674+
e8791d4f
AM
12675+out_path:
12676+ path_put(&path);
12677+out_relock:
12678+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
12679+ if (!IS_ERR(dentry)) {
12680+ dput(dentry);
12681+ dentry = ERR_PTR(-ESTALE);
12682+ }
12683+out_pathname:
12684+ free_page((unsigned long)pathname);
12685+out_h_parent:
12686+ dput(h_parent);
12687+out:
12688+ AuTraceErrPtr(dentry);
12689+ return dentry;
4a4d8108
AM
12690+}
12691+
4a4d8108
AM
12692+/* ---------------------------------------------------------------------- */
12693+
e8791d4f
AM
12694+static struct dentry *
12695+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
12696+ int fh_type)
4a4d8108 12697+{
e8791d4f
AM
12698+ struct dentry *dentry;
12699+ __u32 *fh = fid->raw;
12700+ struct au_branch *br;
12701+ ino_t ino, dir_ino;
12702+ struct au_nfsd_si_lock nsi_lock = {
12703+ .force_lock = 0
b752ccd1 12704+ };
4a4d8108 12705+
e8791d4f
AM
12706+ dentry = ERR_PTR(-ESTALE);
12707+ /* it should never happen, but the file handle is unreliable */
12708+ if (unlikely(fh_len < Fh_tail))
12709+ goto out;
12710+ nsi_lock.sigen = fh[Fh_sigen];
12711+ nsi_lock.br_id = fh[Fh_br_id];
4a4d8108 12712+
e8791d4f
AM
12713+ /* branch id may be wrapped around */
12714+ br = NULL;
12715+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
4a4d8108 12716+ goto out;
e8791d4f 12717+ nsi_lock.force_lock = 1;
4a4d8108 12718+
e8791d4f
AM
12719+ /* is this inode still cached? */
12720+ ino = decode_ino(fh + Fh_ino);
12721+ /* it should never happen */
12722+ if (unlikely(ino == AUFS_ROOT_INO))
12723+ goto out_unlock;
4a4d8108 12724+
e8791d4f
AM
12725+ dir_ino = decode_ino(fh + Fh_dir_ino);
12726+ dentry = decode_by_ino(sb, ino, dir_ino);
12727+ if (IS_ERR(dentry))
12728+ goto out_unlock;
12729+ if (dentry)
12730+ goto accept;
4a4d8108 12731+
e8791d4f
AM
12732+ /* is the parent dir cached? */
12733+ br = au_sbr(sb, nsi_lock.bindex);
12734+ au_br_get(br);
12735+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
12736+ if (IS_ERR(dentry))
12737+ goto out_unlock;
12738+ if (dentry)
12739+ goto accept;
12740+
12741+ /* lookup path */
12742+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
12743+ if (IS_ERR(dentry))
12744+ goto out_unlock;
12745+ if (unlikely(!dentry))
12746+ /* todo?: make it ESTALE */
12747+ goto out_unlock;
12748+
12749+accept:
12750+ if (!au_digen_test(dentry, au_sigen(sb))
12751+ && d_inode(dentry)->i_generation == fh[Fh_igen])
12752+ goto out_unlock; /* success */
12753+
12754+ dput(dentry);
12755+ dentry = ERR_PTR(-ESTALE);
12756+out_unlock:
12757+ if (br)
12758+ au_br_put(br);
12759+ si_read_unlock(sb);
12760+out:
12761+ AuTraceErrPtr(dentry);
12762+ return dentry;
4a4d8108
AM
12763+}
12764+
e8791d4f
AM
12765+#if 0 /* reserved for future use */
12766+/* support subtreecheck option */
12767+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
12768+ int fh_len, int fh_type)
4a4d8108 12769+{
e8791d4f
AM
12770+ struct dentry *parent;
12771+ __u32 *fh = fid->raw;
12772+ ino_t dir_ino;
4a4d8108 12773+
e8791d4f
AM
12774+ dir_ino = decode_ino(fh + Fh_dir_ino);
12775+ parent = decode_by_ino(sb, dir_ino, 0);
12776+ if (IS_ERR(parent))
4a4d8108 12777+ goto out;
e8791d4f
AM
12778+ if (!parent)
12779+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
12780+ dir_ino, fh, fh_len);
4a4d8108
AM
12781+
12782+out:
e8791d4f
AM
12783+ AuTraceErrPtr(parent);
12784+ return parent;
4a4d8108 12785+}
e8791d4f 12786+#endif
4a4d8108 12787+
e8791d4f
AM
12788+/* ---------------------------------------------------------------------- */
12789+
12790+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
12791+ struct inode *dir)
4a4d8108 12792+{
e8791d4f
AM
12793+ int err;
12794+ aufs_bindex_t bindex;
12795+ struct super_block *sb, *h_sb;
12796+ struct dentry *dentry, *parent, *h_parent;
12797+ struct inode *h_dir;
4a4d8108 12798+ struct au_branch *br;
4a4d8108 12799+
e8791d4f
AM
12800+ err = -ENOSPC;
12801+ if (unlikely(*max_len <= Fh_tail)) {
12802+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
12803+ goto out;
12804+ }
12805+
12806+ err = FILEID_ROOT;
12807+ if (inode->i_ino == AUFS_ROOT_INO) {
12808+ AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
12809+ goto out;
12810+ }
4a4d8108 12811+
e8791d4f 12812+ h_parent = NULL;
4a4d8108 12813+ sb = inode->i_sb;
e8791d4f
AM
12814+ err = si_read_lock(sb, AuLock_FLUSH);
12815+ if (unlikely(err))
4a4d8108
AM
12816+ goto out;
12817+
e8791d4f
AM
12818+#ifdef CONFIG_AUFS_DEBUG
12819+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
12820+ AuWarn1("NFS-exporting requires xino\n");
12821+#endif
12822+ err = -EIO;
12823+ parent = NULL;
12824+ ii_read_lock_child(inode);
12825+ bindex = au_ibtop(inode);
12826+ if (!dir) {
12827+ dentry = d_find_any_alias(inode);
12828+ if (unlikely(!dentry))
12829+ goto out_unlock;
12830+ AuDebugOn(au_test_anon(dentry));
12831+ parent = dget_parent(dentry);
12832+ dput(dentry);
12833+ if (unlikely(!parent))
12834+ goto out_unlock;
12835+ if (d_really_is_positive(parent))
12836+ dir = d_inode(parent);
12837+ }
12838+
12839+ ii_read_lock_parent(dir);
12840+ h_dir = au_h_iptr(dir, bindex);
12841+ ii_read_unlock(dir);
12842+ if (unlikely(!h_dir))
12843+ goto out_parent;
12844+ h_parent = d_find_any_alias(h_dir);
12845+ if (unlikely(!h_parent))
12846+ goto out_hparent;
12847+
12848+ err = -EPERM;
12849+ br = au_sbr(sb, bindex);
12850+ h_sb = au_br_sb(br);
12851+ if (unlikely(!h_sb->s_export_op)) {
12852+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
12853+ goto out_hparent;
12854+ }
12855+
12856+ fh[Fh_br_id] = br->br_id;
12857+ fh[Fh_sigen] = au_sigen(sb);
12858+ encode_ino(fh + Fh_ino, inode->i_ino);
12859+ encode_ino(fh + Fh_dir_ino, dir->i_ino);
12860+ fh[Fh_igen] = inode->i_generation;
12861+
12862+ *max_len -= Fh_tail;
12863+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
12864+ max_len,
12865+ /*connectable or subtreecheck*/0);
12866+ err = fh[Fh_h_type];
12867+ *max_len += Fh_tail;
12868+ /* todo: macros? */
12869+ if (err != FILEID_INVALID)
12870+ err = 99;
12871+ else
12872+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
4a4d8108 12873+
e8791d4f
AM
12874+out_hparent:
12875+ dput(h_parent);
12876+out_parent:
12877+ dput(parent);
12878+out_unlock:
12879+ ii_read_unlock(inode);
12880+ si_read_unlock(sb);
4a4d8108 12881+out:
e8791d4f
AM
12882+ if (unlikely(err < 0))
12883+ err = FILEID_INVALID;
4a4d8108
AM
12884+ return err;
12885+}
12886+
e8791d4f
AM
12887+/* ---------------------------------------------------------------------- */
12888+
12889+static int aufs_commit_metadata(struct inode *inode)
b752ccd1
AM
12890+{
12891+ int err;
e8791d4f
AM
12892+ aufs_bindex_t bindex;
12893+ struct super_block *sb;
b752ccd1 12894+ struct inode *h_inode;
e8791d4f 12895+ int (*f)(struct inode *inode);
b752ccd1 12896+
e8791d4f
AM
12897+ sb = inode->i_sb;
12898+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
12899+ ii_write_lock_child(inode);
12900+ bindex = au_ibtop(inode);
12901+ AuDebugOn(bindex < 0);
12902+ h_inode = au_h_iptr(inode, bindex);
b752ccd1 12903+
e8791d4f
AM
12904+ f = h_inode->i_sb->s_export_op->commit_metadata;
12905+ if (f)
12906+ err = f(h_inode);
12907+ else {
12908+ struct writeback_control wbc = {
12909+ .sync_mode = WB_SYNC_ALL,
12910+ .nr_to_write = 0 /* metadata only */
12911+ };
4a4d8108 12912+
e8791d4f
AM
12913+ err = sync_inode(h_inode, &wbc);
12914+ }
12915+
12916+ au_cpup_attr_timesizes(inode);
12917+ ii_write_unlock(inode);
12918+ si_read_unlock(sb);
12919+ return err;
4a4d8108
AM
12920+}
12921+
4a4d8108
AM
12922+/* ---------------------------------------------------------------------- */
12923+
e8791d4f
AM
12924+static struct export_operations aufs_export_op = {
12925+ .fh_to_dentry = aufs_fh_to_dentry,
12926+ /* .fh_to_parent = aufs_fh_to_parent, */
12927+ .encode_fh = aufs_encode_fh,
12928+ .commit_metadata = aufs_commit_metadata
12929+};
4a4d8108 12930+
e8791d4f 12931+void au_export_init(struct super_block *sb)
4a4d8108 12932+{
e8791d4f
AM
12933+ struct au_sbinfo *sbinfo;
12934+ __u32 u;
4a4d8108 12935+
e8791d4f
AM
12936+ BUILD_BUG_ON_MSG(IS_BUILTIN(CONFIG_AUFS_FS)
12937+ && IS_MODULE(CONFIG_EXPORTFS),
12938+ AUFS_NAME ": unsupported configuration "
12939+ "CONFIG_EXPORTFS=m and CONFIG_AUFS_FS=y");
12940+
12941+ sb->s_export_op = &aufs_export_op;
12942+ sbinfo = au_sbi(sb);
12943+ sbinfo->si_xigen = NULL;
12944+ get_random_bytes(&u, sizeof(u));
12945+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
12946+ atomic_set(&sbinfo->si_xigen_next, u);
4a4d8108 12947+}
e8791d4f
AM
12948diff -urNp -x '*.orig' linux-4.9/fs/aufs/f_op.c linux-4.9/fs/aufs/f_op.c
12949--- linux-4.9/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
12950+++ linux-4.9/fs/aufs/f_op.c 2021-02-24 16:15:09.541574180 +0100
12951@@ -0,0 +1,817 @@
4a4d8108 12952+/*
e8791d4f 12953+ * Copyright (C) 2005-2018 Junjiro R. Okajima
4a4d8108
AM
12954+ *
12955+ * This program, aufs is free software; you can redistribute it and/or modify
12956+ * it under the terms of the GNU General Public License as published by
12957+ * the Free Software Foundation; either version 2 of the License, or
12958+ * (at your option) any later version.
12959+ *
12960+ * This program is distributed in the hope that it will be useful,
12961+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12962+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12963+ * GNU General Public License for more details.
12964+ *
12965+ * You should have received a copy of the GNU General Public License
523b37e3 12966+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
12967+ */
12968+
12969+/*
e8791d4f 12970+ * file and vm operations
4a4d8108
AM
12971+ */
12972+
e8791d4f
AM
12973+#include <linux/aio.h>
12974+#include <linux/fs_stack.h>
12975+#include <linux/mman.h>
12976+#include <linux/security.h>
12977+#include "aufs.h"
4a4d8108 12978+
e8791d4f
AM
12979+int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
12980+{
12981+ int err;
12982+ aufs_bindex_t bindex;
12983+ struct dentry *dentry, *h_dentry;
12984+ struct au_finfo *finfo;
12985+ struct inode *h_inode;
4a4d8108 12986+
e8791d4f 12987+ FiMustWriteLock(file);
4a4d8108 12988+
e8791d4f
AM
12989+ err = 0;
12990+ dentry = file->f_path.dentry;
12991+ AuDebugOn(IS_ERR_OR_NULL(dentry));
12992+ finfo = au_fi(file);
12993+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
12994+ atomic_set(&finfo->fi_mmapped, 0);
12995+ bindex = au_dbtop(dentry);
12996+ if (!h_file) {
12997+ h_dentry = au_h_dptr(dentry, bindex);
12998+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
12999+ if (unlikely(err))
13000+ goto out;
13001+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
13002+ } else {
13003+ h_dentry = h_file->f_path.dentry;
13004+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
13005+ if (unlikely(err))
13006+ goto out;
13007+ get_file(h_file);
13008+ }
13009+ if (IS_ERR(h_file))
13010+ err = PTR_ERR(h_file);
13011+ else {
13012+ if ((flags & __O_TMPFILE)
13013+ && !(flags & O_EXCL)) {
13014+ h_inode = file_inode(h_file);
13015+ spin_lock(&h_inode->i_lock);
13016+ h_inode->i_state |= I_LINKABLE;
13017+ spin_unlock(&h_inode->i_lock);
13018+ }
13019+ au_set_fbtop(file, bindex);
13020+ au_set_h_fptr(file, bindex, h_file);
13021+ au_update_figen(file);
13022+ /* todo: necessary? */
13023+ /* file->f_ra = h_file->f_ra; */
13024+ }
4a4d8108 13025+
e8791d4f
AM
13026+out:
13027+ return err;
13028+}
4a4d8108 13029+
e8791d4f
AM
13030+static int aufs_open_nondir(struct inode *inode __maybe_unused,
13031+ struct file *file)
13032+{
13033+ int err;
13034+ struct super_block *sb;
13035+ struct au_do_open_args args = {
13036+ .open = au_do_open_nondir
4a4d8108 13037+ };
4a4d8108 13038+
e8791d4f
AM
13039+ AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
13040+ file, vfsub_file_flags(file), file->f_mode);
4a4d8108 13041+
e8791d4f
AM
13042+ sb = file->f_path.dentry->d_sb;
13043+ si_read_lock(sb, AuLock_FLUSH);
13044+ err = au_do_open(file, &args);
13045+ si_read_unlock(sb);
13046+ return err;
13047+}
13048+
13049+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
13050+{
13051+ struct au_finfo *finfo;
13052+ aufs_bindex_t bindex;
13053+
13054+ finfo = au_fi(file);
13055+ au_hbl_del(&finfo->fi_hlist,
13056+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
13057+ bindex = finfo->fi_btop;
13058+ if (bindex >= 0)
13059+ au_set_h_fptr(file, bindex, NULL);
13060+
13061+ au_finfo_fin(file);
13062+ return 0;
13063+}
4a4d8108 13064+
4a4d8108
AM
13065+/* ---------------------------------------------------------------------- */
13066+
e8791d4f
AM
13067+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
13068+{
13069+ int err;
13070+ struct file *h_file;
4a4d8108 13071+
e8791d4f
AM
13072+ err = 0;
13073+ h_file = au_hf_top(file);
13074+ if (h_file)
13075+ err = vfsub_flush(h_file, id);
13076+ return err;
13077+}
4a4d8108 13078+
e8791d4f
AM
13079+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
13080+{
13081+ return au_do_flush(file, id, au_do_flush_nondir);
13082+}
4a4d8108 13083+
e8791d4f 13084+/* ---------------------------------------------------------------------- */
4a4d8108 13085+/*
e8791d4f
AM
13086+ * read and write functions acquire [fdi]_rwsem once, but release before
13087+ * mmap_sem. This is because to stop a race condition between mmap(2).
13088+ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
13089+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
13090+ * read functions after [fdi]_rwsem are released, but it should be harmless.
4a4d8108
AM
13091+ */
13092+
e8791d4f
AM
13093+/* Callers should call au_read_post() or fput() in the end */
13094+struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc)
4a4d8108 13095+{
e8791d4f
AM
13096+ struct file *h_file;
13097+ int err;
4a4d8108 13098+
e8791d4f
AM
13099+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0, lsc);
13100+ if (!err) {
13101+ di_read_unlock(file->f_path.dentry, AuLock_IR);
13102+ h_file = au_hf_top(file);
13103+ get_file(h_file);
13104+ if (!keep_fi)
13105+ fi_read_unlock(file);
13106+ } else
13107+ h_file = ERR_PTR(err);
13108+
13109+ return h_file;
4a4d8108
AM
13110+}
13111+
e8791d4f 13112+static void au_read_post(struct inode *inode, struct file *h_file)
4a4d8108 13113+{
e8791d4f
AM
13114+ /* update without lock, I don't think it a problem */
13115+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13116+ fput(h_file);
4a4d8108
AM
13117+}
13118+
e8791d4f
AM
13119+struct au_write_pre {
13120+ /* input */
13121+ unsigned int lsc;
4a4d8108 13122+
e8791d4f
AM
13123+ /* output */
13124+ blkcnt_t blks;
13125+ aufs_bindex_t btop;
4a4d8108
AM
13126+};
13127+
e8791d4f
AM
13128+/*
13129+ * return with iinfo is write-locked
13130+ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the
13131+ * end
13132+ */
13133+static struct file *au_write_pre(struct file *file, int do_ready,
13134+ struct au_write_pre *wpre)
4a4d8108 13135+{
e8791d4f
AM
13136+ struct file *h_file;
13137+ struct dentry *dentry;
13138+ int err;
13139+ unsigned int lsc;
13140+ struct au_pin pin;
4a4d8108 13141+
e8791d4f
AM
13142+ lsc = 0;
13143+ if (wpre)
13144+ lsc = wpre->lsc;
13145+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1, lsc);
13146+ h_file = ERR_PTR(err);
13147+ if (unlikely(err))
13148+ goto out;
a2a7ad62 13149+
e8791d4f
AM
13150+ dentry = file->f_path.dentry;
13151+ if (do_ready) {
13152+ err = au_ready_to_write(file, -1, &pin);
13153+ if (unlikely(err)) {
13154+ h_file = ERR_PTR(err);
13155+ di_write_unlock(dentry);
13156+ goto out_fi;
13157+ }
a2a7ad62
AM
13158+ }
13159+
e8791d4f
AM
13160+ di_downgrade_lock(dentry, /*flags*/0);
13161+ if (wpre)
13162+ wpre->btop = au_fbtop(file);
13163+ h_file = au_hf_top(file);
13164+ get_file(h_file);
13165+ if (wpre)
13166+ wpre->blks = file_inode(h_file)->i_blocks;
13167+ if (do_ready)
13168+ au_unpin(&pin);
13169+ di_read_unlock(dentry, /*flags*/0);
13170+
13171+out_fi:
13172+ fi_write_unlock(file);
13173+out:
13174+ return h_file;
a2a7ad62
AM
13175+}
13176+
e8791d4f
AM
13177+static void au_write_post(struct inode *inode, struct file *h_file,
13178+ struct au_write_pre *wpre, ssize_t written)
13179+{
13180+ struct inode *h_inode;
13181+
13182+ au_cpup_attr_timesizes(inode);
13183+ AuDebugOn(au_ibtop(inode) != wpre->btop);
13184+ h_inode = file_inode(h_file);
13185+ inode->i_mode = h_inode->i_mode;
13186+ ii_write_unlock(inode);
13187+ /* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
13188+ if (written > 0)
13189+ au_fhsm_wrote(inode->i_sb, wpre->btop,
13190+ /*force*/h_inode->i_blocks > wpre->blks);
13191+ fput(h_file);
13192+}
4a4d8108 13193+
e8791d4f
AM
13194+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
13195+ loff_t *ppos)
4a4d8108 13196+{
e8791d4f
AM
13197+ ssize_t err;
13198+ struct inode *inode;
13199+ struct file *h_file;
4a4d8108 13200+ struct super_block *sb;
4a4d8108 13201+
e8791d4f 13202+ inode = file_inode(file);
4a4d8108 13203+ sb = inode->i_sb;
e8791d4f 13204+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 13205+
e8791d4f
AM
13206+ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
13207+ err = PTR_ERR(h_file);
13208+ if (IS_ERR(h_file))
13209+ goto out;
1facf9fc 13210+
e8791d4f
AM
13211+ /* filedata may be obsoleted by concurrent copyup, but no problem */
13212+ err = vfsub_read_u(h_file, buf, count, ppos);
13213+ /* todo: necessary? */
13214+ /* file->f_ra = h_file->f_ra; */
13215+ au_read_post(inode, h_file);
13216+
13217+out:
13218+ si_read_unlock(sb);
13219+ return err;
1facf9fc 13220+}
13221+
e8791d4f
AM
13222+/*
13223+ * todo: very ugly
13224+ * it locks both of i_mutex and si_rwsem for read in safe.
13225+ * if the plink maintenance mode continues forever (that is the problem),
13226+ * may loop forever.
13227+ */
13228+static void au_mtx_and_read_lock(struct inode *inode)
1facf9fc 13229+{
13230+ int err;
e8791d4f 13231+ struct super_block *sb = inode->i_sb;
1facf9fc 13232+
e8791d4f
AM
13233+ while (1) {
13234+ inode_lock(inode);
13235+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
13236+ if (!err)
13237+ break;
13238+ inode_unlock(inode);
13239+ si_read_lock(sb, AuLock_NOPLMW);
13240+ si_read_unlock(sb);
1facf9fc 13241+ }
e8791d4f 13242+}
1facf9fc 13243+
e8791d4f
AM
13244+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
13245+ size_t count, loff_t *ppos)
13246+{
13247+ ssize_t err;
13248+ struct au_write_pre wpre;
13249+ struct inode *inode;
13250+ struct file *h_file;
13251+ char __user *buf = (char __user *)ubuf;
1facf9fc 13252+
e8791d4f
AM
13253+ inode = file_inode(file);
13254+ au_mtx_and_read_lock(inode);
1facf9fc 13255+
e8791d4f
AM
13256+ wpre.lsc = 0;
13257+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13258+ err = PTR_ERR(h_file);
13259+ if (IS_ERR(h_file))
13260+ goto out;
13261+
13262+ err = vfsub_write_u(h_file, buf, count, ppos);
13263+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13264+
4f0767ce 13265+out:
e8791d4f
AM
13266+ si_read_unlock(inode->i_sb);
13267+ inode_unlock(inode);
1facf9fc 13268+ return err;
13269+}
13270+
e8791d4f
AM
13271+static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
13272+ struct iov_iter *iov_iter)
1facf9fc 13273+{
e8791d4f 13274+ ssize_t err;
1facf9fc 13275+ struct file *file;
e8791d4f 13276+ ssize_t (*iter)(struct kiocb *, struct iov_iter *);
1facf9fc 13277+
e8791d4f
AM
13278+ err = security_file_permission(h_file, rw);
13279+ if (unlikely(err))
1facf9fc 13280+ goto out;
e8791d4f
AM
13281+
13282+ err = -ENOSYS;
13283+ iter = NULL;
13284+ if (rw == MAY_READ)
13285+ iter = h_file->f_op->read_iter;
13286+ else if (rw == MAY_WRITE)
13287+ iter = h_file->f_op->write_iter;
13288+
13289+ file = kio->ki_filp;
13290+ kio->ki_filp = h_file;
13291+ if (iter) {
13292+ lockdep_off();
13293+ err = iter(kio, iov_iter);
13294+ lockdep_on();
13295+ } else
13296+ /* currently there is no such fs */
13297+ WARN_ON_ONCE(1);
13298+ kio->ki_filp = file;
1facf9fc 13299+
4f0767ce 13300+out:
1facf9fc 13301+ return err;
13302+}
13303+
e8791d4f 13304+static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1facf9fc 13305+{
e8791d4f
AM
13306+ ssize_t err;
13307+ struct file *file, *h_file;
13308+ struct inode *inode;
13309+ struct super_block *sb;
1facf9fc 13310+
e8791d4f
AM
13311+ file = kio->ki_filp;
13312+ inode = file_inode(file);
13313+ sb = inode->i_sb;
13314+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
dece6358 13315+
e8791d4f
AM
13316+ h_file = au_read_pre(file, /*keep_fi*/1, /*lsc*/0);
13317+ err = PTR_ERR(h_file);
13318+ if (IS_ERR(h_file))
13319+ goto out;
13320+
13321+ if (0 && au_test_loopback_kthread()) {
13322+ au_warn_loopback(h_file->f_path.dentry->d_sb);
13323+ if (file->f_mapping != h_file->f_mapping) {
13324+ file->f_mapping = h_file->f_mapping;
13325+ smp_mb(); /* unnecessary? */
13326+ }
1facf9fc 13327+ }
e8791d4f 13328+ fi_read_unlock(file);
1facf9fc 13329+
e8791d4f
AM
13330+ err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
13331+ /* todo: necessary? */
13332+ /* file->f_ra = h_file->f_ra; */
13333+ au_read_post(inode, h_file);
1facf9fc 13334+
e8791d4f
AM
13335+out:
13336+ si_read_unlock(sb);
13337+ return err;
13338+}
13339+
13340+static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1facf9fc 13341+{
e8791d4f
AM
13342+ ssize_t err;
13343+ struct au_write_pre wpre;
1facf9fc 13344+ struct inode *inode;
e8791d4f 13345+ struct file *file, *h_file;
1facf9fc 13346+
e8791d4f
AM
13347+ file = kio->ki_filp;
13348+ inode = file_inode(file);
13349+ au_mtx_and_read_lock(inode);
1facf9fc 13350+
e8791d4f
AM
13351+ wpre.lsc = 0;
13352+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13353+ err = PTR_ERR(h_file);
13354+ if (IS_ERR(h_file))
13355+ goto out;
1facf9fc 13356+
e8791d4f
AM
13357+ err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
13358+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13359+
4f0767ce 13360+out:
e8791d4f
AM
13361+ si_read_unlock(inode->i_sb);
13362+ inode_unlock(inode);
13363+ return err;
1facf9fc 13364+}
13365+
e8791d4f
AM
13366+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
13367+ struct pipe_inode_info *pipe, size_t len,
13368+ unsigned int flags)
13369+{
13370+ ssize_t err;
13371+ struct file *h_file;
13372+ struct inode *inode;
4a4d8108
AM
13373+ struct super_block *sb;
13374+
e8791d4f
AM
13375+ inode = file_inode(file);
13376+ sb = inode->i_sb;
13377+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 13378+
e8791d4f
AM
13379+ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
13380+ err = PTR_ERR(h_file);
13381+ if (IS_ERR(h_file))
13382+ goto out;
4a4d8108 13383+
e8791d4f
AM
13384+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
13385+ /* todo: necessasry? */
13386+ /* file->f_ra = h_file->f_ra; */
13387+ au_read_post(inode, h_file);
1facf9fc 13388+
e8791d4f
AM
13389+out:
13390+ si_read_unlock(sb);
13391+ return err;
1facf9fc 13392+}
13393+
e8791d4f
AM
13394+static ssize_t
13395+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
13396+ size_t len, unsigned int flags)
1facf9fc 13397+{
e8791d4f
AM
13398+ ssize_t err;
13399+ struct au_write_pre wpre;
13400+ struct inode *inode;
13401+ struct file *h_file;
1facf9fc 13402+
e8791d4f
AM
13403+ inode = file_inode(file);
13404+ au_mtx_and_read_lock(inode);
1facf9fc 13405+
e8791d4f
AM
13406+ wpre.lsc = 0;
13407+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13408+ err = PTR_ERR(h_file);
13409+ if (IS_ERR(h_file))
13410+ goto out;
1facf9fc 13411+
e8791d4f
AM
13412+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
13413+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13414+
4f0767ce 13415+out:
e8791d4f
AM
13416+ si_read_unlock(inode->i_sb);
13417+ inode_unlock(inode);
027c5e7a 13418+ return err;
1facf9fc 13419+}
13420+
e8791d4f
AM
13421+static long aufs_fallocate(struct file *file, int mode, loff_t offset,
13422+ loff_t len)
1facf9fc 13423+{
e8791d4f
AM
13424+ long err;
13425+ struct au_write_pre wpre;
13426+ struct inode *inode;
13427+ struct file *h_file;
1facf9fc 13428+
e8791d4f
AM
13429+ inode = file_inode(file);
13430+ au_mtx_and_read_lock(inode);
1facf9fc 13431+
e8791d4f
AM
13432+ wpre.lsc = 0;
13433+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13434+ err = PTR_ERR(h_file);
13435+ if (IS_ERR(h_file))
13436+ goto out;
13437+
13438+ lockdep_off();
13439+ err = vfs_fallocate(h_file, mode, offset, len);
13440+ lockdep_on();
13441+ au_write_post(inode, h_file, &wpre, /*written*/1);
13442+
13443+out:
13444+ si_read_unlock(inode->i_sb);
13445+ inode_unlock(inode);
13446+ return err;
1facf9fc 13447+}
13448+
e8791d4f
AM
13449+static ssize_t aufs_copy_file_range(struct file *src, loff_t src_pos,
13450+ struct file *dst, loff_t dst_pos,
13451+ size_t len, unsigned int flags)
1facf9fc 13452+{
e8791d4f
AM
13453+ ssize_t err;
13454+ struct au_write_pre wpre;
13455+ enum { SRC, DST };
13456+ struct {
13457+ struct inode *inode;
13458+ struct file *h_file;
13459+ struct super_block *h_sb;
13460+ } a[2];
13461+#define a_src a[SRC]
13462+#define a_dst a[DST]
1facf9fc 13463+
e8791d4f
AM
13464+ err = -EINVAL;
13465+ a_src.inode = file_inode(src);
13466+ if (unlikely(!S_ISREG(a_src.inode->i_mode)))
13467+ goto out;
13468+ a_dst.inode = file_inode(dst);
13469+ if (unlikely(!S_ISREG(a_dst.inode->i_mode)))
1facf9fc 13470+ goto out;
13471+
e8791d4f
AM
13472+ au_mtx_and_read_lock(a_dst.inode);
13473+ /*
13474+ * in order to match the order in di_write_lock2_{child,parent}(),
13475+ * use f_path.dentry for this comparision.
13476+ */
13477+ if (src->f_path.dentry < dst->f_path.dentry) {
13478+ a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_1);
13479+ err = PTR_ERR(a_src.h_file);
13480+ if (IS_ERR(a_src.h_file))
13481+ goto out_si;
1facf9fc 13482+
e8791d4f
AM
13483+ wpre.lsc = AuLsc_FI_2;
13484+ a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre);
13485+ err = PTR_ERR(a_dst.h_file);
13486+ if (IS_ERR(a_dst.h_file)) {
13487+ au_read_post(a_src.inode, a_src.h_file);
13488+ goto out_si;
1facf9fc 13489+ }
e8791d4f
AM
13490+ } else {
13491+ wpre.lsc = AuLsc_FI_1;
13492+ a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre);
13493+ err = PTR_ERR(a_dst.h_file);
13494+ if (IS_ERR(a_dst.h_file))
13495+ goto out_si;
1facf9fc 13496+
e8791d4f
AM
13497+ a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_2);
13498+ err = PTR_ERR(a_src.h_file);
13499+ if (IS_ERR(a_src.h_file)) {
13500+ au_write_post(a_dst.inode, a_dst.h_file, &wpre,
13501+ /*written*/0);
13502+ goto out_si;
13503+ }
13504+ }
1facf9fc 13505+
e8791d4f
AM
13506+ err = -EXDEV;
13507+ a_src.h_sb = file_inode(a_src.h_file)->i_sb;
13508+ a_dst.h_sb = file_inode(a_dst.h_file)->i_sb;
13509+ if (unlikely(a_src.h_sb != a_dst.h_sb)) {
13510+ AuDbgFile(src);
13511+ AuDbgFile(dst);
13512+ goto out_file;
13513+ }
1facf9fc 13514+
e8791d4f
AM
13515+ err = vfsub_copy_file_range(a_src.h_file, src_pos, a_dst.h_file,
13516+ dst_pos, len, flags);
1facf9fc 13517+
e8791d4f
AM
13518+out_file:
13519+ au_write_post(a_dst.inode, a_dst.h_file, &wpre, err);
13520+ fi_read_unlock(src);
13521+ au_read_post(a_src.inode, a_src.h_file);
13522+out_si:
13523+ si_read_unlock(a_dst.inode->i_sb);
13524+ inode_unlock(a_dst.inode);
4f0767ce 13525+out:
e8791d4f
AM
13526+ return err;
13527+#undef a_src
13528+#undef a_dst
1facf9fc 13529+}
13530+
13531+/* ---------------------------------------------------------------------- */
13532+
e8791d4f
AM
13533+/*
13534+ * The locking order around current->mmap_sem.
13535+ * - in most and regular cases
13536+ * file I/O syscall -- aufs_read() or something
13537+ * -- si_rwsem for read -- mmap_sem
13538+ * (Note that [fdi]i_rwsem are released before mmap_sem).
13539+ * - in mmap case
13540+ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
13541+ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
13542+ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
13543+ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
13544+ * It means that when aufs acquires si_rwsem for write, the process should never
13545+ * acquire mmap_sem.
13546+ *
13547+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
13548+ * problem either since any directory is not able to be mmap-ed.
13549+ * The similar scenario is applied to aufs_readlink() too.
13550+ */
13551+
13552+#if 0 /* stop calling security_file_mmap() */
13553+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
13554+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
13555+
13556+static unsigned long au_arch_prot_conv(unsigned long flags)
1facf9fc 13557+{
e8791d4f
AM
13558+ /* currently ppc64 only */
13559+#ifdef CONFIG_PPC64
13560+ /* cf. linux/arch/powerpc/include/asm/mman.h */
13561+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
13562+ return AuConv_VM_PROT(flags, SAO);
13563+#else
13564+ AuDebugOn(arch_calc_vm_prot_bits(-1));
13565+ return 0;
13566+#endif
1facf9fc 13567+}
13568+
e8791d4f 13569+static unsigned long au_prot_conv(unsigned long flags)
1facf9fc 13570+{
e8791d4f
AM
13571+ return AuConv_VM_PROT(flags, READ)
13572+ | AuConv_VM_PROT(flags, WRITE)
13573+ | AuConv_VM_PROT(flags, EXEC)
13574+ | au_arch_prot_conv(flags);
13575+}
1facf9fc 13576+
e8791d4f
AM
13577+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
13578+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
1facf9fc 13579+
e8791d4f
AM
13580+static unsigned long au_flag_conv(unsigned long flags)
13581+{
13582+ return AuConv_VM_MAP(flags, GROWSDOWN)
13583+ | AuConv_VM_MAP(flags, DENYWRITE)
13584+ | AuConv_VM_MAP(flags, LOCKED);
1facf9fc 13585+}
e8791d4f 13586+#endif
1facf9fc 13587+
e8791d4f 13588+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
1facf9fc 13589+{
1facf9fc 13590+ int err;
e8791d4f
AM
13591+ const unsigned char wlock
13592+ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
13593+ struct super_block *sb;
13594+ struct file *h_file;
13595+ struct inode *inode;
1facf9fc 13596+
e8791d4f
AM
13597+ AuDbgVmRegion(file, vma);
13598+
13599+ inode = file_inode(file);
13600+ sb = inode->i_sb;
5afbbe0d 13601+ lockdep_off();
e8791d4f
AM
13602+ si_read_lock(sb, AuLock_NOPLMW);
13603+
13604+ h_file = au_write_pre(file, wlock, /*wpre*/NULL);
5afbbe0d 13605+ lockdep_on();
e8791d4f
AM
13606+ err = PTR_ERR(h_file);
13607+ if (IS_ERR(h_file))
1facf9fc 13608+ goto out;
1facf9fc 13609+
e8791d4f
AM
13610+ err = 0;
13611+ au_set_mmapped(file);
13612+ au_vm_file_reset(vma, h_file);
13613+ /*
13614+ * we cannot call security_mmap_file() here since it may acquire
13615+ * mmap_sem or i_mutex.
13616+ *
13617+ * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
13618+ * au_flag_conv(vma->vm_flags));
13619+ */
13620+ if (!err)
13621+ err = h_file->f_op->mmap(h_file, vma);
13622+ if (!err) {
13623+ au_vm_prfile_set(vma, file);
13624+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13625+ goto out_fput; /* success */
13626+ }
13627+ au_unset_mmapped(file);
13628+ au_vm_file_reset(vma, file);
1facf9fc 13629+
e8791d4f
AM
13630+out_fput:
13631+ lockdep_off();
13632+ ii_write_unlock(inode);
13633+ lockdep_on();
13634+ fput(h_file);
4f0767ce 13635+out:
e8791d4f
AM
13636+ lockdep_off();
13637+ si_read_unlock(sb);
13638+ lockdep_on();
13639+ AuTraceErr(err);
13640+ return err;
1facf9fc 13641+}
13642+
13643+/* ---------------------------------------------------------------------- */
13644+
e8791d4f
AM
13645+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
13646+ int datasync)
1facf9fc 13647+{
e8791d4f
AM
13648+ int err;
13649+ struct au_write_pre wpre;
13650+ struct inode *inode;
13651+ struct file *h_file;
4a4d8108 13652+
e8791d4f
AM
13653+ err = 0; /* -EBADF; */ /* posix? */
13654+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
1facf9fc 13655+ goto out;
1facf9fc 13656+
e8791d4f
AM
13657+ inode = file_inode(file);
13658+ au_mtx_and_read_lock(inode);
1facf9fc 13659+
e8791d4f
AM
13660+ wpre.lsc = 0;
13661+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13662+ err = PTR_ERR(h_file);
13663+ if (IS_ERR(h_file))
1facf9fc 13664+ goto out_unlock;
13665+
e8791d4f
AM
13666+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
13667+ au_write_post(inode, h_file, &wpre, /*written*/0);
1facf9fc 13668+
4f0767ce 13669+out_unlock:
e8791d4f
AM
13670+ si_read_unlock(inode->i_sb);
13671+ inode_unlock(inode);
4f0767ce 13672+out:
e8791d4f 13673+ return err;
1facf9fc 13674+}
1facf9fc 13675+
e8791d4f 13676+static int aufs_fasync(int fd, struct file *file, int flag)
1facf9fc 13677+{
13678+ int err;
e8791d4f
AM
13679+ struct file *h_file;
13680+ struct super_block *sb;
1facf9fc 13681+
e8791d4f
AM
13682+ sb = file->f_path.dentry->d_sb;
13683+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 13684+
e8791d4f
AM
13685+ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
13686+ err = PTR_ERR(h_file);
13687+ if (IS_ERR(h_file))
027c5e7a
AM
13688+ goto out;
13689+
e8791d4f
AM
13690+ if (h_file->f_op->fasync)
13691+ err = h_file->f_op->fasync(fd, h_file, flag);
13692+ fput(h_file); /* instead of au_read_post() */
1facf9fc 13693+
4f0767ce 13694+out:
e8791d4f 13695+ si_read_unlock(sb);
1facf9fc 13696+ return err;
13697+}
13698+
e8791d4f 13699+static int aufs_setfl(struct file *file, unsigned long arg)
4a4d8108
AM
13700+{
13701+ int err;
e8791d4f 13702+ struct file *h_file;
4a4d8108 13703+ struct super_block *sb;
4a4d8108 13704+
e8791d4f 13705+ sb = file->f_path.dentry->d_sb;
e49829fe 13706+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 13707+
e8791d4f
AM
13708+ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
13709+ err = PTR_ERR(h_file);
13710+ if (IS_ERR(h_file))
13711+ goto out;
4a4d8108 13712+
e8791d4f
AM
13713+ /* stop calling h_file->fasync */
13714+ arg |= vfsub_file_flags(file) & FASYNC;
13715+ err = setfl(/*unused fd*/-1, h_file, arg);
13716+ fput(h_file); /* instead of au_read_post() */
4a4d8108 13717+
e8791d4f 13718+out:
4a4d8108
AM
13719+ si_read_unlock(sb);
13720+ return err;
13721+}
13722+
13723+/* ---------------------------------------------------------------------- */
13724+
e8791d4f
AM
13725+/* no one supports this operation, currently */
13726+#if 0
13727+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
13728+ size_t len, loff_t *pos, int more)
1facf9fc 13729+{
e8791d4f
AM
13730+}
13731+#endif
1facf9fc 13732+
e8791d4f 13733+/* ---------------------------------------------------------------------- */
5afbbe0d 13734+
e8791d4f
AM
13735+const struct file_operations aufs_file_fop = {
13736+ .owner = THIS_MODULE,
13737+
13738+ .llseek = default_llseek,
13739+
13740+ .read = aufs_read,
13741+ .write = aufs_write,
13742+ .read_iter = aufs_read_iter,
13743+ .write_iter = aufs_write_iter,
13744+
13745+#ifdef CONFIG_AUFS_POLL
13746+ .poll = aufs_poll,
13747+#endif
13748+ .unlocked_ioctl = aufs_ioctl_nondir,
13749+#ifdef CONFIG_COMPAT
13750+ .compat_ioctl = aufs_compat_ioctl_nondir,
13751+#endif
13752+ .mmap = aufs_mmap,
13753+ .open = aufs_open_nondir,
13754+ .flush = aufs_flush_nondir,
13755+ .release = aufs_release_nondir,
13756+ .fsync = aufs_fsync_nondir,
13757+ .fasync = aufs_fasync,
13758+ /* .sendpage = aufs_sendpage, */
13759+ .setfl = aufs_setfl,
13760+ .splice_write = aufs_splice_write,
13761+ .splice_read = aufs_splice_read,
13762+#if 0
13763+ .aio_splice_write = aufs_aio_splice_write,
13764+ .aio_splice_read = aufs_aio_splice_read,
13765+#endif
13766+ .fallocate = aufs_fallocate,
13767+ .copy_file_range = aufs_copy_file_range
13768+};
13769diff -urNp -x '*.orig' linux-4.9/fs/aufs/fhsm.c linux-4.9/fs/aufs/fhsm.c
13770--- linux-4.9/fs/aufs/fhsm.c 1970-01-01 01:00:00.000000000 +0100
13771+++ linux-4.9/fs/aufs/fhsm.c 2021-02-24 16:15:09.528240413 +0100
ae9dfd79 13772@@ -0,0 +1,427 @@
076b876e 13773+/*
ae9dfd79 13774+ * Copyright (C) 2011-2018 Junjiro R. Okajima
076b876e
AM
13775+ *
13776+ * This program, aufs is free software; you can redistribute it and/or modify
13777+ * it under the terms of the GNU General Public License as published by
13778+ * the Free Software Foundation; either version 2 of the License, or
13779+ * (at your option) any later version.
13780+ *
13781+ * This program is distributed in the hope that it will be useful,
13782+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13783+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13784+ * GNU General Public License for more details.
13785+ *
13786+ * You should have received a copy of the GNU General Public License
13787+ * along with this program; if not, write to the Free Software
13788+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
13789+ */
13790+
13791+/*
13792+ * File-based Hierarchy Storage Management
13793+ */
13794+
13795+#include <linux/anon_inodes.h>
13796+#include <linux/poll.h>
13797+#include <linux/seq_file.h>
13798+#include <linux/statfs.h>
13799+#include "aufs.h"
13800+
c1595e42
JR
13801+static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
13802+{
13803+ struct au_sbinfo *sbinfo;
13804+ struct au_fhsm *fhsm;
13805+
13806+ SiMustAnyLock(sb);
13807+
13808+ sbinfo = au_sbi(sb);
13809+ fhsm = &sbinfo->si_fhsm;
13810+ AuDebugOn(!fhsm);
13811+ return fhsm->fhsm_bottom;
13812+}
13813+
13814+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
13815+{
13816+ struct au_sbinfo *sbinfo;
13817+ struct au_fhsm *fhsm;
13818+
13819+ SiMustWriteLock(sb);
13820+
13821+ sbinfo = au_sbi(sb);
13822+ fhsm = &sbinfo->si_fhsm;
13823+ AuDebugOn(!fhsm);
13824+ fhsm->fhsm_bottom = bindex;
13825+}
13826+
13827+/* ---------------------------------------------------------------------- */
13828+
076b876e
AM
13829+static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
13830+{
13831+ struct au_br_fhsm *bf;
13832+
13833+ bf = br->br_fhsm;
13834+ MtxMustLock(&bf->bf_lock);
13835+
13836+ return !bf->bf_readable
13837+ || time_after(jiffies,
13838+ bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
13839+}
13840+
13841+/* ---------------------------------------------------------------------- */
13842+
13843+static void au_fhsm_notify(struct super_block *sb, int val)
13844+{
13845+ struct au_sbinfo *sbinfo;
13846+ struct au_fhsm *fhsm;
13847+
13848+ SiMustAnyLock(sb);
13849+
13850+ sbinfo = au_sbi(sb);
13851+ fhsm = &sbinfo->si_fhsm;
13852+ if (au_fhsm_pid(fhsm)
13853+ && atomic_read(&fhsm->fhsm_readable) != -1) {
13854+ atomic_set(&fhsm->fhsm_readable, val);
13855+ if (val)
13856+ wake_up(&fhsm->fhsm_wqh);
13857+ }
13858+}
13859+
13860+static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
13861+ struct aufs_stfs *rstfs, int do_lock, int do_notify)
13862+{
13863+ int err;
13864+ struct au_branch *br;
13865+ struct au_br_fhsm *bf;
13866+
13867+ br = au_sbr(sb, bindex);
13868+ AuDebugOn(au_br_rdonly(br));
13869+ bf = br->br_fhsm;
13870+ AuDebugOn(!bf);
13871+
13872+ if (do_lock)
13873+ mutex_lock(&bf->bf_lock);
13874+ else
13875+ MtxMustLock(&bf->bf_lock);
13876+
13877+ /* sb->s_root for NFS is unreliable */
13878+ err = au_br_stfs(br, &bf->bf_stfs);
13879+ if (unlikely(err)) {
13880+ AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
13881+ goto out;
13882+ }
13883+
13884+ bf->bf_jiffy = jiffies;
13885+ bf->bf_readable = 1;
13886+ if (do_notify)
13887+ au_fhsm_notify(sb, /*val*/1);
13888+ if (rstfs)
13889+ *rstfs = bf->bf_stfs;
13890+
13891+out:
13892+ if (do_lock)
13893+ mutex_unlock(&bf->bf_lock);
13894+ au_fhsm_notify(sb, /*val*/1);
13895+
13896+ return err;
13897+}
13898+
13899+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
13900+{
13901+ int err;
076b876e
AM
13902+ struct au_sbinfo *sbinfo;
13903+ struct au_fhsm *fhsm;
13904+ struct au_branch *br;
13905+ struct au_br_fhsm *bf;
13906+
13907+ AuDbg("b%d, force %d\n", bindex, force);
13908+ SiMustAnyLock(sb);
13909+
13910+ sbinfo = au_sbi(sb);
13911+ fhsm = &sbinfo->si_fhsm;
c1595e42
JR
13912+ if (!au_ftest_si(sbinfo, FHSM)
13913+ || fhsm->fhsm_bottom == bindex)
076b876e
AM
13914+ return;
13915+
13916+ br = au_sbr(sb, bindex);
13917+ bf = br->br_fhsm;
13918+ AuDebugOn(!bf);
13919+ mutex_lock(&bf->bf_lock);
13920+ if (force
13921+ || au_fhsm_pid(fhsm)
13922+ || au_fhsm_test_jiffy(sbinfo, br))
13923+ err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
13924+ /*do_notify*/1);
13925+ mutex_unlock(&bf->bf_lock);
13926+}
13927+
13928+void au_fhsm_wrote_all(struct super_block *sb, int force)
13929+{
5afbbe0d 13930+ aufs_bindex_t bindex, bbot;
076b876e
AM
13931+ struct au_branch *br;
13932+
13933+ /* exclude the bottom */
5afbbe0d
AM
13934+ bbot = au_fhsm_bottom(sb);
13935+ for (bindex = 0; bindex < bbot; bindex++) {
076b876e
AM
13936+ br = au_sbr(sb, bindex);
13937+ if (au_br_fhsm(br->br_perm))
13938+ au_fhsm_wrote(sb, bindex, force);
13939+ }
13940+}
13941+
13942+/* ---------------------------------------------------------------------- */
13943+
13944+static unsigned int au_fhsm_poll(struct file *file,
13945+ struct poll_table_struct *wait)
13946+{
13947+ unsigned int mask;
13948+ struct au_sbinfo *sbinfo;
13949+ struct au_fhsm *fhsm;
13950+
13951+ mask = 0;
13952+ sbinfo = file->private_data;
13953+ fhsm = &sbinfo->si_fhsm;
13954+ poll_wait(file, &fhsm->fhsm_wqh, wait);
13955+ if (atomic_read(&fhsm->fhsm_readable))
13956+ mask = POLLIN /* | POLLRDNORM */;
13957+
ae9dfd79
AM
13958+ if (!mask)
13959+ AuDbg("mask 0x%x\n", mask);
076b876e
AM
13960+ return mask;
13961+}
13962+
13963+static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
13964+ struct aufs_stfs *stfs, __s16 brid)
13965+{
13966+ int err;
13967+
13968+ err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
13969+ if (!err)
13970+ err = __put_user(brid, &stbr->brid);
13971+ if (unlikely(err))
13972+ err = -EFAULT;
13973+
13974+ return err;
13975+}
13976+
13977+static ssize_t au_fhsm_do_read(struct super_block *sb,
13978+ struct aufs_stbr __user *stbr, size_t count)
13979+{
13980+ ssize_t err;
13981+ int nstbr;
5afbbe0d 13982+ aufs_bindex_t bindex, bbot;
076b876e
AM
13983+ struct au_branch *br;
13984+ struct au_br_fhsm *bf;
13985+
13986+ /* except the bottom branch */
13987+ err = 0;
13988+ nstbr = 0;
5afbbe0d
AM
13989+ bbot = au_fhsm_bottom(sb);
13990+ for (bindex = 0; !err && bindex < bbot; bindex++) {
076b876e
AM
13991+ br = au_sbr(sb, bindex);
13992+ if (!au_br_fhsm(br->br_perm))
13993+ continue;
13994+
13995+ bf = br->br_fhsm;
13996+ mutex_lock(&bf->bf_lock);
13997+ if (bf->bf_readable) {
13998+ err = -EFAULT;
13999+ if (count >= sizeof(*stbr))
14000+ err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
14001+ br->br_id);
14002+ if (!err) {
14003+ bf->bf_readable = 0;
14004+ count -= sizeof(*stbr);
14005+ nstbr++;
14006+ }
14007+ }
14008+ mutex_unlock(&bf->bf_lock);
14009+ }
14010+ if (!err)
14011+ err = sizeof(*stbr) * nstbr;
14012+
14013+ return err;
14014+}
14015+
14016+static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
14017+ loff_t *pos)
14018+{
14019+ ssize_t err;
14020+ int readable;
5afbbe0d 14021+ aufs_bindex_t nfhsm, bindex, bbot;
076b876e
AM
14022+ struct au_sbinfo *sbinfo;
14023+ struct au_fhsm *fhsm;
14024+ struct au_branch *br;
14025+ struct super_block *sb;
14026+
14027+ err = 0;
14028+ sbinfo = file->private_data;
14029+ fhsm = &sbinfo->si_fhsm;
14030+need_data:
14031+ spin_lock_irq(&fhsm->fhsm_wqh.lock);
14032+ if (!atomic_read(&fhsm->fhsm_readable)) {
14033+ if (vfsub_file_flags(file) & O_NONBLOCK)
14034+ err = -EAGAIN;
14035+ else
14036+ err = wait_event_interruptible_locked_irq
14037+ (fhsm->fhsm_wqh,
14038+ atomic_read(&fhsm->fhsm_readable));
14039+ }
14040+ spin_unlock_irq(&fhsm->fhsm_wqh.lock);
14041+ if (unlikely(err))
14042+ goto out;
14043+
14044+ /* sb may already be dead */
14045+ au_rw_read_lock(&sbinfo->si_rwsem);
14046+ readable = atomic_read(&fhsm->fhsm_readable);
14047+ if (readable > 0) {
14048+ sb = sbinfo->si_sb;
14049+ AuDebugOn(!sb);
14050+ /* exclude the bottom branch */
14051+ nfhsm = 0;
5afbbe0d
AM
14052+ bbot = au_fhsm_bottom(sb);
14053+ for (bindex = 0; bindex < bbot; bindex++) {
076b876e
AM
14054+ br = au_sbr(sb, bindex);
14055+ if (au_br_fhsm(br->br_perm))
14056+ nfhsm++;
14057+ }
14058+ err = -EMSGSIZE;
14059+ if (nfhsm * sizeof(struct aufs_stbr) <= count) {
14060+ atomic_set(&fhsm->fhsm_readable, 0);
14061+ err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
14062+ count);
14063+ }
14064+ }
14065+ au_rw_read_unlock(&sbinfo->si_rwsem);
14066+ if (!readable)
14067+ goto need_data;
14068+
14069+out:
14070+ return err;
14071+}
14072+
14073+static int au_fhsm_release(struct inode *inode, struct file *file)
14074+{
14075+ struct au_sbinfo *sbinfo;
14076+ struct au_fhsm *fhsm;
14077+
14078+ /* sb may already be dead */
14079+ sbinfo = file->private_data;
14080+ fhsm = &sbinfo->si_fhsm;
14081+ spin_lock(&fhsm->fhsm_spin);
14082+ fhsm->fhsm_pid = 0;
14083+ spin_unlock(&fhsm->fhsm_spin);
14084+ kobject_put(&sbinfo->si_kobj);
14085+
14086+ return 0;
14087+}
14088+
14089+static const struct file_operations au_fhsm_fops = {
14090+ .owner = THIS_MODULE,
14091+ .llseek = noop_llseek,
14092+ .read = au_fhsm_read,
14093+ .poll = au_fhsm_poll,
14094+ .release = au_fhsm_release
14095+};
14096+
14097+int au_fhsm_fd(struct super_block *sb, int oflags)
14098+{
14099+ int err, fd;
14100+ struct au_sbinfo *sbinfo;
14101+ struct au_fhsm *fhsm;
14102+
14103+ err = -EPERM;
14104+ if (unlikely(!capable(CAP_SYS_ADMIN)))
14105+ goto out;
14106+
14107+ err = -EINVAL;
14108+ if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
14109+ goto out;
14110+
14111+ err = 0;
14112+ sbinfo = au_sbi(sb);
14113+ fhsm = &sbinfo->si_fhsm;
14114+ spin_lock(&fhsm->fhsm_spin);
14115+ if (!fhsm->fhsm_pid)
14116+ fhsm->fhsm_pid = current->pid;
14117+ else
14118+ err = -EBUSY;
14119+ spin_unlock(&fhsm->fhsm_spin);
14120+ if (unlikely(err))
14121+ goto out;
14122+
14123+ oflags |= O_RDONLY;
14124+ /* oflags |= FMODE_NONOTIFY; */
14125+ fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
14126+ err = fd;
14127+ if (unlikely(fd < 0))
14128+ goto out_pid;
14129+
14130+ /* succeed reglardless 'fhsm' status */
14131+ kobject_get(&sbinfo->si_kobj);
14132+ si_noflush_read_lock(sb);
14133+ if (au_ftest_si(sbinfo, FHSM))
14134+ au_fhsm_wrote_all(sb, /*force*/0);
14135+ si_read_unlock(sb);
14136+ goto out; /* success */
14137+
14138+out_pid:
14139+ spin_lock(&fhsm->fhsm_spin);
14140+ fhsm->fhsm_pid = 0;
14141+ spin_unlock(&fhsm->fhsm_spin);
14142+out:
14143+ AuTraceErr(err);
14144+ return err;
14145+}
14146+
14147+/* ---------------------------------------------------------------------- */
14148+
14149+int au_fhsm_br_alloc(struct au_branch *br)
14150+{
14151+ int err;
14152+
14153+ err = 0;
14154+ br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
14155+ if (br->br_fhsm)
14156+ au_br_fhsm_init(br->br_fhsm);
14157+ else
14158+ err = -ENOMEM;
14159+
14160+ return err;
14161+}
14162+
14163+/* ---------------------------------------------------------------------- */
14164+
14165+void au_fhsm_fin(struct super_block *sb)
14166+{
14167+ au_fhsm_notify(sb, /*val*/-1);
14168+}
14169+
14170+void au_fhsm_init(struct au_sbinfo *sbinfo)
14171+{
14172+ struct au_fhsm *fhsm;
14173+
14174+ fhsm = &sbinfo->si_fhsm;
14175+ spin_lock_init(&fhsm->fhsm_spin);
14176+ init_waitqueue_head(&fhsm->fhsm_wqh);
14177+ atomic_set(&fhsm->fhsm_readable, 0);
14178+ fhsm->fhsm_expire
14179+ = msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
c1595e42 14180+ fhsm->fhsm_bottom = -1;
076b876e
AM
14181+}
14182+
14183+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
14184+{
14185+ sbinfo->si_fhsm.fhsm_expire
14186+ = msecs_to_jiffies(sec * MSEC_PER_SEC);
14187+}
14188+
14189+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
14190+{
14191+ unsigned int u;
14192+
14193+ if (!au_ftest_si(sbinfo, FHSM))
14194+ return;
14195+
14196+ u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
14197+ if (u != AUFS_FHSM_CACHE_DEF_SEC)
14198+ seq_printf(seq, ",fhsm_sec=%u", u);
14199+}
e8791d4f
AM
14200diff -urNp -x '*.orig' linux-4.9/fs/aufs/file.c linux-4.9/fs/aufs/file.c
14201--- linux-4.9/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
14202+++ linux-4.9/fs/aufs/file.c 2021-02-24 16:15:09.528240413 +0100
ae9dfd79 14203@@ -0,0 +1,848 @@
1facf9fc 14204+/*
ae9dfd79 14205+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 14206+ *
14207+ * This program, aufs is free software; you can redistribute it and/or modify
14208+ * it under the terms of the GNU General Public License as published by
14209+ * the Free Software Foundation; either version 2 of the License, or
14210+ * (at your option) any later version.
dece6358
AM
14211+ *
14212+ * This program is distributed in the hope that it will be useful,
14213+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14214+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14215+ * GNU General Public License for more details.
14216+ *
14217+ * You should have received a copy of the GNU General Public License
523b37e3 14218+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 14219+ */
14220+
14221+/*
4a4d8108 14222+ * handling file/dir, and address_space operation
1facf9fc 14223+ */
14224+
7eafdf33
AM
14225+#ifdef CONFIG_AUFS_DEBUG
14226+#include <linux/migrate.h>
14227+#endif
4a4d8108 14228+#include <linux/pagemap.h>
1facf9fc 14229+#include "aufs.h"
14230+
4a4d8108
AM
14231+/* drop flags for writing */
14232+unsigned int au_file_roflags(unsigned int flags)
14233+{
14234+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
14235+ flags |= O_RDONLY | O_NOATIME;
14236+ return flags;
14237+}
14238+
14239+/* common functions to regular file and dir */
14240+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 14241+ struct file *file, int force_wr)
1facf9fc 14242+{
1308ab2a 14243+ struct file *h_file;
4a4d8108
AM
14244+ struct dentry *h_dentry;
14245+ struct inode *h_inode;
14246+ struct super_block *sb;
14247+ struct au_branch *br;
14248+ struct path h_path;
b912730e 14249+ int err;
1facf9fc 14250+
4a4d8108
AM
14251+ /* a race condition can happen between open and unlink/rmdir */
14252+ h_file = ERR_PTR(-ENOENT);
14253+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 14254+ if (au_test_nfsd() && (!h_dentry || d_is_negative(h_dentry)))
4a4d8108 14255+ goto out;
5527c038 14256+ h_inode = d_inode(h_dentry);
027c5e7a
AM
14257+ spin_lock(&h_dentry->d_lock);
14258+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
5527c038 14259+ /* || !d_inode(dentry)->i_nlink */
027c5e7a
AM
14260+ ;
14261+ spin_unlock(&h_dentry->d_lock);
14262+ if (unlikely(err))
4a4d8108 14263+ goto out;
1facf9fc 14264+
4a4d8108
AM
14265+ sb = dentry->d_sb;
14266+ br = au_sbr(sb, bindex);
b912730e
AM
14267+ err = au_br_test_oflag(flags, br);
14268+ h_file = ERR_PTR(err);
14269+ if (unlikely(err))
027c5e7a 14270+ goto out;
1facf9fc 14271+
4a4d8108 14272+ /* drop flags for writing */
5527c038 14273+ if (au_test_ro(sb, bindex, d_inode(dentry))) {
392086de
AM
14274+ if (force_wr && !(flags & O_WRONLY))
14275+ force_wr = 0;
4a4d8108 14276+ flags = au_file_roflags(flags);
392086de
AM
14277+ if (force_wr) {
14278+ h_file = ERR_PTR(-EROFS);
14279+ flags = au_file_roflags(flags);
14280+ if (unlikely(vfsub_native_ro(h_inode)
14281+ || IS_APPEND(h_inode)))
14282+ goto out;
14283+ flags &= ~O_ACCMODE;
14284+ flags |= O_WRONLY;
14285+ }
14286+ }
4a4d8108 14287+ flags &= ~O_CREAT;
5afbbe0d 14288+ au_br_get(br);
4a4d8108 14289+ h_path.dentry = h_dentry;
86dc4139 14290+ h_path.mnt = au_br_mnt(br);
38d290e6 14291+ h_file = vfsub_dentry_open(&h_path, flags);
4a4d8108
AM
14292+ if (IS_ERR(h_file))
14293+ goto out_br;
dece6358 14294+
b912730e 14295+ if (flags & __FMODE_EXEC) {
4a4d8108
AM
14296+ err = deny_write_access(h_file);
14297+ if (unlikely(err)) {
14298+ fput(h_file);
14299+ h_file = ERR_PTR(err);
14300+ goto out_br;
14301+ }
14302+ }
953406b4 14303+ fsnotify_open(h_file);
4a4d8108 14304+ goto out; /* success */
1facf9fc 14305+
4f0767ce 14306+out_br:
5afbbe0d 14307+ au_br_put(br);
4f0767ce 14308+out:
4a4d8108
AM
14309+ return h_file;
14310+}
1308ab2a 14311+
076b876e
AM
14312+static int au_cmoo(struct dentry *dentry)
14313+{
ae9dfd79 14314+ int err, cmoo, matched;
076b876e
AM
14315+ unsigned int udba;
14316+ struct path h_path;
14317+ struct au_pin pin;
14318+ struct au_cp_generic cpg = {
14319+ .dentry = dentry,
14320+ .bdst = -1,
14321+ .bsrc = -1,
14322+ .len = -1,
14323+ .pin = &pin,
14324+ .flags = AuCpup_DTIME | AuCpup_HOPEN
14325+ };
7e9cd9fe 14326+ struct inode *delegated;
076b876e
AM
14327+ struct super_block *sb;
14328+ struct au_sbinfo *sbinfo;
14329+ struct au_fhsm *fhsm;
14330+ pid_t pid;
14331+ struct au_branch *br;
14332+ struct dentry *parent;
14333+ struct au_hinode *hdir;
14334+
14335+ DiMustWriteLock(dentry);
5527c038 14336+ IiMustWriteLock(d_inode(dentry));
076b876e
AM
14337+
14338+ err = 0;
14339+ if (IS_ROOT(dentry))
14340+ goto out;
5afbbe0d 14341+ cpg.bsrc = au_dbtop(dentry);
076b876e
AM
14342+ if (!cpg.bsrc)
14343+ goto out;
14344+
14345+ sb = dentry->d_sb;
14346+ sbinfo = au_sbi(sb);
14347+ fhsm = &sbinfo->si_fhsm;
14348+ pid = au_fhsm_pid(fhsm);
ae9dfd79
AM
14349+ rcu_read_lock();
14350+ matched = (pid
14351+ && (current->pid == pid
14352+ || rcu_dereference(current->real_parent)->pid == pid));
14353+ rcu_read_unlock();
14354+ if (matched)
076b876e
AM
14355+ goto out;
14356+
14357+ br = au_sbr(sb, cpg.bsrc);
14358+ cmoo = au_br_cmoo(br->br_perm);
14359+ if (!cmoo)
14360+ goto out;
7e9cd9fe 14361+ if (!d_is_reg(dentry))
076b876e
AM
14362+ cmoo &= AuBrAttr_COO_ALL;
14363+ if (!cmoo)
14364+ goto out;
14365+
14366+ parent = dget_parent(dentry);
14367+ di_write_lock_parent(parent);
14368+ err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
14369+ cpg.bdst = err;
14370+ if (unlikely(err < 0)) {
14371+ err = 0; /* there is no upper writable branch */
14372+ goto out_dgrade;
14373+ }
14374+ AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
14375+
14376+ /* do not respect the coo attrib for the target branch */
14377+ err = au_cpup_dirs(dentry, cpg.bdst);
14378+ if (unlikely(err))
14379+ goto out_dgrade;
14380+
14381+ di_downgrade_lock(parent, AuLock_IR);
14382+ udba = au_opt_udba(sb);
14383+ err = au_pin(&pin, dentry, cpg.bdst, udba,
14384+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
14385+ if (unlikely(err))
14386+ goto out_parent;
14387+
14388+ err = au_sio_cpup_simple(&cpg);
14389+ au_unpin(&pin);
14390+ if (unlikely(err))
14391+ goto out_parent;
14392+ if (!(cmoo & AuBrWAttr_MOO))
14393+ goto out_parent; /* success */
14394+
14395+ err = au_pin(&pin, dentry, cpg.bsrc, udba,
14396+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
14397+ if (unlikely(err))
14398+ goto out_parent;
14399+
14400+ h_path.mnt = au_br_mnt(br);
14401+ h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
5527c038 14402+ hdir = au_hi(d_inode(parent), cpg.bsrc);
076b876e
AM
14403+ delegated = NULL;
14404+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
14405+ au_unpin(&pin);
14406+ /* todo: keep h_dentry or not? */
14407+ if (unlikely(err == -EWOULDBLOCK)) {
14408+ pr_warn("cannot retry for NFSv4 delegation"
14409+ " for an internal unlink\n");
14410+ iput(delegated);
14411+ }
14412+ if (unlikely(err)) {
14413+ pr_err("unlink %pd after coo failed (%d), ignored\n",
14414+ dentry, err);
14415+ err = 0;
14416+ }
14417+ goto out_parent; /* success */
14418+
14419+out_dgrade:
14420+ di_downgrade_lock(parent, AuLock_IR);
14421+out_parent:
14422+ di_read_unlock(parent, AuLock_IR);
14423+ dput(parent);
14424+out:
14425+ AuTraceErr(err);
14426+ return err;
14427+}
14428+
b912730e 14429+int au_do_open(struct file *file, struct au_do_open_args *args)
1facf9fc 14430+{
ae9dfd79 14431+ int err, aopen = args->aopen;
1facf9fc 14432+ struct dentry *dentry;
076b876e 14433+ struct au_finfo *finfo;
1308ab2a 14434+
ae9dfd79 14435+ if (!aopen)
b912730e
AM
14436+ err = au_finfo_init(file, args->fidir);
14437+ else {
14438+ lockdep_off();
14439+ err = au_finfo_init(file, args->fidir);
14440+ lockdep_on();
14441+ }
4a4d8108
AM
14442+ if (unlikely(err))
14443+ goto out;
1facf9fc 14444+
2000de60 14445+ dentry = file->f_path.dentry;
b912730e 14446+ AuDebugOn(IS_ERR_OR_NULL(dentry));
ae9dfd79
AM
14447+ di_write_lock_child(dentry);
14448+ err = au_cmoo(dentry);
14449+ di_downgrade_lock(dentry, AuLock_IR);
14450+ if (!err)
14451+ err = args->open(file, vfsub_file_flags(file), NULL);
14452+ di_read_unlock(dentry, AuLock_IR);
1facf9fc 14453+
076b876e
AM
14454+ finfo = au_fi(file);
14455+ if (!err) {
14456+ finfo->fi_file = file;
ae9dfd79
AM
14457+ au_hbl_add(&finfo->fi_hlist,
14458+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
076b876e 14459+ }
ae9dfd79 14460+ if (!aopen)
b912730e
AM
14461+ fi_write_unlock(file);
14462+ else {
14463+ lockdep_off();
14464+ fi_write_unlock(file);
14465+ lockdep_on();
14466+ }
4a4d8108 14467+ if (unlikely(err)) {
076b876e 14468+ finfo->fi_hdir = NULL;
ae9dfd79 14469+ au_finfo_fin(file);
1308ab2a 14470+ }
4a4d8108 14471+
4f0767ce 14472+out:
1308ab2a 14473+ return err;
14474+}
dece6358 14475+
4a4d8108 14476+int au_reopen_nondir(struct file *file)
1308ab2a 14477+{
4a4d8108 14478+ int err;
5afbbe0d 14479+ aufs_bindex_t btop;
4a4d8108
AM
14480+ struct dentry *dentry;
14481+ struct file *h_file, *h_file_tmp;
1308ab2a 14482+
2000de60 14483+ dentry = file->f_path.dentry;
5afbbe0d 14484+ btop = au_dbtop(dentry);
4a4d8108 14485+ h_file_tmp = NULL;
5afbbe0d 14486+ if (au_fbtop(file) == btop) {
4a4d8108
AM
14487+ h_file = au_hf_top(file);
14488+ if (file->f_mode == h_file->f_mode)
14489+ return 0; /* success */
14490+ h_file_tmp = h_file;
14491+ get_file(h_file_tmp);
5afbbe0d 14492+ au_set_h_fptr(file, btop, NULL);
4a4d8108
AM
14493+ }
14494+ AuDebugOn(au_fi(file)->fi_hdir);
86dc4139
AM
14495+ /*
14496+ * it can happen
14497+ * file exists on both of rw and ro
5afbbe0d 14498+ * open --> dbtop and fbtop are both 0
86dc4139
AM
14499+ * prepend a branch as rw, "rw" become ro
14500+ * remove rw/file
14501+ * delete the top branch, "rw" becomes rw again
5afbbe0d
AM
14502+ * --> dbtop is 1, fbtop is still 0
14503+ * write --> fbtop is 0 but dbtop is 1
86dc4139 14504+ */
5afbbe0d 14505+ /* AuDebugOn(au_fbtop(file) < btop); */
1308ab2a 14506+
5afbbe0d 14507+ h_file = au_h_open(dentry, btop, vfsub_file_flags(file) & ~O_TRUNC,
392086de 14508+ file, /*force_wr*/0);
4a4d8108 14509+ err = PTR_ERR(h_file);
86dc4139
AM
14510+ if (IS_ERR(h_file)) {
14511+ if (h_file_tmp) {
5afbbe0d
AM
14512+ au_sbr_get(dentry->d_sb, btop);
14513+ au_set_h_fptr(file, btop, h_file_tmp);
86dc4139
AM
14514+ h_file_tmp = NULL;
14515+ }
4a4d8108 14516+ goto out; /* todo: close all? */
86dc4139 14517+ }
4a4d8108
AM
14518+
14519+ err = 0;
5afbbe0d
AM
14520+ au_set_fbtop(file, btop);
14521+ au_set_h_fptr(file, btop, h_file);
4a4d8108
AM
14522+ au_update_figen(file);
14523+ /* todo: necessary? */
14524+ /* file->f_ra = h_file->f_ra; */
14525+
4f0767ce 14526+out:
4a4d8108
AM
14527+ if (h_file_tmp)
14528+ fput(h_file_tmp);
14529+ return err;
1facf9fc 14530+}
14531+
1308ab2a 14532+/* ---------------------------------------------------------------------- */
14533+
4a4d8108
AM
14534+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
14535+ struct dentry *hi_wh)
1facf9fc 14536+{
4a4d8108 14537+ int err;
5afbbe0d 14538+ aufs_bindex_t btop;
4a4d8108
AM
14539+ struct au_dinfo *dinfo;
14540+ struct dentry *h_dentry;
14541+ struct au_hdentry *hdp;
1facf9fc 14542+
2000de60 14543+ dinfo = au_di(file->f_path.dentry);
4a4d8108 14544+ AuRwMustWriteLock(&dinfo->di_rwsem);
dece6358 14545+
5afbbe0d
AM
14546+ btop = dinfo->di_btop;
14547+ dinfo->di_btop = btgt;
14548+ hdp = au_hdentry(dinfo, btgt);
14549+ h_dentry = hdp->hd_dentry;
14550+ hdp->hd_dentry = hi_wh;
4a4d8108 14551+ err = au_reopen_nondir(file);
5afbbe0d
AM
14552+ hdp->hd_dentry = h_dentry;
14553+ dinfo->di_btop = btop;
1facf9fc 14554+
1facf9fc 14555+ return err;
14556+}
14557+
4a4d8108 14558+static int au_ready_to_write_wh(struct file *file, loff_t len,
86dc4139 14559+ aufs_bindex_t bcpup, struct au_pin *pin)
1facf9fc 14560+{
4a4d8108 14561+ int err;
027c5e7a 14562+ struct inode *inode, *h_inode;
c2b27bf2
AM
14563+ struct dentry *h_dentry, *hi_wh;
14564+ struct au_cp_generic cpg = {
2000de60 14565+ .dentry = file->f_path.dentry,
c2b27bf2
AM
14566+ .bdst = bcpup,
14567+ .bsrc = -1,
14568+ .len = len,
14569+ .pin = pin
14570+ };
1facf9fc 14571+
5afbbe0d 14572+ au_update_dbtop(cpg.dentry);
5527c038 14573+ inode = d_inode(cpg.dentry);
027c5e7a 14574+ h_inode = NULL;
5afbbe0d
AM
14575+ if (au_dbtop(cpg.dentry) <= bcpup
14576+ && au_dbbot(cpg.dentry) >= bcpup) {
c2b27bf2 14577+ h_dentry = au_h_dptr(cpg.dentry, bcpup);
5527c038
JR
14578+ if (h_dentry && d_is_positive(h_dentry))
14579+ h_inode = d_inode(h_dentry);
027c5e7a 14580+ }
4a4d8108 14581+ hi_wh = au_hi_wh(inode, bcpup);
027c5e7a 14582+ if (!hi_wh && !h_inode)
c2b27bf2 14583+ err = au_sio_cpup_wh(&cpg, file);
4a4d8108
AM
14584+ else
14585+ /* already copied-up after unlink */
14586+ err = au_reopen_wh(file, bcpup, hi_wh);
1facf9fc 14587+
4a4d8108 14588+ if (!err
38d290e6
JR
14589+ && (inode->i_nlink > 1
14590+ || (inode->i_state & I_LINKABLE))
c2b27bf2
AM
14591+ && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
14592+ au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
1308ab2a 14593+
dece6358 14594+ return err;
1facf9fc 14595+}
14596+
4a4d8108
AM
14597+/*
14598+ * prepare the @file for writing.
14599+ */
14600+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
1facf9fc 14601+{
4a4d8108 14602+ int err;
5afbbe0d 14603+ aufs_bindex_t dbtop;
c1595e42 14604+ struct dentry *parent;
86dc4139 14605+ struct inode *inode;
1facf9fc 14606+ struct super_block *sb;
4a4d8108 14607+ struct file *h_file;
c2b27bf2 14608+ struct au_cp_generic cpg = {
2000de60 14609+ .dentry = file->f_path.dentry,
c2b27bf2
AM
14610+ .bdst = -1,
14611+ .bsrc = -1,
14612+ .len = len,
14613+ .pin = pin,
14614+ .flags = AuCpup_DTIME
14615+ };
1facf9fc 14616+
c2b27bf2 14617+ sb = cpg.dentry->d_sb;
5527c038 14618+ inode = d_inode(cpg.dentry);
5afbbe0d 14619+ cpg.bsrc = au_fbtop(file);
c2b27bf2 14620+ err = au_test_ro(sb, cpg.bsrc, inode);
4a4d8108 14621+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
c2b27bf2
AM
14622+ err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
14623+ /*flags*/0);
1facf9fc 14624+ goto out;
4a4d8108 14625+ }
1facf9fc 14626+
027c5e7a 14627+ /* need to cpup or reopen */
c2b27bf2 14628+ parent = dget_parent(cpg.dentry);
4a4d8108 14629+ di_write_lock_parent(parent);
c2b27bf2
AM
14630+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
14631+ cpg.bdst = err;
4a4d8108
AM
14632+ if (unlikely(err < 0))
14633+ goto out_dgrade;
14634+ err = 0;
14635+
c2b27bf2
AM
14636+ if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
14637+ err = au_cpup_dirs(cpg.dentry, cpg.bdst);
1facf9fc 14638+ if (unlikely(err))
4a4d8108
AM
14639+ goto out_dgrade;
14640+ }
14641+
c2b27bf2 14642+ err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108
AM
14643+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
14644+ if (unlikely(err))
14645+ goto out_dgrade;
14646+
5afbbe0d
AM
14647+ dbtop = au_dbtop(cpg.dentry);
14648+ if (dbtop <= cpg.bdst)
c2b27bf2 14649+ cpg.bsrc = cpg.bdst;
027c5e7a 14650+
5afbbe0d 14651+ if (dbtop <= cpg.bdst /* just reopen */
c2b27bf2 14652+ || !d_unhashed(cpg.dentry) /* copyup and reopen */
027c5e7a 14653+ ) {
392086de 14654+ h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
86dc4139 14655+ if (IS_ERR(h_file))
027c5e7a 14656+ err = PTR_ERR(h_file);
86dc4139 14657+ else {
027c5e7a 14658+ di_downgrade_lock(parent, AuLock_IR);
5afbbe0d 14659+ if (dbtop > cpg.bdst)
c2b27bf2 14660+ err = au_sio_cpup_simple(&cpg);
027c5e7a
AM
14661+ if (!err)
14662+ err = au_reopen_nondir(file);
c2b27bf2 14663+ au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
027c5e7a 14664+ }
027c5e7a
AM
14665+ } else { /* copyup as wh and reopen */
14666+ /*
14667+ * since writable hfsplus branch is not supported,
14668+ * h_open_pre/post() are unnecessary.
14669+ */
c2b27bf2 14670+ err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
4a4d8108 14671+ di_downgrade_lock(parent, AuLock_IR);
4a4d8108 14672+ }
4a4d8108
AM
14673+
14674+ if (!err) {
14675+ au_pin_set_parent_lflag(pin, /*lflag*/0);
14676+ goto out_dput; /* success */
14677+ }
14678+ au_unpin(pin);
14679+ goto out_unlock;
1facf9fc 14680+
4f0767ce 14681+out_dgrade:
4a4d8108 14682+ di_downgrade_lock(parent, AuLock_IR);
4f0767ce 14683+out_unlock:
4a4d8108 14684+ di_read_unlock(parent, AuLock_IR);
4f0767ce 14685+out_dput:
4a4d8108 14686+ dput(parent);
4f0767ce 14687+out:
1facf9fc 14688+ return err;
14689+}
14690+
4a4d8108
AM
14691+/* ---------------------------------------------------------------------- */
14692+
14693+int au_do_flush(struct file *file, fl_owner_t id,
14694+ int (*flush)(struct file *file, fl_owner_t id))
1facf9fc 14695+{
4a4d8108 14696+ int err;
1facf9fc 14697+ struct super_block *sb;
4a4d8108 14698+ struct inode *inode;
1facf9fc 14699+
c06a8ce3
AM
14700+ inode = file_inode(file);
14701+ sb = inode->i_sb;
4a4d8108
AM
14702+ si_noflush_read_lock(sb);
14703+ fi_read_lock(file);
b752ccd1 14704+ ii_read_lock_child(inode);
1facf9fc 14705+
4a4d8108
AM
14706+ err = flush(file, id);
14707+ au_cpup_attr_timesizes(inode);
1facf9fc 14708+
b752ccd1 14709+ ii_read_unlock(inode);
4a4d8108 14710+ fi_read_unlock(file);
1308ab2a 14711+ si_read_unlock(sb);
dece6358 14712+ return err;
1facf9fc 14713+}
14714+
4a4d8108
AM
14715+/* ---------------------------------------------------------------------- */
14716+
14717+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
1facf9fc 14718+{
4a4d8108 14719+ int err;
4a4d8108
AM
14720+ struct au_pin pin;
14721+ struct au_finfo *finfo;
c2b27bf2 14722+ struct dentry *parent, *hi_wh;
4a4d8108 14723+ struct inode *inode;
1facf9fc 14724+ struct super_block *sb;
c2b27bf2 14725+ struct au_cp_generic cpg = {
2000de60 14726+ .dentry = file->f_path.dentry,
c2b27bf2
AM
14727+ .bdst = -1,
14728+ .bsrc = -1,
14729+ .len = -1,
14730+ .pin = &pin,
14731+ .flags = AuCpup_DTIME
14732+ };
1facf9fc 14733+
4a4d8108
AM
14734+ FiMustWriteLock(file);
14735+
14736+ err = 0;
14737+ finfo = au_fi(file);
c2b27bf2 14738+ sb = cpg.dentry->d_sb;
5527c038 14739+ inode = d_inode(cpg.dentry);
5afbbe0d 14740+ cpg.bdst = au_ibtop(inode);
c2b27bf2 14741+ if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
1308ab2a 14742+ goto out;
dece6358 14743+
c2b27bf2
AM
14744+ parent = dget_parent(cpg.dentry);
14745+ if (au_test_ro(sb, cpg.bdst, inode)) {
4a4d8108 14746+ di_read_lock_parent(parent, !AuLock_IR);
c2b27bf2
AM
14747+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
14748+ cpg.bdst = err;
4a4d8108
AM
14749+ di_read_unlock(parent, !AuLock_IR);
14750+ if (unlikely(err < 0))
14751+ goto out_parent;
14752+ err = 0;
1facf9fc 14753+ }
1facf9fc 14754+
4a4d8108 14755+ di_read_lock_parent(parent, AuLock_IR);
c2b27bf2 14756+ hi_wh = au_hi_wh(inode, cpg.bdst);
7f207e10
AM
14757+ if (!S_ISDIR(inode->i_mode)
14758+ && au_opt_test(au_mntflags(sb), PLINK)
4a4d8108 14759+ && au_plink_test(inode)
c2b27bf2 14760+ && !d_unhashed(cpg.dentry)
5afbbe0d 14761+ && cpg.bdst < au_dbtop(cpg.dentry)) {
c2b27bf2 14762+ err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
4a4d8108
AM
14763+ if (unlikely(err))
14764+ goto out_unlock;
14765+
14766+ /* always superio. */
c2b27bf2 14767+ err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108 14768+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 14769+ if (!err) {
c2b27bf2 14770+ err = au_sio_cpup_simple(&cpg);
367653fa
AM
14771+ au_unpin(&pin);
14772+ }
4a4d8108
AM
14773+ } else if (hi_wh) {
14774+ /* already copied-up after unlink */
c2b27bf2 14775+ err = au_reopen_wh(file, cpg.bdst, hi_wh);
4a4d8108
AM
14776+ *need_reopen = 0;
14777+ }
1facf9fc 14778+
4f0767ce 14779+out_unlock:
4a4d8108 14780+ di_read_unlock(parent, AuLock_IR);
4f0767ce 14781+out_parent:
4a4d8108 14782+ dput(parent);
4f0767ce 14783+out:
1308ab2a 14784+ return err;
dece6358 14785+}
1facf9fc 14786+
4a4d8108 14787+static void au_do_refresh_dir(struct file *file)
dece6358 14788+{
5afbbe0d 14789+ aufs_bindex_t bindex, bbot, new_bindex, brid;
4a4d8108
AM
14790+ struct au_hfile *p, tmp, *q;
14791+ struct au_finfo *finfo;
1308ab2a 14792+ struct super_block *sb;
4a4d8108 14793+ struct au_fidir *fidir;
1facf9fc 14794+
4a4d8108 14795+ FiMustWriteLock(file);
1facf9fc 14796+
2000de60 14797+ sb = file->f_path.dentry->d_sb;
4a4d8108
AM
14798+ finfo = au_fi(file);
14799+ fidir = finfo->fi_hdir;
14800+ AuDebugOn(!fidir);
14801+ p = fidir->fd_hfile + finfo->fi_btop;
14802+ brid = p->hf_br->br_id;
5afbbe0d
AM
14803+ bbot = fidir->fd_bbot;
14804+ for (bindex = finfo->fi_btop; bindex <= bbot; bindex++, p++) {
4a4d8108
AM
14805+ if (!p->hf_file)
14806+ continue;
1308ab2a 14807+
4a4d8108
AM
14808+ new_bindex = au_br_index(sb, p->hf_br->br_id);
14809+ if (new_bindex == bindex)
14810+ continue;
14811+ if (new_bindex < 0) {
14812+ au_set_h_fptr(file, bindex, NULL);
14813+ continue;
14814+ }
1308ab2a 14815+
4a4d8108
AM
14816+ /* swap two lower inode, and loop again */
14817+ q = fidir->fd_hfile + new_bindex;
14818+ tmp = *q;
14819+ *q = *p;
14820+ *p = tmp;
14821+ if (tmp.hf_file) {
14822+ bindex--;
14823+ p--;
14824+ }
14825+ }
1308ab2a 14826+
4a4d8108 14827+ p = fidir->fd_hfile;
2000de60 14828+ if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
5afbbe0d
AM
14829+ bbot = au_sbbot(sb);
14830+ for (finfo->fi_btop = 0; finfo->fi_btop <= bbot;
4a4d8108
AM
14831+ finfo->fi_btop++, p++)
14832+ if (p->hf_file) {
c06a8ce3 14833+ if (file_inode(p->hf_file))
4a4d8108 14834+ break;
ae9dfd79 14835+ au_hfput(p, /*execed*/0);
4a4d8108
AM
14836+ }
14837+ } else {
5afbbe0d
AM
14838+ bbot = au_br_index(sb, brid);
14839+ for (finfo->fi_btop = 0; finfo->fi_btop < bbot;
4a4d8108
AM
14840+ finfo->fi_btop++, p++)
14841+ if (p->hf_file)
ae9dfd79 14842+ au_hfput(p, /*execed*/0);
5afbbe0d 14843+ bbot = au_sbbot(sb);
4a4d8108 14844+ }
1308ab2a 14845+
5afbbe0d
AM
14846+ p = fidir->fd_hfile + bbot;
14847+ for (fidir->fd_bbot = bbot; fidir->fd_bbot >= finfo->fi_btop;
4a4d8108
AM
14848+ fidir->fd_bbot--, p--)
14849+ if (p->hf_file) {
c06a8ce3 14850+ if (file_inode(p->hf_file))
4a4d8108 14851+ break;
ae9dfd79 14852+ au_hfput(p, /*execed*/0);
4a4d8108
AM
14853+ }
14854+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
1308ab2a 14855+}
14856+
4a4d8108
AM
14857+/*
14858+ * after branch manipulating, refresh the file.
14859+ */
14860+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
1facf9fc 14861+{
e2f27e51 14862+ int err, need_reopen, nbr;
5afbbe0d 14863+ aufs_bindex_t bbot, bindex;
4a4d8108 14864+ struct dentry *dentry;
e2f27e51 14865+ struct super_block *sb;
1308ab2a 14866+ struct au_finfo *finfo;
4a4d8108 14867+ struct au_hfile *hfile;
1facf9fc 14868+
2000de60 14869+ dentry = file->f_path.dentry;
e2f27e51
AM
14870+ sb = dentry->d_sb;
14871+ nbr = au_sbbot(sb) + 1;
1308ab2a 14872+ finfo = au_fi(file);
4a4d8108
AM
14873+ if (!finfo->fi_hdir) {
14874+ hfile = &finfo->fi_htop;
14875+ AuDebugOn(!hfile->hf_file);
e2f27e51 14876+ bindex = au_br_index(sb, hfile->hf_br->br_id);
4a4d8108
AM
14877+ AuDebugOn(bindex < 0);
14878+ if (bindex != finfo->fi_btop)
5afbbe0d 14879+ au_set_fbtop(file, bindex);
4a4d8108 14880+ } else {
e2f27e51 14881+ err = au_fidir_realloc(finfo, nbr, /*may_shrink*/0);
4a4d8108
AM
14882+ if (unlikely(err))
14883+ goto out;
14884+ au_do_refresh_dir(file);
14885+ }
1facf9fc 14886+
4a4d8108
AM
14887+ err = 0;
14888+ need_reopen = 1;
14889+ if (!au_test_mmapped(file))
14890+ err = au_file_refresh_by_inode(file, &need_reopen);
e2f27e51
AM
14891+ if (finfo->fi_hdir)
14892+ /* harmless if err */
14893+ au_fidir_realloc(finfo, nbr, /*may_shrink*/1);
027c5e7a 14894+ if (!err && need_reopen && !d_unlinked(dentry))
4a4d8108
AM
14895+ err = reopen(file);
14896+ if (!err) {
14897+ au_update_figen(file);
14898+ goto out; /* success */
14899+ }
14900+
14901+ /* error, close all lower files */
14902+ if (finfo->fi_hdir) {
5afbbe0d
AM
14903+ bbot = au_fbbot_dir(file);
14904+ for (bindex = au_fbtop(file); bindex <= bbot; bindex++)
4a4d8108
AM
14905+ au_set_h_fptr(file, bindex, NULL);
14906+ }
1facf9fc 14907+
4f0767ce 14908+out:
1facf9fc 14909+ return err;
14910+}
14911+
4a4d8108
AM
14912+/* common function to regular file and dir */
14913+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
ae9dfd79 14914+ int wlock, unsigned int fi_lsc)
dece6358 14915+{
1308ab2a 14916+ int err;
4a4d8108 14917+ unsigned int sigen, figen;
5afbbe0d 14918+ aufs_bindex_t btop;
4a4d8108
AM
14919+ unsigned char pseudo_link;
14920+ struct dentry *dentry;
14921+ struct inode *inode;
1facf9fc 14922+
4a4d8108 14923+ err = 0;
2000de60 14924+ dentry = file->f_path.dentry;
5527c038 14925+ inode = d_inode(dentry);
4a4d8108 14926+ sigen = au_sigen(dentry->d_sb);
ae9dfd79 14927+ fi_write_lock_nested(file, fi_lsc);
4a4d8108 14928+ figen = au_figen(file);
ae9dfd79
AM
14929+ if (!fi_lsc)
14930+ di_write_lock_child(dentry);
14931+ else
14932+ di_write_lock_child2(dentry);
5afbbe0d
AM
14933+ btop = au_dbtop(dentry);
14934+ pseudo_link = (btop != au_ibtop(inode));
14935+ if (sigen == figen && !pseudo_link && au_fbtop(file) == btop) {
4a4d8108
AM
14936+ if (!wlock) {
14937+ di_downgrade_lock(dentry, AuLock_IR);
14938+ fi_downgrade_lock(file);
14939+ }
14940+ goto out; /* success */
14941+ }
dece6358 14942+
4a4d8108 14943+ AuDbg("sigen %d, figen %d\n", sigen, figen);
027c5e7a 14944+ if (au_digen_test(dentry, sigen)) {
4a4d8108 14945+ err = au_reval_dpath(dentry, sigen);
027c5e7a 14946+ AuDebugOn(!err && au_digen_test(dentry, sigen));
4a4d8108 14947+ }
dece6358 14948+
027c5e7a
AM
14949+ if (!err)
14950+ err = refresh_file(file, reopen);
4a4d8108
AM
14951+ if (!err) {
14952+ if (!wlock) {
14953+ di_downgrade_lock(dentry, AuLock_IR);
14954+ fi_downgrade_lock(file);
14955+ }
14956+ } else {
14957+ di_write_unlock(dentry);
14958+ fi_write_unlock(file);
14959+ }
1facf9fc 14960+
4f0767ce 14961+out:
1308ab2a 14962+ return err;
14963+}
1facf9fc 14964+
4a4d8108
AM
14965+/* ---------------------------------------------------------------------- */
14966+
14967+/* cf. aufs_nopage() */
14968+/* for madvise(2) */
14969+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
1308ab2a 14970+{
4a4d8108
AM
14971+ unlock_page(page);
14972+ return 0;
14973+}
1facf9fc 14974+
4a4d8108 14975+/* it will never be called, but necessary to support O_DIRECT */
5afbbe0d 14976+static ssize_t aufs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
4a4d8108 14977+{ BUG(); return 0; }
1facf9fc 14978+
4a4d8108
AM
14979+/* they will never be called. */
14980+#ifdef CONFIG_AUFS_DEBUG
14981+static int aufs_write_begin(struct file *file, struct address_space *mapping,
14982+ loff_t pos, unsigned len, unsigned flags,
14983+ struct page **pagep, void **fsdata)
14984+{ AuUnsupport(); return 0; }
14985+static int aufs_write_end(struct file *file, struct address_space *mapping,
14986+ loff_t pos, unsigned len, unsigned copied,
14987+ struct page *page, void *fsdata)
14988+{ AuUnsupport(); return 0; }
14989+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
14990+{ AuUnsupport(); return 0; }
1308ab2a 14991+
4a4d8108
AM
14992+static int aufs_set_page_dirty(struct page *page)
14993+{ AuUnsupport(); return 0; }
392086de
AM
14994+static void aufs_invalidatepage(struct page *page, unsigned int offset,
14995+ unsigned int length)
4a4d8108
AM
14996+{ AuUnsupport(); }
14997+static int aufs_releasepage(struct page *page, gfp_t gfp)
14998+{ AuUnsupport(); return 0; }
79b8bda9 14999+#if 0 /* called by memory compaction regardless file */
4a4d8108 15000+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
7eafdf33 15001+ struct page *page, enum migrate_mode mode)
4a4d8108 15002+{ AuUnsupport(); return 0; }
79b8bda9 15003+#endif
e2f27e51
AM
15004+static bool aufs_isolate_page(struct page *page, isolate_mode_t mode)
15005+{ AuUnsupport(); return true; }
15006+static void aufs_putback_page(struct page *page)
15007+{ AuUnsupport(); }
4a4d8108
AM
15008+static int aufs_launder_page(struct page *page)
15009+{ AuUnsupport(); return 0; }
15010+static int aufs_is_partially_uptodate(struct page *page,
38d290e6
JR
15011+ unsigned long from,
15012+ unsigned long count)
4a4d8108 15013+{ AuUnsupport(); return 0; }
392086de
AM
15014+static void aufs_is_dirty_writeback(struct page *page, bool *dirty,
15015+ bool *writeback)
15016+{ AuUnsupport(); }
4a4d8108
AM
15017+static int aufs_error_remove_page(struct address_space *mapping,
15018+ struct page *page)
15019+{ AuUnsupport(); return 0; }
b4510431
AM
15020+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
15021+ sector_t *span)
15022+{ AuUnsupport(); return 0; }
15023+static void aufs_swap_deactivate(struct file *file)
15024+{ AuUnsupport(); }
4a4d8108
AM
15025+#endif /* CONFIG_AUFS_DEBUG */
15026+
15027+const struct address_space_operations aufs_aop = {
15028+ .readpage = aufs_readpage,
15029+ .direct_IO = aufs_direct_IO,
4a4d8108
AM
15030+#ifdef CONFIG_AUFS_DEBUG
15031+ .writepage = aufs_writepage,
4a4d8108
AM
15032+ /* no writepages, because of writepage */
15033+ .set_page_dirty = aufs_set_page_dirty,
15034+ /* no readpages, because of readpage */
15035+ .write_begin = aufs_write_begin,
15036+ .write_end = aufs_write_end,
15037+ /* no bmap, no block device */
15038+ .invalidatepage = aufs_invalidatepage,
15039+ .releasepage = aufs_releasepage,
79b8bda9
AM
15040+ /* is fallback_migrate_page ok? */
15041+ /* .migratepage = aufs_migratepage, */
e2f27e51
AM
15042+ .isolate_page = aufs_isolate_page,
15043+ .putback_page = aufs_putback_page,
4a4d8108
AM
15044+ .launder_page = aufs_launder_page,
15045+ .is_partially_uptodate = aufs_is_partially_uptodate,
392086de 15046+ .is_dirty_writeback = aufs_is_dirty_writeback,
b4510431
AM
15047+ .error_remove_page = aufs_error_remove_page,
15048+ .swap_activate = aufs_swap_activate,
15049+ .swap_deactivate = aufs_swap_deactivate
e8791d4f
AM
15050+#endif /* CONFIG_AUFS_DEBUG */
15051+};
15052diff -urNp -x '*.orig' linux-4.9/fs/aufs/file.h linux-4.9/fs/aufs/file.h
15053--- linux-4.9/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
15054+++ linux-4.9/fs/aufs/file.h 2021-02-24 16:15:09.528240413 +0100
15055@@ -0,0 +1,330 @@
dece6358 15056+/*
ae9dfd79 15057+ * Copyright (C) 2005-2018 Junjiro R. Okajima
dece6358
AM
15058+ *
15059+ * This program, aufs is free software; you can redistribute it and/or modify
15060+ * it under the terms of the GNU General Public License as published by
15061+ * the Free Software Foundation; either version 2 of the License, or
15062+ * (at your option) any later version.
15063+ *
15064+ * This program is distributed in the hope that it will be useful,
15065+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15066+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15067+ * GNU General Public License for more details.
15068+ *
15069+ * You should have received a copy of the GNU General Public License
523b37e3 15070+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 15071+ */
1facf9fc 15072+
15073+/*
e8791d4f 15074+ * file operations
1facf9fc 15075+ */
dece6358 15076+
e8791d4f
AM
15077+#ifndef __AUFS_FILE_H__
15078+#define __AUFS_FILE_H__
027c5e7a 15079+
e8791d4f 15080+#ifdef __KERNEL__
1facf9fc 15081+
e8791d4f
AM
15082+#include <linux/file.h>
15083+#include <linux/fs.h>
15084+#include <linux/poll.h>
15085+#include "rwsem.h"
1facf9fc 15086+
e8791d4f
AM
15087+struct au_branch;
15088+struct au_hfile {
15089+ struct file *hf_file;
15090+ struct au_branch *hf_br;
15091+};
1facf9fc 15092+
e8791d4f
AM
15093+struct au_vdir;
15094+struct au_fidir {
15095+ aufs_bindex_t fd_bbot;
15096+ aufs_bindex_t fd_nent;
15097+ struct au_vdir *fd_vdir_cache;
15098+ struct au_hfile fd_hfile[];
15099+};
1facf9fc 15100+
e8791d4f 15101+static inline int au_fidir_sz(int nent)
4a4d8108 15102+{
e8791d4f
AM
15103+ AuDebugOn(nent < 0);
15104+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
1facf9fc 15105+}
15106+
e8791d4f
AM
15107+struct au_finfo {
15108+ atomic_t fi_generation;
4a4d8108 15109+
e8791d4f
AM
15110+ struct au_rwsem fi_rwsem;
15111+ aufs_bindex_t fi_btop;
4a4d8108 15112+
e8791d4f
AM
15113+ /* do not union them */
15114+ struct { /* for non-dir */
15115+ struct au_hfile fi_htop;
15116+ atomic_t fi_mmapped;
15117+ };
15118+ struct au_fidir *fi_hdir; /* for dir only */
4a4d8108 15119+
e8791d4f
AM
15120+ struct hlist_bl_node fi_hlist;
15121+ struct file *fi_file; /* very ugly */
15122+} ____cacheline_aligned_in_smp;
4a4d8108
AM
15123+
15124+/* ---------------------------------------------------------------------- */
b912730e 15125+
e8791d4f
AM
15126+/* file.c */
15127+extern const struct address_space_operations aufs_aop;
15128+unsigned int au_file_roflags(unsigned int flags);
15129+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
15130+ struct file *file, int force_wr);
15131+struct au_do_open_args {
15132+ int aopen;
15133+ int (*open)(struct file *file, int flags,
15134+ struct file *h_file);
15135+ struct au_fidir *fidir;
15136+ struct file *h_file;
15137+};
15138+int au_do_open(struct file *file, struct au_do_open_args *args);
15139+int au_reopen_nondir(struct file *file);
15140+struct au_pin;
15141+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
15142+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
15143+ int wlock, unsigned int fi_lsc);
15144+int au_do_flush(struct file *file, fl_owner_t id,
15145+ int (*flush)(struct file *file, fl_owner_t id));
b912730e 15146+
e8791d4f
AM
15147+/* poll.c */
15148+#ifdef CONFIG_AUFS_POLL
15149+unsigned int aufs_poll(struct file *file, poll_table *wait);
15150+#endif
b912730e 15151+
e8791d4f
AM
15152+#ifdef CONFIG_AUFS_BR_HFSPLUS
15153+/* hfsplus.c */
15154+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
15155+ int force_wr);
15156+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
15157+ struct file *h_file);
15158+#else
15159+AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
15160+ aufs_bindex_t bindex, int force_wr)
15161+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
15162+ struct file *h_file);
15163+#endif
ae9dfd79 15164+
e8791d4f
AM
15165+/* f_op.c */
15166+extern const struct file_operations aufs_file_fop;
15167+int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
15168+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
15169+struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc);
b912730e 15170+
e8791d4f
AM
15171+/* finfo.c */
15172+void au_hfput(struct au_hfile *hf, int execed);
15173+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
15174+ struct file *h_file);
b912730e 15175+
e8791d4f
AM
15176+void au_update_figen(struct file *file);
15177+struct au_fidir *au_fidir_alloc(struct super_block *sb);
15178+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink);
1facf9fc 15179+
e8791d4f
AM
15180+void au_fi_init_once(void *_fi);
15181+void au_finfo_fin(struct file *file);
15182+int au_finfo_init(struct file *file, struct au_fidir *fidir);
b912730e 15183+
e8791d4f
AM
15184+/* ioctl.c */
15185+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
15186+#ifdef CONFIG_COMPAT
15187+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
15188+ unsigned long arg);
15189+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
15190+ unsigned long arg);
15191+#endif
b912730e 15192+
e8791d4f 15193+/* ---------------------------------------------------------------------- */
b912730e 15194+
e8791d4f 15195+static inline struct au_finfo *au_fi(struct file *file)
b912730e 15196+{
e8791d4f 15197+ return file->private_data;
b912730e
AM
15198+}
15199+
e8791d4f 15200+/* ---------------------------------------------------------------------- */
1facf9fc 15201+
e49829fe 15202+/*
e8791d4f
AM
15203+ * fi_read_lock, fi_write_lock,
15204+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
e49829fe 15205+ */
e8791d4f 15206+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
e49829fe 15207+
e8791d4f
AM
15208+/* lock subclass for finfo */
15209+enum {
15210+ AuLsc_FI_1,
15211+ AuLsc_FI_2
15212+};
e49829fe 15213+
e8791d4f 15214+static inline void fi_read_lock_nested(struct file *f, unsigned int lsc)
dece6358 15215+{
e8791d4f 15216+ au_rw_read_lock_nested(&au_fi(f)->fi_rwsem, lsc);
dece6358 15217+}
1facf9fc 15218+
e8791d4f 15219+static inline void fi_write_lock_nested(struct file *f, unsigned int lsc)
dece6358 15220+{
e8791d4f 15221+ au_rw_write_lock_nested(&au_fi(f)->fi_rwsem, lsc);
dece6358 15222+}
1facf9fc 15223+
e8791d4f
AM
15224+/*
15225+ * fi_read_lock_1, fi_write_lock_1,
15226+ * fi_read_lock_2, fi_write_lock_2
15227+ */
15228+#define AuReadLockFunc(name) \
15229+static inline void fi_read_lock_##name(struct file *f) \
15230+{ fi_read_lock_nested(f, AuLsc_FI_##name); }
1facf9fc 15231+
e8791d4f
AM
15232+#define AuWriteLockFunc(name) \
15233+static inline void fi_write_lock_##name(struct file *f) \
15234+{ fi_write_lock_nested(f, AuLsc_FI_##name); }
1facf9fc 15235+
e8791d4f
AM
15236+#define AuRWLockFuncs(name) \
15237+ AuReadLockFunc(name) \
15238+ AuWriteLockFunc(name)
1308ab2a 15239+
e8791d4f
AM
15240+AuRWLockFuncs(1);
15241+AuRWLockFuncs(2);
e49829fe 15242+
e8791d4f
AM
15243+#undef AuReadLockFunc
15244+#undef AuWriteLockFunc
15245+#undef AuRWLockFuncs
9dbd164d 15246+
e8791d4f
AM
15247+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
15248+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
15249+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
1facf9fc 15250+
e8791d4f 15251+/* ---------------------------------------------------------------------- */
1facf9fc 15252+
e8791d4f
AM
15253+/* todo: hard/soft set? */
15254+static inline aufs_bindex_t au_fbtop(struct file *file)
1facf9fc 15255+{
e8791d4f
AM
15256+ FiMustAnyLock(file);
15257+ return au_fi(file)->fi_btop;
1facf9fc 15258+}
15259+
e8791d4f 15260+static inline aufs_bindex_t au_fbbot_dir(struct file *file)
1facf9fc 15261+{
e8791d4f
AM
15262+ FiMustAnyLock(file);
15263+ AuDebugOn(!au_fi(file)->fi_hdir);
15264+ return au_fi(file)->fi_hdir->fd_bbot;
15265+}
1facf9fc 15266+
e8791d4f
AM
15267+static inline struct au_vdir *au_fvdir_cache(struct file *file)
15268+{
15269+ FiMustAnyLock(file);
15270+ AuDebugOn(!au_fi(file)->fi_hdir);
15271+ return au_fi(file)->fi_hdir->fd_vdir_cache;
15272+}
9dbd164d 15273+
e8791d4f
AM
15274+static inline void au_set_fbtop(struct file *file, aufs_bindex_t bindex)
15275+{
15276+ FiMustWriteLock(file);
15277+ au_fi(file)->fi_btop = bindex;
15278+}
9dbd164d 15279+
e8791d4f
AM
15280+static inline void au_set_fbbot_dir(struct file *file, aufs_bindex_t bindex)
15281+{
15282+ FiMustWriteLock(file);
15283+ AuDebugOn(!au_fi(file)->fi_hdir);
15284+ au_fi(file)->fi_hdir->fd_bbot = bindex;
15285+}
1facf9fc 15286+
e8791d4f
AM
15287+static inline void au_set_fvdir_cache(struct file *file,
15288+ struct au_vdir *vdir_cache)
15289+{
15290+ FiMustWriteLock(file);
15291+ AuDebugOn(!au_fi(file)->fi_hdir);
15292+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
4a4d8108 15293+}
1facf9fc 15294+
e8791d4f 15295+static inline struct file *au_hf_top(struct file *file)
38d290e6 15296+{
e8791d4f
AM
15297+ FiMustAnyLock(file);
15298+ AuDebugOn(au_fi(file)->fi_hdir);
15299+ return au_fi(file)->fi_htop.hf_file;
15300+}
38d290e6 15301+
e8791d4f
AM
15302+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
15303+{
15304+ FiMustAnyLock(file);
15305+ AuDebugOn(!au_fi(file)->fi_hdir);
15306+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
15307+}
38d290e6 15308+
e8791d4f
AM
15309+/* todo: memory barrier? */
15310+static inline unsigned int au_figen(struct file *f)
15311+{
15312+ return atomic_read(&au_fi(f)->fi_generation);
15313+}
38d290e6 15314+
e8791d4f
AM
15315+static inline void au_set_mmapped(struct file *f)
15316+{
15317+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
15318+ return;
15319+ pr_warn("fi_mmapped wrapped around\n");
15320+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
15321+ ;
15322+}
38d290e6 15323+
e8791d4f
AM
15324+static inline void au_unset_mmapped(struct file *f)
15325+{
15326+ atomic_dec(&au_fi(f)->fi_mmapped);
38d290e6
JR
15327+}
15328+
e8791d4f 15329+static inline int au_test_mmapped(struct file *f)
ae9dfd79 15330+{
e8791d4f
AM
15331+ return atomic_read(&au_fi(f)->fi_mmapped);
15332+}
ae9dfd79 15333+
e8791d4f 15334+/* customize vma->vm_file */
ae9dfd79 15335+
e8791d4f
AM
15336+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
15337+ struct file *file)
15338+{
15339+ struct file *f;
ae9dfd79 15340+
e8791d4f
AM
15341+ f = vma->vm_file;
15342+ get_file(file);
15343+ vma->vm_file = file;
15344+ fput(f);
15345+}
ae9dfd79 15346+
e8791d4f
AM
15347+#ifdef CONFIG_MMU
15348+#define AuDbgVmRegion(file, vma) do {} while (0)
ae9dfd79 15349+
e8791d4f
AM
15350+static inline void au_vm_file_reset(struct vm_area_struct *vma,
15351+ struct file *file)
15352+{
15353+ au_do_vm_file_reset(vma, file);
15354+}
15355+#else
15356+#define AuDbgVmRegion(file, vma) \
15357+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
ae9dfd79 15358+
e8791d4f
AM
15359+static inline void au_vm_file_reset(struct vm_area_struct *vma,
15360+ struct file *file)
15361+{
15362+ struct file *f;
ae9dfd79 15363+
e8791d4f
AM
15364+ au_do_vm_file_reset(vma, file);
15365+ f = vma->vm_region->vm_file;
15366+ get_file(file);
15367+ vma->vm_region->vm_file = file;
15368+ fput(f);
ae9dfd79 15369+}
e8791d4f 15370+#endif /* CONFIG_MMU */
ae9dfd79 15371+
e8791d4f
AM
15372+/* handle vma->vm_prfile */
15373+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
15374+ struct file *file)
15375+{
15376+ get_file(file);
15377+ vma->vm_prfile = file;
15378+#ifndef CONFIG_MMU
15379+ get_file(file);
15380+ vma->vm_region->vm_prfile = file;
15381+#endif
15382+}
4a4d8108 15383+
e8791d4f
AM
15384+#endif /* __KERNEL__ */
15385+#endif /* __AUFS_FILE_H__ */
15386diff -urNp -x '*.orig' linux-4.9/fs/aufs/finfo.c linux-4.9/fs/aufs/finfo.c
15387--- linux-4.9/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
15388+++ linux-4.9/fs/aufs/finfo.c 2021-02-24 16:15:09.528240413 +0100
15389@@ -0,0 +1,148 @@
9dbd164d 15390+/*
e8791d4f 15391+ * Copyright (C) 2005-2018 Junjiro R. Okajima
9dbd164d 15392+ *
e8791d4f
AM
15393+ * This program, aufs is free software; you can redistribute it and/or modify
15394+ * it under the terms of the GNU General Public License as published by
15395+ * the Free Software Foundation; either version 2 of the License, or
15396+ * (at your option) any later version.
15397+ *
15398+ * This program is distributed in the hope that it will be useful,
15399+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15400+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15401+ * GNU General Public License for more details.
15402+ *
15403+ * You should have received a copy of the GNU General Public License
15404+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
9dbd164d
AM
15405+ */
15406+
e8791d4f
AM
15407+/*
15408+ * file private data
15409+ */
2dfbb274 15410+
e8791d4f
AM
15411+#include "aufs.h"
15412+
15413+void au_hfput(struct au_hfile *hf, int execed)
2dfbb274 15414+{
e8791d4f
AM
15415+ if (execed)
15416+ allow_write_access(hf->hf_file);
15417+ fput(hf->hf_file);
15418+ hf->hf_file = NULL;
15419+ au_br_put(hf->hf_br);
15420+ hf->hf_br = NULL;
2dfbb274
AM
15421+}
15422+
e8791d4f 15423+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
2dfbb274 15424+{
e8791d4f
AM
15425+ struct au_finfo *finfo = au_fi(file);
15426+ struct au_hfile *hf;
15427+ struct au_fidir *fidir;
2dfbb274 15428+
e8791d4f
AM
15429+ fidir = finfo->fi_hdir;
15430+ if (!fidir) {
15431+ AuDebugOn(finfo->fi_btop != bindex);
15432+ hf = &finfo->fi_htop;
15433+ } else
15434+ hf = fidir->fd_hfile + bindex;
2dfbb274 15435+
e8791d4f
AM
15436+ if (hf && hf->hf_file)
15437+ au_hfput(hf, vfsub_file_execed(file));
15438+ if (val) {
15439+ FiMustWriteLock(file);
15440+ AuDebugOn(IS_ERR_OR_NULL(file->f_path.dentry));
15441+ hf->hf_file = val;
15442+ hf->hf_br = au_sbr(file->f_path.dentry->d_sb, bindex);
15443+ }
2dfbb274
AM
15444+}
15445+
e8791d4f 15446+void au_update_figen(struct file *file)
dece6358 15447+{
e8791d4f
AM
15448+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_path.dentry));
15449+ /* smp_mb(); */ /* atomic_set */
15450+}
1308ab2a 15451+
e8791d4f 15452+/* ---------------------------------------------------------------------- */
4a4d8108 15453+
e8791d4f
AM
15454+struct au_fidir *au_fidir_alloc(struct super_block *sb)
15455+{
15456+ struct au_fidir *fidir;
15457+ int nbr;
1308ab2a 15458+
e8791d4f
AM
15459+ nbr = au_sbbot(sb) + 1;
15460+ if (nbr < 2)
15461+ nbr = 2; /* initial allocate for 2 branches */
15462+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
15463+ if (fidir) {
15464+ fidir->fd_bbot = -1;
15465+ fidir->fd_nent = nbr;
b912730e 15466+ }
b912730e 15467+
e8791d4f 15468+ return fidir;
4a4d8108
AM
15469+}
15470+
e8791d4f 15471+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink)
4a4d8108
AM
15472+{
15473+ int err;
e8791d4f 15474+ struct au_fidir *fidir, *p;
b912730e 15475+
e8791d4f
AM
15476+ AuRwMustWriteLock(&finfo->fi_rwsem);
15477+ fidir = finfo->fi_hdir;
15478+ AuDebugOn(!fidir);
4a4d8108 15479+
e8791d4f
AM
15480+ err = -ENOMEM;
15481+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
15482+ GFP_NOFS, may_shrink);
15483+ if (p) {
15484+ p->fd_nent = nbr;
15485+ finfo->fi_hdir = p;
15486+ err = 0;
15487+ }
4a4d8108 15488+
4a4d8108 15489+ return err;
dece6358
AM
15490+}
15491+
e8791d4f
AM
15492+/* ---------------------------------------------------------------------- */
15493+
15494+void au_finfo_fin(struct file *file)
dece6358 15495+{
e8791d4f 15496+ struct au_finfo *finfo;
1308ab2a 15497+
e8791d4f 15498+ au_nfiles_dec(file->f_path.dentry->d_sb);
b912730e 15499+
e8791d4f
AM
15500+ finfo = au_fi(file);
15501+ AuDebugOn(finfo->fi_hdir);
15502+ AuRwDestroy(&finfo->fi_rwsem);
15503+ au_cache_free_finfo(finfo);
15504+}
4a4d8108 15505+
e8791d4f
AM
15506+void au_fi_init_once(void *_finfo)
15507+{
15508+ struct au_finfo *finfo = _finfo;
1308ab2a 15509+
e8791d4f 15510+ au_rw_init(&finfo->fi_rwsem);
dece6358 15511+}
4a4d8108 15512+
e8791d4f 15513+int au_finfo_init(struct file *file, struct au_fidir *fidir)
febd17d6
JR
15514+{
15515+ int err;
e8791d4f
AM
15516+ struct au_finfo *finfo;
15517+ struct dentry *dentry;
febd17d6 15518+
e8791d4f
AM
15519+ err = -ENOMEM;
15520+ dentry = file->f_path.dentry;
15521+ finfo = au_cache_alloc_finfo();
15522+ if (unlikely(!finfo))
febd17d6
JR
15523+ goto out;
15524+
e8791d4f
AM
15525+ err = 0;
15526+ au_nfiles_inc(dentry->d_sb);
15527+ au_rw_write_lock(&finfo->fi_rwsem);
15528+ finfo->fi_btop = -1;
15529+ finfo->fi_hdir = fidir;
15530+ atomic_set(&finfo->fi_generation, au_digen(dentry));
15531+ /* smp_mb(); */ /* atomic_set */
15532+
15533+ file->private_data = finfo;
febd17d6
JR
15534+
15535+out:
febd17d6
JR
15536+ return err;
15537+}
e8791d4f
AM
15538diff -urNp -x '*.orig' linux-4.9/fs/aufs/fstype.h linux-4.9/fs/aufs/fstype.h
15539--- linux-4.9/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
15540+++ linux-4.9/fs/aufs/fstype.h 2021-02-24 16:15:09.528240413 +0100
b912730e 15541@@ -0,0 +1,400 @@
4a4d8108 15542+/*
ae9dfd79 15543+ * Copyright (C) 2005-2018 Junjiro R. Okajima
4a4d8108
AM
15544+ *
15545+ * This program, aufs is free software; you can redistribute it and/or modify
15546+ * it under the terms of the GNU General Public License as published by
15547+ * the Free Software Foundation; either version 2 of the License, or
15548+ * (at your option) any later version.
15549+ *
15550+ * This program is distributed in the hope that it will be useful,
15551+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15552+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15553+ * GNU General Public License for more details.
15554+ *
15555+ * You should have received a copy of the GNU General Public License
523b37e3 15556+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
15557+ */
15558+
15559+/*
15560+ * judging filesystem type
15561+ */
15562+
15563+#ifndef __AUFS_FSTYPE_H__
15564+#define __AUFS_FSTYPE_H__
15565+
15566+#ifdef __KERNEL__
15567+
15568+#include <linux/fs.h>
15569+#include <linux/magic.h>
b912730e 15570+#include <linux/nfs_fs.h>
b95c5147 15571+#include <linux/romfs_fs.h>
4a4d8108
AM
15572+
15573+static inline int au_test_aufs(struct super_block *sb)
15574+{
15575+ return sb->s_magic == AUFS_SUPER_MAGIC;
15576+}
15577+
15578+static inline const char *au_sbtype(struct super_block *sb)
15579+{
15580+ return sb->s_type->name;
15581+}
1308ab2a 15582+
15583+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
15584+{
f0c0a007 15585+#if IS_ENABLED(CONFIG_ISO9660_FS)
2000de60 15586+ return sb->s_magic == ISOFS_SUPER_MAGIC;
dece6358
AM
15587+#else
15588+ return 0;
15589+#endif
15590+}
15591+
1308ab2a 15592+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
dece6358 15593+{
f0c0a007 15594+#if IS_ENABLED(CONFIG_ROMFS_FS)
2000de60 15595+ return sb->s_magic == ROMFS_MAGIC;
dece6358
AM
15596+#else
15597+ return 0;
15598+#endif
15599+}
15600+
1308ab2a 15601+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
dece6358 15602+{
f0c0a007 15603+#if IS_ENABLED(CONFIG_CRAMFS)
1308ab2a 15604+ return sb->s_magic == CRAMFS_MAGIC;
15605+#endif
15606+ return 0;
15607+}
15608+
15609+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
15610+{
f0c0a007 15611+#if IS_ENABLED(CONFIG_NFS_FS)
1308ab2a 15612+ return sb->s_magic == NFS_SUPER_MAGIC;
dece6358
AM
15613+#else
15614+ return 0;
15615+#endif
15616+}
15617+
1308ab2a 15618+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
dece6358 15619+{
f0c0a007 15620+#if IS_ENABLED(CONFIG_FUSE_FS)
1308ab2a 15621+ return sb->s_magic == FUSE_SUPER_MAGIC;
dece6358
AM
15622+#else
15623+ return 0;
15624+#endif
15625+}
15626+
1308ab2a 15627+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
dece6358 15628+{
f0c0a007 15629+#if IS_ENABLED(CONFIG_XFS_FS)
1308ab2a 15630+ return sb->s_magic == XFS_SB_MAGIC;
dece6358
AM
15631+#else
15632+ return 0;
15633+#endif
15634+}
15635+
1308ab2a 15636+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
dece6358 15637+{
1308ab2a 15638+#ifdef CONFIG_TMPFS
15639+ return sb->s_magic == TMPFS_MAGIC;
15640+#else
15641+ return 0;
dece6358 15642+#endif
dece6358
AM
15643+}
15644+
1308ab2a 15645+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
1facf9fc 15646+{
f0c0a007 15647+#if IS_ENABLED(CONFIG_ECRYPT_FS)
1308ab2a 15648+ return !strcmp(au_sbtype(sb), "ecryptfs");
15649+#else
15650+ return 0;
15651+#endif
1facf9fc 15652+}
15653+
1308ab2a 15654+static inline int au_test_ramfs(struct super_block *sb)
15655+{
15656+ return sb->s_magic == RAMFS_MAGIC;
15657+}
15658+
15659+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
15660+{
f0c0a007 15661+#if IS_ENABLED(CONFIG_UBIFS_FS)
1308ab2a 15662+ return sb->s_magic == UBIFS_SUPER_MAGIC;
15663+#else
15664+ return 0;
15665+#endif
15666+}
15667+
15668+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
15669+{
15670+#ifdef CONFIG_PROC_FS
15671+ return sb->s_magic == PROC_SUPER_MAGIC;
15672+#else
15673+ return 0;
15674+#endif
15675+}
15676+
15677+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
15678+{
15679+#ifdef CONFIG_SYSFS
15680+ return sb->s_magic == SYSFS_MAGIC;
15681+#else
15682+ return 0;
15683+#endif
15684+}
15685+
15686+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
15687+{
f0c0a007 15688+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
1308ab2a 15689+ return sb->s_magic == CONFIGFS_MAGIC;
15690+#else
15691+ return 0;
15692+#endif
15693+}
15694+
15695+static inline int au_test_minix(struct super_block *sb __maybe_unused)
15696+{
f0c0a007 15697+#if IS_ENABLED(CONFIG_MINIX_FS)
1308ab2a 15698+ return sb->s_magic == MINIX3_SUPER_MAGIC
15699+ || sb->s_magic == MINIX2_SUPER_MAGIC
15700+ || sb->s_magic == MINIX2_SUPER_MAGIC2
15701+ || sb->s_magic == MINIX_SUPER_MAGIC
15702+ || sb->s_magic == MINIX_SUPER_MAGIC2;
15703+#else
15704+ return 0;
15705+#endif
15706+}
15707+
1308ab2a 15708+static inline int au_test_fat(struct super_block *sb __maybe_unused)
15709+{
f0c0a007 15710+#if IS_ENABLED(CONFIG_FAT_FS)
1308ab2a 15711+ return sb->s_magic == MSDOS_SUPER_MAGIC;
15712+#else
15713+ return 0;
15714+#endif
15715+}
15716+
15717+static inline int au_test_msdos(struct super_block *sb)
15718+{
15719+ return au_test_fat(sb);
15720+}
15721+
15722+static inline int au_test_vfat(struct super_block *sb)
15723+{
15724+ return au_test_fat(sb);
15725+}
15726+
15727+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
15728+{
15729+#ifdef CONFIG_SECURITYFS
15730+ return sb->s_magic == SECURITYFS_MAGIC;
15731+#else
15732+ return 0;
15733+#endif
15734+}
15735+
15736+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
15737+{
f0c0a007 15738+#if IS_ENABLED(CONFIG_SQUASHFS)
1308ab2a 15739+ return sb->s_magic == SQUASHFS_MAGIC;
15740+#else
15741+ return 0;
15742+#endif
15743+}
15744+
15745+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
15746+{
f0c0a007 15747+#if IS_ENABLED(CONFIG_BTRFS_FS)
1308ab2a 15748+ return sb->s_magic == BTRFS_SUPER_MAGIC;
15749+#else
15750+ return 0;
15751+#endif
15752+}
15753+
15754+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
15755+{
f0c0a007 15756+#if IS_ENABLED(CONFIG_XENFS)
1308ab2a 15757+ return sb->s_magic == XENFS_SUPER_MAGIC;
15758+#else
15759+ return 0;
15760+#endif
15761+}
15762+
15763+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
15764+{
15765+#ifdef CONFIG_DEBUG_FS
15766+ return sb->s_magic == DEBUGFS_MAGIC;
15767+#else
15768+ return 0;
15769+#endif
15770+}
15771+
15772+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
15773+{
f0c0a007 15774+#if IS_ENABLED(CONFIG_NILFS)
1308ab2a 15775+ return sb->s_magic == NILFS_SUPER_MAGIC;
15776+#else
15777+ return 0;
15778+#endif
15779+}
15780+
4a4d8108
AM
15781+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
15782+{
f0c0a007 15783+#if IS_ENABLED(CONFIG_HFSPLUS_FS)
4a4d8108
AM
15784+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
15785+#else
15786+ return 0;
15787+#endif
15788+}
15789+
1308ab2a 15790+/* ---------------------------------------------------------------------- */
15791+/*
15792+ * they can't be an aufs branch.
15793+ */
15794+static inline int au_test_fs_unsuppoted(struct super_block *sb)
15795+{
15796+ return
15797+#ifndef CONFIG_AUFS_BR_RAMFS
15798+ au_test_ramfs(sb) ||
15799+#endif
15800+ au_test_procfs(sb)
15801+ || au_test_sysfs(sb)
15802+ || au_test_configfs(sb)
15803+ || au_test_debugfs(sb)
15804+ || au_test_securityfs(sb)
15805+ || au_test_xenfs(sb)
15806+ || au_test_ecryptfs(sb)
15807+ /* || !strcmp(au_sbtype(sb), "unionfs") */
15808+ || au_test_aufs(sb); /* will be supported in next version */
15809+}
15810+
1308ab2a 15811+static inline int au_test_fs_remote(struct super_block *sb)
15812+{
15813+ return !au_test_tmpfs(sb)
15814+#ifdef CONFIG_AUFS_BR_RAMFS
15815+ && !au_test_ramfs(sb)
15816+#endif
15817+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
15818+}
15819+
15820+/* ---------------------------------------------------------------------- */
15821+
15822+/*
15823+ * Note: these functions (below) are created after reading ->getattr() in all
15824+ * filesystems under linux/fs. it means we have to do so in every update...
15825+ */
15826+
15827+/*
15828+ * some filesystems require getattr to refresh the inode attributes before
15829+ * referencing.
15830+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
15831+ * and leave the work for d_revalidate()
15832+ */
15833+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
15834+{
15835+ return au_test_nfs(sb)
15836+ || au_test_fuse(sb)
1308ab2a 15837+ /* || au_test_btrfs(sb) */ /* untested */
1308ab2a 15838+ ;
15839+}
15840+
15841+/*
15842+ * filesystems which don't maintain i_size or i_blocks.
15843+ */
15844+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
15845+{
15846+ return au_test_xfs(sb)
4a4d8108
AM
15847+ || au_test_btrfs(sb)
15848+ || au_test_ubifs(sb)
15849+ || au_test_hfsplus(sb) /* maintained, but incorrect */
1308ab2a 15850+ /* || au_test_minix(sb) */ /* untested */
15851+ ;
15852+}
15853+
15854+/*
15855+ * filesystems which don't store the correct value in some of their inode
15856+ * attributes.
15857+ */
15858+static inline int au_test_fs_bad_iattr(struct super_block *sb)
15859+{
15860+ return au_test_fs_bad_iattr_size(sb)
1308ab2a 15861+ || au_test_fat(sb)
15862+ || au_test_msdos(sb)
15863+ || au_test_vfat(sb);
1facf9fc 15864+}
15865+
15866+/* they don't check i_nlink in link(2) */
15867+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
15868+{
15869+ return au_test_tmpfs(sb)
15870+#ifdef CONFIG_AUFS_BR_RAMFS
15871+ || au_test_ramfs(sb)
15872+#endif
4a4d8108 15873+ || au_test_ubifs(sb)
4a4d8108 15874+ || au_test_hfsplus(sb);
1facf9fc 15875+}
15876+
15877+/*
15878+ * filesystems which sets S_NOATIME and S_NOCMTIME.
15879+ */
15880+static inline int au_test_fs_notime(struct super_block *sb)
15881+{
15882+ return au_test_nfs(sb)
15883+ || au_test_fuse(sb)
dece6358 15884+ || au_test_ubifs(sb)
1facf9fc 15885+ ;
15886+}
15887+
1facf9fc 15888+/* temporary support for i#1 in cramfs */
15889+static inline int au_test_fs_unique_ino(struct inode *inode)
15890+{
15891+ if (au_test_cramfs(inode->i_sb))
15892+ return inode->i_ino != 1;
15893+ return 1;
15894+}
15895+
15896+/* ---------------------------------------------------------------------- */
15897+
15898+/*
15899+ * the filesystem where the xino files placed must support i/o after unlink and
15900+ * maintain i_size and i_blocks.
15901+ */
15902+static inline int au_test_fs_bad_xino(struct super_block *sb)
15903+{
15904+ return au_test_fs_remote(sb)
15905+ || au_test_fs_bad_iattr_size(sb)
1facf9fc 15906+ /* don't want unnecessary work for xino */
15907+ || au_test_aufs(sb)
1308ab2a 15908+ || au_test_ecryptfs(sb)
15909+ || au_test_nilfs(sb);
1facf9fc 15910+}
15911+
15912+static inline int au_test_fs_trunc_xino(struct super_block *sb)
15913+{
15914+ return au_test_tmpfs(sb)
15915+ || au_test_ramfs(sb);
15916+}
15917+
15918+/*
15919+ * test if the @sb is real-readonly.
15920+ */
15921+static inline int au_test_fs_rr(struct super_block *sb)
15922+{
15923+ return au_test_squashfs(sb)
15924+ || au_test_iso9660(sb)
15925+ || au_test_cramfs(sb)
15926+ || au_test_romfs(sb);
15927+}
15928+
b912730e
AM
15929+/*
15930+ * test if the @inode is nfs with 'noacl' option
15931+ * NFS always sets MS_POSIXACL regardless its mount option 'noacl.'
15932+ */
15933+static inline int au_test_nfs_noacl(struct inode *inode)
15934+{
15935+ return au_test_nfs(inode->i_sb)
15936+ /* && IS_POSIXACL(inode) */
15937+ && !nfs_server_capable(inode, NFS_CAP_ACLS);
15938+}
15939+
1facf9fc 15940+#endif /* __KERNEL__ */
15941+#endif /* __AUFS_FSTYPE_H__ */
e8791d4f
AM
15942diff -urNp -x '*.orig' linux-4.9/fs/aufs/hbl.h linux-4.9/fs/aufs/hbl.h
15943--- linux-4.9/fs/aufs/hbl.h 1970-01-01 01:00:00.000000000 +0100
15944+++ linux-4.9/fs/aufs/hbl.h 2021-02-24 16:15:09.528240413 +0100
ae9dfd79
AM
15945@@ -0,0 +1,64 @@
15946+/*
15947+ * Copyright (C) 2017-2018 Junjiro R. Okajima
15948+ *
15949+ * This program, aufs is free software; you can redistribute it and/or modify
15950+ * it under the terms of the GNU General Public License as published by
15951+ * the Free Software Foundation; either version 2 of the License, or
15952+ * (at your option) any later version.
15953+ *
15954+ * This program is distributed in the hope that it will be useful,
15955+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15956+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15957+ * GNU General Public License for more details.
15958+ *
15959+ * You should have received a copy of the GNU General Public License
15960+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
15961+ */
15962+
15963+/*
15964+ * helpers for hlist_bl.h
15965+ */
15966+
15967+#ifndef __AUFS_HBL_H__
15968+#define __AUFS_HBL_H__
15969+
15970+#ifdef __KERNEL__
15971+
15972+#include <linux/list_bl.h>
15973+
15974+static inline void au_hbl_add(struct hlist_bl_node *node,
15975+ struct hlist_bl_head *hbl)
15976+{
15977+ hlist_bl_lock(hbl);
15978+ hlist_bl_add_head(node, hbl);
15979+ hlist_bl_unlock(hbl);
15980+}
15981+
15982+static inline void au_hbl_del(struct hlist_bl_node *node,
15983+ struct hlist_bl_head *hbl)
15984+{
15985+ hlist_bl_lock(hbl);
15986+ hlist_bl_del(node);
15987+ hlist_bl_unlock(hbl);
15988+}
15989+
15990+#define au_hbl_for_each(pos, head) \
15991+ for (pos = hlist_bl_first(head); \
15992+ pos; \
15993+ pos = pos->next)
15994+
15995+static inline unsigned long au_hbl_count(struct hlist_bl_head *hbl)
15996+{
15997+ unsigned long cnt;
15998+ struct hlist_bl_node *pos;
15999+
16000+ cnt = 0;
16001+ hlist_bl_lock(hbl);
16002+ au_hbl_for_each(pos, hbl)
16003+ cnt++;
16004+ hlist_bl_unlock(hbl);
16005+ return cnt;
16006+}
16007+
16008+#endif /* __KERNEL__ */
16009+#endif /* __AUFS_HBL_H__ */
e8791d4f
AM
16010diff -urNp -x '*.orig' linux-4.9/fs/aufs/hfsnotify.c linux-4.9/fs/aufs/hfsnotify.c
16011--- linux-4.9/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
16012+++ linux-4.9/fs/aufs/hfsnotify.c 2021-02-24 16:15:09.528240413 +0100
5afbbe0d 16013@@ -0,0 +1,287 @@
1facf9fc 16014+/*
ae9dfd79 16015+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 16016+ *
16017+ * This program, aufs is free software; you can redistribute it and/or modify
16018+ * it under the terms of the GNU General Public License as published by
16019+ * the Free Software Foundation; either version 2 of the License, or
16020+ * (at your option) any later version.
dece6358
AM
16021+ *
16022+ * This program is distributed in the hope that it will be useful,
16023+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16024+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16025+ * GNU General Public License for more details.
16026+ *
16027+ * You should have received a copy of the GNU General Public License
523b37e3 16028+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 16029+ */
16030+
16031+/*
4a4d8108 16032+ * fsnotify for the lower directories
1facf9fc 16033+ */
16034+
16035+#include "aufs.h"
16036+
4a4d8108
AM
16037+/* FS_IN_IGNORED is unnecessary */
16038+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
16039+ | FS_CREATE | FS_EVENT_ON_CHILD);
7f207e10 16040+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
7eafdf33 16041+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
1facf9fc 16042+
0c5527e5 16043+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
1facf9fc 16044+{
0c5527e5
AM
16045+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
16046+ hn_mark);
5afbbe0d 16047+ /* AuDbg("here\n"); */
ae9dfd79
AM
16048+ au_cache_free_hnotify(hn);
16049+ smp_mb__before_atomic(); /* for atomic64_dec */
1716fcea
AM
16050+ if (atomic64_dec_and_test(&au_hfsn_ifree))
16051+ wake_up(&au_hfsn_wq);
4a4d8108 16052+}
1facf9fc 16053+
027c5e7a 16054+static int au_hfsn_alloc(struct au_hinode *hinode)
4a4d8108 16055+{
1716fcea 16056+ int err;
027c5e7a
AM
16057+ struct au_hnotify *hn;
16058+ struct super_block *sb;
16059+ struct au_branch *br;
0c5527e5 16060+ struct fsnotify_mark *mark;
027c5e7a 16061+ aufs_bindex_t bindex;
1facf9fc 16062+
027c5e7a
AM
16063+ hn = hinode->hi_notify;
16064+ sb = hn->hn_aufs_inode->i_sb;
16065+ bindex = au_br_index(sb, hinode->hi_id);
16066+ br = au_sbr(sb, bindex);
1716fcea
AM
16067+ AuDebugOn(!br->br_hfsn);
16068+
0c5527e5
AM
16069+ mark = &hn->hn_mark;
16070+ fsnotify_init_mark(mark, au_hfsn_free_mark);
16071+ mark->mask = AuHfsnMask;
7f207e10
AM
16072+ /*
16073+ * by udba rename or rmdir, aufs assign a new inode to the known
16074+ * h_inode, so specify 1 to allow dups.
16075+ */
c1595e42 16076+ lockdep_off();
1716fcea 16077+ err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode,
027c5e7a 16078+ /*mnt*/NULL, /*allow_dups*/1);
c1595e42 16079+ lockdep_on();
1716fcea
AM
16080+
16081+ return err;
1facf9fc 16082+}
16083+
7eafdf33 16084+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
1facf9fc 16085+{
0c5527e5 16086+ struct fsnotify_mark *mark;
7eafdf33 16087+ unsigned long long ull;
1716fcea 16088+ struct fsnotify_group *group;
7eafdf33
AM
16089+
16090+ ull = atomic64_inc_return(&au_hfsn_ifree);
16091+ BUG_ON(!ull);
953406b4 16092+
0c5527e5 16093+ mark = &hn->hn_mark;
1716fcea
AM
16094+ spin_lock(&mark->lock);
16095+ group = mark->group;
16096+ fsnotify_get_group(group);
16097+ spin_unlock(&mark->lock);
c1595e42 16098+ lockdep_off();
1716fcea 16099+ fsnotify_destroy_mark(mark, group);
5afbbe0d 16100+ fsnotify_put_mark(mark);
1716fcea 16101+ fsnotify_put_group(group);
c1595e42 16102+ lockdep_on();
7f207e10 16103+
7eafdf33
AM
16104+ /* free hn by myself */
16105+ return 0;
1facf9fc 16106+}
16107+
16108+/* ---------------------------------------------------------------------- */
16109+
4a4d8108 16110+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
1facf9fc 16111+{
0c5527e5 16112+ struct fsnotify_mark *mark;
1facf9fc 16113+
0c5527e5
AM
16114+ mark = &hinode->hi_notify->hn_mark;
16115+ spin_lock(&mark->lock);
1facf9fc 16116+ if (do_set) {
0c5527e5
AM
16117+ AuDebugOn(mark->mask & AuHfsnMask);
16118+ mark->mask |= AuHfsnMask;
1facf9fc 16119+ } else {
0c5527e5
AM
16120+ AuDebugOn(!(mark->mask & AuHfsnMask));
16121+ mark->mask &= ~AuHfsnMask;
1facf9fc 16122+ }
0c5527e5 16123+ spin_unlock(&mark->lock);
4a4d8108 16124+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
1facf9fc 16125+}
16126+
4a4d8108 16127+/* ---------------------------------------------------------------------- */
1facf9fc 16128+
4a4d8108
AM
16129+/* #define AuDbgHnotify */
16130+#ifdef AuDbgHnotify
16131+static char *au_hfsn_name(u32 mask)
16132+{
16133+#ifdef CONFIG_AUFS_DEBUG
c06a8ce3
AM
16134+#define test_ret(flag) \
16135+ do { \
16136+ if (mask & flag) \
16137+ return #flag; \
16138+ } while (0)
4a4d8108
AM
16139+ test_ret(FS_ACCESS);
16140+ test_ret(FS_MODIFY);
16141+ test_ret(FS_ATTRIB);
16142+ test_ret(FS_CLOSE_WRITE);
16143+ test_ret(FS_CLOSE_NOWRITE);
16144+ test_ret(FS_OPEN);
16145+ test_ret(FS_MOVED_FROM);
16146+ test_ret(FS_MOVED_TO);
16147+ test_ret(FS_CREATE);
16148+ test_ret(FS_DELETE);
16149+ test_ret(FS_DELETE_SELF);
16150+ test_ret(FS_MOVE_SELF);
16151+ test_ret(FS_UNMOUNT);
16152+ test_ret(FS_Q_OVERFLOW);
16153+ test_ret(FS_IN_IGNORED);
b912730e 16154+ test_ret(FS_ISDIR);
4a4d8108
AM
16155+ test_ret(FS_IN_ONESHOT);
16156+ test_ret(FS_EVENT_ON_CHILD);
16157+ return "";
16158+#undef test_ret
16159+#else
16160+ return "??";
16161+#endif
1facf9fc 16162+}
4a4d8108 16163+#endif
1facf9fc 16164+
16165+/* ---------------------------------------------------------------------- */
16166+
1716fcea
AM
16167+static void au_hfsn_free_group(struct fsnotify_group *group)
16168+{
16169+ struct au_br_hfsnotify *hfsn = group->private;
16170+
5afbbe0d 16171+ /* AuDbg("here\n"); */
ae9dfd79 16172+ kfree(hfsn);
1716fcea
AM
16173+}
16174+
4a4d8108 16175+static int au_hfsn_handle_event(struct fsnotify_group *group,
fb47a38f 16176+ struct inode *inode,
0c5527e5
AM
16177+ struct fsnotify_mark *inode_mark,
16178+ struct fsnotify_mark *vfsmount_mark,
fb47a38f
JR
16179+ u32 mask, void *data, int data_type,
16180+ const unsigned char *file_name, u32 cookie)
1facf9fc 16181+{
16182+ int err;
4a4d8108
AM
16183+ struct au_hnotify *hnotify;
16184+ struct inode *h_dir, *h_inode;
fb47a38f 16185+ struct qstr h_child_qstr = QSTR_INIT(file_name, strlen(file_name));
4a4d8108 16186+
fb47a38f 16187+ AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
1facf9fc 16188+
16189+ err = 0;
0c5527e5 16190+ /* if FS_UNMOUNT happens, there must be another bug */
4a4d8108 16191+ AuDebugOn(mask & FS_UNMOUNT);
0c5527e5 16192+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
1facf9fc 16193+ goto out;
1facf9fc 16194+
fb47a38f
JR
16195+ h_dir = inode;
16196+ h_inode = NULL;
4a4d8108 16197+#ifdef AuDbgHnotify
392086de 16198+ au_debug_on();
4a4d8108
AM
16199+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
16200+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
16201+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
16202+ h_dir->i_ino, mask, au_hfsn_name(mask),
16203+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
16204+ /* WARN_ON(1); */
1facf9fc 16205+ }
392086de 16206+ au_debug_off();
1facf9fc 16207+#endif
4a4d8108 16208+
0c5527e5
AM
16209+ AuDebugOn(!inode_mark);
16210+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
16211+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
1facf9fc 16212+
4a4d8108
AM
16213+out:
16214+ return err;
16215+}
1facf9fc 16216+
4a4d8108 16217+static struct fsnotify_ops au_hfsn_ops = {
1716fcea
AM
16218+ .handle_event = au_hfsn_handle_event,
16219+ .free_group_priv = au_hfsn_free_group
4a4d8108
AM
16220+};
16221+
16222+/* ---------------------------------------------------------------------- */
16223+
027c5e7a
AM
16224+static void au_hfsn_fin_br(struct au_branch *br)
16225+{
1716fcea 16226+ struct au_br_hfsnotify *hfsn;
027c5e7a 16227+
1716fcea 16228+ hfsn = br->br_hfsn;
c1595e42
JR
16229+ if (hfsn) {
16230+ lockdep_off();
1716fcea 16231+ fsnotify_put_group(hfsn->hfsn_group);
c1595e42
JR
16232+ lockdep_on();
16233+ }
027c5e7a
AM
16234+}
16235+
1716fcea 16236+static int au_hfsn_init_br(struct au_branch *br, int perm)
4a4d8108
AM
16237+{
16238+ int err;
1716fcea
AM
16239+ struct fsnotify_group *group;
16240+ struct au_br_hfsnotify *hfsn;
1facf9fc 16241+
4a4d8108 16242+ err = 0;
1716fcea
AM
16243+ br->br_hfsn = NULL;
16244+ if (!au_br_hnotifyable(perm))
027c5e7a 16245+ goto out;
027c5e7a 16246+
1716fcea
AM
16247+ err = -ENOMEM;
16248+ hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
16249+ if (unlikely(!hfsn))
027c5e7a
AM
16250+ goto out;
16251+
1716fcea
AM
16252+ err = 0;
16253+ group = fsnotify_alloc_group(&au_hfsn_ops);
16254+ if (IS_ERR(group)) {
16255+ err = PTR_ERR(group);
0c5527e5 16256+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
1716fcea 16257+ goto out_hfsn;
4a4d8108 16258+ }
1facf9fc 16259+
1716fcea
AM
16260+ group->private = hfsn;
16261+ hfsn->hfsn_group = group;
16262+ br->br_hfsn = hfsn;
16263+ goto out; /* success */
16264+
16265+out_hfsn:
ae9dfd79 16266+ kfree(hfsn);
027c5e7a 16267+out:
1716fcea
AM
16268+ return err;
16269+}
16270+
16271+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
16272+{
16273+ int err;
16274+
16275+ err = 0;
16276+ if (!br->br_hfsn)
16277+ err = au_hfsn_init_br(br, perm);
16278+
1facf9fc 16279+ return err;
16280+}
16281+
7eafdf33
AM
16282+/* ---------------------------------------------------------------------- */
16283+
16284+static void au_hfsn_fin(void)
16285+{
16286+ AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
16287+ wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
16288+}
16289+
4a4d8108
AM
16290+const struct au_hnotify_op au_hnotify_op = {
16291+ .ctl = au_hfsn_ctl,
16292+ .alloc = au_hfsn_alloc,
16293+ .free = au_hfsn_free,
1facf9fc 16294+
7eafdf33
AM
16295+ .fin = au_hfsn_fin,
16296+
027c5e7a
AM
16297+ .reset_br = au_hfsn_reset_br,
16298+ .fin_br = au_hfsn_fin_br,
16299+ .init_br = au_hfsn_init_br
4a4d8108 16300+};
e8791d4f
AM
16301diff -urNp -x '*.orig' linux-4.9/fs/aufs/hfsplus.c linux-4.9/fs/aufs/hfsplus.c
16302--- linux-4.9/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
16303+++ linux-4.9/fs/aufs/hfsplus.c 2021-02-24 16:15:09.528240413 +0100
523b37e3 16304@@ -0,0 +1,56 @@
4a4d8108 16305+/*
ae9dfd79 16306+ * Copyright (C) 2010-2018 Junjiro R. Okajima
4a4d8108
AM
16307+ *
16308+ * This program, aufs is free software; you can redistribute it and/or modify
16309+ * it under the terms of the GNU General Public License as published by
16310+ * the Free Software Foundation; either version 2 of the License, or
16311+ * (at your option) any later version.
16312+ *
16313+ * This program is distributed in the hope that it will be useful,
16314+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16315+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16316+ * GNU General Public License for more details.
16317+ *
16318+ * You should have received a copy of the GNU General Public License
523b37e3 16319+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 16320+ */
1facf9fc 16321+
4a4d8108
AM
16322+/*
16323+ * special support for filesystems which aqucires an inode mutex
16324+ * at final closing a file, eg, hfsplus.
16325+ *
16326+ * This trick is very simple and stupid, just to open the file before really
16327+ * neceeary open to tell hfsplus that this is not the final closing.
16328+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
16329+ * and au_h_open_post() after releasing it.
16330+ */
1facf9fc 16331+
4a4d8108 16332+#include "aufs.h"
1facf9fc 16333+
392086de
AM
16334+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
16335+ int force_wr)
4a4d8108
AM
16336+{
16337+ struct file *h_file;
16338+ struct dentry *h_dentry;
1facf9fc 16339+
4a4d8108
AM
16340+ h_dentry = au_h_dptr(dentry, bindex);
16341+ AuDebugOn(!h_dentry);
5527c038 16342+ AuDebugOn(d_is_negative(h_dentry));
4a4d8108
AM
16343+
16344+ h_file = NULL;
16345+ if (au_test_hfsplus(h_dentry->d_sb)
7e9cd9fe 16346+ && d_is_reg(h_dentry))
4a4d8108
AM
16347+ h_file = au_h_open(dentry, bindex,
16348+ O_RDONLY | O_NOATIME | O_LARGEFILE,
392086de 16349+ /*file*/NULL, force_wr);
4a4d8108 16350+ return h_file;
1facf9fc 16351+}
16352+
4a4d8108
AM
16353+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
16354+ struct file *h_file)
16355+{
16356+ if (h_file) {
16357+ fput(h_file);
16358+ au_sbr_put(dentry->d_sb, bindex);
16359+ }
16360+}
e8791d4f
AM
16361diff -urNp -x '*.orig' linux-4.9/fs/aufs/hnotify.c linux-4.9/fs/aufs/hnotify.c
16362--- linux-4.9/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
16363+++ linux-4.9/fs/aufs/hnotify.c 2021-02-24 16:15:09.528240413 +0100
ae9dfd79 16364@@ -0,0 +1,719 @@
e49829fe 16365+/*
ae9dfd79 16366+ * Copyright (C) 2005-2018 Junjiro R. Okajima
e49829fe
JR
16367+ *
16368+ * This program, aufs is free software; you can redistribute it and/or modify
16369+ * it under the terms of the GNU General Public License as published by
16370+ * the Free Software Foundation; either version 2 of the License, or
16371+ * (at your option) any later version.
16372+ *
16373+ * This program is distributed in the hope that it will be useful,
16374+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16375+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16376+ * GNU General Public License for more details.
16377+ *
16378+ * You should have received a copy of the GNU General Public License
523b37e3 16379+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
16380+ */
16381+
16382+/*
7f207e10 16383+ * abstraction to notify the direct changes on lower directories
e49829fe
JR
16384+ */
16385+
16386+#include "aufs.h"
16387+
027c5e7a 16388+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
e49829fe
JR
16389+{
16390+ int err;
7f207e10 16391+ struct au_hnotify *hn;
1facf9fc 16392+
4a4d8108
AM
16393+ err = -ENOMEM;
16394+ hn = au_cache_alloc_hnotify();
16395+ if (hn) {
16396+ hn->hn_aufs_inode = inode;
027c5e7a
AM
16397+ hinode->hi_notify = hn;
16398+ err = au_hnotify_op.alloc(hinode);
16399+ AuTraceErr(err);
16400+ if (unlikely(err)) {
16401+ hinode->hi_notify = NULL;
ae9dfd79 16402+ au_cache_free_hnotify(hn);
4a4d8108
AM
16403+ /*
16404+ * The upper dir was removed by udba, but the same named
16405+ * dir left. In this case, aufs assignes a new inode
16406+ * number and set the monitor again.
16407+ * For the lower dir, the old monitnor is still left.
16408+ */
16409+ if (err == -EEXIST)
16410+ err = 0;
16411+ }
1308ab2a 16412+ }
1308ab2a 16413+
027c5e7a 16414+ AuTraceErr(err);
1308ab2a 16415+ return err;
dece6358 16416+}
1facf9fc 16417+
4a4d8108 16418+void au_hn_free(struct au_hinode *hinode)
dece6358 16419+{
4a4d8108 16420+ struct au_hnotify *hn;
1facf9fc 16421+
4a4d8108
AM
16422+ hn = hinode->hi_notify;
16423+ if (hn) {
4a4d8108 16424+ hinode->hi_notify = NULL;
7eafdf33 16425+ if (au_hnotify_op.free(hinode, hn))
ae9dfd79 16426+ au_cache_free_hnotify(hn);
4a4d8108
AM
16427+ }
16428+}
dece6358 16429+
4a4d8108 16430+/* ---------------------------------------------------------------------- */
dece6358 16431+
4a4d8108
AM
16432+void au_hn_ctl(struct au_hinode *hinode, int do_set)
16433+{
16434+ if (hinode->hi_notify)
16435+ au_hnotify_op.ctl(hinode, do_set);
16436+}
16437+
16438+void au_hn_reset(struct inode *inode, unsigned int flags)
16439+{
5afbbe0d 16440+ aufs_bindex_t bindex, bbot;
4a4d8108
AM
16441+ struct inode *hi;
16442+ struct dentry *iwhdentry;
1facf9fc 16443+
5afbbe0d
AM
16444+ bbot = au_ibbot(inode);
16445+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
4a4d8108
AM
16446+ hi = au_h_iptr(inode, bindex);
16447+ if (!hi)
16448+ continue;
1308ab2a 16449+
febd17d6 16450+ /* inode_lock_nested(hi, AuLsc_I_CHILD); */
4a4d8108
AM
16451+ iwhdentry = au_hi_wh(inode, bindex);
16452+ if (iwhdentry)
16453+ dget(iwhdentry);
16454+ au_igrab(hi);
16455+ au_set_h_iptr(inode, bindex, NULL, 0);
16456+ au_set_h_iptr(inode, bindex, au_igrab(hi),
16457+ flags & ~AuHi_XINO);
16458+ iput(hi);
16459+ dput(iwhdentry);
febd17d6 16460+ /* inode_unlock(hi); */
1facf9fc 16461+ }
1facf9fc 16462+}
16463+
1308ab2a 16464+/* ---------------------------------------------------------------------- */
1facf9fc 16465+
4a4d8108 16466+static int hn_xino(struct inode *inode, struct inode *h_inode)
1facf9fc 16467+{
4a4d8108 16468+ int err;
5afbbe0d 16469+ aufs_bindex_t bindex, bbot, bfound, btop;
4a4d8108 16470+ struct inode *h_i;
1facf9fc 16471+
4a4d8108
AM
16472+ err = 0;
16473+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 16474+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
16475+ goto out;
16476+ }
1facf9fc 16477+
4a4d8108 16478+ bfound = -1;
5afbbe0d
AM
16479+ bbot = au_ibbot(inode);
16480+ btop = au_ibtop(inode);
4a4d8108 16481+#if 0 /* reserved for future use */
5afbbe0d 16482+ if (bindex == bbot) {
4a4d8108
AM
16483+ /* keep this ino in rename case */
16484+ goto out;
16485+ }
16486+#endif
5afbbe0d 16487+ for (bindex = btop; bindex <= bbot; bindex++)
4a4d8108
AM
16488+ if (au_h_iptr(inode, bindex) == h_inode) {
16489+ bfound = bindex;
16490+ break;
16491+ }
16492+ if (bfound < 0)
1308ab2a 16493+ goto out;
1facf9fc 16494+
5afbbe0d 16495+ for (bindex = btop; bindex <= bbot; bindex++) {
4a4d8108
AM
16496+ h_i = au_h_iptr(inode, bindex);
16497+ if (!h_i)
16498+ continue;
1facf9fc 16499+
4a4d8108
AM
16500+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
16501+ /* ignore this error */
16502+ /* bad action? */
1facf9fc 16503+ }
1facf9fc 16504+
4a4d8108 16505+ /* children inode number will be broken */
1facf9fc 16506+
4f0767ce 16507+out:
4a4d8108
AM
16508+ AuTraceErr(err);
16509+ return err;
1facf9fc 16510+}
16511+
4a4d8108 16512+static int hn_gen_tree(struct dentry *dentry)
1facf9fc 16513+{
4a4d8108
AM
16514+ int err, i, j, ndentry;
16515+ struct au_dcsub_pages dpages;
16516+ struct au_dpage *dpage;
16517+ struct dentry **dentries;
1facf9fc 16518+
4a4d8108
AM
16519+ err = au_dpages_init(&dpages, GFP_NOFS);
16520+ if (unlikely(err))
16521+ goto out;
16522+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
16523+ if (unlikely(err))
16524+ goto out_dpages;
1facf9fc 16525+
4a4d8108
AM
16526+ for (i = 0; i < dpages.ndpage; i++) {
16527+ dpage = dpages.dpages + i;
16528+ dentries = dpage->dentries;
16529+ ndentry = dpage->ndentry;
16530+ for (j = 0; j < ndentry; j++) {
16531+ struct dentry *d;
16532+
16533+ d = dentries[j];
16534+ if (IS_ROOT(d))
16535+ continue;
16536+
4a4d8108 16537+ au_digen_dec(d);
5527c038 16538+ if (d_really_is_positive(d))
4a4d8108
AM
16539+ /* todo: reset children xino?
16540+ cached children only? */
5527c038 16541+ au_iigen_dec(d_inode(d));
1308ab2a 16542+ }
dece6358 16543+ }
1facf9fc 16544+
4f0767ce 16545+out_dpages:
4a4d8108 16546+ au_dpages_free(&dpages);
dece6358 16547+
027c5e7a 16548+#if 0
4a4d8108
AM
16549+ /* discard children */
16550+ dentry_unhash(dentry);
16551+ dput(dentry);
027c5e7a 16552+#endif
4f0767ce 16553+out:
dece6358
AM
16554+ return err;
16555+}
16556+
1308ab2a 16557+/*
4a4d8108 16558+ * return 0 if processed.
1308ab2a 16559+ */
4a4d8108
AM
16560+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
16561+ const unsigned int isdir)
dece6358 16562+{
1308ab2a 16563+ int err;
4a4d8108
AM
16564+ struct dentry *d;
16565+ struct qstr *dname;
1facf9fc 16566+
4a4d8108
AM
16567+ err = 1;
16568+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 16569+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
16570+ err = 0;
16571+ goto out;
16572+ }
dece6358 16573+
4a4d8108
AM
16574+ if (!isdir) {
16575+ AuDebugOn(!name);
16576+ au_iigen_dec(inode);
027c5e7a 16577+ spin_lock(&inode->i_lock);
c1595e42 16578+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 16579+ spin_lock(&d->d_lock);
4a4d8108
AM
16580+ dname = &d->d_name;
16581+ if (dname->len != nlen
027c5e7a
AM
16582+ && memcmp(dname->name, name, nlen)) {
16583+ spin_unlock(&d->d_lock);
4a4d8108 16584+ continue;
027c5e7a 16585+ }
4a4d8108 16586+ err = 0;
4a4d8108
AM
16587+ au_digen_dec(d);
16588+ spin_unlock(&d->d_lock);
16589+ break;
1facf9fc 16590+ }
027c5e7a 16591+ spin_unlock(&inode->i_lock);
1308ab2a 16592+ } else {
027c5e7a 16593+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
c1595e42 16594+ d = d_find_any_alias(inode);
4a4d8108
AM
16595+ if (!d) {
16596+ au_iigen_dec(inode);
16597+ goto out;
16598+ }
1facf9fc 16599+
027c5e7a 16600+ spin_lock(&d->d_lock);
4a4d8108 16601+ dname = &d->d_name;
027c5e7a
AM
16602+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
16603+ spin_unlock(&d->d_lock);
4a4d8108 16604+ err = hn_gen_tree(d);
027c5e7a
AM
16605+ spin_lock(&d->d_lock);
16606+ }
16607+ spin_unlock(&d->d_lock);
4a4d8108
AM
16608+ dput(d);
16609+ }
1facf9fc 16610+
4f0767ce 16611+out:
4a4d8108 16612+ AuTraceErr(err);
1308ab2a 16613+ return err;
16614+}
dece6358 16615+
4a4d8108 16616+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
1facf9fc 16617+{
4a4d8108 16618+ int err;
1facf9fc 16619+
5527c038 16620+ if (IS_ROOT(dentry)) {
0c3ec466 16621+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
16622+ return 0;
16623+ }
1308ab2a 16624+
4a4d8108
AM
16625+ err = 0;
16626+ if (!isdir) {
4a4d8108 16627+ au_digen_dec(dentry);
5527c038
JR
16628+ if (d_really_is_positive(dentry))
16629+ au_iigen_dec(d_inode(dentry));
4a4d8108 16630+ } else {
027c5e7a 16631+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
5527c038 16632+ if (d_really_is_positive(dentry))
4a4d8108
AM
16633+ err = hn_gen_tree(dentry);
16634+ }
16635+
16636+ AuTraceErr(err);
16637+ return err;
1facf9fc 16638+}
16639+
4a4d8108 16640+/* ---------------------------------------------------------------------- */
1facf9fc 16641+
4a4d8108
AM
16642+/* hnotify job flags */
16643+#define AuHnJob_XINO0 1
16644+#define AuHnJob_GEN (1 << 1)
16645+#define AuHnJob_DIRENT (1 << 2)
16646+#define AuHnJob_ISDIR (1 << 3)
16647+#define AuHnJob_TRYXINO0 (1 << 4)
16648+#define AuHnJob_MNTPNT (1 << 5)
16649+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
7f207e10
AM
16650+#define au_fset_hnjob(flags, name) \
16651+ do { (flags) |= AuHnJob_##name; } while (0)
16652+#define au_fclr_hnjob(flags, name) \
16653+ do { (flags) &= ~AuHnJob_##name; } while (0)
1facf9fc 16654+
4a4d8108
AM
16655+enum {
16656+ AuHn_CHILD,
16657+ AuHn_PARENT,
16658+ AuHnLast
16659+};
1facf9fc 16660+
4a4d8108
AM
16661+struct au_hnotify_args {
16662+ struct inode *h_dir, *dir, *h_child_inode;
16663+ u32 mask;
16664+ unsigned int flags[AuHnLast];
16665+ unsigned int h_child_nlen;
16666+ char h_child_name[];
16667+};
1facf9fc 16668+
4a4d8108
AM
16669+struct hn_job_args {
16670+ unsigned int flags;
16671+ struct inode *inode, *h_inode, *dir, *h_dir;
16672+ struct dentry *dentry;
16673+ char *h_name;
16674+ int h_nlen;
16675+};
1308ab2a 16676+
4a4d8108
AM
16677+static int hn_job(struct hn_job_args *a)
16678+{
16679+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
076b876e 16680+ int e;
1308ab2a 16681+
4a4d8108
AM
16682+ /* reset xino */
16683+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
16684+ hn_xino(a->inode, a->h_inode); /* ignore this error */
1308ab2a 16685+
4a4d8108
AM
16686+ if (au_ftest_hnjob(a->flags, TRYXINO0)
16687+ && a->inode
16688+ && a->h_inode) {
ae9dfd79 16689+ vfsub_inode_lock_shared_nested(a->h_inode, AuLsc_I_CHILD);
38d290e6
JR
16690+ if (!a->h_inode->i_nlink
16691+ && !(a->h_inode->i_state & I_LINKABLE))
4a4d8108 16692+ hn_xino(a->inode, a->h_inode); /* ignore this error */
ae9dfd79 16693+ inode_unlock_shared(a->h_inode);
1308ab2a 16694+ }
1facf9fc 16695+
4a4d8108
AM
16696+ /* make the generation obsolete */
16697+ if (au_ftest_hnjob(a->flags, GEN)) {
076b876e 16698+ e = -1;
4a4d8108 16699+ if (a->inode)
076b876e 16700+ e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
4a4d8108 16701+ isdir);
076b876e 16702+ if (e && a->dentry)
4a4d8108
AM
16703+ hn_gen_by_name(a->dentry, isdir);
16704+ /* ignore this error */
1facf9fc 16705+ }
1facf9fc 16706+
4a4d8108
AM
16707+ /* make dir entries obsolete */
16708+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
16709+ struct au_vdir *vdir;
1facf9fc 16710+
4a4d8108
AM
16711+ vdir = au_ivdir(a->inode);
16712+ if (vdir)
16713+ vdir->vd_jiffy = 0;
16714+ /* IMustLock(a->inode); */
16715+ /* a->inode->i_version++; */
16716+ }
1facf9fc 16717+
4a4d8108
AM
16718+ /* can do nothing but warn */
16719+ if (au_ftest_hnjob(a->flags, MNTPNT)
16720+ && a->dentry
16721+ && d_mountpoint(a->dentry))
523b37e3 16722+ pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
1facf9fc 16723+
4a4d8108 16724+ return 0;
1308ab2a 16725+}
1facf9fc 16726+
1308ab2a 16727+/* ---------------------------------------------------------------------- */
1facf9fc 16728+
4a4d8108
AM
16729+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
16730+ struct inode *dir)
1308ab2a 16731+{
4a4d8108
AM
16732+ struct dentry *dentry, *d, *parent;
16733+ struct qstr *dname;
1308ab2a 16734+
c1595e42 16735+ parent = d_find_any_alias(dir);
4a4d8108
AM
16736+ if (!parent)
16737+ return NULL;
1308ab2a 16738+
4a4d8108 16739+ dentry = NULL;
027c5e7a 16740+ spin_lock(&parent->d_lock);
c1595e42 16741+ list_for_each_entry(d, &parent->d_subdirs, d_child) {
523b37e3 16742+ /* AuDbg("%pd\n", d); */
027c5e7a 16743+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
4a4d8108
AM
16744+ dname = &d->d_name;
16745+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
027c5e7a
AM
16746+ goto cont_unlock;
16747+ if (au_di(d))
16748+ au_digen_dec(d);
16749+ else
16750+ goto cont_unlock;
c1595e42 16751+ if (au_dcount(d) > 0) {
027c5e7a 16752+ dentry = dget_dlock(d);
4a4d8108 16753+ spin_unlock(&d->d_lock);
027c5e7a 16754+ break;
dece6358 16755+ }
1facf9fc 16756+
f6b6e03d 16757+cont_unlock:
027c5e7a 16758+ spin_unlock(&d->d_lock);
1308ab2a 16759+ }
027c5e7a 16760+ spin_unlock(&parent->d_lock);
4a4d8108 16761+ dput(parent);
1facf9fc 16762+
4a4d8108
AM
16763+ if (dentry)
16764+ di_write_lock_child(dentry);
1308ab2a 16765+
4a4d8108
AM
16766+ return dentry;
16767+}
dece6358 16768+
4a4d8108
AM
16769+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
16770+ aufs_bindex_t bindex, ino_t h_ino)
16771+{
16772+ struct inode *inode;
16773+ ino_t ino;
16774+ int err;
16775+
16776+ inode = NULL;
16777+ err = au_xino_read(sb, bindex, h_ino, &ino);
16778+ if (!err && ino)
16779+ inode = ilookup(sb, ino);
16780+ if (!inode)
16781+ goto out;
16782+
16783+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 16784+ pr_warn("wrong root branch\n");
4a4d8108
AM
16785+ iput(inode);
16786+ inode = NULL;
16787+ goto out;
1308ab2a 16788+ }
16789+
4a4d8108 16790+ ii_write_lock_child(inode);
1308ab2a 16791+
4f0767ce 16792+out:
4a4d8108 16793+ return inode;
dece6358
AM
16794+}
16795+
4a4d8108 16796+static void au_hn_bh(void *_args)
1facf9fc 16797+{
4a4d8108
AM
16798+ struct au_hnotify_args *a = _args;
16799+ struct super_block *sb;
5afbbe0d 16800+ aufs_bindex_t bindex, bbot, bfound;
4a4d8108 16801+ unsigned char xino, try_iput;
1facf9fc 16802+ int err;
1308ab2a 16803+ struct inode *inode;
4a4d8108
AM
16804+ ino_t h_ino;
16805+ struct hn_job_args args;
16806+ struct dentry *dentry;
16807+ struct au_sbinfo *sbinfo;
1facf9fc 16808+
4a4d8108
AM
16809+ AuDebugOn(!_args);
16810+ AuDebugOn(!a->h_dir);
16811+ AuDebugOn(!a->dir);
16812+ AuDebugOn(!a->mask);
16813+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
16814+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
16815+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
1facf9fc 16816+
4a4d8108
AM
16817+ inode = NULL;
16818+ dentry = NULL;
16819+ /*
16820+ * do not lock a->dir->i_mutex here
16821+ * because of d_revalidate() may cause a deadlock.
16822+ */
16823+ sb = a->dir->i_sb;
16824+ AuDebugOn(!sb);
16825+ sbinfo = au_sbi(sb);
16826+ AuDebugOn(!sbinfo);
7f207e10 16827+ si_write_lock(sb, AuLock_NOPLMW);
1facf9fc 16828+
ae9dfd79
AM
16829+ if (au_opt_test(sbinfo->si_mntflags, DIRREN))
16830+ switch (a->mask & FS_EVENTS_POSS_ON_CHILD) {
16831+ case FS_MOVED_FROM:
16832+ case FS_MOVED_TO:
16833+ AuWarn1("DIRREN with UDBA may not work correctly "
16834+ "for the direct rename(2)\n");
16835+ }
16836+
4a4d8108
AM
16837+ ii_read_lock_parent(a->dir);
16838+ bfound = -1;
5afbbe0d
AM
16839+ bbot = au_ibbot(a->dir);
16840+ for (bindex = au_ibtop(a->dir); bindex <= bbot; bindex++)
4a4d8108
AM
16841+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
16842+ bfound = bindex;
16843+ break;
16844+ }
16845+ ii_read_unlock(a->dir);
16846+ if (unlikely(bfound < 0))
16847+ goto out;
1facf9fc 16848+
4a4d8108
AM
16849+ xino = !!au_opt_test(au_mntflags(sb), XINO);
16850+ h_ino = 0;
16851+ if (a->h_child_inode)
16852+ h_ino = a->h_child_inode->i_ino;
1facf9fc 16853+
4a4d8108
AM
16854+ if (a->h_child_nlen
16855+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
16856+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
16857+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
16858+ a->dir);
16859+ try_iput = 0;
5527c038
JR
16860+ if (dentry && d_really_is_positive(dentry))
16861+ inode = d_inode(dentry);
4a4d8108
AM
16862+ if (xino && !inode && h_ino
16863+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
16864+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
16865+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
16866+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
16867+ try_iput = 1;
f0c0a007 16868+ }
1facf9fc 16869+
4a4d8108
AM
16870+ args.flags = a->flags[AuHn_CHILD];
16871+ args.dentry = dentry;
16872+ args.inode = inode;
16873+ args.h_inode = a->h_child_inode;
16874+ args.dir = a->dir;
16875+ args.h_dir = a->h_dir;
16876+ args.h_name = a->h_child_name;
16877+ args.h_nlen = a->h_child_nlen;
16878+ err = hn_job(&args);
16879+ if (dentry) {
027c5e7a 16880+ if (au_di(dentry))
4a4d8108
AM
16881+ di_write_unlock(dentry);
16882+ dput(dentry);
16883+ }
16884+ if (inode && try_iput) {
16885+ ii_write_unlock(inode);
16886+ iput(inode);
16887+ }
1facf9fc 16888+
4a4d8108
AM
16889+ ii_write_lock_parent(a->dir);
16890+ args.flags = a->flags[AuHn_PARENT];
16891+ args.dentry = NULL;
16892+ args.inode = a->dir;
16893+ args.h_inode = a->h_dir;
16894+ args.dir = NULL;
16895+ args.h_dir = NULL;
16896+ args.h_name = NULL;
16897+ args.h_nlen = 0;
16898+ err = hn_job(&args);
16899+ ii_write_unlock(a->dir);
1facf9fc 16900+
4f0767ce 16901+out:
4a4d8108
AM
16902+ iput(a->h_child_inode);
16903+ iput(a->h_dir);
16904+ iput(a->dir);
027c5e7a
AM
16905+ si_write_unlock(sb);
16906+ au_nwt_done(&sbinfo->si_nowait);
ae9dfd79 16907+ kfree(a);
dece6358 16908+}
1facf9fc 16909+
4a4d8108
AM
16910+/* ---------------------------------------------------------------------- */
16911+
16912+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
16913+ struct qstr *h_child_qstr, struct inode *h_child_inode)
dece6358 16914+{
4a4d8108 16915+ int err, len;
53392da6 16916+ unsigned int flags[AuHnLast], f;
4a4d8108
AM
16917+ unsigned char isdir, isroot, wh;
16918+ struct inode *dir;
16919+ struct au_hnotify_args *args;
16920+ char *p, *h_child_name;
dece6358 16921+
1308ab2a 16922+ err = 0;
4a4d8108
AM
16923+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
16924+ dir = igrab(hnotify->hn_aufs_inode);
16925+ if (!dir)
16926+ goto out;
1facf9fc 16927+
4a4d8108
AM
16928+ isroot = (dir->i_ino == AUFS_ROOT_INO);
16929+ wh = 0;
16930+ h_child_name = (void *)h_child_qstr->name;
16931+ len = h_child_qstr->len;
16932+ if (h_child_name) {
16933+ if (len > AUFS_WH_PFX_LEN
16934+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
16935+ h_child_name += AUFS_WH_PFX_LEN;
16936+ len -= AUFS_WH_PFX_LEN;
16937+ wh = 1;
16938+ }
1facf9fc 16939+ }
dece6358 16940+
4a4d8108
AM
16941+ isdir = 0;
16942+ if (h_child_inode)
16943+ isdir = !!S_ISDIR(h_child_inode->i_mode);
16944+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
16945+ flags[AuHn_CHILD] = 0;
16946+ if (isdir)
16947+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
16948+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
16949+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
16950+ switch (mask & FS_EVENTS_POSS_ON_CHILD) {
16951+ case FS_MOVED_FROM:
16952+ case FS_MOVED_TO:
16953+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
16954+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
16955+ /*FALLTHROUGH*/
16956+ case FS_CREATE:
fb47a38f 16957+ AuDebugOn(!h_child_name);
4a4d8108 16958+ break;
1facf9fc 16959+
4a4d8108
AM
16960+ case FS_DELETE:
16961+ /*
16962+ * aufs never be able to get this child inode.
16963+ * revalidation should be in d_revalidate()
16964+ * by checking i_nlink, i_generation or d_unhashed().
16965+ */
16966+ AuDebugOn(!h_child_name);
16967+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
16968+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
16969+ break;
dece6358 16970+
4a4d8108
AM
16971+ default:
16972+ AuDebugOn(1);
16973+ }
1308ab2a 16974+
4a4d8108
AM
16975+ if (wh)
16976+ h_child_inode = NULL;
1308ab2a 16977+
4a4d8108
AM
16978+ err = -ENOMEM;
16979+ /* iput() and kfree() will be called in au_hnotify() */
4a4d8108 16980+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
4a4d8108
AM
16981+ if (unlikely(!args)) {
16982+ AuErr1("no memory\n");
16983+ iput(dir);
16984+ goto out;
16985+ }
16986+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
16987+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
16988+ args->mask = mask;
16989+ args->dir = dir;
16990+ args->h_dir = igrab(h_dir);
16991+ if (h_child_inode)
16992+ h_child_inode = igrab(h_child_inode); /* can be NULL */
16993+ args->h_child_inode = h_child_inode;
16994+ args->h_child_nlen = len;
e8791d4f
AM
16995+ if (len) {
16996+ p = (void *)args;
16997+ p += sizeof(*args);
16998+ memcpy(p, h_child_name, len);
16999+ p[len] = 0;
17000+ }
17001+
17002+ /* NFS fires the event for silly-renamed one from kworker */
17003+ f = 0;
17004+ if (!dir->i_nlink
17005+ || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
17006+ f = AuWkq_NEST;
17007+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
17008+ if (unlikely(err)) {
17009+ pr_err("wkq %d\n", err);
17010+ iput(args->h_child_inode);
17011+ iput(args->h_dir);
17012+ iput(args->dir);
17013+ kfree(args);
1308ab2a 17014+ }
1facf9fc 17015+
4f0767ce 17016+out:
027c5e7a
AM
17017+ return err;
17018+}
17019+
e8791d4f
AM
17020+/* ---------------------------------------------------------------------- */
17021+
17022+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
b95c5147 17023+{
e8791d4f 17024+ int err;
b95c5147 17025+
e8791d4f 17026+ AuDebugOn(!(udba & AuOptMask_UDBA));
b95c5147 17027+
e8791d4f
AM
17028+ err = 0;
17029+ if (au_hnotify_op.reset_br)
17030+ err = au_hnotify_op.reset_br(udba, br, perm);
b95c5147 17031+
e8791d4f 17032+ return err;
b95c5147
AM
17033+}
17034+
e8791d4f 17035+int au_hnotify_init_br(struct au_branch *br, int perm)
027c5e7a 17036+{
e8791d4f 17037+ int err;
027c5e7a 17038+
e8791d4f
AM
17039+ err = 0;
17040+ if (au_hnotify_op.init_br)
17041+ err = au_hnotify_op.init_br(br, perm);
027c5e7a 17042+
4a4d8108
AM
17043+ return err;
17044+}
1facf9fc 17045+
e8791d4f 17046+void au_hnotify_fin_br(struct au_branch *br)
4a4d8108 17047+{
e8791d4f
AM
17048+ if (au_hnotify_op.fin_br)
17049+ au_hnotify_op.fin_br(br);
17050+}
4a4d8108 17051+
e8791d4f
AM
17052+static void au_hn_destroy_cache(void)
17053+{
17054+ kmem_cache_destroy(au_cache[AuCache_HNOTIFY]);
17055+ au_cache[AuCache_HNOTIFY] = NULL;
dece6358
AM
17056+}
17057+
e8791d4f 17058+int __init au_hnotify_init(void)
dece6358 17059+{
4a4d8108 17060+ int err;
dece6358 17061+
e8791d4f
AM
17062+ err = -ENOMEM;
17063+ au_cache[AuCache_HNOTIFY] = AuCache(au_hnotify);
17064+ if (au_cache[AuCache_HNOTIFY]) {
17065+ err = 0;
17066+ if (au_hnotify_op.init)
17067+ err = au_hnotify_op.init();
4a4d8108 17068+ if (unlikely(err))
e8791d4f 17069+ au_hn_destroy_cache();
4a4d8108 17070+ }
e8791d4f
AM
17071+ AuTraceErr(err);
17072+ return err;
17073+}
dece6358 17074+
e8791d4f
AM
17075+void au_hnotify_fin(void)
17076+{
17077+ if (au_hnotify_op.fin)
17078+ au_hnotify_op.fin();
dece6358 17079+
e8791d4f
AM
17080+ /* cf. au_cache_fin() */
17081+ if (au_cache[AuCache_HNOTIFY])
17082+ au_hn_destroy_cache();
4a4d8108 17083+}
e8791d4f
AM
17084diff -urNp -x '*.orig' linux-4.9/fs/aufs/i_op.c linux-4.9/fs/aufs/i_op.c
17085--- linux-4.9/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
17086+++ linux-4.9/fs/aufs/i_op.c 2021-02-24 16:15:09.531573855 +0100
17087@@ -0,0 +1,1459 @@
17088+/*
17089+ * Copyright (C) 2005-2018 Junjiro R. Okajima
17090+ *
17091+ * This program, aufs is free software; you can redistribute it and/or modify
17092+ * it under the terms of the GNU General Public License as published by
17093+ * the Free Software Foundation; either version 2 of the License, or
17094+ * (at your option) any later version.
17095+ *
17096+ * This program is distributed in the hope that it will be useful,
17097+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17098+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17099+ * GNU General Public License for more details.
17100+ *
17101+ * You should have received a copy of the GNU General Public License
17102+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17103+ */
dece6358 17104+
027c5e7a 17105+/*
e8791d4f 17106+ * inode operations (except add/del/rename)
027c5e7a 17107+ */
e8791d4f
AM
17108+
17109+#include <linux/device_cgroup.h>
17110+#include <linux/fs_stack.h>
17111+#include <linux/namei.h>
17112+#include <linux/security.h>
17113+#include "aufs.h"
17114+
17115+static int h_permission(struct inode *h_inode, int mask,
17116+ struct path *h_path, int brperm)
4a4d8108
AM
17117+{
17118+ int err;
e8791d4f 17119+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
dece6358 17120+
e8791d4f
AM
17121+ err = -EPERM;
17122+ if (write_mask && IS_IMMUTABLE(h_inode))
4a4d8108
AM
17123+ goto out;
17124+
e8791d4f
AM
17125+ err = -EACCES;
17126+ if (((mask & MAY_EXEC)
17127+ && S_ISREG(h_inode->i_mode)
17128+ && (path_noexec(h_path)
17129+ || !(h_inode->i_mode & S_IXUGO))))
17130+ goto out;
1facf9fc 17131+
e8791d4f
AM
17132+ /*
17133+ * - skip the lower fs test in the case of write to ro branch.
17134+ * - nfs dir permission write check is optimized, but a policy for
17135+ * link/rename requires a real check.
17136+ * - nfs always sets MS_POSIXACL regardless its mount option 'noacl.'
17137+ * in this case, generic_permission() returns -EOPNOTSUPP.
17138+ */
17139+ if ((write_mask && !au_br_writable(brperm))
17140+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
17141+ && write_mask && !(mask & MAY_READ))
17142+ || !h_inode->i_op->permission) {
17143+ /* AuLabel(generic_permission); */
17144+ /* AuDbg("get_acl %pf\n", h_inode->i_op->get_acl); */
17145+ err = generic_permission(h_inode, mask);
17146+ if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
17147+ err = h_inode->i_op->permission(h_inode, mask);
17148+ AuTraceErr(err);
17149+ } else {
17150+ /* AuLabel(h_inode->permission); */
17151+ err = h_inode->i_op->permission(h_inode, mask);
17152+ AuTraceErr(err);
4a4d8108 17153+ }
ae9dfd79 17154+
e8791d4f
AM
17155+ if (!err)
17156+ err = devcgroup_inode_permission(h_inode, mask);
17157+ if (!err)
17158+ err = security_inode_permission(h_inode, mask);
1308ab2a 17159+
e8791d4f
AM
17160+#if 0
17161+ if (!err) {
17162+ /* todo: do we need to call ima_path_check()? */
17163+ struct path h_path = {
17164+ .dentry =
17165+ .mnt = h_mnt
17166+ };
17167+ err = ima_path_check(&h_path,
17168+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
17169+ IMA_COUNT_LEAVE);
1308ab2a 17170+ }
e8791d4f 17171+#endif
1facf9fc 17172+
4f0767ce 17173+out:
1facf9fc 17174+ return err;
17175+}
17176+
e8791d4f 17177+static int aufs_permission(struct inode *inode, int mask)
1facf9fc 17178+{
e8791d4f
AM
17179+ int err;
17180+ aufs_bindex_t bindex, bbot;
17181+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
17182+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
17183+ struct inode *h_inode;
4a4d8108 17184+ struct super_block *sb;
e8791d4f 17185+ struct au_branch *br;
b752ccd1 17186+
e8791d4f
AM
17187+ /* todo: support rcu-walk? */
17188+ if (mask & MAY_NOT_BLOCK)
17189+ return -ECHILD;
b752ccd1 17190+
e8791d4f
AM
17191+ sb = inode->i_sb;
17192+ si_read_lock(sb, AuLock_FLUSH);
17193+ ii_read_lock_child(inode);
17194+#if 0
17195+ err = au_iigen_test(inode, au_sigen(sb));
4a4d8108 17196+ if (unlikely(err))
e8791d4f
AM
17197+ goto out;
17198+#endif
b752ccd1 17199+
e8791d4f
AM
17200+ if (!isdir
17201+ || write_mask
17202+ || au_opt_test(au_mntflags(sb), DIRPERM1)) {
17203+ err = au_busy_or_stale();
17204+ h_inode = au_h_iptr(inode, au_ibtop(inode));
17205+ if (unlikely(!h_inode
17206+ || (h_inode->i_mode & S_IFMT)
17207+ != (inode->i_mode & S_IFMT)))
17208+ goto out;
17209+
17210+ err = 0;
17211+ bindex = au_ibtop(inode);
17212+ br = au_sbr(sb, bindex);
17213+ err = h_permission(h_inode, mask, &br->br_path, br->br_perm);
17214+ if (write_mask
17215+ && !err
17216+ && !special_file(h_inode->i_mode)) {
17217+ /* test whether the upper writable branch exists */
17218+ err = -EROFS;
17219+ for (; bindex >= 0; bindex--)
17220+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
17221+ err = 0;
17222+ break;
17223+ }
dece6358 17224+ }
e8791d4f 17225+ goto out;
dece6358 17226+ }
1facf9fc 17227+
e8791d4f
AM
17228+ /* non-write to dir */
17229+ err = 0;
17230+ bbot = au_ibbot(inode);
17231+ for (bindex = au_ibtop(inode); !err && bindex <= bbot; bindex++) {
17232+ h_inode = au_h_iptr(inode, bindex);
17233+ if (h_inode) {
17234+ err = au_busy_or_stale();
17235+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
17236+ break;
1308ab2a 17237+
e8791d4f
AM
17238+ br = au_sbr(sb, bindex);
17239+ err = h_permission(h_inode, mask, &br->br_path,
17240+ br->br_perm);
ae9dfd79 17241+ }
4a4d8108
AM
17242+ }
17243+
4f0767ce 17244+out:
e8791d4f
AM
17245+ ii_read_unlock(inode);
17246+ si_read_unlock(sb);
17247+ return err;
1facf9fc 17248+}
17249+
4a4d8108 17250+/* ---------------------------------------------------------------------- */
1facf9fc 17251+
e8791d4f
AM
17252+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
17253+ unsigned int flags)
4a4d8108 17254+{
e8791d4f
AM
17255+ struct dentry *ret, *parent;
17256+ struct inode *inode;
17257+ struct super_block *sb;
17258+ int err, npositive;
1facf9fc 17259+
e8791d4f 17260+ IMustLock(dir);
1facf9fc 17261+
e8791d4f
AM
17262+ /* todo: support rcu-walk? */
17263+ ret = ERR_PTR(-ECHILD);
17264+ if (flags & LOOKUP_RCU)
17265+ goto out;
17266+
17267+ ret = ERR_PTR(-ENAMETOOLONG);
17268+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17269+ goto out;
17270+
17271+ sb = dir->i_sb;
17272+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
17273+ ret = ERR_PTR(err);
17274+ if (unlikely(err))
17275+ goto out;
17276+
17277+ err = au_di_init(dentry);
17278+ ret = ERR_PTR(err);
17279+ if (unlikely(err))
17280+ goto out_si;
17281+
17282+ inode = NULL;
17283+ npositive = 0; /* suppress a warning */
17284+ parent = dentry->d_parent; /* dir inode is locked */
17285+ di_read_lock_parent(parent, AuLock_IR);
17286+ err = au_alive_dir(parent);
17287+ if (!err)
17288+ err = au_digen_test(parent, au_sigen(sb));
17289+ if (!err) {
17290+ /* regardless LOOKUP_CREATE, always ALLOW_NEG */
17291+ npositive = au_lkup_dentry(dentry, au_dbtop(parent),
17292+ AuLkup_ALLOW_NEG);
17293+ err = npositive;
1facf9fc 17294+ }
e8791d4f
AM
17295+ di_read_unlock(parent, AuLock_IR);
17296+ ret = ERR_PTR(err);
17297+ if (unlikely(err < 0))
17298+ goto out_unlock;
1facf9fc 17299+
e8791d4f
AM
17300+ if (npositive) {
17301+ inode = au_new_inode(dentry, /*must_new*/0);
17302+ if (IS_ERR(inode)) {
17303+ ret = (void *)inode;
17304+ inode = NULL;
17305+ goto out_unlock;
17306+ }
17307+ }
dece6358 17308+
e8791d4f
AM
17309+ if (inode)
17310+ atomic_inc(&inode->i_count);
17311+ ret = d_splice_alias(inode, dentry);
17312+#if 0
17313+ if (unlikely(d_need_lookup(dentry))) {
17314+ spin_lock(&dentry->d_lock);
17315+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
17316+ spin_unlock(&dentry->d_lock);
17317+ } else
17318+#endif
17319+ if (inode) {
17320+ if (!IS_ERR(ret)) {
17321+ iput(inode);
17322+ if (ret && ret != dentry)
17323+ ii_write_unlock(inode);
17324+ } else {
17325+ ii_write_unlock(inode);
17326+ iput(inode);
17327+ inode = NULL;
17328+ }
17329+ }
1facf9fc 17330+
e8791d4f
AM
17331+out_unlock:
17332+ di_write_unlock(dentry);
17333+out_si:
17334+ si_read_unlock(sb);
17335+out:
17336+ return ret;
1facf9fc 17337+}
dece6358 17338+
e8791d4f 17339+/* ---------------------------------------------------------------------- */
dece6358 17340+
e8791d4f
AM
17341+struct aopen_node {
17342+ struct hlist_bl_node hblist;
17343+ struct file *file, *h_file;
17344+};
1308ab2a 17345+
e8791d4f
AM
17346+static int au_do_aopen(struct inode *inode, struct file *file)
17347+{
17348+ struct hlist_bl_head *aopen;
17349+ struct hlist_bl_node *pos;
17350+ struct aopen_node *node;
17351+ struct au_do_open_args args = {
17352+ .aopen = 1,
17353+ .open = au_do_open_nondir
17354+ };
1308ab2a 17355+
e8791d4f
AM
17356+ aopen = &au_sbi(inode->i_sb)->si_aopen;
17357+ hlist_bl_lock(aopen);
17358+ hlist_bl_for_each_entry(node, pos, aopen, hblist)
17359+ if (node->file == file) {
17360+ args.h_file = node->h_file;
17361+ break;
17362+ }
17363+ hlist_bl_unlock(aopen);
17364+ /* AuDebugOn(!args.h_file); */
1facf9fc 17365+
e8791d4f
AM
17366+ return au_do_open(file, &args);
17367+}
1facf9fc 17368+
e8791d4f
AM
17369+static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
17370+ struct file *file, unsigned int open_flag,
17371+ umode_t create_mode, int *opened)
17372+{
17373+ int err, unlocked, h_opened = *opened;
17374+ unsigned int lkup_flags;
17375+ struct dentry *parent, *d;
17376+ struct hlist_bl_head *aopen;
17377+ struct vfsub_aopen_args args = {
17378+ .open_flag = open_flag,
17379+ .create_mode = create_mode,
17380+ .opened = &h_opened
17381+ };
17382+ struct aopen_node aopen_node = {
17383+ .file = file
17384+ };
dece6358 17385+
e8791d4f
AM
17386+ IMustLock(dir);
17387+ AuDbg("open_flag 0%o\n", open_flag);
17388+ AuDbgDentry(dentry);
dece6358 17389+
e8791d4f
AM
17390+ err = 0;
17391+ if (!au_di(dentry)) {
17392+ lkup_flags = LOOKUP_OPEN;
17393+ if (open_flag & O_CREAT)
17394+ lkup_flags |= LOOKUP_CREATE;
17395+ d = aufs_lookup(dir, dentry, lkup_flags);
17396+ if (IS_ERR(d)) {
17397+ err = PTR_ERR(d);
17398+ AuTraceErr(err);
17399+ goto out;
17400+ } else if (d) {
17401+ /*
17402+ * obsoleted dentry found.
17403+ * another error will be returned later.
17404+ */
17405+ d_drop(d);
17406+ AuDbgDentry(d);
17407+ dput(d);
17408+ }
17409+ AuDbgDentry(dentry);
17410+ }
537831f9 17411+
e8791d4f
AM
17412+ if (d_is_positive(dentry)
17413+ || d_unhashed(dentry)
17414+ || d_unlinked(dentry)
17415+ || !(open_flag & O_CREAT))
17416+ goto out_no_open;
537831f9 17417+
e8791d4f
AM
17418+ unlocked = 0;
17419+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
17420+ if (unlikely(err))
17421+ goto out;
1facf9fc 17422+
e8791d4f
AM
17423+ parent = dentry->d_parent; /* dir is locked */
17424+ di_write_lock_parent(parent);
17425+ err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
17426+ if (unlikely(err))
17427+ goto out_unlock;
1facf9fc 17428+
e8791d4f
AM
17429+ AuDbgDentry(dentry);
17430+ if (d_is_positive(dentry))
17431+ goto out_unlock;
1308ab2a 17432+
e8791d4f
AM
17433+ args.file = get_empty_filp();
17434+ err = PTR_ERR(args.file);
17435+ if (IS_ERR(args.file))
17436+ goto out_unlock;
4a4d8108 17437+
e8791d4f
AM
17438+ args.file->f_flags = file->f_flags;
17439+ err = au_aopen_or_create(dir, dentry, &args);
17440+ AuTraceErr(err);
17441+ AuDbgFile(args.file);
17442+ if (unlikely(err < 0)) {
17443+ if (h_opened & FILE_OPENED)
17444+ fput(args.file);
17445+ else
17446+ put_filp(args.file);
17447+ goto out_unlock;
17448+ }
17449+ di_write_unlock(parent);
17450+ di_write_unlock(dentry);
17451+ unlocked = 1;
4a4d8108 17452+
e8791d4f
AM
17453+ /* some filesystems don't set FILE_CREATED while succeeded? */
17454+ *opened |= FILE_CREATED;
17455+ if (h_opened & FILE_OPENED)
17456+ aopen_node.h_file = args.file;
17457+ else {
17458+ put_filp(args.file);
17459+ args.file = NULL;
17460+ }
17461+ aopen = &au_sbi(dir->i_sb)->si_aopen;
17462+ au_hbl_add(&aopen_node.hblist, aopen);
17463+ err = finish_open(file, dentry, au_do_aopen, opened);
17464+ au_hbl_del(&aopen_node.hblist, aopen);
17465+ AuTraceErr(err);
17466+ AuDbgFile(file);
17467+ if (aopen_node.h_file)
17468+ fput(aopen_node.h_file);
86dc4139 17469+
e8791d4f
AM
17470+out_unlock:
17471+ if (unlocked)
17472+ si_read_unlock(dentry->d_sb);
17473+ else {
17474+ di_write_unlock(parent);
17475+ aufs_read_unlock(dentry, AuLock_DW);
17476+ }
17477+ AuDbgDentry(dentry);
17478+ if (unlikely(err < 0))
17479+ goto out;
17480+out_no_open:
17481+ if (err >= 0 && !(*opened & FILE_CREATED)) {
17482+ AuLabel(out_no_open);
17483+ dget(dentry);
17484+ err = finish_no_open(file, dentry);
17485+ }
17486+out:
17487+ AuDbg("%pd%s%s\n", dentry,
17488+ (*opened & FILE_CREATED) ? " created" : "",
17489+ (*opened & FILE_OPENED) ? " opened" : "");
17490+ AuTraceErr(err);
17491+ return err;
17492+}
1facf9fc 17493+
86dc4139 17494+
1308ab2a 17495+/* ---------------------------------------------------------------------- */
17496+
e8791d4f
AM
17497+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
17498+ const unsigned char add_entry, aufs_bindex_t bcpup,
17499+ aufs_bindex_t btop)
1facf9fc 17500+{
e8791d4f
AM
17501+ int err;
17502+ struct dentry *h_parent;
17503+ struct inode *h_dir;
1facf9fc 17504+
e8791d4f
AM
17505+ if (add_entry)
17506+ IMustLock(d_inode(parent));
17507+ else
17508+ di_write_lock_parent(parent);
1facf9fc 17509+
e8791d4f
AM
17510+ err = 0;
17511+ if (!au_h_dptr(parent, bcpup)) {
17512+ if (btop > bcpup)
17513+ err = au_cpup_dirs(dentry, bcpup);
17514+ else if (btop < bcpup)
17515+ err = au_cpdown_dirs(dentry, bcpup);
17516+ else
17517+ BUG();
17518+ }
17519+ if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
17520+ h_parent = au_h_dptr(parent, bcpup);
17521+ h_dir = d_inode(h_parent);
17522+ vfsub_inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
17523+ err = au_lkup_neg(dentry, bcpup, /*wh*/0);
17524+ /* todo: no unlock here */
17525+ inode_unlock_shared(h_dir);
1facf9fc 17526+
e8791d4f
AM
17527+ AuDbg("bcpup %d\n", bcpup);
17528+ if (!err) {
17529+ if (d_really_is_negative(dentry))
17530+ au_set_h_dptr(dentry, btop, NULL);
17531+ au_update_dbrange(dentry, /*do_put_zero*/0);
17532+ }
17533+ }
1facf9fc 17534+
e8791d4f
AM
17535+ if (!add_entry)
17536+ di_write_unlock(parent);
17537+ if (!err)
17538+ err = bcpup; /* success */
1308ab2a 17539+
e8791d4f
AM
17540+ AuTraceErr(err);
17541+ return err;
17542+}
1facf9fc 17543+
e8791d4f
AM
17544+/*
17545+ * decide the branch and the parent dir where we will create a new entry.
17546+ * returns new bindex or an error.
17547+ * copyup the parent dir if needed.
17548+ */
4a4d8108 17549+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
e8791d4f
AM
17550+ struct au_wr_dir_args *args)
17551+{
17552+ int err;
17553+ unsigned int flags;
17554+ aufs_bindex_t bcpup, btop, src_btop;
17555+ const unsigned char add_entry
17556+ = au_ftest_wrdir(args->flags, ADD_ENTRY)
17557+ | au_ftest_wrdir(args->flags, TMPFILE);
17558+ struct super_block *sb;
17559+ struct dentry *parent;
17560+ struct au_sbinfo *sbinfo;
c1595e42 17561+
e8791d4f
AM
17562+ sb = dentry->d_sb;
17563+ sbinfo = au_sbi(sb);
17564+ parent = dget_parent(dentry);
17565+ btop = au_dbtop(dentry);
17566+ bcpup = btop;
17567+ if (args->force_btgt < 0) {
17568+ if (src_dentry) {
17569+ src_btop = au_dbtop(src_dentry);
17570+ if (src_btop < btop)
17571+ bcpup = src_btop;
17572+ } else if (add_entry) {
17573+ flags = 0;
17574+ if (au_ftest_wrdir(args->flags, ISDIR))
17575+ au_fset_wbr(flags, DIR);
17576+ err = AuWbrCreate(sbinfo, dentry, flags);
17577+ bcpup = err;
17578+ }
c1595e42 17579+
e8791d4f
AM
17580+ if (bcpup < 0 || au_test_ro(sb, bcpup, d_inode(dentry))) {
17581+ if (add_entry)
17582+ err = AuWbrCopyup(sbinfo, dentry);
17583+ else {
17584+ if (!IS_ROOT(dentry)) {
17585+ di_read_lock_parent(parent, !AuLock_IR);
17586+ err = AuWbrCopyup(sbinfo, dentry);
17587+ di_read_unlock(parent, !AuLock_IR);
17588+ } else
17589+ err = AuWbrCopyup(sbinfo, dentry);
17590+ }
17591+ bcpup = err;
17592+ if (unlikely(err < 0))
17593+ goto out;
17594+ }
17595+ } else {
17596+ bcpup = args->force_btgt;
17597+ AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
17598+ }
c1595e42 17599+
e8791d4f
AM
17600+ AuDbg("btop %d, bcpup %d\n", btop, bcpup);
17601+ err = bcpup;
17602+ if (bcpup == btop)
17603+ goto out; /* success */
c1595e42 17604+
e8791d4f
AM
17605+ /* copyup the new parent into the branch we process */
17606+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, btop);
17607+ if (err >= 0) {
17608+ if (d_really_is_negative(dentry)) {
17609+ au_set_h_dptr(dentry, btop, NULL);
17610+ au_set_dbtop(dentry, bcpup);
17611+ au_set_dbbot(dentry, bcpup);
17612+ }
17613+ AuDebugOn(add_entry
17614+ && !au_ftest_wrdir(args->flags, TMPFILE)
17615+ && !au_h_dptr(dentry, bcpup));
17616+ }
1facf9fc 17617+
e8791d4f
AM
17618+out:
17619+ dput(parent);
17620+ return err;
17621+}
1facf9fc 17622+
e8791d4f 17623+/* ---------------------------------------------------------------------- */
1308ab2a 17624+
e8791d4f
AM
17625+void au_pin_hdir_unlock(struct au_pin *p)
17626+{
17627+ if (p->hdir)
17628+ au_hn_inode_unlock(p->hdir);
17629+}
1facf9fc 17630+
e8791d4f
AM
17631+int au_pin_hdir_lock(struct au_pin *p)
17632+{
17633+ int err;
1308ab2a 17634+
e8791d4f
AM
17635+ err = 0;
17636+ if (!p->hdir)
17637+ goto out;
1facf9fc 17638+
e8791d4f
AM
17639+ /* even if an error happens later, keep this lock */
17640+ au_hn_inode_lock_nested(p->hdir, p->lsc_hi);
1facf9fc 17641+
e8791d4f
AM
17642+ err = -EBUSY;
17643+ if (unlikely(p->hdir->hi_inode != d_inode(p->h_parent)))
17644+ goto out;
1facf9fc 17645+
e8791d4f
AM
17646+ err = 0;
17647+ if (p->h_dentry)
17648+ err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
17649+ p->h_parent, p->br);
1facf9fc 17650+
e8791d4f
AM
17651+out:
17652+ return err;
17653+}
1308ab2a 17654+
e8791d4f
AM
17655+int au_pin_hdir_relock(struct au_pin *p)
17656+{
17657+ int err, i;
17658+ struct inode *h_i;
17659+ struct dentry *h_d[] = {
17660+ p->h_dentry,
17661+ p->h_parent
17662+ };
1facf9fc 17663+
e8791d4f
AM
17664+ err = au_pin_hdir_lock(p);
17665+ if (unlikely(err))
17666+ goto out;
c1595e42 17667+
e8791d4f
AM
17668+ for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
17669+ if (!h_d[i])
17670+ continue;
17671+ if (d_is_positive(h_d[i])) {
17672+ h_i = d_inode(h_d[i]);
17673+ err = !h_i->i_nlink;
17674+ }
17675+ }
c1595e42 17676+
e8791d4f
AM
17677+out:
17678+ return err;
17679+}
c1595e42 17680+
e8791d4f
AM
17681+static void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
17682+{
17683+#if !defined(CONFIG_RWSEM_GENERIC_SPINLOCK) && defined(CONFIG_RWSEM_SPIN_ON_OWNER)
17684+ p->hdir->hi_inode->i_rwsem.owner = task;
c1595e42 17685+#endif
e8791d4f 17686+}
c1595e42 17687+
e8791d4f
AM
17688+void au_pin_hdir_acquire_nest(struct au_pin *p)
17689+{
17690+ if (p->hdir) {
17691+ rwsem_acquire_nest(&p->hdir->hi_inode->i_rwsem.dep_map,
17692+ p->lsc_hi, 0, NULL, _RET_IP_);
17693+ au_pin_hdir_set_owner(p, current);
17694+ }
17695+}
1308ab2a 17696+
e8791d4f
AM
17697+void au_pin_hdir_release(struct au_pin *p)
17698+{
17699+ if (p->hdir) {
17700+ au_pin_hdir_set_owner(p, p->task);
17701+ rwsem_release(&p->hdir->hi_inode->i_rwsem.dep_map, 1, _RET_IP_);
17702+ }
17703+}
1308ab2a 17704+
e8791d4f
AM
17705+struct dentry *au_pinned_h_parent(struct au_pin *pin)
17706+{
17707+ if (pin && pin->parent)
17708+ return au_h_dptr(pin->parent, pin->bindex);
17709+ return NULL;
4a4d8108
AM
17710+}
17711+
e8791d4f
AM
17712+void au_unpin(struct au_pin *p)
17713+{
17714+ if (p->hdir)
17715+ au_pin_hdir_unlock(p);
17716+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
17717+ vfsub_mnt_drop_write(p->h_mnt);
17718+ if (!p->hdir)
17719+ return;
17720+
17721+ if (!au_ftest_pin(p->flags, DI_LOCKED))
17722+ di_read_unlock(p->parent, AuLock_IR);
17723+ iput(p->hdir->hi_inode);
17724+ dput(p->parent);
17725+ p->parent = NULL;
17726+ p->hdir = NULL;
17727+ p->h_mnt = NULL;
17728+ /* do not clear p->task */
4a4d8108
AM
17729+}
17730+
e8791d4f
AM
17731+int au_do_pin(struct au_pin *p)
17732+{
17733+ int err;
17734+ struct super_block *sb;
17735+ struct inode *h_dir;
4a4d8108 17736+
e8791d4f
AM
17737+ err = 0;
17738+ sb = p->dentry->d_sb;
17739+ p->br = au_sbr(sb, p->bindex);
17740+ if (IS_ROOT(p->dentry)) {
17741+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
17742+ p->h_mnt = au_br_mnt(p->br);
17743+ err = vfsub_mnt_want_write(p->h_mnt);
17744+ if (unlikely(err)) {
17745+ au_fclr_pin(p->flags, MNT_WRITE);
17746+ goto out_err;
17747+ }
17748+ }
17749+ goto out;
17750+ }
4a4d8108 17751+
e8791d4f
AM
17752+ p->h_dentry = NULL;
17753+ if (p->bindex <= au_dbbot(p->dentry))
17754+ p->h_dentry = au_h_dptr(p->dentry, p->bindex);
1facf9fc 17755+
e8791d4f
AM
17756+ p->parent = dget_parent(p->dentry);
17757+ if (!au_ftest_pin(p->flags, DI_LOCKED))
17758+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
1facf9fc 17759+
e8791d4f
AM
17760+ h_dir = NULL;
17761+ p->h_parent = au_h_dptr(p->parent, p->bindex);
17762+ p->hdir = au_hi(d_inode(p->parent), p->bindex);
17763+ if (p->hdir)
17764+ h_dir = p->hdir->hi_inode;
1facf9fc 17765+
e8791d4f
AM
17766+ /*
17767+ * udba case, or
17768+ * if DI_LOCKED is not set, then p->parent may be different
17769+ * and h_parent can be NULL.
17770+ */
17771+ if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
17772+ err = -EBUSY;
17773+ if (!au_ftest_pin(p->flags, DI_LOCKED))
17774+ di_read_unlock(p->parent, AuLock_IR);
17775+ dput(p->parent);
17776+ p->parent = NULL;
17777+ goto out_err;
17778+ }
1308ab2a 17779+
e8791d4f
AM
17780+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
17781+ p->h_mnt = au_br_mnt(p->br);
17782+ err = vfsub_mnt_want_write(p->h_mnt);
17783+ if (unlikely(err)) {
17784+ au_fclr_pin(p->flags, MNT_WRITE);
17785+ if (!au_ftest_pin(p->flags, DI_LOCKED))
17786+ di_read_unlock(p->parent, AuLock_IR);
17787+ dput(p->parent);
17788+ p->parent = NULL;
17789+ goto out_err;
17790+ }
17791+ }
17792+
17793+ au_igrab(h_dir);
17794+ err = au_pin_hdir_lock(p);
17795+ if (!err)
17796+ goto out; /* success */
17797+
17798+ au_unpin(p);
17799+
17800+out_err:
17801+ pr_err("err %d\n", err);
17802+ err = au_busy_or_stale();
17803+out:
17804+ return err;
027c5e7a
AM
17805+}
17806+
e8791d4f
AM
17807+void au_pin_init(struct au_pin *p, struct dentry *dentry,
17808+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
17809+ unsigned int udba, unsigned char flags)
4a4d8108 17810+{
e8791d4f
AM
17811+ p->dentry = dentry;
17812+ p->udba = udba;
17813+ p->lsc_di = lsc_di;
17814+ p->lsc_hi = lsc_hi;
17815+ p->flags = flags;
17816+ p->bindex = bindex;
537831f9 17817+
e8791d4f
AM
17818+ p->parent = NULL;
17819+ p->hdir = NULL;
17820+ p->h_mnt = NULL;
537831f9 17821+
e8791d4f
AM
17822+ p->h_dentry = NULL;
17823+ p->h_parent = NULL;
17824+ p->br = NULL;
17825+ p->task = current;
4a4d8108 17826+}
1308ab2a 17827+
e8791d4f
AM
17828+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
17829+ unsigned int udba, unsigned char flags)
4a4d8108 17830+{
e8791d4f
AM
17831+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
17832+ udba, flags);
17833+ return au_do_pin(pin);
4a4d8108 17834+}
1308ab2a 17835+
e8791d4f
AM
17836+/* ---------------------------------------------------------------------- */
17837+
17838+/*
17839+ * ->setattr() and ->getattr() are called in various cases.
17840+ * chmod, stat: dentry is revalidated.
17841+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
17842+ * unhashed.
17843+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
17844+ */
17845+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
17846+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
4a4d8108 17847+{
e8791d4f
AM
17848+ int err;
17849+ struct dentry *parent;
537831f9 17850+
e8791d4f
AM
17851+ err = 0;
17852+ if (au_digen_test(dentry, sigen)) {
17853+ parent = dget_parent(dentry);
17854+ di_read_lock_parent(parent, AuLock_IR);
17855+ err = au_refresh_dentry(dentry, parent);
17856+ di_read_unlock(parent, AuLock_IR);
17857+ dput(parent);
17858+ }
17859+
17860+ AuTraceErr(err);
17861+ return err;
027c5e7a
AM
17862+}
17863+
e8791d4f
AM
17864+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
17865+ struct au_icpup_args *a)
027c5e7a
AM
17866+{
17867+ int err;
e8791d4f
AM
17868+ loff_t sz;
17869+ aufs_bindex_t btop, ibtop;
17870+ struct dentry *hi_wh, *parent;
17871+ struct inode *inode;
17872+ struct au_wr_dir_args wr_dir_args = {
17873+ .force_btgt = -1,
17874+ .flags = 0
17875+ };
17876+
17877+ if (d_is_dir(dentry))
17878+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
17879+ /* plink or hi_wh() case */
17880+ btop = au_dbtop(dentry);
17881+ inode = d_inode(dentry);
17882+ ibtop = au_ibtop(inode);
17883+ if (btop != ibtop && !au_test_ro(inode->i_sb, ibtop, inode))
17884+ wr_dir_args.force_btgt = ibtop;
17885+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
17886+ if (unlikely(err < 0))
17887+ goto out;
17888+ a->btgt = err;
17889+ if (err != btop)
17890+ au_fset_icpup(a->flags, DID_CPUP);
17891+
17892+ err = 0;
17893+ a->pin_flags = AuPin_MNT_WRITE;
17894+ parent = NULL;
17895+ if (!IS_ROOT(dentry)) {
17896+ au_fset_pin(a->pin_flags, DI_LOCKED);
17897+ parent = dget_parent(dentry);
17898+ di_write_lock_parent(parent);
17899+ }
17900+
17901+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
17902+ if (unlikely(err))
17903+ goto out_parent;
17904+
17905+ sz = -1;
17906+ a->h_path.dentry = au_h_dptr(dentry, btop);
17907+ a->h_inode = d_inode(a->h_path.dentry);
17908+ if (ia && (ia->ia_valid & ATTR_SIZE)) {
17909+ vfsub_inode_lock_shared_nested(a->h_inode, AuLsc_I_CHILD);
17910+ if (ia->ia_size < i_size_read(a->h_inode))
17911+ sz = ia->ia_size;
17912+ inode_unlock_shared(a->h_inode);
17913+ }
17914+
17915+ hi_wh = NULL;
17916+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
17917+ hi_wh = au_hi_wh(inode, a->btgt);
17918+ if (!hi_wh) {
17919+ struct au_cp_generic cpg = {
17920+ .dentry = dentry,
17921+ .bdst = a->btgt,
17922+ .bsrc = -1,
17923+ .len = sz,
17924+ .pin = &a->pin
17925+ };
17926+ err = au_sio_cpup_wh(&cpg, /*file*/NULL);
17927+ if (unlikely(err))
17928+ goto out_unlock;
17929+ hi_wh = au_hi_wh(inode, a->btgt);
17930+ /* todo: revalidate hi_wh? */
17931+ }
17932+ }
17933+
17934+ if (parent) {
17935+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
17936+ di_downgrade_lock(parent, AuLock_IR);
17937+ dput(parent);
17938+ parent = NULL;
17939+ }
17940+ if (!au_ftest_icpup(a->flags, DID_CPUP))
17941+ goto out; /* success */
027c5e7a 17942+
e8791d4f
AM
17943+ if (!d_unhashed(dentry)) {
17944+ struct au_cp_generic cpg = {
17945+ .dentry = dentry,
17946+ .bdst = a->btgt,
17947+ .bsrc = btop,
17948+ .len = sz,
17949+ .pin = &a->pin,
17950+ .flags = AuCpup_DTIME | AuCpup_HOPEN
17951+ };
17952+ err = au_sio_cpup_simple(&cpg);
17953+ if (!err)
17954+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
17955+ } else if (!hi_wh)
17956+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
17957+ else
17958+ a->h_path.dentry = hi_wh; /* do not dget here */
027c5e7a 17959+
e8791d4f
AM
17960+out_unlock:
17961+ a->h_inode = d_inode(a->h_path.dentry);
17962+ if (!err)
17963+ goto out; /* success */
17964+ au_unpin(&a->pin);
17965+out_parent:
17966+ if (parent) {
17967+ di_write_unlock(parent);
17968+ dput(parent);
17969+ }
17970+out:
17971+ if (!err)
17972+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
027c5e7a 17973+ return err;
4a4d8108 17974+}
1308ab2a 17975+
e8791d4f 17976+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
5afbbe0d 17977+{
e8791d4f
AM
17978+ int err;
17979+ struct inode *inode, *delegated;
17980+ struct super_block *sb;
17981+ struct file *file;
17982+ struct au_icpup_args *a;
5afbbe0d 17983+
e8791d4f
AM
17984+ inode = d_inode(dentry);
17985+ IMustLock(inode);
5afbbe0d 17986+
e8791d4f
AM
17987+ err = setattr_prepare(dentry, ia);
17988+ if (unlikely(err))
17989+ goto out;
1308ab2a 17990+
e8791d4f
AM
17991+ err = -ENOMEM;
17992+ a = kzalloc(sizeof(*a), GFP_NOFS);
17993+ if (unlikely(!a))
17994+ goto out;
1308ab2a 17995+
e8791d4f
AM
17996+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
17997+ ia->ia_valid &= ~ATTR_MODE;
1308ab2a 17998+
e8791d4f
AM
17999+ file = NULL;
18000+ sb = dentry->d_sb;
18001+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
18002+ if (unlikely(err))
18003+ goto out_kfree;
1308ab2a 18004+
e8791d4f
AM
18005+ if (ia->ia_valid & ATTR_FILE) {
18006+ /* currently ftruncate(2) only */
18007+ AuDebugOn(!d_is_reg(dentry));
18008+ file = ia->ia_file;
18009+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1,
18010+ /*fi_lsc*/0);
18011+ if (unlikely(err))
18012+ goto out_si;
18013+ ia->ia_file = au_hf_top(file);
18014+ a->udba = AuOpt_UDBA_NONE;
18015+ } else {
18016+ /* fchmod() doesn't pass ia_file */
18017+ a->udba = au_opt_udba(sb);
18018+ di_write_lock_child(dentry);
18019+ /* no d_unlinked(), to set UDBA_NONE for root */
18020+ if (d_unhashed(dentry))
18021+ a->udba = AuOpt_UDBA_NONE;
18022+ if (a->udba != AuOpt_UDBA_NONE) {
18023+ AuDebugOn(IS_ROOT(dentry));
18024+ err = au_reval_for_attr(dentry, au_sigen(sb));
18025+ if (unlikely(err))
18026+ goto out_dentry;
18027+ }
18028+ }
1308ab2a 18029+
e8791d4f
AM
18030+ err = au_pin_and_icpup(dentry, ia, a);
18031+ if (unlikely(err < 0))
18032+ goto out_dentry;
18033+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
18034+ ia->ia_file = NULL;
18035+ ia->ia_valid &= ~ATTR_FILE;
18036+ }
1308ab2a 18037+
e8791d4f
AM
18038+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
18039+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
18040+ == (ATTR_MODE | ATTR_CTIME)) {
18041+ err = security_path_chmod(&a->h_path, ia->ia_mode);
18042+ if (unlikely(err))
18043+ goto out_unlock;
18044+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
18045+ && (ia->ia_valid & ATTR_CTIME)) {
18046+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
18047+ if (unlikely(err))
18048+ goto out_unlock;
18049+ }
1308ab2a 18050+
e8791d4f
AM
18051+ if (ia->ia_valid & ATTR_SIZE) {
18052+ struct file *f;
1facf9fc 18053+
e8791d4f
AM
18054+ if (ia->ia_size < i_size_read(inode))
18055+ /* unmap only */
18056+ truncate_setsize(inode, ia->ia_size);
dece6358 18057+
e8791d4f
AM
18058+ f = NULL;
18059+ if (ia->ia_valid & ATTR_FILE)
18060+ f = ia->ia_file;
18061+ inode_unlock(a->h_inode);
18062+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
18063+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
18064+ } else {
18065+ delegated = NULL;
18066+ while (1) {
18067+ err = vfsub_notify_change(&a->h_path, ia, &delegated);
18068+ if (delegated) {
18069+ err = break_deleg_wait(&delegated);
18070+ if (!err)
18071+ continue;
18072+ }
18073+ break;
18074+ }
18075+ }
18076+ /*
18077+ * regardless aufs 'acl' option setting.
18078+ * why don't all acl-aware fs call this func from their ->setattr()?
18079+ */
18080+ if (!err && (ia->ia_valid & ATTR_MODE))
18081+ err = vfsub_acl_chmod(a->h_inode, ia->ia_mode);
18082+ if (!err)
18083+ au_cpup_attr_changeable(inode);
1facf9fc 18084+
e8791d4f
AM
18085+out_unlock:
18086+ inode_unlock(a->h_inode);
18087+ au_unpin(&a->pin);
18088+ if (unlikely(err))
18089+ au_update_dbtop(dentry);
18090+out_dentry:
18091+ di_write_unlock(dentry);
18092+ if (file) {
18093+ fi_write_unlock(file);
18094+ ia->ia_file = file;
18095+ ia->ia_valid |= ATTR_FILE;
18096+ }
18097+out_si:
18098+ si_read_unlock(sb);
18099+out_kfree:
18100+ kfree(a);
18101+out:
18102+ AuTraceErr(err);
18103+ return err;
1facf9fc 18104+}
18105+
e8791d4f
AM
18106+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
18107+static int au_h_path_to_set_attr(struct dentry *dentry,
18108+ struct au_icpup_args *a, struct path *h_path)
1facf9fc 18109+{
e8791d4f
AM
18110+ int err;
18111+ struct super_block *sb;
1facf9fc 18112+
e8791d4f
AM
18113+ sb = dentry->d_sb;
18114+ a->udba = au_opt_udba(sb);
18115+ /* no d_unlinked(), to set UDBA_NONE for root */
18116+ if (d_unhashed(dentry))
18117+ a->udba = AuOpt_UDBA_NONE;
18118+ if (a->udba != AuOpt_UDBA_NONE) {
18119+ AuDebugOn(IS_ROOT(dentry));
18120+ err = au_reval_for_attr(dentry, au_sigen(sb));
18121+ if (unlikely(err))
18122+ goto out;
18123+ }
18124+ err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
18125+ if (unlikely(err < 0))
18126+ goto out;
1facf9fc 18127+
e8791d4f
AM
18128+ h_path->dentry = a->h_path.dentry;
18129+ h_path->mnt = au_sbr_mnt(sb, a->btgt);
1308ab2a 18130+
e8791d4f
AM
18131+out:
18132+ return err;
4a4d8108
AM
18133+}
18134+
e8791d4f
AM
18135+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
18136+ struct au_sxattr *arg)
4a4d8108 18137+{
e8791d4f
AM
18138+ int err;
18139+ struct path h_path;
18140+ struct super_block *sb;
18141+ struct au_icpup_args *a;
18142+ struct inode *h_inode;
4a4d8108 18143+
e8791d4f 18144+ IMustLock(inode);
7eafdf33 18145+
e8791d4f
AM
18146+ err = -ENOMEM;
18147+ a = kzalloc(sizeof(*a), GFP_NOFS);
18148+ if (unlikely(!a))
18149+ goto out;
4a4d8108 18150+
e8791d4f
AM
18151+ sb = dentry->d_sb;
18152+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
18153+ if (unlikely(err))
18154+ goto out_kfree;
027c5e7a 18155+
e8791d4f
AM
18156+ h_path.dentry = NULL; /* silence gcc */
18157+ di_write_lock_child(dentry);
18158+ err = au_h_path_to_set_attr(dentry, a, &h_path);
18159+ if (unlikely(err))
18160+ goto out_di;
4a4d8108 18161+
e8791d4f
AM
18162+ inode_unlock(a->h_inode);
18163+ switch (arg->type) {
18164+ case AU_XATTR_SET:
18165+ AuDebugOn(d_is_negative(h_path.dentry));
18166+ err = vfsub_setxattr(h_path.dentry,
18167+ arg->u.set.name, arg->u.set.value,
18168+ arg->u.set.size, arg->u.set.flags);
18169+ break;
18170+ case AU_ACL_SET:
18171+ err = -EOPNOTSUPP;
18172+ h_inode = d_inode(h_path.dentry);
18173+ if (h_inode->i_op->set_acl)
18174+ /* this will call posix_acl_update_mode */
18175+ err = h_inode->i_op->set_acl(h_inode,
18176+ arg->u.acl_set.acl,
18177+ arg->u.acl_set.type);
18178+ break;
18179+ }
18180+ if (!err)
18181+ au_cpup_attr_timesizes(inode);
4a4d8108 18182+
e8791d4f
AM
18183+ au_unpin(&a->pin);
18184+ if (unlikely(err))
18185+ au_update_dbtop(dentry);
4a4d8108 18186+
e8791d4f
AM
18187+out_di:
18188+ di_write_unlock(dentry);
18189+ si_read_unlock(sb);
18190+out_kfree:
18191+ kfree(a);
18192+out:
18193+ AuTraceErr(err);
18194+ return err;
1308ab2a 18195+}
e8791d4f 18196+#endif
1308ab2a 18197+
e8791d4f
AM
18198+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
18199+ unsigned int nlink)
53392da6 18200+{
e8791d4f 18201+ unsigned int n;
53392da6 18202+
e8791d4f
AM
18203+ inode->i_mode = st->mode;
18204+ /* don't i_[ug]id_write() here */
18205+ inode->i_uid = st->uid;
18206+ inode->i_gid = st->gid;
18207+ inode->i_atime = st->atime;
18208+ inode->i_mtime = st->mtime;
18209+ inode->i_ctime = st->ctime;
4a4d8108 18210+
e8791d4f
AM
18211+ au_cpup_attr_nlink(inode, /*force*/0);
18212+ if (S_ISDIR(inode->i_mode)) {
18213+ n = inode->i_nlink;
18214+ n -= nlink;
18215+ n += st->nlink;
18216+ smp_mb(); /* for i_nlink */
18217+ /* 0 can happen */
18218+ set_nlink(inode, n);
18219+ }
1308ab2a 18220+
e8791d4f
AM
18221+ spin_lock(&inode->i_lock);
18222+ inode->i_blocks = st->blocks;
18223+ i_size_write(inode, st->size);
18224+ spin_unlock(&inode->i_lock);
4a4d8108 18225+}
1308ab2a 18226+
e8791d4f
AM
18227+/*
18228+ * common routine for aufs_getattr() and au_getxattr().
18229+ * returns zero or negative (an error).
18230+ * @dentry will be read-locked in success.
18231+ */
18232+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path,
18233+ int locked)
4a4d8108 18234+{
e8791d4f
AM
18235+ int err;
18236+ unsigned int mnt_flags, sigen;
18237+ unsigned char udba_none;
18238+ aufs_bindex_t bindex;
18239+ struct super_block *sb, *h_sb;
18240+ struct inode *inode;
dece6358 18241+
e8791d4f
AM
18242+ h_path->mnt = NULL;
18243+ h_path->dentry = NULL;
1facf9fc 18244+
e8791d4f
AM
18245+ err = 0;
18246+ sb = dentry->d_sb;
18247+ mnt_flags = au_mntflags(sb);
18248+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
ae9dfd79 18249+
e8791d4f
AM
18250+ if (unlikely(locked))
18251+ goto body; /* skip locking dinfo */
4a4d8108 18252+
e8791d4f
AM
18253+ /* support fstat(2) */
18254+ if (!d_unlinked(dentry) && !udba_none) {
18255+ sigen = au_sigen(sb);
18256+ err = au_digen_test(dentry, sigen);
18257+ if (!err) {
18258+ di_read_lock_child(dentry, AuLock_IR);
18259+ err = au_dbrange_test(dentry);
18260+ if (unlikely(err)) {
18261+ di_read_unlock(dentry, AuLock_IR);
18262+ goto out;
18263+ }
18264+ } else {
18265+ AuDebugOn(IS_ROOT(dentry));
18266+ di_write_lock_child(dentry);
18267+ err = au_dbrange_test(dentry);
18268+ if (!err)
18269+ err = au_reval_for_attr(dentry, sigen);
18270+ if (!err)
18271+ di_downgrade_lock(dentry, AuLock_IR);
18272+ else {
18273+ di_write_unlock(dentry);
18274+ goto out;
18275+ }
18276+ }
18277+ } else
18278+ di_read_lock_child(dentry, AuLock_IR);
4a4d8108 18279+
e8791d4f
AM
18280+body:
18281+ inode = d_inode(dentry);
18282+ bindex = au_ibtop(inode);
18283+ h_path->mnt = au_sbr_mnt(sb, bindex);
18284+ h_sb = h_path->mnt->mnt_sb;
18285+ if (!force
18286+ && !au_test_fs_bad_iattr(h_sb)
18287+ && udba_none)
18288+ goto out; /* success */
4a4d8108 18289+
e8791d4f
AM
18290+ if (au_dbtop(dentry) == bindex)
18291+ h_path->dentry = au_h_dptr(dentry, bindex);
18292+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
18293+ h_path->dentry = au_plink_lkup(inode, bindex);
18294+ if (IS_ERR(h_path->dentry))
18295+ /* pretending success */
18296+ h_path->dentry = NULL;
18297+ else
18298+ dput(h_path->dentry);
18299+ }
4a4d8108 18300+
e8791d4f
AM
18301+out:
18302+ return err;
18303+}
18304+
18305+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
18306+ struct dentry *dentry, struct kstat *st)
4a4d8108 18307+{
e8791d4f
AM
18308+ int err;
18309+ unsigned char positive;
18310+ struct path h_path;
18311+ struct inode *inode;
4a4d8108 18312+ struct super_block *sb;
1e00d052 18313+
e8791d4f
AM
18314+ inode = d_inode(dentry);
18315+ sb = dentry->d_sb;
18316+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
18317+ if (unlikely(err))
18318+ goto out;
18319+ err = au_h_path_getattr(dentry, /*force*/0, &h_path, /*locked*/0);
18320+ if (unlikely(err))
18321+ goto out_si;
18322+ if (unlikely(!h_path.dentry))
18323+ /* illegally overlapped or something */
18324+ goto out_fill; /* pretending success */
1e00d052 18325+
e8791d4f
AM
18326+ positive = d_is_positive(h_path.dentry);
18327+ if (positive)
18328+ err = vfs_getattr(&h_path, st);
18329+ if (!err) {
18330+ if (positive)
18331+ au_refresh_iattr(inode, st,
18332+ d_inode(h_path.dentry)->i_nlink);
18333+ goto out_fill; /* success */
1e00d052 18334+ }
e8791d4f
AM
18335+ AuTraceErr(err);
18336+ goto out_di;
1e00d052 18337+
e8791d4f
AM
18338+out_fill:
18339+ generic_fillattr(inode, st);
18340+out_di:
18341+ di_read_unlock(dentry, AuLock_IR);
18342+out_si:
18343+ si_read_unlock(sb);
18344+out:
18345+ AuTraceErr(err);
18346+ return err;
18347+}
18348+
18349+/* ---------------------------------------------------------------------- */
18350+
18351+static const char *aufs_get_link(struct dentry *dentry, struct inode *inode,
18352+ struct delayed_call *done)
18353+{
18354+ const char *ret;
18355+ struct dentry *h_dentry;
18356+ struct inode *h_inode;
18357+ int err;
18358+ aufs_bindex_t bindex;
18359+
18360+ ret = NULL; /* suppress a warning */
18361+ err = -ECHILD;
18362+ if (!dentry)
4a4d8108 18363+ goto out;
4a4d8108 18364+
e8791d4f
AM
18365+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
18366+ if (unlikely(err))
18367+ goto out;
1e00d052 18368+
e8791d4f
AM
18369+ err = au_d_hashed_positive(dentry);
18370+ if (unlikely(err))
18371+ goto out_unlock;
1e00d052 18372+
e8791d4f
AM
18373+ err = -EINVAL;
18374+ inode = d_inode(dentry);
18375+ bindex = au_ibtop(inode);
18376+ h_inode = au_h_iptr(inode, bindex);
18377+ if (unlikely(!h_inode->i_op->get_link))
18378+ goto out_unlock;
18379+
18380+ err = -EBUSY;
18381+ h_dentry = NULL;
18382+ if (au_dbtop(dentry) <= bindex) {
18383+ h_dentry = au_h_dptr(dentry, bindex);
18384+ if (h_dentry)
18385+ dget(h_dentry);
18386+ }
18387+ if (!h_dentry) {
18388+ h_dentry = d_find_any_alias(h_inode);
18389+ if (IS_ERR(h_dentry)) {
18390+ err = PTR_ERR(h_dentry);
18391+ goto out_unlock;
4a4d8108 18392+ }
4a4d8108 18393+ }
e8791d4f
AM
18394+ if (unlikely(!h_dentry))
18395+ goto out_unlock;
4a4d8108 18396+
e8791d4f
AM
18397+ err = 0;
18398+ AuDbg("%pf\n", h_inode->i_op->get_link);
18399+ AuDbgDentry(h_dentry);
18400+ ret = vfs_get_link(h_dentry, done);
18401+ dput(h_dentry);
18402+ if (IS_ERR(ret))
18403+ err = PTR_ERR(ret);
4a4d8108 18404+
e8791d4f
AM
18405+out_unlock:
18406+ aufs_read_unlock(dentry, AuLock_IR);
4f0767ce 18407+out:
e8791d4f
AM
18408+ if (unlikely(err))
18409+ ret = ERR_PTR(err);
18410+ AuTraceErrPtr(ret);
18411+ return ret;
4a4d8108
AM
18412+}
18413+
18414+/* ---------------------------------------------------------------------- */
18415+
e8791d4f 18416+static int au_is_special(struct inode *inode)
4a4d8108 18417+{
e8791d4f
AM
18418+ return (inode->i_mode & (S_IFBLK | S_IFCHR | S_IFIFO | S_IFSOCK));
18419+}
4a4d8108 18420+
e8791d4f
AM
18421+static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags)
18422+{
18423+ int err;
18424+ aufs_bindex_t bindex;
18425+ struct super_block *sb;
18426+ struct inode *h_inode;
18427+ struct vfsmount *h_mnt;
4a4d8108 18428+
e8791d4f
AM
18429+ sb = inode->i_sb;
18430+ WARN_ONCE((flags & S_ATIME) && !IS_NOATIME(inode),
18431+ "unexpected s_flags 0x%lx", sb->s_flags);
4a4d8108 18432+
e8791d4f
AM
18433+ /* mmap_sem might be acquired already, cf. aufs_mmap() */
18434+ lockdep_off();
18435+ si_read_lock(sb, AuLock_FLUSH);
18436+ ii_write_lock_child(inode);
18437+ lockdep_on();
027c5e7a 18438+
e8791d4f
AM
18439+ err = 0;
18440+ bindex = au_ibtop(inode);
18441+ h_inode = au_h_iptr(inode, bindex);
18442+ if (!au_test_ro(sb, bindex, inode)) {
18443+ h_mnt = au_sbr_mnt(sb, bindex);
18444+ err = vfsub_mnt_want_write(h_mnt);
18445+ if (!err) {
18446+ err = vfsub_update_time(h_inode, ts, flags);
18447+ vfsub_mnt_drop_write(h_mnt);
18448+ }
18449+ } else if (au_is_special(h_inode)) {
18450+ /*
18451+ * Never copy-up here.
18452+ * These special files may already be opened and used for
18453+ * communicating. If we copied it up, then the communication
18454+ * would be corrupted.
18455+ */
18456+ AuWarn1("timestamps for i%lu are ignored "
18457+ "since it is on readonly branch (hi%lu).\n",
18458+ inode->i_ino, h_inode->i_ino);
18459+ } else if (flags & ~S_ATIME) {
18460+ err = -EIO;
18461+ AuIOErr1("unexpected flags 0x%x\n", flags);
18462+ AuDebugOn(1);
18463+ }
076b876e 18464+
e8791d4f
AM
18465+ lockdep_off();
18466+ if (!err)
18467+ au_cpup_attr_timesizes(inode);
18468+ ii_write_unlock(inode);
18469+ si_read_unlock(sb);
18470+ lockdep_on();
076b876e 18471+
e8791d4f
AM
18472+ if (!err && (flags & S_VERSION))
18473+ inode_inc_iversion(inode);
4a4d8108 18474+
4a4d8108
AM
18475+ return err;
18476+}
18477+
e8791d4f 18478+/* ---------------------------------------------------------------------- */
4a4d8108 18479+
e8791d4f
AM
18480+/* no getattr version will be set by module.c:aufs_init() */
18481+struct inode_operations aufs_iop_nogetattr[AuIop_Last],
18482+ aufs_iop[] = {
18483+ [AuIop_SYMLINK] = {
18484+ .permission = aufs_permission,
18485+#ifdef CONFIG_FS_POSIX_ACL
18486+ .get_acl = aufs_get_acl,
18487+ .set_acl = aufs_set_acl, /* unsupport for symlink? */
18488+#endif
c2b27bf2 18489+
e8791d4f
AM
18490+ .setattr = aufs_setattr,
18491+ .getattr = aufs_getattr,
4a4d8108 18492+
e8791d4f
AM
18493+#ifdef CONFIG_AUFS_XATTR
18494+ .listxattr = aufs_listxattr,
18495+#endif
4a4d8108 18496+
e8791d4f
AM
18497+ .readlink = generic_readlink,
18498+ .get_link = aufs_get_link,
b752ccd1 18499+
e8791d4f
AM
18500+ /* .update_time = aufs_update_time */
18501+ },
18502+ [AuIop_DIR] = {
18503+ .create = aufs_create,
18504+ .lookup = aufs_lookup,
18505+ .link = aufs_link,
18506+ .unlink = aufs_unlink,
18507+ .symlink = aufs_symlink,
18508+ .mkdir = aufs_mkdir,
18509+ .rmdir = aufs_rmdir,
18510+ .mknod = aufs_mknod,
18511+ .rename = aufs_rename,
b752ccd1 18512+
e8791d4f
AM
18513+ .permission = aufs_permission,
18514+#ifdef CONFIG_FS_POSIX_ACL
18515+ .get_acl = aufs_get_acl,
18516+ .set_acl = aufs_set_acl,
18517+#endif
b752ccd1 18518+
e8791d4f
AM
18519+ .setattr = aufs_setattr,
18520+ .getattr = aufs_getattr,
027c5e7a 18521+
e8791d4f
AM
18522+#ifdef CONFIG_AUFS_XATTR
18523+ .listxattr = aufs_listxattr,
18524+#endif
076b876e 18525+
e8791d4f
AM
18526+ .update_time = aufs_update_time,
18527+ .atomic_open = aufs_atomic_open,
18528+ .tmpfile = aufs_tmpfile
18529+ },
18530+ [AuIop_OTHER] = {
18531+ .permission = aufs_permission,
18532+#ifdef CONFIG_FS_POSIX_ACL
18533+ .get_acl = aufs_get_acl,
18534+ .set_acl = aufs_set_acl,
18535+#endif
b752ccd1 18536+
e8791d4f
AM
18537+ .setattr = aufs_setattr,
18538+ .getattr = aufs_getattr,
b752ccd1 18539+
e8791d4f
AM
18540+#ifdef CONFIG_AUFS_XATTR
18541+ .listxattr = aufs_listxattr,
b752ccd1 18542+#endif
e8791d4f
AM
18543+
18544+ .update_time = aufs_update_time
18545+ }
18546+};
18547diff -urNp -x '*.orig' linux-4.9/fs/aufs/i_op_add.c linux-4.9/fs/aufs/i_op_add.c
18548--- linux-4.9/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
18549+++ linux-4.9/fs/aufs/i_op_add.c 2021-02-24 16:15:09.531573855 +0100
f2c43d5f 18550@@ -0,0 +1,928 @@
4a4d8108 18551+/*
ae9dfd79 18552+ * Copyright (C) 2005-2018 Junjiro R. Okajima
4a4d8108
AM
18553+ *
18554+ * This program, aufs is free software; you can redistribute it and/or modify
18555+ * it under the terms of the GNU General Public License as published by
18556+ * the Free Software Foundation; either version 2 of the License, or
18557+ * (at your option) any later version.
18558+ *
18559+ * This program is distributed in the hope that it will be useful,
18560+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18561+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18562+ * GNU General Public License for more details.
18563+ *
18564+ * You should have received a copy of the GNU General Public License
523b37e3 18565+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
18566+ */
18567+
18568+/*
18569+ * inode operations (add entry)
18570+ */
18571+
18572+#include "aufs.h"
18573+
18574+/*
18575+ * final procedure of adding a new entry, except link(2).
18576+ * remove whiteout, instantiate, copyup the parent dir's times and size
18577+ * and update version.
18578+ * if it failed, re-create the removed whiteout.
18579+ */
18580+static int epilog(struct inode *dir, aufs_bindex_t bindex,
18581+ struct dentry *wh_dentry, struct dentry *dentry)
18582+{
18583+ int err, rerr;
18584+ aufs_bindex_t bwh;
18585+ struct path h_path;
076b876e 18586+ struct super_block *sb;
4a4d8108
AM
18587+ struct inode *inode, *h_dir;
18588+ struct dentry *wh;
18589+
18590+ bwh = -1;
076b876e 18591+ sb = dir->i_sb;
4a4d8108 18592+ if (wh_dentry) {
5527c038 18593+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
4a4d8108
AM
18594+ IMustLock(h_dir);
18595+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
18596+ bwh = au_dbwh(dentry);
18597+ h_path.dentry = wh_dentry;
076b876e 18598+ h_path.mnt = au_sbr_mnt(sb, bindex);
4a4d8108
AM
18599+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
18600+ dentry);
18601+ if (unlikely(err))
18602+ goto out;
18603+ }
18604+
18605+ inode = au_new_inode(dentry, /*must_new*/1);
18606+ if (!IS_ERR(inode)) {
18607+ d_instantiate(dentry, inode);
5527c038 18608+ dir = d_inode(dentry->d_parent); /* dir inode is locked */
4a4d8108 18609+ IMustLock(dir);
b912730e 18610+ au_dir_ts(dir, bindex);
4a4d8108 18611+ dir->i_version++;
076b876e 18612+ au_fhsm_wrote(sb, bindex, /*force*/0);
4a4d8108
AM
18613+ return 0; /* success */
18614+ }
18615+
18616+ err = PTR_ERR(inode);
18617+ if (!wh_dentry)
18618+ goto out;
18619+
18620+ /* revert */
18621+ /* dir inode is locked */
18622+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
18623+ rerr = PTR_ERR(wh);
18624+ if (IS_ERR(wh)) {
523b37e3
AM
18625+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
18626+ dentry, err, rerr);
4a4d8108
AM
18627+ err = -EIO;
18628+ } else
18629+ dput(wh);
18630+
4f0767ce 18631+out:
4a4d8108
AM
18632+ return err;
18633+}
18634+
027c5e7a
AM
18635+static int au_d_may_add(struct dentry *dentry)
18636+{
18637+ int err;
18638+
18639+ err = 0;
18640+ if (unlikely(d_unhashed(dentry)))
18641+ err = -ENOENT;
5527c038 18642+ if (unlikely(d_really_is_positive(dentry)))
027c5e7a
AM
18643+ err = -EEXIST;
18644+ return err;
18645+}
18646+
4a4d8108
AM
18647+/*
18648+ * simple tests for the adding inode operations.
18649+ * following the checks in vfs, plus the parent-child relationship.
18650+ */
18651+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
18652+ struct dentry *h_parent, int isdir)
18653+{
18654+ int err;
18655+ umode_t h_mode;
18656+ struct dentry *h_dentry;
18657+ struct inode *h_inode;
18658+
18659+ err = -ENAMETOOLONG;
18660+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
18661+ goto out;
18662+
18663+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 18664+ if (d_really_is_negative(dentry)) {
4a4d8108 18665+ err = -EEXIST;
5527c038 18666+ if (unlikely(d_is_positive(h_dentry)))
4a4d8108
AM
18667+ goto out;
18668+ } else {
18669+ /* rename(2) case */
18670+ err = -EIO;
5527c038
JR
18671+ if (unlikely(d_is_negative(h_dentry)))
18672+ goto out;
18673+ h_inode = d_inode(h_dentry);
18674+ if (unlikely(!h_inode->i_nlink))
4a4d8108
AM
18675+ goto out;
18676+
18677+ h_mode = h_inode->i_mode;
18678+ if (!isdir) {
18679+ err = -EISDIR;
18680+ if (unlikely(S_ISDIR(h_mode)))
18681+ goto out;
18682+ } else if (unlikely(!S_ISDIR(h_mode))) {
18683+ err = -ENOTDIR;
18684+ goto out;
18685+ }
18686+ }
18687+
18688+ err = 0;
18689+ /* expected parent dir is locked */
18690+ if (unlikely(h_parent != h_dentry->d_parent))
18691+ err = -EIO;
18692+
4f0767ce 18693+out:
4a4d8108
AM
18694+ AuTraceErr(err);
18695+ return err;
18696+}
18697+
18698+/*
18699+ * initial procedure of adding a new entry.
18700+ * prepare writable branch and the parent dir, lock it,
18701+ * and lookup whiteout for the new entry.
18702+ */
18703+static struct dentry*
18704+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
18705+ struct dentry *src_dentry, struct au_pin *pin,
18706+ struct au_wr_dir_args *wr_dir_args)
18707+{
18708+ struct dentry *wh_dentry, *h_parent;
18709+ struct super_block *sb;
18710+ struct au_branch *br;
18711+ int err;
18712+ unsigned int udba;
18713+ aufs_bindex_t bcpup;
18714+
523b37e3 18715+ AuDbg("%pd\n", dentry);
4a4d8108
AM
18716+
18717+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
18718+ bcpup = err;
18719+ wh_dentry = ERR_PTR(err);
18720+ if (unlikely(err < 0))
18721+ goto out;
18722+
18723+ sb = dentry->d_sb;
18724+ udba = au_opt_udba(sb);
18725+ err = au_pin(pin, dentry, bcpup, udba,
18726+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
18727+ wh_dentry = ERR_PTR(err);
18728+ if (unlikely(err))
18729+ goto out;
18730+
18731+ h_parent = au_pinned_h_parent(pin);
18732+ if (udba != AuOpt_UDBA_NONE
5afbbe0d 18733+ && au_dbtop(dentry) == bcpup)
4a4d8108
AM
18734+ err = au_may_add(dentry, bcpup, h_parent,
18735+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
18736+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
18737+ err = -ENAMETOOLONG;
18738+ wh_dentry = ERR_PTR(err);
18739+ if (unlikely(err))
18740+ goto out_unpin;
18741+
18742+ br = au_sbr(sb, bcpup);
18743+ if (dt) {
18744+ struct path tmp = {
18745+ .dentry = h_parent,
86dc4139 18746+ .mnt = au_br_mnt(br)
4a4d8108
AM
18747+ };
18748+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
18749+ }
18750+
18751+ wh_dentry = NULL;
18752+ if (bcpup != au_dbwh(dentry))
18753+ goto out; /* success */
18754+
2000de60
JR
18755+ /*
18756+ * ENAMETOOLONG here means that if we allowed create such name, then it
18757+ * would not be able to removed in the future. So we don't allow such
18758+ * name here and we don't handle ENAMETOOLONG differently here.
18759+ */
4a4d8108
AM
18760+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
18761+
4f0767ce 18762+out_unpin:
4a4d8108
AM
18763+ if (IS_ERR(wh_dentry))
18764+ au_unpin(pin);
4f0767ce 18765+out:
4a4d8108
AM
18766+ return wh_dentry;
18767+}
18768+
18769+/* ---------------------------------------------------------------------- */
18770+
18771+enum { Mknod, Symlink, Creat };
18772+struct simple_arg {
18773+ int type;
18774+ union {
18775+ struct {
b912730e
AM
18776+ umode_t mode;
18777+ bool want_excl;
18778+ bool try_aopen;
18779+ struct vfsub_aopen_args *aopen;
4a4d8108
AM
18780+ } c;
18781+ struct {
18782+ const char *symname;
18783+ } s;
18784+ struct {
7eafdf33 18785+ umode_t mode;
4a4d8108
AM
18786+ dev_t dev;
18787+ } m;
18788+ } u;
18789+};
18790+
18791+static int add_simple(struct inode *dir, struct dentry *dentry,
18792+ struct simple_arg *arg)
18793+{
076b876e 18794+ int err, rerr;
5afbbe0d 18795+ aufs_bindex_t btop;
4a4d8108 18796+ unsigned char created;
b912730e
AM
18797+ const unsigned char try_aopen
18798+ = (arg->type == Creat && arg->u.c.try_aopen);
4a4d8108
AM
18799+ struct dentry *wh_dentry, *parent;
18800+ struct inode *h_dir;
b912730e
AM
18801+ struct super_block *sb;
18802+ struct au_branch *br;
c2b27bf2
AM
18803+ /* to reuduce stack size */
18804+ struct {
18805+ struct au_dtime dt;
18806+ struct au_pin pin;
18807+ struct path h_path;
18808+ struct au_wr_dir_args wr_dir_args;
18809+ } *a;
4a4d8108 18810+
523b37e3 18811+ AuDbg("%pd\n", dentry);
4a4d8108
AM
18812+ IMustLock(dir);
18813+
c2b27bf2
AM
18814+ err = -ENOMEM;
18815+ a = kmalloc(sizeof(*a), GFP_NOFS);
18816+ if (unlikely(!a))
18817+ goto out;
18818+ a->wr_dir_args.force_btgt = -1;
18819+ a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
18820+
4a4d8108 18821+ parent = dentry->d_parent; /* dir inode is locked */
b912730e
AM
18822+ if (!try_aopen) {
18823+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
18824+ if (unlikely(err))
18825+ goto out_free;
18826+ }
027c5e7a
AM
18827+ err = au_d_may_add(dentry);
18828+ if (unlikely(err))
18829+ goto out_unlock;
b912730e
AM
18830+ if (!try_aopen)
18831+ di_write_lock_parent(parent);
c2b27bf2
AM
18832+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
18833+ &a->pin, &a->wr_dir_args);
4a4d8108
AM
18834+ err = PTR_ERR(wh_dentry);
18835+ if (IS_ERR(wh_dentry))
027c5e7a 18836+ goto out_parent;
4a4d8108 18837+
5afbbe0d 18838+ btop = au_dbtop(dentry);
b912730e 18839+ sb = dentry->d_sb;
5afbbe0d
AM
18840+ br = au_sbr(sb, btop);
18841+ a->h_path.dentry = au_h_dptr(dentry, btop);
b912730e 18842+ a->h_path.mnt = au_br_mnt(br);
c2b27bf2 18843+ h_dir = au_pinned_h_dir(&a->pin);
4a4d8108
AM
18844+ switch (arg->type) {
18845+ case Creat:
b912730e
AM
18846+ err = 0;
18847+ if (!try_aopen || !h_dir->i_op->atomic_open)
18848+ err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
18849+ arg->u.c.want_excl);
18850+ else
18851+ err = vfsub_atomic_open(h_dir, a->h_path.dentry,
18852+ arg->u.c.aopen, br);
4a4d8108
AM
18853+ break;
18854+ case Symlink:
c2b27bf2 18855+ err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
4a4d8108
AM
18856+ break;
18857+ case Mknod:
c2b27bf2
AM
18858+ err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
18859+ arg->u.m.dev);
4a4d8108
AM
18860+ break;
18861+ default:
18862+ BUG();
18863+ }
18864+ created = !err;
18865+ if (!err)
5afbbe0d 18866+ err = epilog(dir, btop, wh_dentry, dentry);
4a4d8108
AM
18867+
18868+ /* revert */
5527c038 18869+ if (unlikely(created && err && d_is_positive(a->h_path.dentry))) {
523b37e3
AM
18870+ /* no delegation since it is just created */
18871+ rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
18872+ /*force*/0);
4a4d8108 18873+ if (rerr) {
523b37e3
AM
18874+ AuIOErr("%pd revert failure(%d, %d)\n",
18875+ dentry, err, rerr);
4a4d8108
AM
18876+ err = -EIO;
18877+ }
c2b27bf2 18878+ au_dtime_revert(&a->dt);
4a4d8108
AM
18879+ }
18880+
b912730e
AM
18881+ if (!err && try_aopen && !h_dir->i_op->atomic_open)
18882+ *arg->u.c.aopen->opened |= FILE_CREATED;
18883+
c2b27bf2 18884+ au_unpin(&a->pin);
4a4d8108
AM
18885+ dput(wh_dentry);
18886+
027c5e7a 18887+out_parent:
b912730e
AM
18888+ if (!try_aopen)
18889+ di_write_unlock(parent);
027c5e7a 18890+out_unlock:
4a4d8108 18891+ if (unlikely(err)) {
5afbbe0d 18892+ au_update_dbtop(dentry);
4a4d8108
AM
18893+ d_drop(dentry);
18894+ }
b912730e
AM
18895+ if (!try_aopen)
18896+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2 18897+out_free:
ae9dfd79 18898+ kfree(a);
027c5e7a 18899+out:
4a4d8108
AM
18900+ return err;
18901+}
18902+
7eafdf33
AM
18903+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
18904+ dev_t dev)
4a4d8108
AM
18905+{
18906+ struct simple_arg arg = {
18907+ .type = Mknod,
18908+ .u.m = {
18909+ .mode = mode,
18910+ .dev = dev
18911+ }
18912+ };
18913+ return add_simple(dir, dentry, &arg);
18914+}
18915+
18916+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
18917+{
18918+ struct simple_arg arg = {
18919+ .type = Symlink,
18920+ .u.s.symname = symname
18921+ };
18922+ return add_simple(dir, dentry, &arg);
18923+}
18924+
7eafdf33 18925+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 18926+ bool want_excl)
4a4d8108
AM
18927+{
18928+ struct simple_arg arg = {
18929+ .type = Creat,
18930+ .u.c = {
b4510431
AM
18931+ .mode = mode,
18932+ .want_excl = want_excl
4a4d8108
AM
18933+ }
18934+ };
18935+ return add_simple(dir, dentry, &arg);
18936+}
18937+
b912730e
AM
18938+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
18939+ struct vfsub_aopen_args *aopen_args)
18940+{
18941+ struct simple_arg arg = {
18942+ .type = Creat,
18943+ .u.c = {
18944+ .mode = aopen_args->create_mode,
18945+ .want_excl = aopen_args->open_flag & O_EXCL,
18946+ .try_aopen = true,
18947+ .aopen = aopen_args
18948+ }
18949+ };
18950+ return add_simple(dir, dentry, &arg);
18951+}
18952+
38d290e6
JR
18953+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
18954+{
18955+ int err;
18956+ aufs_bindex_t bindex;
18957+ struct super_block *sb;
18958+ struct dentry *parent, *h_parent, *h_dentry;
18959+ struct inode *h_dir, *inode;
18960+ struct vfsmount *h_mnt;
18961+ struct au_wr_dir_args wr_dir_args = {
18962+ .force_btgt = -1,
18963+ .flags = AuWrDir_TMPFILE
18964+ };
18965+
18966+ /* copy-up may happen */
febd17d6 18967+ inode_lock(dir);
38d290e6
JR
18968+
18969+ sb = dir->i_sb;
18970+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
18971+ if (unlikely(err))
18972+ goto out;
18973+
18974+ err = au_di_init(dentry);
18975+ if (unlikely(err))
18976+ goto out_si;
18977+
18978+ err = -EBUSY;
18979+ parent = d_find_any_alias(dir);
18980+ AuDebugOn(!parent);
18981+ di_write_lock_parent(parent);
5527c038 18982+ if (unlikely(d_inode(parent) != dir))
38d290e6
JR
18983+ goto out_parent;
18984+
18985+ err = au_digen_test(parent, au_sigen(sb));
18986+ if (unlikely(err))
18987+ goto out_parent;
18988+
5afbbe0d
AM
18989+ bindex = au_dbtop(parent);
18990+ au_set_dbtop(dentry, bindex);
18991+ au_set_dbbot(dentry, bindex);
38d290e6
JR
18992+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
18993+ bindex = err;
18994+ if (unlikely(err < 0))
18995+ goto out_parent;
18996+
18997+ err = -EOPNOTSUPP;
18998+ h_dir = au_h_iptr(dir, bindex);
18999+ if (unlikely(!h_dir->i_op->tmpfile))
19000+ goto out_parent;
19001+
19002+ h_mnt = au_sbr_mnt(sb, bindex);
19003+ err = vfsub_mnt_want_write(h_mnt);
19004+ if (unlikely(err))
19005+ goto out_parent;
19006+
19007+ h_parent = au_h_dptr(parent, bindex);
5527c038 19008+ err = inode_permission(d_inode(h_parent), MAY_WRITE | MAY_EXEC);
38d290e6
JR
19009+ if (unlikely(err))
19010+ goto out_mnt;
19011+
19012+ err = -ENOMEM;
19013+ h_dentry = d_alloc(h_parent, &dentry->d_name);
19014+ if (unlikely(!h_dentry))
19015+ goto out_mnt;
19016+
19017+ err = h_dir->i_op->tmpfile(h_dir, h_dentry, mode);
19018+ if (unlikely(err))
19019+ goto out_dentry;
19020+
5afbbe0d
AM
19021+ au_set_dbtop(dentry, bindex);
19022+ au_set_dbbot(dentry, bindex);
38d290e6
JR
19023+ au_set_h_dptr(dentry, bindex, dget(h_dentry));
19024+ inode = au_new_inode(dentry, /*must_new*/1);
19025+ if (IS_ERR(inode)) {
19026+ err = PTR_ERR(inode);
19027+ au_set_h_dptr(dentry, bindex, NULL);
5afbbe0d
AM
19028+ au_set_dbtop(dentry, -1);
19029+ au_set_dbbot(dentry, -1);
38d290e6
JR
19030+ } else {
19031+ if (!inode->i_nlink)
19032+ set_nlink(inode, 1);
19033+ d_tmpfile(dentry, inode);
19034+ au_di(dentry)->di_tmpfile = 1;
19035+
19036+ /* update without i_mutex */
5afbbe0d 19037+ if (au_ibtop(dir) == au_dbtop(dentry))
38d290e6
JR
19038+ au_cpup_attr_timesizes(dir);
19039+ }
19040+
19041+out_dentry:
19042+ dput(h_dentry);
19043+out_mnt:
19044+ vfsub_mnt_drop_write(h_mnt);
19045+out_parent:
19046+ di_write_unlock(parent);
19047+ dput(parent);
19048+ di_write_unlock(dentry);
5afbbe0d 19049+ if (unlikely(err)) {
38d290e6
JR
19050+ au_di_fin(dentry);
19051+ dentry->d_fsdata = NULL;
19052+ }
19053+out_si:
19054+ si_read_unlock(sb);
19055+out:
febd17d6 19056+ inode_unlock(dir);
38d290e6
JR
19057+ return err;
19058+}
19059+
4a4d8108
AM
19060+/* ---------------------------------------------------------------------- */
19061+
19062+struct au_link_args {
19063+ aufs_bindex_t bdst, bsrc;
19064+ struct au_pin pin;
19065+ struct path h_path;
19066+ struct dentry *src_parent, *parent;
19067+};
19068+
19069+static int au_cpup_before_link(struct dentry *src_dentry,
19070+ struct au_link_args *a)
19071+{
19072+ int err;
19073+ struct dentry *h_src_dentry;
c2b27bf2
AM
19074+ struct au_cp_generic cpg = {
19075+ .dentry = src_dentry,
19076+ .bdst = a->bdst,
19077+ .bsrc = a->bsrc,
19078+ .len = -1,
19079+ .pin = &a->pin,
19080+ .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
19081+ };
4a4d8108
AM
19082+
19083+ di_read_lock_parent(a->src_parent, AuLock_IR);
19084+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
19085+ if (unlikely(err))
19086+ goto out;
19087+
19088+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
4a4d8108
AM
19089+ err = au_pin(&a->pin, src_dentry, a->bdst,
19090+ au_opt_udba(src_dentry->d_sb),
19091+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
19092+ if (unlikely(err))
19093+ goto out;
367653fa 19094+
c2b27bf2 19095+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
19096+ au_unpin(&a->pin);
19097+
4f0767ce 19098+out:
4a4d8108
AM
19099+ di_read_unlock(a->src_parent, AuLock_IR);
19100+ return err;
19101+}
19102+
86dc4139
AM
19103+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
19104+ struct au_link_args *a)
4a4d8108
AM
19105+{
19106+ int err;
19107+ unsigned char plink;
5afbbe0d 19108+ aufs_bindex_t bbot;
4a4d8108 19109+ struct dentry *h_src_dentry;
523b37e3 19110+ struct inode *h_inode, *inode, *delegated;
4a4d8108
AM
19111+ struct super_block *sb;
19112+ struct file *h_file;
19113+
19114+ plink = 0;
19115+ h_inode = NULL;
19116+ sb = src_dentry->d_sb;
5527c038 19117+ inode = d_inode(src_dentry);
5afbbe0d 19118+ if (au_ibtop(inode) <= a->bdst)
4a4d8108
AM
19119+ h_inode = au_h_iptr(inode, a->bdst);
19120+ if (!h_inode || !h_inode->i_nlink) {
19121+ /* copyup src_dentry as the name of dentry. */
5afbbe0d
AM
19122+ bbot = au_dbbot(dentry);
19123+ if (bbot < a->bsrc)
19124+ au_set_dbbot(dentry, a->bsrc);
86dc4139
AM
19125+ au_set_h_dptr(dentry, a->bsrc,
19126+ dget(au_h_dptr(src_dentry, a->bsrc)));
19127+ dget(a->h_path.dentry);
19128+ au_set_h_dptr(dentry, a->bdst, NULL);
c1595e42
JR
19129+ AuDbg("temporary d_inode...\n");
19130+ spin_lock(&dentry->d_lock);
5527c038 19131+ dentry->d_inode = d_inode(src_dentry); /* tmp */
c1595e42 19132+ spin_unlock(&dentry->d_lock);
392086de 19133+ h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
86dc4139 19134+ if (IS_ERR(h_file))
4a4d8108 19135+ err = PTR_ERR(h_file);
86dc4139 19136+ else {
c2b27bf2
AM
19137+ struct au_cp_generic cpg = {
19138+ .dentry = dentry,
19139+ .bdst = a->bdst,
19140+ .bsrc = -1,
19141+ .len = -1,
19142+ .pin = &a->pin,
19143+ .flags = AuCpup_KEEPLINO
19144+ };
19145+ err = au_sio_cpup_simple(&cpg);
86dc4139
AM
19146+ au_h_open_post(dentry, a->bsrc, h_file);
19147+ if (!err) {
19148+ dput(a->h_path.dentry);
19149+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
19150+ } else
19151+ au_set_h_dptr(dentry, a->bdst,
19152+ a->h_path.dentry);
19153+ }
c1595e42 19154+ spin_lock(&dentry->d_lock);
86dc4139 19155+ dentry->d_inode = NULL; /* restore */
c1595e42
JR
19156+ spin_unlock(&dentry->d_lock);
19157+ AuDbg("temporary d_inode...done\n");
86dc4139 19158+ au_set_h_dptr(dentry, a->bsrc, NULL);
5afbbe0d 19159+ au_set_dbbot(dentry, bbot);
4a4d8108
AM
19160+ } else {
19161+ /* the inode of src_dentry already exists on a.bdst branch */
19162+ h_src_dentry = d_find_alias(h_inode);
19163+ if (!h_src_dentry && au_plink_test(inode)) {
19164+ plink = 1;
19165+ h_src_dentry = au_plink_lkup(inode, a->bdst);
19166+ err = PTR_ERR(h_src_dentry);
19167+ if (IS_ERR(h_src_dentry))
19168+ goto out;
19169+
5527c038 19170+ if (unlikely(d_is_negative(h_src_dentry))) {
4a4d8108
AM
19171+ dput(h_src_dentry);
19172+ h_src_dentry = NULL;
19173+ }
19174+
19175+ }
19176+ if (h_src_dentry) {
523b37e3 19177+ delegated = NULL;
4a4d8108 19178+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
19179+ &a->h_path, &delegated);
19180+ if (unlikely(err == -EWOULDBLOCK)) {
19181+ pr_warn("cannot retry for NFSv4 delegation"
19182+ " for an internal link\n");
19183+ iput(delegated);
19184+ }
4a4d8108
AM
19185+ dput(h_src_dentry);
19186+ } else {
19187+ AuIOErr("no dentry found for hi%lu on b%d\n",
19188+ h_inode->i_ino, a->bdst);
19189+ err = -EIO;
19190+ }
19191+ }
19192+
19193+ if (!err && !plink)
19194+ au_plink_append(inode, a->bdst, a->h_path.dentry);
19195+
19196+out:
2cbb1c4b 19197+ AuTraceErr(err);
4a4d8108
AM
19198+ return err;
19199+}
19200+
19201+int aufs_link(struct dentry *src_dentry, struct inode *dir,
19202+ struct dentry *dentry)
19203+{
19204+ int err, rerr;
19205+ struct au_dtime dt;
19206+ struct au_link_args *a;
19207+ struct dentry *wh_dentry, *h_src_dentry;
523b37e3 19208+ struct inode *inode, *delegated;
4a4d8108
AM
19209+ struct super_block *sb;
19210+ struct au_wr_dir_args wr_dir_args = {
19211+ /* .force_btgt = -1, */
19212+ .flags = AuWrDir_ADD_ENTRY
19213+ };
19214+
19215+ IMustLock(dir);
5527c038 19216+ inode = d_inode(src_dentry);
4a4d8108
AM
19217+ IMustLock(inode);
19218+
4a4d8108
AM
19219+ err = -ENOMEM;
19220+ a = kzalloc(sizeof(*a), GFP_NOFS);
19221+ if (unlikely(!a))
19222+ goto out;
19223+
19224+ a->parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
19225+ err = aufs_read_and_write_lock2(dentry, src_dentry,
19226+ AuLock_NOPLM | AuLock_GEN);
e49829fe
JR
19227+ if (unlikely(err))
19228+ goto out_kfree;
38d290e6 19229+ err = au_d_linkable(src_dentry);
027c5e7a
AM
19230+ if (unlikely(err))
19231+ goto out_unlock;
19232+ err = au_d_may_add(dentry);
19233+ if (unlikely(err))
19234+ goto out_unlock;
e49829fe 19235+
4a4d8108 19236+ a->src_parent = dget_parent(src_dentry);
5afbbe0d 19237+ wr_dir_args.force_btgt = au_ibtop(inode);
4a4d8108
AM
19238+
19239+ di_write_lock_parent(a->parent);
19240+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
19241+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
19242+ &wr_dir_args);
19243+ err = PTR_ERR(wh_dentry);
19244+ if (IS_ERR(wh_dentry))
027c5e7a 19245+ goto out_parent;
4a4d8108
AM
19246+
19247+ err = 0;
19248+ sb = dentry->d_sb;
5afbbe0d 19249+ a->bdst = au_dbtop(dentry);
4a4d8108
AM
19250+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
19251+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
5afbbe0d 19252+ a->bsrc = au_ibtop(inode);
2cbb1c4b 19253+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
38d290e6
JR
19254+ if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
19255+ h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
2cbb1c4b 19256+ if (!h_src_dentry) {
5afbbe0d 19257+ a->bsrc = au_dbtop(src_dentry);
2cbb1c4b
JR
19258+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
19259+ AuDebugOn(!h_src_dentry);
38d290e6
JR
19260+ } else if (IS_ERR(h_src_dentry)) {
19261+ err = PTR_ERR(h_src_dentry);
2cbb1c4b 19262+ goto out_parent;
38d290e6 19263+ }
2cbb1c4b 19264+
f2c43d5f
AM
19265+ /*
19266+ * aufs doesn't touch the credential so
19267+ * security_dentry_create_files_as() is unnecrssary.
19268+ */
4a4d8108
AM
19269+ if (au_opt_test(au_mntflags(sb), PLINK)) {
19270+ if (a->bdst < a->bsrc
19271+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
86dc4139 19272+ err = au_cpup_or_link(src_dentry, dentry, a);
523b37e3
AM
19273+ else {
19274+ delegated = NULL;
4a4d8108 19275+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
19276+ &a->h_path, &delegated);
19277+ if (unlikely(err == -EWOULDBLOCK)) {
19278+ pr_warn("cannot retry for NFSv4 delegation"
19279+ " for an internal link\n");
19280+ iput(delegated);
19281+ }
19282+ }
2cbb1c4b 19283+ dput(h_src_dentry);
4a4d8108
AM
19284+ } else {
19285+ /*
19286+ * copyup src_dentry to the branch we process,
19287+ * and then link(2) to it.
19288+ */
2cbb1c4b 19289+ dput(h_src_dentry);
4a4d8108
AM
19290+ if (a->bdst < a->bsrc
19291+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
19292+ au_unpin(&a->pin);
19293+ di_write_unlock(a->parent);
19294+ err = au_cpup_before_link(src_dentry, a);
19295+ di_write_lock_parent(a->parent);
19296+ if (!err)
19297+ err = au_pin(&a->pin, dentry, a->bdst,
19298+ au_opt_udba(sb),
19299+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
19300+ if (unlikely(err))
19301+ goto out_wh;
19302+ }
19303+ if (!err) {
19304+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
19305+ err = -ENOENT;
5527c038 19306+ if (h_src_dentry && d_is_positive(h_src_dentry)) {
523b37e3 19307+ delegated = NULL;
4a4d8108
AM
19308+ err = vfsub_link(h_src_dentry,
19309+ au_pinned_h_dir(&a->pin),
523b37e3
AM
19310+ &a->h_path, &delegated);
19311+ if (unlikely(err == -EWOULDBLOCK)) {
19312+ pr_warn("cannot retry"
19313+ " for NFSv4 delegation"
19314+ " for an internal link\n");
19315+ iput(delegated);
19316+ }
19317+ }
4a4d8108
AM
19318+ }
19319+ }
19320+ if (unlikely(err))
19321+ goto out_unpin;
19322+
19323+ if (wh_dentry) {
19324+ a->h_path.dentry = wh_dentry;
19325+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
19326+ dentry);
19327+ if (unlikely(err))
19328+ goto out_revert;
19329+ }
19330+
b912730e 19331+ au_dir_ts(dir, a->bdst);
4a4d8108 19332+ dir->i_version++;
4a4d8108
AM
19333+ inc_nlink(inode);
19334+ inode->i_ctime = dir->i_ctime;
027c5e7a
AM
19335+ d_instantiate(dentry, au_igrab(inode));
19336+ if (d_unhashed(a->h_path.dentry))
4a4d8108
AM
19337+ /* some filesystem calls d_drop() */
19338+ d_drop(dentry);
076b876e
AM
19339+ /* some filesystems consume an inode even hardlink */
19340+ au_fhsm_wrote(sb, a->bdst, /*force*/0);
4a4d8108
AM
19341+ goto out_unpin; /* success */
19342+
4f0767ce 19343+out_revert:
523b37e3
AM
19344+ /* no delegation since it is just created */
19345+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
19346+ /*delegated*/NULL, /*force*/0);
027c5e7a 19347+ if (unlikely(rerr)) {
523b37e3 19348+ AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
027c5e7a
AM
19349+ err = -EIO;
19350+ }
4a4d8108 19351+ au_dtime_revert(&dt);
4f0767ce 19352+out_unpin:
4a4d8108 19353+ au_unpin(&a->pin);
4f0767ce 19354+out_wh:
4a4d8108 19355+ dput(wh_dentry);
027c5e7a
AM
19356+out_parent:
19357+ di_write_unlock(a->parent);
19358+ dput(a->src_parent);
4f0767ce 19359+out_unlock:
4a4d8108 19360+ if (unlikely(err)) {
5afbbe0d 19361+ au_update_dbtop(dentry);
4a4d8108
AM
19362+ d_drop(dentry);
19363+ }
4a4d8108 19364+ aufs_read_and_write_unlock2(dentry, src_dentry);
e49829fe 19365+out_kfree:
ae9dfd79 19366+ kfree(a);
4f0767ce 19367+out:
86dc4139 19368+ AuTraceErr(err);
4a4d8108
AM
19369+ return err;
19370+}
19371+
7eafdf33 19372+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4a4d8108
AM
19373+{
19374+ int err, rerr;
19375+ aufs_bindex_t bindex;
19376+ unsigned char diropq;
19377+ struct path h_path;
19378+ struct dentry *wh_dentry, *parent, *opq_dentry;
febd17d6 19379+ struct inode *h_inode;
4a4d8108
AM
19380+ struct super_block *sb;
19381+ struct {
19382+ struct au_pin pin;
19383+ struct au_dtime dt;
19384+ } *a; /* reduce the stack usage */
19385+ struct au_wr_dir_args wr_dir_args = {
19386+ .force_btgt = -1,
19387+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
19388+ };
19389+
19390+ IMustLock(dir);
19391+
19392+ err = -ENOMEM;
19393+ a = kmalloc(sizeof(*a), GFP_NOFS);
19394+ if (unlikely(!a))
19395+ goto out;
19396+
027c5e7a
AM
19397+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
19398+ if (unlikely(err))
19399+ goto out_free;
19400+ err = au_d_may_add(dentry);
19401+ if (unlikely(err))
19402+ goto out_unlock;
19403+
4a4d8108
AM
19404+ parent = dentry->d_parent; /* dir inode is locked */
19405+ di_write_lock_parent(parent);
19406+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
19407+ &a->pin, &wr_dir_args);
19408+ err = PTR_ERR(wh_dentry);
19409+ if (IS_ERR(wh_dentry))
027c5e7a 19410+ goto out_parent;
4a4d8108
AM
19411+
19412+ sb = dentry->d_sb;
5afbbe0d 19413+ bindex = au_dbtop(dentry);
4a4d8108
AM
19414+ h_path.dentry = au_h_dptr(dentry, bindex);
19415+ h_path.mnt = au_sbr_mnt(sb, bindex);
19416+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
19417+ if (unlikely(err))
027c5e7a 19418+ goto out_unpin;
4a4d8108
AM
19419+
19420+ /* make the dir opaque */
19421+ diropq = 0;
febd17d6 19422+ h_inode = d_inode(h_path.dentry);
4a4d8108
AM
19423+ if (wh_dentry
19424+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
febd17d6 19425+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
4a4d8108 19426+ opq_dentry = au_diropq_create(dentry, bindex);
febd17d6 19427+ inode_unlock(h_inode);
4a4d8108
AM
19428+ err = PTR_ERR(opq_dentry);
19429+ if (IS_ERR(opq_dentry))
19430+ goto out_dir;
19431+ dput(opq_dentry);
19432+ diropq = 1;
19433+ }
19434+
19435+ err = epilog(dir, bindex, wh_dentry, dentry);
19436+ if (!err) {
19437+ inc_nlink(dir);
027c5e7a 19438+ goto out_unpin; /* success */
4a4d8108
AM
19439+ }
19440+
19441+ /* revert */
19442+ if (diropq) {
19443+ AuLabel(revert opq);
febd17d6 19444+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
4a4d8108 19445+ rerr = au_diropq_remove(dentry, bindex);
febd17d6 19446+ inode_unlock(h_inode);
4a4d8108 19447+ if (rerr) {
523b37e3
AM
19448+ AuIOErr("%pd reverting diropq failed(%d, %d)\n",
19449+ dentry, err, rerr);
4a4d8108
AM
19450+ err = -EIO;
19451+ }
19452+ }
19453+
4f0767ce 19454+out_dir:
4a4d8108
AM
19455+ AuLabel(revert dir);
19456+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
19457+ if (rerr) {
523b37e3
AM
19458+ AuIOErr("%pd reverting dir failed(%d, %d)\n",
19459+ dentry, err, rerr);
4a4d8108
AM
19460+ err = -EIO;
19461+ }
4a4d8108 19462+ au_dtime_revert(&a->dt);
027c5e7a 19463+out_unpin:
4a4d8108
AM
19464+ au_unpin(&a->pin);
19465+ dput(wh_dentry);
027c5e7a
AM
19466+out_parent:
19467+ di_write_unlock(parent);
19468+out_unlock:
4a4d8108 19469+ if (unlikely(err)) {
5afbbe0d 19470+ au_update_dbtop(dentry);
4a4d8108
AM
19471+ d_drop(dentry);
19472+ }
4a4d8108 19473+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 19474+out_free:
ae9dfd79 19475+ kfree(a);
4f0767ce 19476+out:
4a4d8108
AM
19477+ return err;
19478+}
e8791d4f
AM
19479diff -urNp -x '*.orig' linux-4.9/fs/aufs/i_op_del.c linux-4.9/fs/aufs/i_op_del.c
19480--- linux-4.9/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
19481+++ linux-4.9/fs/aufs/i_op_del.c 2021-02-24 16:15:09.531573855 +0100
19482@@ -0,0 +1,511 @@
4a4d8108 19483+/*
ae9dfd79 19484+ * Copyright (C) 2005-2018 Junjiro R. Okajima
4a4d8108
AM
19485+ *
19486+ * This program, aufs is free software; you can redistribute it and/or modify
19487+ * it under the terms of the GNU General Public License as published by
19488+ * the Free Software Foundation; either version 2 of the License, or
19489+ * (at your option) any later version.
19490+ *
19491+ * This program is distributed in the hope that it will be useful,
19492+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19493+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19494+ * GNU General Public License for more details.
19495+ *
19496+ * You should have received a copy of the GNU General Public License
523b37e3 19497+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 19498+ */
1facf9fc 19499+
1308ab2a 19500+/*
e8791d4f 19501+ * inode operations (del entry)
1308ab2a 19502+ */
4a4d8108 19503+
4a4d8108
AM
19504+#include "aufs.h"
19505+
e8791d4f
AM
19506+/*
19507+ * decide if a new whiteout for @dentry is necessary or not.
19508+ * when it is necessary, prepare the parent dir for the upper branch whose
19509+ * branch index is @bcpup for creation. the actual creation of the whiteout will
19510+ * be done by caller.
19511+ * return value:
19512+ * 0: wh is unnecessary
19513+ * plus: wh is necessary
19514+ * minus: error
19515+ */
19516+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
1facf9fc 19517+{
e8791d4f
AM
19518+ int need_wh, err;
19519+ aufs_bindex_t btop;
19520+ struct super_block *sb;
e2f27e51 19521+
e8791d4f
AM
19522+ sb = dentry->d_sb;
19523+ btop = au_dbtop(dentry);
19524+ if (*bcpup < 0) {
19525+ *bcpup = btop;
19526+ if (au_test_ro(sb, btop, d_inode(dentry))) {
19527+ err = AuWbrCopyup(au_sbi(sb), dentry);
19528+ *bcpup = err;
19529+ if (unlikely(err < 0))
19530+ goto out;
19531+ }
19532+ } else
19533+ AuDebugOn(btop < *bcpup
19534+ || au_test_ro(sb, *bcpup, d_inode(dentry)));
19535+ AuDbg("bcpup %d, btop %d\n", *bcpup, btop);
4a4d8108 19536+
e8791d4f
AM
19537+ if (*bcpup != btop) {
19538+ err = au_cpup_dirs(dentry, *bcpup);
19539+ if (unlikely(err))
19540+ goto out;
19541+ need_wh = 1;
1308ab2a 19542+ } else {
e8791d4f
AM
19543+ struct au_dinfo *dinfo, *tmp;
19544+
19545+ need_wh = -ENOMEM;
19546+ dinfo = au_di(dentry);
19547+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
19548+ if (tmp) {
19549+ au_di_cp(tmp, dinfo);
19550+ au_di_swap(tmp, dinfo);
19551+ /* returns the number of positive dentries */
19552+ need_wh = au_lkup_dentry(dentry, btop + 1,
19553+ /* AuLkup_IGNORE_PERM */ 0);
19554+ au_di_swap(tmp, dinfo);
19555+ au_rw_write_unlock(&tmp->di_rwsem);
19556+ au_di_free(tmp);
19557+ }
4a4d8108 19558+ }
e8791d4f
AM
19559+ AuDbg("need_wh %d\n", need_wh);
19560+ err = need_wh;
1facf9fc 19561+
e8791d4f
AM
19562+out:
19563+ return err;
19564+}
4a4d8108 19565+
e8791d4f
AM
19566+/*
19567+ * simple tests for the del-entry operations.
19568+ * following the checks in vfs, plus the parent-child relationship.
19569+ */
19570+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
19571+ struct dentry *h_parent, int isdir)
19572+{
19573+ int err;
19574+ umode_t h_mode;
19575+ struct dentry *h_dentry, *h_latest;
19576+ struct inode *h_inode;
19577+
19578+ h_dentry = au_h_dptr(dentry, bindex);
19579+ if (d_really_is_positive(dentry)) {
19580+ err = -ENOENT;
19581+ if (unlikely(d_is_negative(h_dentry)))
19582+ goto out;
19583+ h_inode = d_inode(h_dentry);
19584+ if (unlikely(!h_inode->i_nlink))
19585+ goto out;
19586+
19587+ h_mode = h_inode->i_mode;
19588+ if (!isdir) {
19589+ err = -EISDIR;
19590+ if (unlikely(S_ISDIR(h_mode)))
19591+ goto out;
19592+ } else if (unlikely(!S_ISDIR(h_mode))) {
19593+ err = -ENOTDIR;
19594+ goto out;
19595+ }
19596+ } else {
19597+ /* rename(2) case */
19598+ err = -EIO;
19599+ if (unlikely(d_is_positive(h_dentry)))
19600+ goto out;
1308ab2a 19601+ }
e8791d4f
AM
19602+
19603+ err = -ENOENT;
19604+ /* expected parent dir is locked */
19605+ if (unlikely(h_parent != h_dentry->d_parent))
19606+ goto out;
19607+ err = 0;
19608+
19609+ /*
19610+ * rmdir a dir may break the consistency on some filesystem.
19611+ * let's try heavy test.
19612+ */
19613+ err = -EACCES;
19614+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)
19615+ && au_test_h_perm(d_inode(h_parent),
19616+ MAY_EXEC | MAY_WRITE)))
19617+ goto out;
19618+
19619+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent);
19620+ err = -EIO;
19621+ if (IS_ERR(h_latest))
19622+ goto out;
19623+ if (h_latest == h_dentry)
19624+ err = 0;
19625+ dput(h_latest);
dece6358 19626+
4f0767ce 19627+out:
1308ab2a 19628+ return err;
19629+}
dece6358 19630+
e8791d4f
AM
19631+/*
19632+ * decide the branch where we operate for @dentry. the branch index will be set
19633+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
19634+ * dir for reverting.
19635+ * when a new whiteout is necessary, create it.
19636+ */
19637+static struct dentry*
19638+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
19639+ struct au_dtime *dt, struct au_pin *pin)
1308ab2a 19640+{
e8791d4f 19641+ struct dentry *wh_dentry;
4a4d8108 19642+ struct super_block *sb;
e8791d4f
AM
19643+ struct path h_path;
19644+ int err, need_wh;
19645+ unsigned int udba;
19646+ aufs_bindex_t bcpup;
1facf9fc 19647+
e8791d4f
AM
19648+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
19649+ wh_dentry = ERR_PTR(need_wh);
19650+ if (unlikely(need_wh < 0))
19651+ goto out;
027c5e7a 19652+
e8791d4f
AM
19653+ sb = dentry->d_sb;
19654+ udba = au_opt_udba(sb);
19655+ bcpup = *rbcpup;
19656+ err = au_pin(pin, dentry, bcpup, udba,
19657+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
19658+ wh_dentry = ERR_PTR(err);
027c5e7a
AM
19659+ if (unlikely(err))
19660+ goto out;
1facf9fc 19661+
e8791d4f
AM
19662+ h_path.dentry = au_pinned_h_parent(pin);
19663+ if (udba != AuOpt_UDBA_NONE
19664+ && au_dbtop(dentry) == bcpup) {
19665+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
19666+ wh_dentry = ERR_PTR(err);
19667+ if (unlikely(err))
19668+ goto out_unpin;
4a4d8108 19669+ }
dece6358 19670+
e8791d4f
AM
19671+ h_path.mnt = au_sbr_mnt(sb, bcpup);
19672+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
19673+ wh_dentry = NULL;
19674+ if (!need_wh)
19675+ goto out; /* success, no need to create whiteout */
4a4d8108 19676+
e8791d4f
AM
19677+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
19678+ if (IS_ERR(wh_dentry))
19679+ goto out_unpin;
1308ab2a 19680+
e8791d4f
AM
19681+ /* returns with the parent is locked and wh_dentry is dget-ed */
19682+ goto out; /* success */
19683+
19684+out_unpin:
19685+ au_unpin(pin);
4f0767ce 19686+out:
e8791d4f 19687+ return wh_dentry;
1308ab2a 19688+}
19689+
e8791d4f
AM
19690+/*
19691+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
19692+ * in order to be revertible and save time for removing many child whiteouts
19693+ * under the dir.
19694+ * returns 1 when there are too many child whiteout and caller should remove
19695+ * them asynchronously. returns 0 when the number of children is enough small to
19696+ * remove now or the branch fs is a remote fs.
19697+ * otherwise return an error.
19698+ */
19699+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
19700+ struct au_nhash *whlist, struct inode *dir)
4a4d8108 19701+{
e8791d4f
AM
19702+ int rmdir_later, err, dirwh;
19703+ struct dentry *h_dentry;
4a4d8108 19704+ struct super_block *sb;
e8791d4f 19705+ struct inode *inode;
537831f9 19706+
e8791d4f
AM
19707+ sb = dentry->d_sb;
19708+ SiMustAnyLock(sb);
19709+ h_dentry = au_h_dptr(dentry, bindex);
19710+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
7f207e10
AM
19711+ if (unlikely(err))
19712+ goto out;
19713+
e8791d4f
AM
19714+ /* stop monitoring */
19715+ inode = d_inode(dentry);
19716+ au_hn_free(au_hi(inode, bindex));
1308ab2a 19717+
e8791d4f
AM
19718+ if (!au_test_fs_remote(h_dentry->d_sb)) {
19719+ dirwh = au_sbi(sb)->si_dirwh;
19720+ rmdir_later = (dirwh <= 1);
19721+ if (!rmdir_later)
19722+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
19723+ dirwh);
19724+ if (rmdir_later)
19725+ return rmdir_later;
9dbd164d 19726+ }
4a4d8108 19727+
e8791d4f
AM
19728+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
19729+ if (unlikely(err)) {
19730+ AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
19731+ h_dentry, bindex, err);
19732+ err = 0;
7f207e10 19733+ }
1facf9fc 19734+
7f207e10 19735+out:
e8791d4f
AM
19736+ AuTraceErr(err);
19737+ return err;
4a4d8108 19738+}
1facf9fc 19739+
e8791d4f
AM
19740+/*
19741+ * final procedure for deleting a entry.
19742+ * maintain dentry and iattr.
19743+ */
19744+static void epilog(struct inode *dir, struct dentry *dentry,
19745+ aufs_bindex_t bindex)
19746+{
19747+ struct inode *inode;
1facf9fc 19748+
e8791d4f
AM
19749+ inode = d_inode(dentry);
19750+ d_drop(dentry);
19751+ inode->i_ctime = dir->i_ctime;
b912730e 19752+
e8791d4f
AM
19753+ au_dir_ts(dir, bindex);
19754+ dir->i_version++;
19755+}
19756+
19757+/*
19758+ * when an error happened, remove the created whiteout and revert everything.
19759+ */
19760+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
19761+ aufs_bindex_t bwh, struct dentry *wh_dentry,
19762+ struct dentry *dentry, struct au_dtime *dt)
b912730e 19763+{
e8791d4f
AM
19764+ int rerr;
19765+ struct path h_path = {
19766+ .dentry = wh_dentry,
19767+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
b912730e
AM
19768+ };
19769+
e8791d4f
AM
19770+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
19771+ if (!rerr) {
19772+ au_set_dbwh(dentry, bwh);
19773+ au_dtime_revert(dt);
19774+ return 0;
19775+ }
b912730e 19776+
e8791d4f
AM
19777+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
19778+ return -EIO;
b912730e
AM
19779+}
19780+
e8791d4f
AM
19781+/* ---------------------------------------------------------------------- */
19782+
19783+int aufs_unlink(struct inode *dir, struct dentry *dentry)
b912730e 19784+{
e8791d4f
AM
19785+ int err;
19786+ aufs_bindex_t bwh, bindex, btop;
19787+ struct inode *inode, *h_dir, *delegated;
19788+ struct dentry *parent, *wh_dentry;
19789+ /* to reuduce stack size */
19790+ struct {
19791+ struct au_dtime dt;
19792+ struct au_pin pin;
19793+ struct path h_path;
19794+ } *a;
b912730e
AM
19795+
19796+ IMustLock(dir);
b912730e 19797+
e8791d4f
AM
19798+ err = -ENOMEM;
19799+ a = kmalloc(sizeof(*a), GFP_NOFS);
19800+ if (unlikely(!a))
b912730e
AM
19801+ goto out;
19802+
e8791d4f
AM
19803+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
19804+ if (unlikely(err))
19805+ goto out_free;
19806+ err = au_d_hashed_positive(dentry);
b912730e
AM
19807+ if (unlikely(err))
19808+ goto out_unlock;
e8791d4f
AM
19809+ inode = d_inode(dentry);
19810+ IMustLock(inode);
19811+ err = -EISDIR;
19812+ if (unlikely(d_is_dir(dentry)))
19813+ goto out_unlock; /* possible? */
b912730e 19814+
e8791d4f
AM
19815+ btop = au_dbtop(dentry);
19816+ bwh = au_dbwh(dentry);
19817+ bindex = -1;
19818+ parent = dentry->d_parent; /* dir inode is locked */
19819+ di_write_lock_parent(parent);
19820+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
19821+ &a->pin);
19822+ err = PTR_ERR(wh_dentry);
19823+ if (IS_ERR(wh_dentry))
19824+ goto out_parent;
b912730e 19825+
e8791d4f
AM
19826+ a->h_path.mnt = au_sbr_mnt(dentry->d_sb, btop);
19827+ a->h_path.dentry = au_h_dptr(dentry, btop);
19828+ dget(a->h_path.dentry);
19829+ if (bindex == btop) {
19830+ h_dir = au_pinned_h_dir(&a->pin);
19831+ delegated = NULL;
19832+ err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
19833+ if (unlikely(err == -EWOULDBLOCK)) {
19834+ pr_warn("cannot retry for NFSv4 delegation"
19835+ " for an internal unlink\n");
19836+ iput(delegated);
19837+ }
19838+ } else {
19839+ /* dir inode is locked */
19840+ h_dir = d_inode(wh_dentry->d_parent);
19841+ IMustLock(h_dir);
19842+ err = 0;
b912730e
AM
19843+ }
19844+
e8791d4f
AM
19845+ if (!err) {
19846+ vfsub_drop_nlink(inode);
19847+ epilog(dir, dentry, bindex);
b912730e 19848+
e8791d4f
AM
19849+ /* update target timestamps */
19850+ if (bindex == btop) {
19851+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
19852+ /*ignore*/
19853+ inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
19854+ } else
19855+ /* todo: this timestamp may be reverted later */
19856+ inode->i_ctime = h_dir->i_ctime;
19857+ goto out_unpin; /* success */
b912730e 19858+ }
1facf9fc 19859+
e8791d4f
AM
19860+ /* revert */
19861+ if (wh_dentry) {
19862+ int rerr;
027c5e7a 19863+
e8791d4f
AM
19864+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
19865+ &a->dt);
19866+ if (rerr)
19867+ err = rerr;
1308ab2a 19868+ }
1facf9fc 19869+
e8791d4f
AM
19870+out_unpin:
19871+ au_unpin(&a->pin);
19872+ dput(wh_dentry);
19873+ dput(a->h_path.dentry);
19874+out_parent:
19875+ di_write_unlock(parent);
19876+out_unlock:
19877+ aufs_read_unlock(dentry, AuLock_DW);
19878+out_free:
19879+ kfree(a);
19880+out:
4a4d8108
AM
19881+ return err;
19882+}
1facf9fc 19883+
e8791d4f 19884+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
4a4d8108 19885+{
e8791d4f
AM
19886+ int err, rmdir_later;
19887+ aufs_bindex_t bwh, bindex, btop;
19888+ struct inode *inode;
19889+ struct dentry *parent, *wh_dentry, *h_dentry;
19890+ struct au_whtmp_rmdir *args;
19891+ /* to reuduce stack size */
19892+ struct {
19893+ struct au_dtime dt;
19894+ struct au_pin pin;
19895+ } *a;
19896+
19897+ IMustLock(dir);
19898+
19899+ err = -ENOMEM;
19900+ a = kmalloc(sizeof(*a), GFP_NOFS);
19901+ if (unlikely(!a))
19902+ goto out;
19903+
19904+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
19905+ if (unlikely(err))
19906+ goto out_free;
19907+ err = au_alive_dir(dentry);
19908+ if (unlikely(err))
19909+ goto out_unlock;
19910+ inode = d_inode(dentry);
19911+ IMustLock(inode);
19912+ err = -ENOTDIR;
19913+ if (unlikely(!d_is_dir(dentry)))
19914+ goto out_unlock; /* possible? */
19915+
19916+ err = -ENOMEM;
19917+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
19918+ if (unlikely(!args))
19919+ goto out_unlock;
19920+
19921+ parent = dentry->d_parent; /* dir inode is locked */
19922+ di_write_lock_parent(parent);
19923+ err = au_test_empty(dentry, &args->whlist);
19924+ if (unlikely(err))
19925+ goto out_parent;
1facf9fc 19926+
5afbbe0d 19927+ btop = au_dbtop(dentry);
e8791d4f
AM
19928+ bwh = au_dbwh(dentry);
19929+ bindex = -1;
19930+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
19931+ &a->pin);
19932+ err = PTR_ERR(wh_dentry);
19933+ if (IS_ERR(wh_dentry))
19934+ goto out_parent;
1facf9fc 19935+
e8791d4f
AM
19936+ h_dentry = au_h_dptr(dentry, btop);
19937+ dget(h_dentry);
19938+ rmdir_later = 0;
19939+ if (bindex == btop) {
19940+ err = renwh_and_rmdir(dentry, btop, &args->whlist, dir);
19941+ if (err > 0) {
19942+ rmdir_later = err;
19943+ err = 0;
4a4d8108
AM
19944+ }
19945+ } else {
e8791d4f
AM
19946+ /* stop monitoring */
19947+ au_hn_free(au_hi(inode, btop));
19948+
19949+ /* dir inode is locked */
19950+ IMustLock(d_inode(wh_dentry->d_parent));
19951+ err = 0;
1308ab2a 19952+ }
027c5e7a 19953+
e8791d4f
AM
19954+ if (!err) {
19955+ vfsub_dead_dir(inode);
19956+ au_set_dbdiropq(dentry, -1);
19957+ epilog(dir, dentry, bindex);
4a4d8108 19958+
e8791d4f
AM
19959+ if (rmdir_later) {
19960+ au_whtmp_kick_rmdir(dir, btop, h_dentry, args);
19961+ args = NULL;
027c5e7a 19962+ }
e8791d4f
AM
19963+
19964+ goto out_unpin; /* success */
19965+ }
19966+
19967+ /* revert */
19968+ AuLabel(revert);
19969+ if (wh_dentry) {
19970+ int rerr;
19971+
19972+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
19973+ &a->dt);
19974+ if (rerr)
19975+ err = rerr;
027c5e7a 19976+ }
86dc4139 19977+
e8791d4f
AM
19978+out_unpin:
19979+ au_unpin(&a->pin);
19980+ dput(wh_dentry);
19981+ dput(h_dentry);
19982+out_parent:
19983+ di_write_unlock(parent);
19984+ if (args)
19985+ au_whtmp_rmdir_free(args);
19986+out_unlock:
19987+ aufs_read_unlock(dentry, AuLock_DW);
19988+out_free:
19989+ kfree(a);
86dc4139 19990+out:
e8791d4f 19991+ AuTraceErr(err);
86dc4139
AM
19992+ return err;
19993+}
e8791d4f
AM
19994diff -urNp -x '*.orig' linux-4.9/fs/aufs/i_op_ren.c linux-4.9/fs/aufs/i_op_ren.c
19995--- linux-4.9/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
19996+++ linux-4.9/fs/aufs/i_op_ren.c 2021-02-24 16:15:09.531573855 +0100
19997@@ -0,0 +1,1246 @@
19998+/*
19999+ * Copyright (C) 2005-2018 Junjiro R. Okajima
20000+ *
20001+ * This program, aufs is free software; you can redistribute it and/or modify
20002+ * it under the terms of the GNU General Public License as published by
20003+ * the Free Software Foundation; either version 2 of the License, or
20004+ * (at your option) any later version.
20005+ *
20006+ * This program is distributed in the hope that it will be useful,
20007+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20008+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20009+ * GNU General Public License for more details.
20010+ *
20011+ * You should have received a copy of the GNU General Public License
20012+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
20013+ */
86dc4139 20014+
e8791d4f
AM
20015+/*
20016+ * inode operation (rename entry)
20017+ * todo: this is crazy monster
20018+ */
86dc4139 20019+
e8791d4f 20020+#include "aufs.h"
86dc4139 20021+
e8791d4f
AM
20022+enum { AuSRC, AuDST, AuSrcDst };
20023+enum { AuPARENT, AuCHILD, AuParentChild };
86dc4139 20024+
e8791d4f
AM
20025+#define AuRen_ISDIR_SRC 1
20026+#define AuRen_ISDIR_DST (1 << 1)
20027+#define AuRen_ISSAMEDIR (1 << 2)
20028+#define AuRen_WHSRC (1 << 3)
20029+#define AuRen_WHDST (1 << 4)
20030+#define AuRen_MNT_WRITE (1 << 5)
20031+#define AuRen_DT_DSTDIR (1 << 6)
20032+#define AuRen_DIROPQ_SRC (1 << 7)
20033+#define AuRen_DIROPQ_DST (1 << 8)
20034+#define AuRen_DIRREN (1 << 9)
20035+#define AuRen_DROPPED_SRC (1 << 10)
20036+#define AuRen_DROPPED_DST (1 << 11)
20037+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
20038+#define au_fset_ren(flags, name) \
20039+ do { (flags) |= AuRen_##name; } while (0)
20040+#define au_fclr_ren(flags, name) \
20041+ do { (flags) &= ~AuRen_##name; } while (0)
86dc4139 20042+
e8791d4f
AM
20043+#ifndef CONFIG_AUFS_DIRREN
20044+#undef AuRen_DIRREN
20045+#define AuRen_DIRREN 0
20046+#endif
86dc4139 20047+
e8791d4f
AM
20048+struct au_ren_args {
20049+ struct {
20050+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
20051+ *wh_dentry;
20052+ struct inode *dir, *inode;
20053+ struct au_hinode *hdir, *hinode;
20054+ struct au_dtime dt[AuParentChild];
20055+ aufs_bindex_t btop, bdiropq;
20056+ } sd[AuSrcDst];
86dc4139 20057+
e8791d4f
AM
20058+#define src_dentry sd[AuSRC].dentry
20059+#define src_dir sd[AuSRC].dir
20060+#define src_inode sd[AuSRC].inode
20061+#define src_h_dentry sd[AuSRC].h_dentry
20062+#define src_parent sd[AuSRC].parent
20063+#define src_h_parent sd[AuSRC].h_parent
20064+#define src_wh_dentry sd[AuSRC].wh_dentry
20065+#define src_hdir sd[AuSRC].hdir
20066+#define src_hinode sd[AuSRC].hinode
20067+#define src_h_dir sd[AuSRC].hdir->hi_inode
20068+#define src_dt sd[AuSRC].dt
20069+#define src_btop sd[AuSRC].btop
20070+#define src_bdiropq sd[AuSRC].bdiropq
86dc4139 20071+
e8791d4f
AM
20072+#define dst_dentry sd[AuDST].dentry
20073+#define dst_dir sd[AuDST].dir
20074+#define dst_inode sd[AuDST].inode
20075+#define dst_h_dentry sd[AuDST].h_dentry
20076+#define dst_parent sd[AuDST].parent
20077+#define dst_h_parent sd[AuDST].h_parent
20078+#define dst_wh_dentry sd[AuDST].wh_dentry
20079+#define dst_hdir sd[AuDST].hdir
20080+#define dst_hinode sd[AuDST].hinode
20081+#define dst_h_dir sd[AuDST].hdir->hi_inode
20082+#define dst_dt sd[AuDST].dt
20083+#define dst_btop sd[AuDST].btop
20084+#define dst_bdiropq sd[AuDST].bdiropq
86dc4139 20085+
e8791d4f
AM
20086+ struct dentry *h_trap;
20087+ struct au_branch *br;
20088+ struct path h_path;
20089+ struct au_nhash whlist;
20090+ aufs_bindex_t btgt, src_bwh;
86dc4139 20091+
e8791d4f
AM
20092+ struct {
20093+ unsigned short auren_flags;
20094+ unsigned char flags; /* syscall parameter */
20095+ unsigned char exchange;
20096+ } __packed;
86dc4139 20097+
e8791d4f
AM
20098+ struct au_whtmp_rmdir *thargs;
20099+ struct dentry *h_dst;
20100+ struct au_hinode *h_root;
20101+};
86dc4139 20102+
e8791d4f 20103+/* ---------------------------------------------------------------------- */
86dc4139 20104+
e8791d4f
AM
20105+/*
20106+ * functions for reverting.
20107+ * when an error happened in a single rename systemcall, we should revert
20108+ * everything as if nothing happened.
20109+ * we don't need to revert the copied-up/down the parent dir since they are
20110+ * harmless.
20111+ */
86dc4139 20112+
e8791d4f
AM
20113+#define RevertFailure(fmt, ...) do { \
20114+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
20115+ ##__VA_ARGS__, err, rerr); \
20116+ err = -EIO; \
20117+} while (0)
1facf9fc 20118+
e8791d4f 20119+static void au_ren_do_rev_diropq(int err, struct au_ren_args *a, int idx)
86dc4139 20120+{
e8791d4f
AM
20121+ int rerr;
20122+ struct dentry *d;
20123+#define src_or_dst(member) a->sd[idx].member
1308ab2a 20124+
e8791d4f
AM
20125+ d = src_or_dst(dentry); /* {src,dst}_dentry */
20126+ au_hn_inode_lock_nested(src_or_dst(hinode), AuLsc_I_CHILD);
20127+ rerr = au_diropq_remove(d, a->btgt);
20128+ au_hn_inode_unlock(src_or_dst(hinode));
20129+ au_set_dbdiropq(d, src_or_dst(bdiropq));
20130+ if (rerr)
20131+ RevertFailure("remove diropq %pd", d);
20132+
20133+#undef src_or_dst_
dece6358 20134+}
1facf9fc 20135+
e8791d4f 20136+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
dece6358 20137+{
e8791d4f
AM
20138+ if (au_ftest_ren(a->auren_flags, DIROPQ_SRC))
20139+ au_ren_do_rev_diropq(err, a, AuSRC);
20140+ if (au_ftest_ren(a->auren_flags, DIROPQ_DST))
20141+ au_ren_do_rev_diropq(err, a, AuDST);
4a4d8108 20142+}
1308ab2a 20143+
e8791d4f 20144+static void au_ren_rev_rename(int err, struct au_ren_args *a)
4a4d8108 20145+{
e8791d4f
AM
20146+ int rerr;
20147+ struct inode *delegated;
dece6358 20148+
e8791d4f
AM
20149+ a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name,
20150+ a->src_h_parent);
20151+ rerr = PTR_ERR(a->h_path.dentry);
20152+ if (IS_ERR(a->h_path.dentry)) {
20153+ RevertFailure("lkup one %pd", a->src_dentry);
20154+ return;
4a4d8108 20155+ }
1308ab2a 20156+
e8791d4f
AM
20157+ delegated = NULL;
20158+ rerr = vfsub_rename(a->dst_h_dir,
20159+ au_h_dptr(a->src_dentry, a->btgt),
20160+ a->src_h_dir, &a->h_path, &delegated, a->flags);
20161+ if (unlikely(rerr == -EWOULDBLOCK)) {
20162+ pr_warn("cannot retry for NFSv4 delegation"
20163+ " for an internal rename\n");
20164+ iput(delegated);
dece6358 20165+ }
e8791d4f
AM
20166+ d_drop(a->h_path.dentry);
20167+ dput(a->h_path.dentry);
20168+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
20169+ if (rerr)
20170+ RevertFailure("rename %pd", a->src_dentry);
1facf9fc 20171+}
20172+
e8791d4f 20173+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
4a4d8108 20174+{
e8791d4f
AM
20175+ int rerr;
20176+ struct inode *delegated;
86dc4139 20177+
e8791d4f
AM
20178+ a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name,
20179+ a->dst_h_parent);
20180+ rerr = PTR_ERR(a->h_path.dentry);
20181+ if (IS_ERR(a->h_path.dentry)) {
20182+ RevertFailure("lkup one %pd", a->dst_dentry);
20183+ return;
20184+ }
20185+ if (d_is_positive(a->h_path.dentry)) {
20186+ d_drop(a->h_path.dentry);
20187+ dput(a->h_path.dentry);
20188+ return;
20189+ }
20190+
20191+ delegated = NULL;
20192+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path,
20193+ &delegated, a->flags);
20194+ if (unlikely(rerr == -EWOULDBLOCK)) {
20195+ pr_warn("cannot retry for NFSv4 delegation"
20196+ " for an internal rename\n");
20197+ iput(delegated);
20198+ }
20199+ d_drop(a->h_path.dentry);
20200+ dput(a->h_path.dentry);
20201+ if (!rerr)
20202+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
20203+ else
20204+ RevertFailure("rename %pd", a->h_dst);
4a4d8108
AM
20205+}
20206+
e8791d4f 20207+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
4a4d8108 20208+{
e8791d4f
AM
20209+ int rerr;
20210+
20211+ a->h_path.dentry = a->src_wh_dentry;
20212+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
20213+ au_set_dbwh(a->src_dentry, a->src_bwh);
20214+ if (rerr)
20215+ RevertFailure("unlink %pd", a->src_wh_dentry);
4a4d8108 20216+}
e8791d4f 20217+#undef RevertFailure
4a4d8108 20218+
dece6358
AM
20219+/* ---------------------------------------------------------------------- */
20220+
1308ab2a 20221+/*
e8791d4f
AM
20222+ * when we have to copyup the renaming entry, do it with the rename-target name
20223+ * in order to minimize the cost (the later actual rename is unnecessary).
20224+ * otherwise rename it on the target branch.
1308ab2a 20225+ */
e8791d4f 20226+static int au_ren_or_cpup(struct au_ren_args *a)
1facf9fc 20227+{
4a4d8108 20228+ int err;
e8791d4f
AM
20229+ struct dentry *d;
20230+ struct inode *delegated;
1facf9fc 20231+
e8791d4f
AM
20232+ d = a->src_dentry;
20233+ if (au_dbtop(d) == a->btgt) {
20234+ a->h_path.dentry = a->dst_h_dentry;
20235+ AuDebugOn(au_dbtop(d) != a->btgt);
20236+ delegated = NULL;
20237+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
20238+ a->dst_h_dir, &a->h_path, &delegated,
20239+ a->flags);
20240+ if (unlikely(err == -EWOULDBLOCK)) {
20241+ pr_warn("cannot retry for NFSv4 delegation"
20242+ " for an internal rename\n");
20243+ iput(delegated);
20244+ }
20245+ } else
20246+ BUG();
20247+
20248+ if (!err && a->h_dst)
20249+ /* it will be set to dinfo later */
20250+ dget(a->h_dst);
1facf9fc 20251+
1308ab2a 20252+ return err;
20253+}
dece6358 20254+
e8791d4f
AM
20255+/* cf. aufs_rmdir() */
20256+static int au_ren_del_whtmp(struct au_ren_args *a)
1308ab2a 20257+{
20258+ int err;
e8791d4f 20259+ struct inode *dir;
4a4d8108 20260+
e8791d4f
AM
20261+ dir = a->dst_dir;
20262+ SiMustAnyLock(dir->i_sb);
20263+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
20264+ au_sbi(dir->i_sb)->si_dirwh)
20265+ || au_test_fs_remote(a->h_dst->d_sb)) {
20266+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
20267+ if (unlikely(err))
20268+ pr_warn("failed removing whtmp dir %pd (%d), "
20269+ "ignored.\n", a->h_dst, err);
20270+ } else {
20271+ au_nhash_wh_free(&a->thargs->whlist);
20272+ a->thargs->whlist = a->whlist;
20273+ a->whlist.nh_num = 0;
20274+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
20275+ dput(a->h_dst);
20276+ a->thargs = NULL;
4a4d8108
AM
20277+ }
20278+
e8791d4f
AM
20279+ return 0;
20280+}
4a4d8108 20281+
e8791d4f
AM
20282+/* make it 'opaque' dir. */
20283+static int au_ren_do_diropq(struct au_ren_args *a, int idx)
20284+{
20285+ int err;
20286+ struct dentry *d, *diropq;
20287+#define src_or_dst(member) a->sd[idx].member
4a4d8108 20288+
e8791d4f
AM
20289+ err = 0;
20290+ d = src_or_dst(dentry); /* {src,dst}_dentry */
20291+ src_or_dst(bdiropq) = au_dbdiropq(d);
20292+ src_or_dst(hinode) = au_hi(src_or_dst(inode), a->btgt);
20293+ au_hn_inode_lock_nested(src_or_dst(hinode), AuLsc_I_CHILD);
20294+ diropq = au_diropq_create(d, a->btgt);
20295+ au_hn_inode_unlock(src_or_dst(hinode));
20296+ if (IS_ERR(diropq))
20297+ err = PTR_ERR(diropq);
20298+ else
20299+ dput(diropq);
4a4d8108 20300+
e8791d4f
AM
20301+#undef src_or_dst_
20302+ return err;
20303+}
4a4d8108 20304+
e8791d4f
AM
20305+static int au_ren_diropq(struct au_ren_args *a)
20306+{
20307+ int err;
20308+ unsigned char always;
20309+ struct dentry *d;
1308ab2a 20310+
e8791d4f
AM
20311+ err = 0;
20312+ d = a->dst_dentry; /* already renamed on the branch */
20313+ always = !!au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ);
20314+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)
20315+ && !au_ftest_ren(a->auren_flags, DIRREN)
20316+ && a->btgt != au_dbdiropq(a->src_dentry)
20317+ && (a->dst_wh_dentry
20318+ || a->btgt <= au_dbdiropq(d)
20319+ /* hide the lower to keep xino */
20320+ /* the lowers may not be a dir, but we hide them anyway */
20321+ || a->btgt < au_dbbot(d)
20322+ || always)) {
20323+ AuDbg("here\n");
20324+ err = au_ren_do_diropq(a, AuSRC);
20325+ if (unlikely(err))
20326+ goto out;
20327+ au_fset_ren(a->auren_flags, DIROPQ_SRC);
20328+ }
20329+ if (!a->exchange)
dece6358 20330+ goto out; /* success */
e8791d4f
AM
20331+
20332+ d = a->src_dentry; /* already renamed on the branch */
20333+ if (au_ftest_ren(a->auren_flags, ISDIR_DST)
20334+ && a->btgt != au_dbdiropq(a->dst_dentry)
20335+ && (a->btgt < au_dbdiropq(d)
20336+ || a->btgt < au_dbbot(d)
20337+ || always)) {
20338+ AuDbgDentry(a->src_dentry);
20339+ AuDbgDentry(a->dst_dentry);
20340+ err = au_ren_do_diropq(a, AuDST);
20341+ if (unlikely(err))
20342+ goto out_rev_src;
20343+ au_fset_ren(a->auren_flags, DIROPQ_DST);
4a4d8108 20344+ }
e8791d4f
AM
20345+ goto out; /* success */
20346+
20347+out_rev_src:
20348+ AuDbg("err %d, reverting src\n", err);
20349+ au_ren_rev_diropq(err, a);
4f0767ce 20350+out:
1facf9fc 20351+ return err;
20352+}
20353+
e8791d4f 20354+static int do_rename(struct au_ren_args *a)
1facf9fc 20355+{
4a4d8108 20356+ int err;
e8791d4f 20357+ struct dentry *d, *h_d;
dece6358 20358+
e8791d4f
AM
20359+ if (!a->exchange) {
20360+ /* prepare workqueue args for asynchronous rmdir */
20361+ h_d = a->dst_h_dentry;
20362+ if (au_ftest_ren(a->auren_flags, ISDIR_DST)
20363+ /* && !au_ftest_ren(a->auren_flags, DIRREN) */
20364+ && d_is_positive(h_d)) {
20365+ err = -ENOMEM;
20366+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb,
20367+ GFP_NOFS);
20368+ if (unlikely(!a->thargs))
20369+ goto out;
20370+ a->h_dst = dget(h_d);
20371+ }
f2c43d5f 20372+
e8791d4f
AM
20373+ /* create whiteout for src_dentry */
20374+ if (au_ftest_ren(a->auren_flags, WHSRC)) {
20375+ a->src_bwh = au_dbwh(a->src_dentry);
20376+ AuDebugOn(a->src_bwh >= 0);
20377+ a->src_wh_dentry = au_wh_create(a->src_dentry, a->btgt,
20378+ a->src_h_parent);
20379+ err = PTR_ERR(a->src_wh_dentry);
20380+ if (IS_ERR(a->src_wh_dentry))
20381+ goto out_thargs;
20382+ }
1facf9fc 20383+
e8791d4f
AM
20384+ /* lookup whiteout for dentry */
20385+ if (au_ftest_ren(a->auren_flags, WHDST)) {
20386+ h_d = au_wh_lkup(a->dst_h_parent,
20387+ &a->dst_dentry->d_name, a->br);
20388+ err = PTR_ERR(h_d);
20389+ if (IS_ERR(h_d))
20390+ goto out_whsrc;
20391+ if (d_is_negative(h_d))
20392+ dput(h_d);
20393+ else
20394+ a->dst_wh_dentry = h_d;
20395+ }
dece6358 20396+
e8791d4f
AM
20397+ /* rename dentry to tmpwh */
20398+ if (a->thargs) {
20399+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
20400+ if (unlikely(err))
20401+ goto out_whdst;
e49829fe 20402+
e8791d4f
AM
20403+ d = a->dst_dentry;
20404+ au_set_h_dptr(d, a->btgt, NULL);
20405+ err = au_lkup_neg(d, a->btgt, /*wh*/0);
4a4d8108 20406+ if (unlikely(err))
e8791d4f
AM
20407+ goto out_whtmp;
20408+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
4a4d8108 20409+ }
dece6358 20410+ }
dece6358 20411+
e8791d4f
AM
20412+ BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_btop != a->btgt);
20413+#if 0
20414+ BUG_ON(!au_ftest_ren(a->auren_flags, DIRREN)
20415+ && d_is_positive(a->dst_h_dentry)
20416+ && a->src_btop != a->btgt);
20417+#endif
dece6358 20418+
e8791d4f
AM
20419+ /* rename by vfs_rename or cpup */
20420+ err = au_ren_or_cpup(a);
20421+ if (unlikely(err))
20422+ /* leave the copied-up one */
20423+ goto out_whtmp;
dece6358 20424+
e8791d4f
AM
20425+ /* make dir opaque */
20426+ err = au_ren_diropq(a);
20427+ if (unlikely(err))
20428+ goto out_rename;
1308ab2a 20429+
e8791d4f
AM
20430+ /* update target timestamps */
20431+ if (a->exchange) {
20432+ AuDebugOn(au_dbtop(a->dst_dentry) != a->btgt);
20433+ a->h_path.dentry = au_h_dptr(a->dst_dentry, a->btgt);
20434+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
20435+ a->dst_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
20436+ }
20437+ AuDebugOn(au_dbtop(a->src_dentry) != a->btgt);
20438+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
20439+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
20440+ a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
1308ab2a 20441+
e8791d4f
AM
20442+ if (!a->exchange) {
20443+ /* remove whiteout for dentry */
20444+ if (a->dst_wh_dentry) {
20445+ a->h_path.dentry = a->dst_wh_dentry;
20446+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
20447+ a->dst_dentry);
20448+ if (unlikely(err))
20449+ goto out_diropq;
523b37e3 20450+ }
e8791d4f
AM
20451+
20452+ /* remove whtmp */
20453+ if (a->thargs)
20454+ au_ren_del_whtmp(a); /* ignore this error */
20455+
20456+ au_fhsm_wrote(a->src_dentry->d_sb, a->btgt, /*force*/0);
523b37e3 20457+ }
e8791d4f
AM
20458+ err = 0;
20459+ goto out_success;
1308ab2a 20460+
e8791d4f
AM
20461+out_diropq:
20462+ au_ren_rev_diropq(err, a);
20463+out_rename:
20464+ au_ren_rev_rename(err, a);
20465+ dput(a->h_dst);
20466+out_whtmp:
20467+ if (a->thargs)
20468+ au_ren_rev_whtmp(err, a);
20469+out_whdst:
20470+ dput(a->dst_wh_dentry);
20471+ a->dst_wh_dentry = NULL;
20472+out_whsrc:
20473+ if (a->src_wh_dentry)
20474+ au_ren_rev_whsrc(err, a);
20475+out_success:
20476+ dput(a->src_wh_dentry);
20477+ dput(a->dst_wh_dentry);
20478+out_thargs:
20479+ if (a->thargs) {
20480+ dput(a->h_dst);
20481+ au_whtmp_rmdir_free(a->thargs);
20482+ a->thargs = NULL;
4a4d8108 20483+ }
4f0767ce 20484+out:
4a4d8108 20485+ return err;
1facf9fc 20486+}
20487+
e8791d4f
AM
20488+/* ---------------------------------------------------------------------- */
20489+
20490+/*
20491+ * test if @dentry dir can be rename destination or not.
20492+ * success means, it is a logically empty dir.
20493+ */
20494+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
20495+{
20496+ return au_test_empty(dentry, whlist);
20497+}
20498+
20499+/*
20500+ * test if @a->src_dentry dir can be rename source or not.
20501+ * if it can, return 0.
20502+ * success means,
20503+ * - it is a logically empty dir.
20504+ * - or, it exists on writable branch and has no children including whiteouts
20505+ * on the lower branch unless DIRREN is on.
20506+ */
20507+static int may_rename_srcdir(struct au_ren_args *a)
c1595e42
JR
20508+{
20509+ int err;
e8791d4f
AM
20510+ unsigned int rdhash;
20511+ aufs_bindex_t btop, btgt;
20512+ struct dentry *dentry;
c1595e42 20513+ struct super_block *sb;
e8791d4f 20514+ struct au_sbinfo *sbinfo;
c1595e42 20515+
e8791d4f 20516+ dentry = a->src_dentry;
c1595e42 20517+ sb = dentry->d_sb;
e8791d4f
AM
20518+ sbinfo = au_sbi(sb);
20519+ if (au_opt_test(sbinfo->si_mntflags, DIRREN))
20520+ au_fset_ren(a->auren_flags, DIRREN);
20521+
20522+ btgt = a->btgt;
20523+ btop = au_dbtop(dentry);
20524+ if (btop != btgt) {
20525+ struct au_nhash whlist;
20526+
20527+ SiMustAnyLock(sb);
20528+ rdhash = sbinfo->si_rdhash;
20529+ if (!rdhash)
20530+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
20531+ dentry));
20532+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
c1595e42
JR
20533+ if (unlikely(err))
20534+ goto out;
e8791d4f
AM
20535+ err = au_test_empty(dentry, &whlist);
20536+ au_nhash_wh_free(&whlist);
c1595e42 20537+ goto out;
e8791d4f 20538+ }
c1595e42 20539+
e8791d4f
AM
20540+ if (btop == au_dbtaildir(dentry))
20541+ return 0; /* success */
20542+
20543+ err = au_test_empty_lower(dentry);
c1595e42
JR
20544+
20545+out:
e8791d4f
AM
20546+ if (err == -ENOTEMPTY) {
20547+ if (au_ftest_ren(a->auren_flags, DIRREN)) {
20548+ err = 0;
20549+ } else {
20550+ AuWarn1("renaming dir who has child(ren) on multiple "
20551+ "branches, is not supported\n");
20552+ err = -EXDEV;
20553+ }
20554+ }
c1595e42
JR
20555+ return err;
20556+}
20557+
e8791d4f
AM
20558+/* side effect: sets whlist and h_dentry */
20559+static int au_ren_may_dir(struct au_ren_args *a)
c1595e42
JR
20560+{
20561+ int err;
e8791d4f
AM
20562+ unsigned int rdhash;
20563+ struct dentry *d;
c1595e42 20564+
e8791d4f
AM
20565+ d = a->dst_dentry;
20566+ SiMustAnyLock(d->d_sb);
c1595e42 20567+
e8791d4f
AM
20568+ err = 0;
20569+ if (au_ftest_ren(a->auren_flags, ISDIR_DST) && a->dst_inode) {
20570+ rdhash = au_sbi(d->d_sb)->si_rdhash;
20571+ if (!rdhash)
20572+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
20573+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
20574+ if (unlikely(err))
20575+ goto out;
c1595e42 20576+
e8791d4f
AM
20577+ if (!a->exchange) {
20578+ au_set_dbtop(d, a->dst_btop);
20579+ err = may_rename_dstdir(d, &a->whlist);
20580+ au_set_dbtop(d, a->btgt);
20581+ } else
20582+ err = may_rename_srcdir(a);
c1595e42 20583+ }
e8791d4f 20584+ a->dst_h_dentry = au_h_dptr(d, au_dbtop(d));
c1595e42 20585+ if (unlikely(err))
e8791d4f 20586+ goto out;
c1595e42 20587+
e8791d4f
AM
20588+ d = a->src_dentry;
20589+ a->src_h_dentry = au_h_dptr(d, au_dbtop(d));
20590+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)) {
20591+ err = may_rename_srcdir(a);
20592+ if (unlikely(err)) {
20593+ au_nhash_wh_free(&a->whlist);
20594+ a->whlist.nh_num = 0;
20595+ }
20596+ }
c1595e42 20597+out:
c1595e42
JR
20598+ return err;
20599+}
1facf9fc 20600+
e8791d4f 20601+/* ---------------------------------------------------------------------- */
1facf9fc 20602+
c1595e42 20603+/*
e8791d4f
AM
20604+ * simple tests for rename.
20605+ * following the checks in vfs, plus the parent-child relationship.
c1595e42 20606+ */
e8791d4f 20607+static int au_may_ren(struct au_ren_args *a)
1facf9fc 20608+{
e8791d4f
AM
20609+ int err, isdir;
20610+ struct inode *h_inode;
1facf9fc 20611+
e8791d4f
AM
20612+ if (a->src_btop == a->btgt) {
20613+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
20614+ au_ftest_ren(a->auren_flags, ISDIR_SRC));
20615+ if (unlikely(err))
20616+ goto out;
20617+ err = -EINVAL;
20618+ if (unlikely(a->src_h_dentry == a->h_trap))
20619+ goto out;
4a4d8108 20620+ }
c1595e42 20621+
e8791d4f
AM
20622+ err = 0;
20623+ if (a->dst_btop != a->btgt)
20624+ goto out;
c1595e42 20625+
e8791d4f
AM
20626+ err = -ENOTEMPTY;
20627+ if (unlikely(a->dst_h_dentry == a->h_trap))
c1595e42 20628+ goto out;
4a4d8108 20629+
e8791d4f
AM
20630+ err = -EIO;
20631+ isdir = !!au_ftest_ren(a->auren_flags, ISDIR_DST);
20632+ if (d_really_is_negative(a->dst_dentry)) {
20633+ if (d_is_negative(a->dst_h_dentry))
20634+ err = au_may_add(a->dst_dentry, a->btgt,
20635+ a->dst_h_parent, isdir);
20636+ } else {
20637+ if (unlikely(d_is_negative(a->dst_h_dentry)))
20638+ goto out;
20639+ h_inode = d_inode(a->dst_h_dentry);
20640+ if (h_inode->i_nlink)
20641+ err = au_may_del(a->dst_dentry, a->btgt,
20642+ a->dst_h_parent, isdir);
1facf9fc 20643+ }
4a4d8108 20644+
7f207e10 20645+out:
e8791d4f
AM
20646+ if (unlikely(err == -ENOENT || err == -EEXIST))
20647+ err = -EIO;
7f207e10 20648+ AuTraceErr(err);
4a4d8108 20649+ return err;
1facf9fc 20650+}
20651+
20652+/* ---------------------------------------------------------------------- */
20653+
e8791d4f
AM
20654+/*
20655+ * locking order
20656+ * (VFS)
20657+ * - src_dir and dir by lock_rename()
20658+ * - inode if exitsts
20659+ * (aufs)
20660+ * - lock all
20661+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
20662+ * + si_read_lock
20663+ * + di_write_lock2_child()
20664+ * + di_write_lock_child()
20665+ * + ii_write_lock_child()
20666+ * + di_write_lock_child2()
20667+ * + ii_write_lock_child2()
20668+ * + src_parent and parent
20669+ * + di_write_lock_parent()
20670+ * + ii_write_lock_parent()
20671+ * + di_write_lock_parent2()
20672+ * + ii_write_lock_parent2()
20673+ * + lower src_dir and dir by vfsub_lock_rename()
20674+ * + verify the every relationships between child and parent. if any
20675+ * of them failed, unlock all and return -EBUSY.
20676+ */
20677+static void au_ren_unlock(struct au_ren_args *a)
20678+{
20679+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
20680+ a->dst_h_parent, a->dst_hdir);
20681+ if (au_ftest_ren(a->auren_flags, DIRREN)
20682+ && a->h_root)
20683+ au_hn_inode_unlock(a->h_root);
20684+ if (au_ftest_ren(a->auren_flags, MNT_WRITE))
20685+ vfsub_mnt_drop_write(au_br_mnt(a->br));
20686+}
20687+
20688+static int au_ren_lock(struct au_ren_args *a)
4a4d8108
AM
20689+{
20690+ int err;
e8791d4f 20691+ unsigned int udba;
1facf9fc 20692+
e8791d4f
AM
20693+ err = 0;
20694+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
20695+ a->src_hdir = au_hi(a->src_dir, a->btgt);
20696+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
20697+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
febd17d6 20698+
e8791d4f 20699+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
027c5e7a 20700+ if (unlikely(err))
c2c0f25c 20701+ goto out;
e8791d4f
AM
20702+ au_fset_ren(a->auren_flags, MNT_WRITE);
20703+ if (au_ftest_ren(a->auren_flags, DIRREN)) {
20704+ struct dentry *root;
20705+ struct inode *dir;
027c5e7a 20706+
e8791d4f
AM
20707+ /*
20708+ * sbinfo is already locked, so this ii_read_lock is
20709+ * unnecessary. but our debugging feature checks it.
20710+ */
20711+ root = a->src_inode->i_sb->s_root;
20712+ if (root != a->src_parent && root != a->dst_parent) {
20713+ dir = d_inode(root);
20714+ ii_read_lock_parent3(dir);
20715+ a->h_root = au_hi(dir, a->btgt);
20716+ ii_read_unlock(dir);
20717+ au_hn_inode_lock_nested(a->h_root, AuLsc_I_PARENT3);
c2c0f25c
AM
20718+ }
20719+ }
e8791d4f
AM
20720+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
20721+ a->dst_h_parent, a->dst_hdir);
20722+ udba = au_opt_udba(a->src_dentry->d_sb);
20723+ if (unlikely(a->src_hdir->hi_inode != d_inode(a->src_h_parent)
20724+ || a->dst_hdir->hi_inode != d_inode(a->dst_h_parent)))
20725+ err = au_busy_or_stale();
20726+ if (!err && au_dbtop(a->src_dentry) == a->btgt)
20727+ err = au_h_verify(a->src_h_dentry, udba,
20728+ d_inode(a->src_h_parent), a->src_h_parent,
20729+ a->br);
20730+ if (!err && au_dbtop(a->dst_dentry) == a->btgt)
20731+ err = au_h_verify(a->dst_h_dentry, udba,
20732+ d_inode(a->dst_h_parent), a->dst_h_parent,
20733+ a->br);
20734+ if (!err)
20735+ goto out; /* success */
1facf9fc 20736+
e8791d4f
AM
20737+ err = au_busy_or_stale();
20738+ au_ren_unlock(a);
c2c0f25c 20739+
4f0767ce 20740+out:
e8791d4f 20741+ return err;
4a4d8108 20742+}
1facf9fc 20743+
4a4d8108 20744+/* ---------------------------------------------------------------------- */
1facf9fc 20745+
e8791d4f 20746+static void au_ren_refresh_dir(struct au_ren_args *a)
e2f27e51 20747+{
e8791d4f
AM
20748+ struct inode *dir;
20749+
20750+ dir = a->dst_dir;
20751+ dir->i_version++;
20752+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)) {
20753+ /* is this updating defined in POSIX? */
20754+ au_cpup_attr_timesizes(a->src_inode);
20755+ au_cpup_attr_nlink(dir, /*force*/1);
20756+ }
20757+ au_dir_ts(dir, a->btgt);
20758+
20759+ if (a->exchange) {
20760+ dir = a->src_dir;
20761+ dir->i_version++;
20762+ if (au_ftest_ren(a->auren_flags, ISDIR_DST)) {
20763+ /* is this updating defined in POSIX? */
20764+ au_cpup_attr_timesizes(a->dst_inode);
20765+ au_cpup_attr_nlink(dir, /*force*/1);
20766+ }
20767+ au_dir_ts(dir, a->btgt);
20768+ }
20769+
20770+ if (au_ftest_ren(a->auren_flags, ISSAMEDIR))
20771+ return;
20772+
20773+ dir = a->src_dir;
20774+ dir->i_version++;
20775+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC))
20776+ au_cpup_attr_nlink(dir, /*force*/1);
20777+ au_dir_ts(dir, a->btgt);
e2f27e51
AM
20778+}
20779+
e8791d4f 20780+static void au_ren_refresh(struct au_ren_args *a)
4a4d8108 20781+{
e8791d4f
AM
20782+ aufs_bindex_t bbot, bindex;
20783+ struct dentry *d, *h_d;
20784+ struct inode *i, *h_i;
0c3ec466 20785+ struct super_block *sb;
0c3ec466 20786+
e8791d4f
AM
20787+ d = a->dst_dentry;
20788+ d_drop(d);
20789+ if (a->h_dst)
20790+ /* already dget-ed by au_ren_or_cpup() */
20791+ au_set_h_dptr(d, a->btgt, a->h_dst);
e2f27e51 20792+
e8791d4f
AM
20793+ i = a->dst_inode;
20794+ if (i) {
20795+ if (!a->exchange) {
20796+ if (!au_ftest_ren(a->auren_flags, ISDIR_DST))
20797+ vfsub_drop_nlink(i);
20798+ else {
20799+ vfsub_dead_dir(i);
20800+ au_cpup_attr_timesizes(i);
20801+ }
20802+ au_update_dbrange(d, /*do_put_zero*/1);
20803+ } else
20804+ au_cpup_attr_nlink(i, /*force*/1);
20805+ } else {
20806+ bbot = a->btgt;
20807+ for (bindex = au_dbtop(d); bindex < bbot; bindex++)
20808+ au_set_h_dptr(d, bindex, NULL);
20809+ bbot = au_dbbot(d);
20810+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++)
20811+ au_set_h_dptr(d, bindex, NULL);
20812+ au_update_dbrange(d, /*do_put_zero*/0);
20813+ }
e2f27e51 20814+
e8791d4f
AM
20815+ if (a->exchange
20816+ || au_ftest_ren(a->auren_flags, DIRREN)) {
20817+ d_drop(a->src_dentry);
20818+ if (au_ftest_ren(a->auren_flags, DIRREN))
20819+ au_set_dbwh(a->src_dentry, -1);
20820+ return;
20821+ }
20822+
20823+ d = a->src_dentry;
20824+ au_set_dbwh(d, -1);
20825+ bbot = au_dbbot(d);
20826+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
20827+ h_d = au_h_dptr(d, bindex);
20828+ if (h_d)
20829+ au_set_h_dptr(d, bindex, NULL);
e2f27e51 20830+ }
e8791d4f 20831+ au_set_dbbot(d, a->btgt);
e2f27e51 20832+
e8791d4f
AM
20833+ sb = d->d_sb;
20834+ i = a->src_inode;
20835+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
20836+ return; /* success */
38d290e6 20837+
e8791d4f
AM
20838+ bbot = au_ibbot(i);
20839+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
20840+ h_i = au_h_iptr(i, bindex);
20841+ if (h_i) {
20842+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
20843+ /* ignore this error */
20844+ au_set_h_iptr(i, bindex, NULL, 0);
20845+ }
20846+ }
20847+ au_set_ibbot(i, a->btgt);
4a4d8108 20848+}
1facf9fc 20849+
4a4d8108 20850+/* ---------------------------------------------------------------------- */
1308ab2a 20851+
e8791d4f
AM
20852+/* mainly for link(2) and rename(2) */
20853+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
20854+{
20855+ aufs_bindex_t bdiropq, bwh;
20856+ struct dentry *parent;
20857+ struct au_branch *br;
b95c5147 20858+
e8791d4f
AM
20859+ parent = dentry->d_parent;
20860+ IMustLock(d_inode(parent)); /* dir is locked */
c1595e42 20861+
e8791d4f
AM
20862+ bdiropq = au_dbdiropq(parent);
20863+ bwh = au_dbwh(dentry);
20864+ br = au_sbr(dentry->d_sb, btgt);
20865+ if (au_br_rdonly(br)
20866+ || (0 <= bdiropq && bdiropq < btgt)
20867+ || (0 <= bwh && bwh < btgt))
20868+ btgt = -1;
0c3ec466 20869+
e8791d4f
AM
20870+ AuDbg("btgt %d\n", btgt);
20871+ return btgt;
20872+}
c1595e42 20873+
e8791d4f
AM
20874+/* sets src_btop, dst_btop and btgt */
20875+static int au_ren_wbr(struct au_ren_args *a)
20876+{
20877+ int err;
20878+ struct au_wr_dir_args wr_dir_args = {
20879+ /* .force_btgt = -1, */
20880+ .flags = AuWrDir_ADD_ENTRY
20881+ };
c1595e42 20882+
e8791d4f
AM
20883+ a->src_btop = au_dbtop(a->src_dentry);
20884+ a->dst_btop = au_dbtop(a->dst_dentry);
20885+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)
20886+ || au_ftest_ren(a->auren_flags, ISDIR_DST))
20887+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
20888+ wr_dir_args.force_btgt = a->src_btop;
20889+ if (a->dst_inode && a->dst_btop < a->src_btop)
20890+ wr_dir_args.force_btgt = a->dst_btop;
20891+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
20892+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
20893+ a->btgt = err;
20894+ if (a->exchange)
20895+ au_update_dbtop(a->dst_dentry);
0c3ec466 20896+
e8791d4f
AM
20897+ return err;
20898+}
c1595e42 20899+
e8791d4f
AM
20900+static void au_ren_dt(struct au_ren_args *a)
20901+{
20902+ a->h_path.dentry = a->src_h_parent;
20903+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
20904+ if (!au_ftest_ren(a->auren_flags, ISSAMEDIR)) {
20905+ a->h_path.dentry = a->dst_h_parent;
20906+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
b95c5147 20907+ }
1facf9fc 20908+
e8791d4f
AM
20909+ au_fclr_ren(a->auren_flags, DT_DSTDIR);
20910+ if (!au_ftest_ren(a->auren_flags, ISDIR_SRC)
20911+ && !a->exchange)
20912+ return;
dece6358 20913+
e8791d4f
AM
20914+ a->h_path.dentry = a->src_h_dentry;
20915+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
20916+ if (d_is_positive(a->dst_h_dentry)) {
20917+ au_fset_ren(a->auren_flags, DT_DSTDIR);
20918+ a->h_path.dentry = a->dst_h_dentry;
20919+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
20920+ }
20921+}
dece6358 20922+
e8791d4f 20923+static void au_ren_rev_dt(int err, struct au_ren_args *a)
1308ab2a 20924+{
e8791d4f
AM
20925+ struct dentry *h_d;
20926+ struct inode *h_inode;
dece6358 20927+
e8791d4f
AM
20928+ au_dtime_revert(a->src_dt + AuPARENT);
20929+ if (!au_ftest_ren(a->auren_flags, ISSAMEDIR))
20930+ au_dtime_revert(a->dst_dt + AuPARENT);
1308ab2a 20931+
e8791d4f
AM
20932+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC) && err != -EIO) {
20933+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
20934+ h_inode = d_inode(h_d);
20935+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
20936+ au_dtime_revert(a->src_dt + AuCHILD);
20937+ inode_unlock(h_inode);
4a4d8108 20938+
e8791d4f
AM
20939+ if (au_ftest_ren(a->auren_flags, DT_DSTDIR)) {
20940+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
20941+ h_inode = d_inode(h_d);
20942+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
20943+ au_dtime_revert(a->dst_dt + AuCHILD);
20944+ inode_unlock(h_inode);
4a4d8108
AM
20945+ }
20946+ }
1facf9fc 20947+}
20948+
e8791d4f
AM
20949+/* ---------------------------------------------------------------------- */
20950+
20951+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
20952+ struct inode *_dst_dir, struct dentry *_dst_dentry,
20953+ unsigned int _flags)
1facf9fc 20954+{
e8791d4f
AM
20955+ int err, lock_flags;
20956+ void *rev;
20957+ /* reduce stack space */
20958+ struct au_ren_args *a;
20959+ struct au_pin pin;
1facf9fc 20960+
e8791d4f
AM
20961+ AuDbg("%pd, %pd, 0x%x\n", _src_dentry, _dst_dentry, _flags);
20962+ IMustLock(_src_dir);
20963+ IMustLock(_dst_dir);
1facf9fc 20964+
e8791d4f
AM
20965+ err = -EINVAL;
20966+ if (unlikely(_flags & RENAME_WHITEOUT))
20967+ goto out;
20968+
20969+ err = -ENOMEM;
20970+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
20971+ a = kzalloc(sizeof(*a), GFP_NOFS);
20972+ if (unlikely(!a))
20973+ goto out;
20974+
20975+ a->flags = _flags;
20976+ a->exchange = _flags & RENAME_EXCHANGE;
20977+ a->src_dir = _src_dir;
20978+ a->src_dentry = _src_dentry;
20979+ a->src_inode = NULL;
20980+ if (d_really_is_positive(a->src_dentry))
20981+ a->src_inode = d_inode(a->src_dentry);
20982+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
20983+ a->dst_dir = _dst_dir;
20984+ a->dst_dentry = _dst_dentry;
20985+ a->dst_inode = NULL;
20986+ if (d_really_is_positive(a->dst_dentry))
20987+ a->dst_inode = d_inode(a->dst_dentry);
20988+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
20989+ if (a->dst_inode) {
20990+ /*
20991+ * if EXCHANGE && src is non-dir && dst is dir,
20992+ * dst is not locked.
20993+ */
20994+ /* IMustLock(a->dst_inode); */
20995+ au_igrab(a->dst_inode);
20996+ }
20997+
20998+ err = -ENOTDIR;
20999+ lock_flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
21000+ if (d_is_dir(a->src_dentry)) {
21001+ au_fset_ren(a->auren_flags, ISDIR_SRC);
21002+ if (unlikely(!a->exchange
21003+ && d_really_is_positive(a->dst_dentry)
21004+ && !d_is_dir(a->dst_dentry)))
21005+ goto out_free;
21006+ lock_flags |= AuLock_DIRS;
21007+ }
21008+ if (a->dst_inode && d_is_dir(a->dst_dentry)) {
21009+ au_fset_ren(a->auren_flags, ISDIR_DST);
21010+ if (unlikely(!a->exchange
21011+ && d_really_is_positive(a->src_dentry)
21012+ && !d_is_dir(a->src_dentry)))
21013+ goto out_free;
21014+ lock_flags |= AuLock_DIRS;
4a4d8108 21015+ }
e8791d4f
AM
21016+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
21017+ lock_flags);
21018+ if (unlikely(err))
21019+ goto out_free;
1facf9fc 21020+
e8791d4f
AM
21021+ err = au_d_hashed_positive(a->src_dentry);
21022+ if (unlikely(err))
21023+ goto out_unlock;
4a4d8108 21024+ err = -ENOENT;
e8791d4f
AM
21025+ if (a->dst_inode) {
21026+ /*
21027+ * If it is a dir, VFS unhash it before this
21028+ * function. It means we cannot rely upon d_unhashed().
21029+ */
21030+ if (unlikely(!a->dst_inode->i_nlink))
21031+ goto out_unlock;
21032+ if (!au_ftest_ren(a->auren_flags, ISDIR_DST)) {
21033+ err = au_d_hashed_positive(a->dst_dentry);
21034+ if (unlikely(err && !a->exchange))
21035+ goto out_unlock;
21036+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
21037+ goto out_unlock;
21038+ } else if (unlikely(d_unhashed(a->dst_dentry)))
21039+ goto out_unlock;
4a4d8108
AM
21040+
21041+ /*
e8791d4f
AM
21042+ * is it possible?
21043+ * yes, it happened (in linux-3.3-rcN) but I don't know why.
21044+ * there may exist a problem somewhere else.
4a4d8108 21045+ */
e8791d4f
AM
21046+ err = -EINVAL;
21047+ if (unlikely(d_inode(a->dst_parent) == d_inode(a->src_dentry)))
21048+ goto out_unlock;
4a4d8108 21049+
e8791d4f
AM
21050+ au_fset_ren(a->auren_flags, ISSAMEDIR); /* temporary */
21051+ di_write_lock_parent(a->dst_parent);
4a4d8108 21052+
e8791d4f
AM
21053+ /* which branch we process */
21054+ err = au_ren_wbr(a);
21055+ if (unlikely(err < 0))
21056+ goto out_parent;
21057+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
21058+ a->h_path.mnt = au_br_mnt(a->br);
1facf9fc 21059+
e8791d4f
AM
21060+ /* are they available to be renamed */
21061+ err = au_ren_may_dir(a);
21062+ if (unlikely(err))
21063+ goto out_children;
dece6358 21064+
e8791d4f
AM
21065+ /* prepare the writable parent dir on the same branch */
21066+ if (a->dst_btop == a->btgt) {
21067+ au_fset_ren(a->auren_flags, WHDST);
21068+ } else {
21069+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
21070+ if (unlikely(err))
21071+ goto out_children;
21072+ }
4a4d8108 21073+
e8791d4f
AM
21074+ err = 0;
21075+ if (!a->exchange) {
21076+ if (a->src_dir != a->dst_dir) {
21077+ /*
21078+ * this temporary unlock is safe,
21079+ * because both dir->i_mutex are locked.
21080+ */
21081+ di_write_unlock(a->dst_parent);
21082+ di_write_lock_parent(a->src_parent);
21083+ err = au_wr_dir_need_wh(a->src_dentry,
21084+ au_ftest_ren(a->auren_flags,
21085+ ISDIR_SRC),
21086+ &a->btgt);
21087+ di_write_unlock(a->src_parent);
21088+ di_write_lock2_parent(a->src_parent, a->dst_parent,
21089+ /*isdir*/1);
21090+ au_fclr_ren(a->auren_flags, ISSAMEDIR);
21091+ } else
21092+ err = au_wr_dir_need_wh(a->src_dentry,
21093+ au_ftest_ren(a->auren_flags,
21094+ ISDIR_SRC),
21095+ &a->btgt);
21096+ }
21097+ if (unlikely(err < 0))
21098+ goto out_children;
21099+ if (err)
21100+ au_fset_ren(a->auren_flags, WHSRC);
21101+
21102+ /* cpup src */
21103+ if (a->src_btop != a->btgt) {
21104+ err = au_pin(&pin, a->src_dentry, a->btgt,
21105+ au_opt_udba(a->src_dentry->d_sb),
21106+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
21107+ if (!err) {
21108+ struct au_cp_generic cpg = {
21109+ .dentry = a->src_dentry,
21110+ .bdst = a->btgt,
21111+ .bsrc = a->src_btop,
21112+ .len = -1,
21113+ .pin = &pin,
21114+ .flags = AuCpup_DTIME | AuCpup_HOPEN
21115+ };
21116+ AuDebugOn(au_dbtop(a->src_dentry) != a->src_btop);
21117+ err = au_sio_cpup_simple(&cpg);
21118+ au_unpin(&pin);
21119+ }
21120+ if (unlikely(err))
21121+ goto out_children;
21122+ a->src_btop = a->btgt;
21123+ a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
21124+ if (!a->exchange)
21125+ au_fset_ren(a->auren_flags, WHSRC);
21126+ }
21127+
21128+ /* cpup dst */
21129+ if (a->exchange && a->dst_inode
21130+ && a->dst_btop != a->btgt) {
21131+ err = au_pin(&pin, a->dst_dentry, a->btgt,
21132+ au_opt_udba(a->dst_dentry->d_sb),
21133+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
21134+ if (!err) {
21135+ struct au_cp_generic cpg = {
21136+ .dentry = a->dst_dentry,
21137+ .bdst = a->btgt,
21138+ .bsrc = a->dst_btop,
21139+ .len = -1,
21140+ .pin = &pin,
21141+ .flags = AuCpup_DTIME | AuCpup_HOPEN
21142+ };
21143+ err = au_sio_cpup_simple(&cpg);
21144+ au_unpin(&pin);
21145+ }
21146+ if (unlikely(err))
21147+ goto out_children;
21148+ a->dst_btop = a->btgt;
21149+ a->dst_h_dentry = au_h_dptr(a->dst_dentry, a->btgt);
21150+ }
21151+
21152+ /* lock them all */
21153+ err = au_ren_lock(a);
4a4d8108 21154+ if (unlikely(err))
e8791d4f
AM
21155+ /* leave the copied-up one */
21156+ goto out_children;
4a4d8108 21157+
e8791d4f
AM
21158+ if (!a->exchange) {
21159+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
21160+ err = au_may_ren(a);
21161+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
21162+ err = -ENAMETOOLONG;
4a4d8108 21163+ if (unlikely(err))
e8791d4f 21164+ goto out_hdir;
4a4d8108
AM
21165+ }
21166+
e8791d4f
AM
21167+ /* store timestamps to be revertible */
21168+ au_ren_dt(a);
4a4d8108 21169+
e8791d4f
AM
21170+ /* store dirren info */
21171+ if (au_ftest_ren(a->auren_flags, DIRREN)) {
21172+ err = au_dr_rename(a->src_dentry, a->btgt,
21173+ &a->dst_dentry->d_name, &rev);
21174+ AuTraceErr(err);
21175+ if (unlikely(err))
21176+ goto out_dt;
21177+ }
4a4d8108 21178+
e8791d4f
AM
21179+ /* here we go */
21180+ err = do_rename(a);
21181+ if (unlikely(err))
21182+ goto out_dirren;
4a4d8108 21183+
e8791d4f
AM
21184+ if (au_ftest_ren(a->auren_flags, DIRREN))
21185+ au_dr_rename_fin(a->src_dentry, a->btgt, rev);
1facf9fc 21186+
e8791d4f
AM
21187+ /* update dir attributes */
21188+ au_ren_refresh_dir(a);
4a4d8108 21189+
e8791d4f
AM
21190+ /* dput/iput all lower dentries */
21191+ au_ren_refresh(a);
4a4d8108 21192+
e8791d4f 21193+ goto out_hdir; /* success */
4a4d8108 21194+
e8791d4f
AM
21195+out_dirren:
21196+ if (au_ftest_ren(a->auren_flags, DIRREN))
21197+ au_dr_rename_rev(a->src_dentry, a->btgt, rev);
21198+out_dt:
21199+ au_ren_rev_dt(err, a);
21200+out_hdir:
21201+ au_ren_unlock(a);
21202+out_children:
21203+ au_nhash_wh_free(&a->whlist);
21204+ if (err && a->dst_inode && a->dst_btop != a->btgt) {
21205+ AuDbg("btop %d, btgt %d\n", a->dst_btop, a->btgt);
21206+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
21207+ au_set_dbtop(a->dst_dentry, a->dst_btop);
4a4d8108 21208+ }
e8791d4f
AM
21209+out_parent:
21210+ if (!err) {
21211+ if (d_unhashed(a->src_dentry))
21212+ au_fset_ren(a->auren_flags, DROPPED_SRC);
21213+ if (d_unhashed(a->dst_dentry))
21214+ au_fset_ren(a->auren_flags, DROPPED_DST);
21215+ if (!a->exchange)
21216+ d_move(a->src_dentry, a->dst_dentry);
21217+ else {
21218+ d_exchange(a->src_dentry, a->dst_dentry);
21219+ if (au_ftest_ren(a->auren_flags, DROPPED_DST))
21220+ d_drop(a->dst_dentry);
21221+ }
21222+ if (au_ftest_ren(a->auren_flags, DROPPED_SRC))
21223+ d_drop(a->src_dentry);
21224+ } else {
21225+ au_update_dbtop(a->dst_dentry);
21226+ if (!a->dst_inode)
21227+ d_drop(a->dst_dentry);
4a4d8108 21228+ }
e8791d4f
AM
21229+ if (au_ftest_ren(a->auren_flags, ISSAMEDIR))
21230+ di_write_unlock(a->dst_parent);
21231+ else
21232+ di_write_unlock2(a->src_parent, a->dst_parent);
21233+out_unlock:
21234+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
21235+out_free:
21236+ iput(a->dst_inode);
21237+ if (a->thargs)
21238+ au_whtmp_rmdir_free(a->thargs);
21239+ kfree(a);
4f0767ce 21240+out:
4a4d8108
AM
21241+ AuTraceErr(err);
21242+ return err;
21243+}
e8791d4f
AM
21244diff -urNp -x '*.orig' linux-4.9/fs/aufs/iinfo.c linux-4.9/fs/aufs/iinfo.c
21245--- linux-4.9/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
21246+++ linux-4.9/fs/aufs/iinfo.c 2021-02-24 16:15:09.528240413 +0100
21247@@ -0,0 +1,285 @@
21248+/*
21249+ * Copyright (C) 2005-2018 Junjiro R. Okajima
21250+ *
21251+ * This program, aufs is free software; you can redistribute it and/or modify
21252+ * it under the terms of the GNU General Public License as published by
21253+ * the Free Software Foundation; either version 2 of the License, or
21254+ * (at your option) any later version.
21255+ *
21256+ * This program is distributed in the hope that it will be useful,
21257+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21258+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21259+ * GNU General Public License for more details.
21260+ *
21261+ * You should have received a copy of the GNU General Public License
21262+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
21263+ */
1308ab2a 21264+
4a4d8108 21265+/*
e8791d4f 21266+ * inode private data
4a4d8108 21267+ */
e8791d4f
AM
21268+
21269+#include "aufs.h"
21270+
21271+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
4a4d8108 21272+{
e8791d4f
AM
21273+ struct inode *h_inode;
21274+ struct au_hinode *hinode;
1308ab2a 21275+
e8791d4f 21276+ IiMustAnyLock(inode);
1308ab2a 21277+
e8791d4f
AM
21278+ hinode = au_hinode(au_ii(inode), bindex);
21279+ h_inode = hinode->hi_inode;
21280+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
21281+ return h_inode;
1facf9fc 21282+}
21283+
e8791d4f
AM
21284+/* todo: hard/soft set? */
21285+void au_hiput(struct au_hinode *hinode)
1facf9fc 21286+{
e8791d4f
AM
21287+ au_hn_free(hinode);
21288+ dput(hinode->hi_whdentry);
21289+ iput(hinode->hi_inode);
21290+}
dece6358 21291+
e8791d4f
AM
21292+unsigned int au_hi_flags(struct inode *inode, int isdir)
21293+{
21294+ unsigned int flags;
21295+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
dece6358 21296+
e8791d4f
AM
21297+ flags = 0;
21298+ if (au_opt_test(mnt_flags, XINO))
21299+ au_fset_hi(flags, XINO);
21300+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
21301+ au_fset_hi(flags, HNOTIFY);
21302+ return flags;
1facf9fc 21303+}
21304+
e8791d4f
AM
21305+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
21306+ struct inode *h_inode, unsigned int flags)
1308ab2a 21307+{
e8791d4f
AM
21308+ struct au_hinode *hinode;
21309+ struct inode *hi;
21310+ struct au_iinfo *iinfo = au_ii(inode);
1facf9fc 21311+
e8791d4f 21312+ IiMustWriteLock(inode);
027c5e7a 21313+
e8791d4f
AM
21314+ hinode = au_hinode(iinfo, bindex);
21315+ hi = hinode->hi_inode;
21316+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
c2b27bf2 21317+
e8791d4f
AM
21318+ if (hi)
21319+ au_hiput(hinode);
21320+ hinode->hi_inode = h_inode;
21321+ if (h_inode) {
21322+ int err;
21323+ struct super_block *sb = inode->i_sb;
21324+ struct au_branch *br;
1facf9fc 21325+
e8791d4f
AM
21326+ AuDebugOn(inode->i_mode
21327+ && (h_inode->i_mode & S_IFMT)
21328+ != (inode->i_mode & S_IFMT));
21329+ if (bindex == iinfo->ii_btop)
21330+ au_cpup_igen(inode, h_inode);
21331+ br = au_sbr(sb, bindex);
21332+ hinode->hi_id = br->br_id;
21333+ if (au_ftest_hi(flags, XINO)) {
21334+ err = au_xino_write(sb, bindex, h_inode->i_ino,
21335+ inode->i_ino);
21336+ if (unlikely(err))
21337+ AuIOErr1("failed au_xino_write() %d\n", err);
21338+ }
1facf9fc 21339+
e8791d4f
AM
21340+ if (au_ftest_hi(flags, HNOTIFY)
21341+ && au_br_hnotifyable(br->br_perm)) {
21342+ err = au_hn_alloc(hinode, inode);
21343+ if (unlikely(err))
21344+ AuIOErr1("au_hn_alloc() %d\n", err);
523b37e3 21345+ }
4a4d8108 21346+ }
e8791d4f 21347+}
dece6358 21348+
e8791d4f
AM
21349+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
21350+ struct dentry *h_wh)
21351+{
21352+ struct au_hinode *hinode;
4a4d8108 21353+
e8791d4f 21354+ IiMustWriteLock(inode);
1facf9fc 21355+
e8791d4f
AM
21356+ hinode = au_hinode(au_ii(inode), bindex);
21357+ AuDebugOn(hinode->hi_whdentry);
21358+ hinode->hi_whdentry = h_wh;
21359+}
4a4d8108 21360+
e8791d4f
AM
21361+void au_update_iigen(struct inode *inode, int half)
21362+{
21363+ struct au_iinfo *iinfo;
21364+ struct au_iigen *iigen;
21365+ unsigned int sigen;
21366+
21367+ sigen = au_sigen(inode->i_sb);
21368+ iinfo = au_ii(inode);
21369+ iigen = &iinfo->ii_generation;
21370+ spin_lock(&iigen->ig_spin);
21371+ iigen->ig_generation = sigen;
21372+ if (half)
21373+ au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
21374+ else
21375+ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
21376+ spin_unlock(&iigen->ig_spin);
21377+}
21378+
21379+/* it may be called at remount time, too */
21380+void au_update_ibrange(struct inode *inode, int do_put_zero)
21381+{
21382+ struct au_iinfo *iinfo;
21383+ aufs_bindex_t bindex, bbot;
21384+
21385+ AuDebugOn(au_is_bad_inode(inode));
21386+ IiMustWriteLock(inode);
21387+
21388+ iinfo = au_ii(inode);
21389+ if (do_put_zero && iinfo->ii_btop >= 0) {
21390+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
21391+ bindex++) {
21392+ struct inode *h_i;
21393+
21394+ h_i = au_hinode(iinfo, bindex)->hi_inode;
21395+ if (h_i
21396+ && !h_i->i_nlink
21397+ && !(h_i->i_state & I_LINKABLE))
21398+ au_set_h_iptr(inode, bindex, NULL, 0);
21399+ }
dece6358 21400+ }
1facf9fc 21401+
e8791d4f
AM
21402+ iinfo->ii_btop = -1;
21403+ iinfo->ii_bbot = -1;
21404+ bbot = au_sbbot(inode->i_sb);
21405+ for (bindex = 0; bindex <= bbot; bindex++)
21406+ if (au_hinode(iinfo, bindex)->hi_inode) {
21407+ iinfo->ii_btop = bindex;
21408+ break;
21409+ }
21410+ if (iinfo->ii_btop >= 0)
21411+ for (bindex = bbot; bindex >= iinfo->ii_btop; bindex--)
21412+ if (au_hinode(iinfo, bindex)->hi_inode) {
21413+ iinfo->ii_bbot = bindex;
21414+ break;
21415+ }
21416+ AuDebugOn(iinfo->ii_btop > iinfo->ii_bbot);
dece6358
AM
21417+}
21418+
e8791d4f
AM
21419+/* ---------------------------------------------------------------------- */
21420+
21421+void au_icntnr_init_once(void *_c)
1308ab2a 21422+{
e8791d4f
AM
21423+ struct au_icntnr *c = _c;
21424+ struct au_iinfo *iinfo = &c->iinfo;
1facf9fc 21425+
e8791d4f
AM
21426+ spin_lock_init(&iinfo->ii_generation.ig_spin);
21427+ au_rw_init(&iinfo->ii_rwsem);
21428+ inode_init_once(&c->vfs_inode);
21429+}
027c5e7a 21430+
e8791d4f
AM
21431+void au_hinode_init(struct au_hinode *hinode)
21432+{
21433+ hinode->hi_inode = NULL;
21434+ hinode->hi_id = -1;
21435+ au_hn_init(hinode);
21436+ hinode->hi_whdentry = NULL;
21437+}
c2b27bf2 21438+
e8791d4f
AM
21439+int au_iinfo_init(struct inode *inode)
21440+{
21441+ struct au_iinfo *iinfo;
21442+ struct super_block *sb;
21443+ struct au_hinode *hi;
21444+ int nbr, i;
dece6358 21445+
e8791d4f
AM
21446+ sb = inode->i_sb;
21447+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
21448+ nbr = au_sbbot(sb) + 1;
21449+ if (unlikely(nbr <= 0))
21450+ nbr = 1;
21451+ hi = kmalloc_array(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
21452+ if (hi) {
21453+ au_ninodes_inc(sb);
dece6358 21454+
e8791d4f
AM
21455+ iinfo->ii_hinode = hi;
21456+ for (i = 0; i < nbr; i++, hi++)
21457+ au_hinode_init(hi);
1facf9fc 21458+
e8791d4f
AM
21459+ iinfo->ii_generation.ig_generation = au_sigen(sb);
21460+ iinfo->ii_btop = -1;
21461+ iinfo->ii_bbot = -1;
21462+ iinfo->ii_vdir = NULL;
21463+ return 0;
21464+ }
21465+ return -ENOMEM;
21466+}
1facf9fc 21467+
e8791d4f
AM
21468+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink)
21469+{
21470+ int err, i;
21471+ struct au_hinode *hip;
4a4d8108 21472+
e8791d4f
AM
21473+ AuRwMustWriteLock(&iinfo->ii_rwsem);
21474+
21475+ err = -ENOMEM;
21476+ hip = au_krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS,
21477+ may_shrink);
21478+ if (hip) {
21479+ iinfo->ii_hinode = hip;
21480+ i = iinfo->ii_bbot + 1;
21481+ hip += i;
21482+ for (; i < nbr; i++, hip++)
21483+ au_hinode_init(hip);
1facf9fc 21484+ err = 0;
21485+ }
21486+
e8791d4f
AM
21487+ return err;
21488+}
21489+
21490+void au_iinfo_fin(struct inode *inode)
21491+{
21492+ struct au_iinfo *iinfo;
21493+ struct au_hinode *hi;
21494+ struct super_block *sb;
21495+ aufs_bindex_t bindex, bbot;
21496+ const unsigned char unlinked = !inode->i_nlink;
1308ab2a 21497+
e8791d4f 21498+ AuDebugOn(au_is_bad_inode(inode));
1308ab2a 21499+
e8791d4f
AM
21500+ sb = inode->i_sb;
21501+ au_ninodes_dec(sb);
21502+ if (si_pid_test(sb))
21503+ au_xino_delete_inode(inode, unlinked);
21504+ else {
21505+ /*
21506+ * it is safe to hide the dependency between sbinfo and
21507+ * sb->s_umount.
21508+ */
21509+ lockdep_off();
21510+ si_noflush_read_lock(sb);
21511+ au_xino_delete_inode(inode, unlinked);
21512+ si_read_unlock(sb);
21513+ lockdep_on();
1facf9fc 21514+ }
21515+
e8791d4f
AM
21516+ iinfo = au_ii(inode);
21517+ if (iinfo->ii_vdir)
21518+ au_vdir_free(iinfo->ii_vdir);
1308ab2a 21519+
e8791d4f
AM
21520+ bindex = iinfo->ii_btop;
21521+ if (bindex >= 0) {
21522+ hi = au_hinode(iinfo, bindex);
21523+ bbot = iinfo->ii_bbot;
21524+ while (bindex++ <= bbot) {
21525+ if (hi->hi_inode)
21526+ au_hiput(hi);
21527+ hi++;
21528+ }
1facf9fc 21529+ }
e8791d4f
AM
21530+ kfree(iinfo->ii_hinode);
21531+ AuRwDestroy(&iinfo->ii_rwsem);
dece6358 21532+}
e8791d4f
AM
21533diff -urNp -x '*.orig' linux-4.9/fs/aufs/inode.c linux-4.9/fs/aufs/inode.c
21534--- linux-4.9/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
21535+++ linux-4.9/fs/aufs/inode.c 2021-02-24 16:15:09.528240413 +0100
21536@@ -0,0 +1,527 @@
1facf9fc 21537+/*
ae9dfd79 21538+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 21539+ *
21540+ * This program, aufs is free software; you can redistribute it and/or modify
21541+ * it under the terms of the GNU General Public License as published by
21542+ * the Free Software Foundation; either version 2 of the License, or
21543+ * (at your option) any later version.
dece6358
AM
21544+ *
21545+ * This program is distributed in the hope that it will be useful,
21546+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21547+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21548+ * GNU General Public License for more details.
21549+ *
21550+ * You should have received a copy of the GNU General Public License
523b37e3 21551+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21552+ */
21553+
21554+/*
e8791d4f 21555+ * inode functions
1facf9fc 21556+ */
21557+
21558+#include "aufs.h"
21559+
e8791d4f
AM
21560+struct inode *au_igrab(struct inode *inode)
21561+{
21562+ if (inode) {
21563+ AuDebugOn(!atomic_read(&inode->i_count));
21564+ ihold(inode);
21565+ }
21566+ return inode;
21567+}
1facf9fc 21568+
e8791d4f
AM
21569+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
21570+{
21571+ au_cpup_attr_all(inode, /*force*/0);
21572+ au_update_iigen(inode, /*half*/1);
21573+ if (do_version)
21574+ inode->i_version++;
21575+}
1facf9fc 21576+
e8791d4f
AM
21577+static int au_ii_refresh(struct inode *inode, int *update)
21578+{
21579+ int err, e, nbr;
21580+ umode_t type;
21581+ aufs_bindex_t bindex, new_bindex;
21582+ struct super_block *sb;
21583+ struct au_iinfo *iinfo;
21584+ struct au_hinode *p, *q, tmp;
1308ab2a 21585+
e8791d4f
AM
21586+ AuDebugOn(au_is_bad_inode(inode));
21587+ IiMustWriteLock(inode);
1308ab2a 21588+
e8791d4f
AM
21589+ *update = 0;
21590+ sb = inode->i_sb;
21591+ nbr = au_sbbot(sb) + 1;
21592+ type = inode->i_mode & S_IFMT;
21593+ iinfo = au_ii(inode);
21594+ err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0);
21595+ if (unlikely(err))
21596+ goto out;
1facf9fc 21597+
e8791d4f
AM
21598+ AuDebugOn(iinfo->ii_btop < 0);
21599+ p = au_hinode(iinfo, iinfo->ii_btop);
21600+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
21601+ bindex++, p++) {
21602+ if (!p->hi_inode)
21603+ continue;
1facf9fc 21604+
e8791d4f
AM
21605+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
21606+ new_bindex = au_br_index(sb, p->hi_id);
21607+ if (new_bindex == bindex)
21608+ continue;
1facf9fc 21609+
e8791d4f
AM
21610+ if (new_bindex < 0) {
21611+ *update = 1;
21612+ au_hiput(p);
21613+ p->hi_inode = NULL;
21614+ continue;
21615+ }
f2c43d5f 21616+
e8791d4f
AM
21617+ if (new_bindex < iinfo->ii_btop)
21618+ iinfo->ii_btop = new_bindex;
21619+ if (iinfo->ii_bbot < new_bindex)
21620+ iinfo->ii_bbot = new_bindex;
21621+ /* swap two lower inode, and loop again */
21622+ q = au_hinode(iinfo, new_bindex);
21623+ tmp = *q;
21624+ *q = *p;
21625+ *p = tmp;
21626+ if (tmp.hi_inode) {
21627+ bindex--;
21628+ p--;
21629+ }
21630+ }
21631+ au_update_ibrange(inode, /*do_put_zero*/0);
21632+ au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */
21633+ e = au_dy_irefresh(inode);
21634+ if (unlikely(e && !err))
21635+ err = e;
f2c43d5f 21636+
e8791d4f
AM
21637+out:
21638+ AuTraceErr(err);
21639+ return err;
4a4d8108 21640+}
1facf9fc 21641+
e8791d4f 21642+void au_refresh_iop(struct inode *inode, int force_getattr)
4a4d8108 21643+{
e8791d4f
AM
21644+ int type;
21645+ struct au_sbinfo *sbi = au_sbi(inode->i_sb);
21646+ const struct inode_operations *iop
21647+ = force_getattr ? aufs_iop : sbi->si_iop_array;
1facf9fc 21648+
e8791d4f 21649+ if (inode->i_op == iop)
4a4d8108 21650+ return;
1facf9fc 21651+
e8791d4f
AM
21652+ switch (inode->i_mode & S_IFMT) {
21653+ case S_IFDIR:
21654+ type = AuIop_DIR;
21655+ break;
21656+ case S_IFLNK:
21657+ type = AuIop_SYMLINK;
21658+ break;
21659+ default:
21660+ type = AuIop_OTHER;
21661+ break;
523b37e3 21662+ }
e8791d4f
AM
21663+
21664+ inode->i_op = iop + type;
21665+ /* unnecessary smp_wmb() */
1facf9fc 21666+}
21667+
e8791d4f 21668+int au_refresh_hinode_self(struct inode *inode)
1facf9fc 21669+{
e8791d4f 21670+ int err, update;
dece6358 21671+
e8791d4f
AM
21672+ err = au_ii_refresh(inode, &update);
21673+ if (!err)
21674+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
dece6358 21675+
e8791d4f
AM
21676+ AuTraceErr(err);
21677+ return err;
4a4d8108 21678+}
1308ab2a 21679+
e8791d4f 21680+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
4a4d8108 21681+{
e8791d4f
AM
21682+ int err, e, update;
21683+ unsigned int flags;
21684+ umode_t mode;
21685+ aufs_bindex_t bindex, bbot;
21686+ unsigned char isdir;
21687+ struct au_hinode *p;
21688+ struct au_iinfo *iinfo;
1308ab2a 21689+
e8791d4f
AM
21690+ err = au_ii_refresh(inode, &update);
21691+ if (unlikely(err))
21692+ goto out;
1facf9fc 21693+
e8791d4f
AM
21694+ update = 0;
21695+ iinfo = au_ii(inode);
21696+ p = au_hinode(iinfo, iinfo->ii_btop);
21697+ mode = (inode->i_mode & S_IFMT);
21698+ isdir = S_ISDIR(mode);
21699+ flags = au_hi_flags(inode, isdir);
21700+ bbot = au_dbbot(dentry);
21701+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
21702+ struct inode *h_i, *h_inode;
21703+ struct dentry *h_d;
1308ab2a 21704+
e8791d4f
AM
21705+ h_d = au_h_dptr(dentry, bindex);
21706+ if (!h_d || d_is_negative(h_d))
21707+ continue;
1facf9fc 21708+
e8791d4f
AM
21709+ h_inode = d_inode(h_d);
21710+ AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
21711+ if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) {
21712+ h_i = au_h_iptr(inode, bindex);
21713+ if (h_i) {
21714+ if (h_i == h_inode)
21715+ continue;
21716+ err = -EIO;
21717+ break;
21718+ }
523b37e3 21719+ }
e8791d4f
AM
21720+ if (bindex < iinfo->ii_btop)
21721+ iinfo->ii_btop = bindex;
21722+ if (iinfo->ii_bbot < bindex)
21723+ iinfo->ii_bbot = bindex;
21724+ au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
21725+ update = 1;
21726+ }
21727+ au_update_ibrange(inode, /*do_put_zero*/0);
21728+ e = au_dy_irefresh(inode);
21729+ if (unlikely(e && !err))
21730+ err = e;
21731+ if (!err)
21732+ au_refresh_hinode_attr(inode, update && isdir);
1facf9fc 21733+
e8791d4f
AM
21734+out:
21735+ AuTraceErr(err);
dece6358
AM
21736+ return err;
21737+}
1facf9fc 21738+
e8791d4f 21739+static int set_inode(struct inode *inode, struct dentry *dentry)
dece6358 21740+{
4a4d8108 21741+ int err;
e8791d4f
AM
21742+ unsigned int flags;
21743+ umode_t mode;
21744+ aufs_bindex_t bindex, btop, btail;
21745+ unsigned char isdir;
21746+ struct dentry *h_dentry;
21747+ struct inode *h_inode;
21748+ struct au_iinfo *iinfo;
21749+ struct inode_operations *iop;
1facf9fc 21750+
e8791d4f
AM
21751+ IiMustWriteLock(inode);
21752+
21753+ err = 0;
21754+ isdir = 0;
21755+ iop = au_sbi(inode->i_sb)->si_iop_array;
21756+ btop = au_dbtop(dentry);
21757+ h_dentry = au_h_dptr(dentry, btop);
21758+ h_inode = d_inode(h_dentry);
21759+ mode = h_inode->i_mode;
21760+ switch (mode & S_IFMT) {
21761+ case S_IFREG:
21762+ btail = au_dbtail(dentry);
21763+ inode->i_op = iop + AuIop_OTHER;
21764+ inode->i_fop = &aufs_file_fop;
21765+ err = au_dy_iaop(inode, btop, h_inode);
4a4d8108 21766+ if (unlikely(err))
e8791d4f
AM
21767+ goto out;
21768+ break;
21769+ case S_IFDIR:
21770+ isdir = 1;
21771+ btail = au_dbtaildir(dentry);
21772+ inode->i_op = iop + AuIop_DIR;
21773+ inode->i_fop = &aufs_dir_fop;
21774+ break;
21775+ case S_IFLNK:
21776+ btail = au_dbtail(dentry);
21777+ inode->i_op = iop + AuIop_SYMLINK;
21778+ break;
21779+ case S_IFBLK:
21780+ case S_IFCHR:
21781+ case S_IFIFO:
21782+ case S_IFSOCK:
21783+ btail = au_dbtail(dentry);
21784+ inode->i_op = iop + AuIop_OTHER;
21785+ init_special_inode(inode, mode, h_inode->i_rdev);
21786+ break;
21787+ default:
21788+ AuIOErr("Unknown file type 0%o\n", mode);
21789+ err = -EIO;
21790+ goto out;
4a4d8108
AM
21791+ }
21792+
e8791d4f
AM
21793+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
21794+ flags = au_hi_flags(inode, isdir);
21795+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
21796+ && au_ftest_hi(flags, HNOTIFY)
21797+ && dentry->d_name.len > AUFS_WH_PFX_LEN
21798+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
21799+ au_fclr_hi(flags, HNOTIFY);
21800+ iinfo = au_ii(inode);
21801+ iinfo->ii_btop = btop;
21802+ iinfo->ii_bbot = btail;
21803+ for (bindex = btop; bindex <= btail; bindex++) {
21804+ h_dentry = au_h_dptr(dentry, bindex);
21805+ if (h_dentry)
21806+ au_set_h_iptr(inode, bindex,
21807+ au_igrab(d_inode(h_dentry)), flags);
21808+ }
21809+ au_cpup_attr_all(inode, /*force*/1);
21810+ /*
21811+ * to force calling aufs_get_acl() every time,
21812+ * do not call cache_no_acl() for aufs inode.
21813+ */
21814+
21815+out:
21816+ return err;
1308ab2a 21817+}
1facf9fc 21818+
e8791d4f
AM
21819+/*
21820+ * successful returns with iinfo write_locked
21821+ * minus: errno
21822+ * zero: success, matched
21823+ * plus: no error, but unmatched
21824+ */
21825+static int reval_inode(struct inode *inode, struct dentry *dentry)
4a4d8108
AM
21826+{
21827+ int err;
e8791d4f
AM
21828+ unsigned int gen, igflags;
21829+ aufs_bindex_t bindex, bbot;
21830+ struct inode *h_inode, *h_dinode;
21831+ struct dentry *h_dentry;
1facf9fc 21832+
e8791d4f
AM
21833+ /*
21834+ * before this function, if aufs got any iinfo lock, it must be only
21835+ * one, the parent dir.
21836+ * it can happen by UDBA and the obsoleted inode number.
21837+ */
21838+ err = -EIO;
21839+ if (unlikely(inode->i_ino == parent_ino(dentry)))
21840+ goto out;
1facf9fc 21841+
e8791d4f
AM
21842+ err = 1;
21843+ ii_write_lock_new_child(inode);
21844+ h_dentry = au_h_dptr(dentry, au_dbtop(dentry));
21845+ h_dinode = d_inode(h_dentry);
21846+ bbot = au_ibbot(inode);
21847+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
21848+ h_inode = au_h_iptr(inode, bindex);
21849+ if (!h_inode || h_inode != h_dinode)
21850+ continue;
21851+
21852+ err = 0;
21853+ gen = au_iigen(inode, &igflags);
21854+ if (gen == au_digen(dentry)
21855+ && !au_ig_ftest(igflags, HALF_REFRESHED))
21856+ break;
21857+
21858+ /* fully refresh inode using dentry */
21859+ err = au_refresh_hinode(inode, dentry);
21860+ if (!err)
21861+ au_update_iigen(inode, /*half*/0);
21862+ break;
21863+ }
21864+
21865+ if (unlikely(err))
21866+ ii_write_unlock(inode);
21867+out:
4a4d8108
AM
21868+ return err;
21869+}
1facf9fc 21870+
e8791d4f
AM
21871+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
21872+ unsigned int d_type, ino_t *ino)
4a4d8108 21873+{
e8791d4f
AM
21874+ int err, idx;
21875+ const int isnondir = d_type != DT_DIR;
1facf9fc 21876+
e8791d4f
AM
21877+ /* prevent hardlinked inode number from race condition */
21878+ if (isnondir) {
21879+ err = au_xinondir_enter(sb, bindex, h_ino, &idx);
f2c43d5f 21880+ if (unlikely(err))
4a4d8108 21881+ goto out;
4a4d8108 21882+ }
1facf9fc 21883+
e8791d4f
AM
21884+ err = au_xino_read(sb, bindex, h_ino, ino);
21885+ if (unlikely(err))
21886+ goto out_xinondir;
21887+
21888+ if (!*ino) {
21889+ err = -EIO;
21890+ *ino = au_xino_new_ino(sb);
21891+ if (unlikely(!*ino))
21892+ goto out_xinondir;
21893+ err = au_xino_write(sb, bindex, h_ino, *ino);
4a4d8108 21894+ if (unlikely(err))
e8791d4f 21895+ goto out_xinondir;
f2c43d5f 21896+ }
dece6358 21897+
e8791d4f
AM
21898+out_xinondir:
21899+ if (isnondir && idx >= 0)
21900+ au_xinondir_leave(sb, bindex, h_ino, idx);
f2c43d5f
AM
21901+out:
21902+ return err;
21903+}
21904+
e8791d4f
AM
21905+/* successful returns with iinfo write_locked */
21906+/* todo: return with unlocked? */
21907+struct inode *au_new_inode(struct dentry *dentry, int must_new)
f2c43d5f 21908+{
e8791d4f
AM
21909+ struct inode *inode, *h_inode;
21910+ struct dentry *h_dentry;
21911+ struct super_block *sb;
21912+ ino_t h_ino, ino;
21913+ int err, idx, hlinked;
21914+ aufs_bindex_t btop;
f2c43d5f 21915+
e8791d4f
AM
21916+ sb = dentry->d_sb;
21917+ btop = au_dbtop(dentry);
21918+ h_dentry = au_h_dptr(dentry, btop);
21919+ h_inode = d_inode(h_dentry);
21920+ h_ino = h_inode->i_ino;
21921+ hlinked = !d_is_dir(h_dentry) && h_inode->i_nlink > 1;
f2c43d5f 21922+
e8791d4f
AM
21923+new_ino:
21924+ /*
21925+ * stop 'race'-ing between hardlinks under different
21926+ * parents.
21927+ */
21928+ if (hlinked) {
21929+ err = au_xinondir_enter(sb, btop, h_ino, &idx);
21930+ inode = ERR_PTR(err);
21931+ if (unlikely(err))
21932+ goto out;
21933+ }
f2c43d5f 21934+
e8791d4f
AM
21935+ err = au_xino_read(sb, btop, h_ino, &ino);
21936+ inode = ERR_PTR(err);
21937+ if (unlikely(err))
21938+ goto out_xinondir;
f2c43d5f 21939+
e8791d4f
AM
21940+ if (!ino) {
21941+ ino = au_xino_new_ino(sb);
21942+ if (unlikely(!ino)) {
21943+ inode = ERR_PTR(-EIO);
21944+ goto out_xinondir;
f2c43d5f 21945+ }
4a4d8108 21946+ }
1facf9fc 21947+
e8791d4f
AM
21948+ AuDbg("i%lu\n", (unsigned long)ino);
21949+ inode = au_iget_locked(sb, ino);
21950+ err = PTR_ERR(inode);
21951+ if (IS_ERR(inode))
21952+ goto out_xinondir;
1facf9fc 21953+
e8791d4f
AM
21954+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
21955+ if (inode->i_state & I_NEW) {
21956+ ii_write_lock_new_child(inode);
21957+ err = set_inode(inode, dentry);
21958+ if (!err) {
21959+ unlock_new_inode(inode);
21960+ goto out_xinondir; /* success */
21961+ }
1308ab2a 21962+
e8791d4f
AM
21963+ /*
21964+ * iget_failed() calls iput(), but we need to call
21965+ * ii_write_unlock() after iget_failed(). so dirty hack for
21966+ * i_count.
21967+ */
21968+ atomic_inc(&inode->i_count);
21969+ iget_failed(inode);
21970+ ii_write_unlock(inode);
21971+ au_xino_write(sb, btop, h_ino, /*ino*/0);
21972+ /* ignore this error */
21973+ goto out_iput;
21974+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
21975+ /*
21976+ * horrible race condition between lookup, readdir and copyup
21977+ * (or something).
21978+ */
21979+ if (hlinked && idx >= 0)
21980+ au_xinondir_leave(sb, btop, h_ino, idx);
21981+ err = reval_inode(inode, dentry);
21982+ if (unlikely(err < 0)) {
21983+ hlinked = 0;
21984+ goto out_iput;
21985+ }
21986+ if (!err)
21987+ goto out; /* success */
21988+ else if (hlinked && idx >= 0) {
21989+ err = au_xinondir_enter(sb, btop, h_ino, &idx);
21990+ if (unlikely(err)) {
21991+ iput(inode);
21992+ inode = ERR_PTR(err);
21993+ goto out;
21994+ }
21995+ }
21996+ }
1308ab2a 21997+
e8791d4f
AM
21998+ if (unlikely(au_test_fs_unique_ino(h_inode)))
21999+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
22000+ " b%d, %s, %pd, hi%lu, i%lu.\n",
22001+ btop, au_sbtype(h_dentry->d_sb), dentry,
22002+ (unsigned long)h_ino, (unsigned long)ino);
22003+ ino = 0;
22004+ err = au_xino_write(sb, btop, h_ino, /*ino*/0);
22005+ if (!err) {
22006+ iput(inode);
22007+ if (hlinked && idx >= 0)
22008+ au_xinondir_leave(sb, btop, h_ino, idx);
22009+ goto new_ino;
f2c43d5f 22010+ }
1facf9fc 22011+
e8791d4f
AM
22012+out_iput:
22013+ iput(inode);
22014+ inode = ERR_PTR(err);
22015+out_xinondir:
22016+ if (hlinked && idx >= 0)
22017+ au_xinondir_leave(sb, btop, h_ino, idx);
22018+out:
22019+ return inode;
22020+}
1facf9fc 22021+
e8791d4f 22022+/* ---------------------------------------------------------------------- */
1308ab2a 22023+
e8791d4f
AM
22024+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
22025+ struct inode *inode)
22026+{
22027+ int err;
22028+ struct inode *hi;
4a4d8108 22029+
e8791d4f
AM
22030+ err = au_br_rdonly(au_sbr(sb, bindex));
22031+
22032+ /* pseudo-link after flushed may happen out of bounds */
22033+ if (!err
22034+ && inode
22035+ && au_ibtop(inode) <= bindex
22036+ && bindex <= au_ibbot(inode)) {
22037+ /*
22038+ * permission check is unnecessary since vfsub routine
22039+ * will be called later
22040+ */
22041+ hi = au_h_iptr(inode, bindex);
22042+ if (hi)
22043+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
4a4d8108 22044+ }
e8791d4f 22045+
4a4d8108 22046+ return err;
dece6358 22047+}
1facf9fc 22048+
e8791d4f
AM
22049+int au_test_h_perm(struct inode *h_inode, int mask)
22050+{
22051+ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
22052+ return 0;
22053+ return inode_permission(h_inode, mask);
22054+}
1facf9fc 22055+
e8791d4f 22056+int au_test_h_perm_sio(struct inode *h_inode, int mask)
1308ab2a 22057+{
e8791d4f
AM
22058+ if (au_test_nfs(h_inode->i_sb)
22059+ && (mask & MAY_WRITE)
22060+ && S_ISDIR(h_inode->i_mode))
22061+ mask |= MAY_READ; /* force permission check */
22062+ return au_test_h_perm(h_inode, mask);
1308ab2a 22063+}
e8791d4f
AM
22064diff -urNp -x '*.orig' linux-4.9/fs/aufs/inode.h linux-4.9/fs/aufs/inode.h
22065--- linux-4.9/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
22066+++ linux-4.9/fs/aufs/inode.h 2021-02-24 16:15:09.528240413 +0100
22067@@ -0,0 +1,696 @@
22068+/*
22069+ * Copyright (C) 2005-2018 Junjiro R. Okajima
22070+ *
22071+ * This program, aufs is free software; you can redistribute it and/or modify
22072+ * it under the terms of the GNU General Public License as published by
22073+ * the Free Software Foundation; either version 2 of the License, or
22074+ * (at your option) any later version.
22075+ *
22076+ * This program is distributed in the hope that it will be useful,
22077+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22078+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22079+ * GNU General Public License for more details.
22080+ *
22081+ * You should have received a copy of the GNU General Public License
22082+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
22083+ */
1facf9fc 22084+
4a4d8108 22085+/*
e8791d4f 22086+ * inode operations
4a4d8108 22087+ */
ae9dfd79 22088+
e8791d4f
AM
22089+#ifndef __AUFS_INODE_H__
22090+#define __AUFS_INODE_H__
1facf9fc 22091+
e8791d4f 22092+#ifdef __KERNEL__
dece6358 22093+
e8791d4f
AM
22094+#include <linux/fsnotify.h>
22095+#include "rwsem.h"
dece6358 22096+
e8791d4f
AM
22097+struct vfsmount;
22098+
22099+struct au_hnotify {
22100+#ifdef CONFIG_AUFS_HNOTIFY
22101+#ifdef CONFIG_AUFS_HFSNOTIFY
22102+ /* never use fsnotify_add_vfsmount_mark() */
22103+ struct fsnotify_mark hn_mark;
22104+#endif
22105+ struct inode *hn_aufs_inode; /* no get/put */
22106+#endif
22107+} ____cacheline_aligned_in_smp;
22108+
22109+struct au_hinode {
22110+ struct inode *hi_inode;
22111+ aufs_bindex_t hi_id;
22112+#ifdef CONFIG_AUFS_HNOTIFY
22113+ struct au_hnotify *hi_notify;
22114+#endif
22115+
22116+ /* reference to the copied-up whiteout with get/put */
22117+ struct dentry *hi_whdentry;
22118+};
22119+
22120+/* ig_flags */
22121+#define AuIG_HALF_REFRESHED 1
22122+#define au_ig_ftest(flags, name) ((flags) & AuIG_##name)
22123+#define au_ig_fset(flags, name) \
22124+ do { (flags) |= AuIG_##name; } while (0)
22125+#define au_ig_fclr(flags, name) \
22126+ do { (flags) &= ~AuIG_##name; } while (0)
22127+
22128+struct au_iigen {
22129+ spinlock_t ig_spin;
22130+ __u32 ig_generation, ig_flags;
22131+};
22132+
22133+struct au_vdir;
22134+struct au_iinfo {
22135+ struct au_iigen ii_generation;
22136+ struct super_block *ii_hsb1; /* no get/put */
22137+
22138+ struct au_rwsem ii_rwsem;
22139+ aufs_bindex_t ii_btop, ii_bbot;
22140+ __u32 ii_higen;
22141+ struct au_hinode *ii_hinode;
22142+ struct au_vdir *ii_vdir;
22143+};
22144+
22145+struct au_icntnr {
22146+ struct au_iinfo iinfo;
22147+ struct inode vfs_inode;
22148+ struct hlist_bl_node plink;
22149+} ____cacheline_aligned_in_smp;
22150+
22151+/* au_pin flags */
22152+#define AuPin_DI_LOCKED 1
22153+#define AuPin_MNT_WRITE (1 << 1)
22154+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
22155+#define au_fset_pin(flags, name) \
22156+ do { (flags) |= AuPin_##name; } while (0)
22157+#define au_fclr_pin(flags, name) \
22158+ do { (flags) &= ~AuPin_##name; } while (0)
22159+
22160+struct au_pin {
22161+ /* input */
22162+ struct dentry *dentry;
22163+ unsigned int udba;
22164+ unsigned char lsc_di, lsc_hi, flags;
22165+ aufs_bindex_t bindex;
22166+
22167+ /* output */
22168+ struct dentry *parent;
22169+ struct au_hinode *hdir;
22170+ struct vfsmount *h_mnt;
22171+
22172+ /* temporary unlock/relock for copyup */
22173+ struct dentry *h_dentry, *h_parent;
22174+ struct au_branch *br;
22175+ struct task_struct *task;
22176+};
22177+
22178+void au_pin_hdir_unlock(struct au_pin *p);
22179+int au_pin_hdir_lock(struct au_pin *p);
22180+int au_pin_hdir_relock(struct au_pin *p);
22181+void au_pin_hdir_acquire_nest(struct au_pin *p);
22182+void au_pin_hdir_release(struct au_pin *p);
dece6358 22183+
e8791d4f 22184+/* ---------------------------------------------------------------------- */
1facf9fc 22185+
e8791d4f
AM
22186+static inline struct au_iinfo *au_ii(struct inode *inode)
22187+{
22188+ BUG_ON(is_bad_inode(inode));
22189+ return &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
4a4d8108 22190+}
1308ab2a 22191+
e8791d4f
AM
22192+/* ---------------------------------------------------------------------- */
22193+
22194+/* inode.c */
22195+struct inode *au_igrab(struct inode *inode);
22196+void au_refresh_iop(struct inode *inode, int force_getattr);
22197+int au_refresh_hinode_self(struct inode *inode);
22198+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
22199+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
22200+ unsigned int d_type, ino_t *ino);
22201+struct inode *au_new_inode(struct dentry *dentry, int must_new);
22202+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
22203+ struct inode *inode);
22204+int au_test_h_perm(struct inode *h_inode, int mask);
22205+int au_test_h_perm_sio(struct inode *h_inode, int mask);
22206+
22207+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
22208+ ino_t h_ino, unsigned int d_type, ino_t *ino)
1308ab2a 22209+{
e8791d4f
AM
22210+#ifdef CONFIG_AUFS_SHWH
22211+ return au_ino(sb, bindex, h_ino, d_type, ino);
22212+#else
22213+ return 0;
22214+#endif
22215+}
1facf9fc 22216+
e8791d4f
AM
22217+/* i_op.c */
22218+enum {
22219+ AuIop_SYMLINK,
22220+ AuIop_DIR,
22221+ AuIop_OTHER,
22222+ AuIop_Last
22223+};
22224+extern struct inode_operations aufs_iop[AuIop_Last],
22225+ aufs_iop_nogetattr[AuIop_Last];
1facf9fc 22226+
e8791d4f
AM
22227+/* au_wr_dir flags */
22228+#define AuWrDir_ADD_ENTRY 1
22229+#define AuWrDir_ISDIR (1 << 1)
22230+#define AuWrDir_TMPFILE (1 << 2)
22231+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
22232+#define au_fset_wrdir(flags, name) \
22233+ do { (flags) |= AuWrDir_##name; } while (0)
22234+#define au_fclr_wrdir(flags, name) \
22235+ do { (flags) &= ~AuWrDir_##name; } while (0)
1308ab2a 22236+
e8791d4f
AM
22237+struct au_wr_dir_args {
22238+ aufs_bindex_t force_btgt;
22239+ unsigned char flags;
22240+};
22241+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
22242+ struct au_wr_dir_args *args);
4a4d8108 22243+
e8791d4f
AM
22244+struct dentry *au_pinned_h_parent(struct au_pin *pin);
22245+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
22246+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
22247+ unsigned int udba, unsigned char flags);
22248+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
22249+ unsigned int udba, unsigned char flags) __must_check;
22250+int au_do_pin(struct au_pin *pin) __must_check;
22251+void au_unpin(struct au_pin *pin);
22252+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
1facf9fc 22253+
e8791d4f
AM
22254+#define AuIcpup_DID_CPUP 1
22255+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
22256+#define au_fset_icpup(flags, name) \
22257+ do { (flags) |= AuIcpup_##name; } while (0)
22258+#define au_fclr_icpup(flags, name) \
22259+ do { (flags) &= ~AuIcpup_##name; } while (0)
1facf9fc 22260+
e8791d4f
AM
22261+struct au_icpup_args {
22262+ unsigned char flags;
22263+ unsigned char pin_flags;
22264+ aufs_bindex_t btgt;
22265+ unsigned int udba;
22266+ struct au_pin pin;
22267+ struct path h_path;
4a4d8108 22268+ struct inode *h_inode;
e8791d4f 22269+};
1facf9fc 22270+
e8791d4f
AM
22271+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
22272+ struct au_icpup_args *a);
1facf9fc 22273+
e8791d4f
AM
22274+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path,
22275+ int locked);
1facf9fc 22276+
e8791d4f
AM
22277+/* i_op_add.c */
22278+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
22279+ struct dentry *h_parent, int isdir);
22280+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
22281+ dev_t dev);
22282+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
22283+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
22284+ bool want_excl);
22285+struct vfsub_aopen_args;
22286+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
22287+ struct vfsub_aopen_args *args);
22288+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode);
22289+int aufs_link(struct dentry *src_dentry, struct inode *dir,
22290+ struct dentry *dentry);
22291+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
027c5e7a 22292+
e8791d4f
AM
22293+/* i_op_del.c */
22294+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
22295+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
22296+ struct dentry *h_parent, int isdir);
22297+int aufs_unlink(struct inode *dir, struct dentry *dentry);
22298+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
1facf9fc 22299+
e8791d4f
AM
22300+/* i_op_ren.c */
22301+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
22302+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
22303+ struct inode *dir, struct dentry *dentry,
22304+ unsigned int flags);
1facf9fc 22305+
e8791d4f
AM
22306+/* iinfo.c */
22307+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
22308+void au_hiput(struct au_hinode *hinode);
22309+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
22310+ struct dentry *h_wh);
22311+unsigned int au_hi_flags(struct inode *inode, int isdir);
1facf9fc 22312+
e8791d4f
AM
22313+/* hinode flags */
22314+#define AuHi_XINO 1
22315+#define AuHi_HNOTIFY (1 << 1)
22316+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
22317+#define au_fset_hi(flags, name) \
22318+ do { (flags) |= AuHi_##name; } while (0)
22319+#define au_fclr_hi(flags, name) \
22320+ do { (flags) &= ~AuHi_##name; } while (0)
1308ab2a 22321+
e8791d4f
AM
22322+#ifndef CONFIG_AUFS_HNOTIFY
22323+#undef AuHi_HNOTIFY
22324+#define AuHi_HNOTIFY 0
22325+#endif
1308ab2a 22326+
e8791d4f
AM
22327+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
22328+ struct inode *h_inode, unsigned int flags);
86dc4139 22329+
e8791d4f
AM
22330+void au_update_iigen(struct inode *inode, int half);
22331+void au_update_ibrange(struct inode *inode, int do_put_zero);
ae9dfd79 22332+
e8791d4f
AM
22333+void au_icntnr_init_once(void *_c);
22334+void au_hinode_init(struct au_hinode *hinode);
22335+int au_iinfo_init(struct inode *inode);
22336+void au_iinfo_fin(struct inode *inode);
22337+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink);
4a4d8108 22338+
e8791d4f
AM
22339+#ifdef CONFIG_PROC_FS
22340+/* plink.c */
22341+int au_plink_maint(struct super_block *sb, int flags);
22342+struct au_sbinfo;
22343+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
22344+int au_plink_maint_enter(struct super_block *sb);
22345+#ifdef CONFIG_AUFS_DEBUG
22346+void au_plink_list(struct super_block *sb);
22347+#else
22348+AuStubVoid(au_plink_list, struct super_block *sb)
22349+#endif
22350+int au_plink_test(struct inode *inode);
22351+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
22352+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
22353+ struct dentry *h_dentry);
22354+void au_plink_put(struct super_block *sb, int verbose);
22355+void au_plink_clean(struct super_block *sb, int verbose);
22356+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
22357+#else
22358+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
22359+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
22360+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
22361+AuStubVoid(au_plink_list, struct super_block *sb);
22362+AuStubInt0(au_plink_test, struct inode *inode);
22363+AuStub(struct dentry *, au_plink_lkup, return NULL,
22364+ struct inode *inode, aufs_bindex_t bindex);
22365+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
22366+ struct dentry *h_dentry);
22367+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
22368+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
22369+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
22370+#endif /* CONFIG_PROC_FS */
86dc4139 22371+
e8791d4f
AM
22372+#ifdef CONFIG_AUFS_XATTR
22373+/* xattr.c */
22374+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
22375+ unsigned int verbose);
22376+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
22377+void au_xattr_init(struct super_block *sb);
22378+#else
22379+AuStubInt0(au_cpup_xattr, struct dentry *h_dst, struct dentry *h_src,
22380+ int ignore_flags, unsigned int verbose);
22381+AuStubVoid(au_xattr_init, struct super_block *sb);
22382+#endif
1facf9fc 22383+
e8791d4f
AM
22384+#ifdef CONFIG_FS_POSIX_ACL
22385+struct posix_acl *aufs_get_acl(struct inode *inode, int type);
22386+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
22387+#endif
1facf9fc 22388+
e8791d4f
AM
22389+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
22390+enum {
22391+ AU_XATTR_SET,
22392+ AU_ACL_SET
22393+};
dece6358 22394+
e8791d4f
AM
22395+struct au_sxattr {
22396+ int type;
22397+ union {
22398+ struct {
22399+ const char *name;
22400+ const void *value;
22401+ size_t size;
22402+ int flags;
22403+ } set;
22404+ struct {
22405+ struct posix_acl *acl;
22406+ int type;
22407+ } acl_set;
22408+ } u;
22409+};
22410+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
22411+ struct au_sxattr *arg);
22412+#endif
dece6358 22413+
e8791d4f 22414+/* ---------------------------------------------------------------------- */
f2c43d5f 22415+
e8791d4f
AM
22416+/* lock subclass for iinfo */
22417+enum {
22418+ AuLsc_II_CHILD, /* child first */
22419+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
22420+ AuLsc_II_CHILD3, /* copyup dirs */
22421+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
22422+ AuLsc_II_PARENT2,
22423+ AuLsc_II_PARENT3, /* copyup dirs */
22424+ AuLsc_II_NEW_CHILD
22425+};
dece6358 22426+
e8791d4f
AM
22427+/*
22428+ * ii_read_lock_child, ii_write_lock_child,
22429+ * ii_read_lock_child2, ii_write_lock_child2,
22430+ * ii_read_lock_child3, ii_write_lock_child3,
22431+ * ii_read_lock_parent, ii_write_lock_parent,
22432+ * ii_read_lock_parent2, ii_write_lock_parent2,
22433+ * ii_read_lock_parent3, ii_write_lock_parent3,
22434+ * ii_read_lock_new_child, ii_write_lock_new_child,
22435+ */
22436+#define AuReadLockFunc(name, lsc) \
22437+static inline void ii_read_lock_##name(struct inode *i) \
22438+{ \
22439+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
1facf9fc 22440+}
22441+
e8791d4f
AM
22442+#define AuWriteLockFunc(name, lsc) \
22443+static inline void ii_write_lock_##name(struct inode *i) \
22444+{ \
22445+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
22446+}
dece6358 22447+
e8791d4f
AM
22448+#define AuRWLockFuncs(name, lsc) \
22449+ AuReadLockFunc(name, lsc) \
22450+ AuWriteLockFunc(name, lsc)
027c5e7a 22451+
e8791d4f
AM
22452+AuRWLockFuncs(child, CHILD);
22453+AuRWLockFuncs(child2, CHILD2);
22454+AuRWLockFuncs(child3, CHILD3);
22455+AuRWLockFuncs(parent, PARENT);
22456+AuRWLockFuncs(parent2, PARENT2);
22457+AuRWLockFuncs(parent3, PARENT3);
22458+AuRWLockFuncs(new_child, NEW_CHILD);
027c5e7a 22459+
e8791d4f
AM
22460+#undef AuReadLockFunc
22461+#undef AuWriteLockFunc
22462+#undef AuRWLockFuncs
ae9dfd79 22463+
e8791d4f
AM
22464+/*
22465+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
22466+ */
22467+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
4a4d8108 22468+
e8791d4f
AM
22469+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
22470+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
22471+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
4a4d8108 22472+
e8791d4f
AM
22473+/* ---------------------------------------------------------------------- */
22474+
22475+static inline void au_icntnr_init(struct au_icntnr *c)
22476+{
22477+#ifdef CONFIG_AUFS_DEBUG
22478+ c->vfs_inode.i_mode = 0;
22479+#endif
1308ab2a 22480+}
dece6358 22481+
e8791d4f
AM
22482+static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags)
22483+{
22484+ unsigned int gen;
22485+ struct au_iinfo *iinfo;
22486+ struct au_iigen *iigen;
4a4d8108 22487+
e8791d4f
AM
22488+ iinfo = au_ii(inode);
22489+ iigen = &iinfo->ii_generation;
22490+ spin_lock(&iigen->ig_spin);
22491+ if (igflags)
22492+ *igflags = iigen->ig_flags;
22493+ gen = iigen->ig_generation;
22494+ spin_unlock(&iigen->ig_spin);
22495+
22496+ return gen;
22497+}
22498+
22499+/* tiny test for inode number */
22500+/* tmpfs generation is too rough */
22501+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
1308ab2a 22502+{
e8791d4f 22503+ struct au_iinfo *iinfo;
4a4d8108 22504+
e8791d4f
AM
22505+ iinfo = au_ii(inode);
22506+ AuRwMustAnyLock(&iinfo->ii_rwsem);
22507+ return !(iinfo->ii_hsb1 == h_inode->i_sb
22508+ && iinfo->ii_higen == h_inode->i_generation);
22509+}
4a4d8108 22510+
e8791d4f
AM
22511+static inline void au_iigen_dec(struct inode *inode)
22512+{
22513+ struct au_iinfo *iinfo;
22514+ struct au_iigen *iigen;
4a4d8108 22515+
e8791d4f
AM
22516+ iinfo = au_ii(inode);
22517+ iigen = &iinfo->ii_generation;
22518+ spin_lock(&iigen->ig_spin);
22519+ iigen->ig_generation--;
22520+ spin_unlock(&iigen->ig_spin);
1facf9fc 22521+}
22522+
e8791d4f 22523+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
1facf9fc 22524+{
4a4d8108 22525+ int err;
dece6358 22526+
e8791d4f
AM
22527+ err = 0;
22528+ if (unlikely(inode && au_iigen(inode, NULL) != sigen))
22529+ err = -EIO;
dece6358 22530+
4a4d8108 22531+ return err;
1facf9fc 22532+}
22533+
e8791d4f
AM
22534+/* ---------------------------------------------------------------------- */
22535+
22536+static inline struct au_hinode *au_hinode(struct au_iinfo *iinfo,
22537+ aufs_bindex_t bindex)
1facf9fc 22538+{
e8791d4f
AM
22539+ return iinfo->ii_hinode + bindex;
22540+}
1facf9fc 22541+
e8791d4f
AM
22542+static inline int au_is_bad_inode(struct inode *inode)
22543+{
22544+ return !!(is_bad_inode(inode) || !au_hinode(au_ii(inode), 0));
22545+}
dece6358 22546+
e8791d4f
AM
22547+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
22548+ aufs_bindex_t bindex)
22549+{
22550+ IiMustAnyLock(inode);
22551+ return au_hinode(au_ii(inode), bindex)->hi_id;
1308ab2a 22552+}
dece6358 22553+
e8791d4f 22554+static inline aufs_bindex_t au_ibtop(struct inode *inode)
1308ab2a 22555+{
e8791d4f
AM
22556+ IiMustAnyLock(inode);
22557+ return au_ii(inode)->ii_btop;
22558+}
4a4d8108 22559+
e8791d4f
AM
22560+static inline aufs_bindex_t au_ibbot(struct inode *inode)
22561+{
22562+ IiMustAnyLock(inode);
22563+ return au_ii(inode)->ii_bbot;
22564+}
4a4d8108 22565+
e8791d4f
AM
22566+static inline struct au_vdir *au_ivdir(struct inode *inode)
22567+{
22568+ IiMustAnyLock(inode);
22569+ return au_ii(inode)->ii_vdir;
22570+}
4a4d8108 22571+
e8791d4f
AM
22572+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
22573+{
22574+ IiMustAnyLock(inode);
22575+ return au_hinode(au_ii(inode), bindex)->hi_whdentry;
22576+}
22577+
22578+static inline void au_set_ibtop(struct inode *inode, aufs_bindex_t bindex)
22579+{
22580+ IiMustWriteLock(inode);
22581+ au_ii(inode)->ii_btop = bindex;
22582+}
22583+
22584+static inline void au_set_ibbot(struct inode *inode, aufs_bindex_t bindex)
22585+{
22586+ IiMustWriteLock(inode);
22587+ au_ii(inode)->ii_bbot = bindex;
22588+}
22589+
22590+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
22591+{
22592+ IiMustWriteLock(inode);
22593+ au_ii(inode)->ii_vdir = vdir;
22594+}
22595+
22596+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
22597+{
22598+ IiMustAnyLock(inode);
22599+ return au_hinode(au_ii(inode), bindex);
1facf9fc 22600+}
22601+
4a4d8108
AM
22602+/* ---------------------------------------------------------------------- */
22603+
e8791d4f 22604+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
1facf9fc 22605+{
e8791d4f
AM
22606+ if (pin)
22607+ return pin->parent;
22608+ return NULL;
22609+}
4a4d8108 22610+
e8791d4f
AM
22611+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
22612+{
22613+ if (pin && pin->hdir)
22614+ return pin->hdir->hi_inode;
22615+ return NULL;
22616+}
4a4d8108 22617+
e8791d4f
AM
22618+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
22619+{
22620+ if (pin)
22621+ return pin->hdir;
22622+ return NULL;
22623+}
f2c43d5f 22624+
e8791d4f
AM
22625+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
22626+{
22627+ if (pin)
22628+ pin->dentry = dentry;
22629+}
4a4d8108 22630+
e8791d4f
AM
22631+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
22632+ unsigned char lflag)
22633+{
22634+ if (pin) {
22635+ if (lflag)
22636+ au_fset_pin(pin->flags, DI_LOCKED);
22637+ else
22638+ au_fclr_pin(pin->flags, DI_LOCKED);
1facf9fc 22639+ }
e8791d4f 22640+}
1facf9fc 22641+
e8791d4f
AM
22642+#if 0 /* reserved */
22643+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
22644+{
22645+ if (pin) {
22646+ dput(pin->parent);
22647+ pin->parent = dget(parent);
b95c5147 22648+ }
e8791d4f
AM
22649+}
22650+#endif
1facf9fc 22651+
e8791d4f
AM
22652+/* ---------------------------------------------------------------------- */
22653+
22654+struct au_branch;
22655+#ifdef CONFIG_AUFS_HNOTIFY
22656+struct au_hnotify_op {
22657+ void (*ctl)(struct au_hinode *hinode, int do_set);
22658+ int (*alloc)(struct au_hinode *hinode);
027c5e7a 22659+
7eafdf33 22660+ /*
e8791d4f
AM
22661+ * if it returns true, the the caller should free hinode->hi_notify,
22662+ * otherwise ->free() frees it.
7eafdf33 22663+ */
e8791d4f
AM
22664+ int (*free)(struct au_hinode *hinode,
22665+ struct au_hnotify *hn) __must_check;
7eafdf33 22666+
e8791d4f
AM
22667+ void (*fin)(void);
22668+ int (*init)(void);
1facf9fc 22669+
e8791d4f
AM
22670+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
22671+ void (*fin_br)(struct au_branch *br);
22672+ int (*init_br)(struct au_branch *br, int perm);
22673+};
1facf9fc 22674+
e8791d4f
AM
22675+/* hnotify.c */
22676+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
22677+void au_hn_free(struct au_hinode *hinode);
22678+void au_hn_ctl(struct au_hinode *hinode, int do_set);
22679+void au_hn_reset(struct inode *inode, unsigned int flags);
22680+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
22681+ struct qstr *h_child_qstr, struct inode *h_child_inode);
22682+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
22683+int au_hnotify_init_br(struct au_branch *br, int perm);
22684+void au_hnotify_fin_br(struct au_branch *br);
22685+int __init au_hnotify_init(void);
22686+void au_hnotify_fin(void);
1facf9fc 22687+
e8791d4f
AM
22688+/* hfsnotify.c */
22689+extern const struct au_hnotify_op au_hnotify_op;
1facf9fc 22690+
e8791d4f
AM
22691+static inline
22692+void au_hn_init(struct au_hinode *hinode)
22693+{
22694+ hinode->hi_notify = NULL;
22695+}
1facf9fc 22696+
e8791d4f
AM
22697+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
22698+{
22699+ return hinode->hi_notify;
22700+}
f2c43d5f 22701+
e8791d4f
AM
22702+#else
22703+AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
22704+ struct au_hinode *hinode __maybe_unused,
22705+ struct inode *inode __maybe_unused)
22706+AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
22707+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
22708+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
22709+ int do_set __maybe_unused)
22710+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
22711+ unsigned int flags __maybe_unused)
22712+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
22713+ struct au_branch *br __maybe_unused,
22714+ int perm __maybe_unused)
22715+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
22716+ int perm __maybe_unused)
22717+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
22718+AuStubInt0(__init au_hnotify_init, void)
22719+AuStubVoid(au_hnotify_fin, void)
22720+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
22721+#endif /* CONFIG_AUFS_HNOTIFY */
22722+
22723+static inline void au_hn_suspend(struct au_hinode *hdir)
22724+{
22725+ au_hn_ctl(hdir, /*do_set*/0);
22726+}
22727+
22728+static inline void au_hn_resume(struct au_hinode *hdir)
22729+{
22730+ au_hn_ctl(hdir, /*do_set*/1);
22731+}
22732+
22733+static inline void au_hn_inode_lock(struct au_hinode *hdir)
22734+{
22735+ inode_lock(hdir->hi_inode);
22736+ au_hn_suspend(hdir);
22737+}
22738+
22739+static inline void au_hn_inode_lock_nested(struct au_hinode *hdir,
22740+ unsigned int sc __maybe_unused)
22741+{
22742+ inode_lock_nested(hdir->hi_inode, sc);
22743+ au_hn_suspend(hdir);
22744+}
22745+
22746+#if 0 /* unused */
22747+#include "vfsub.h"
22748+static inline void au_hn_inode_lock_shared_nested(struct au_hinode *hdir,
22749+ unsigned int sc)
22750+{
22751+ vfsub_inode_lock_shared_nested(hdir->hi_inode, sc);
22752+ au_hn_suspend(hdir);
22753+}
22754+#endif
22755+
22756+static inline void au_hn_inode_unlock(struct au_hinode *hdir)
22757+{
22758+ au_hn_resume(hdir);
22759+ inode_unlock(hdir->hi_inode);
22760+}
22761+
22762+#endif /* __KERNEL__ */
22763+#endif /* __AUFS_INODE_H__ */
22764diff -urNp -x '*.orig' linux-4.9/fs/aufs/ioctl.c linux-4.9/fs/aufs/ioctl.c
22765--- linux-4.9/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
22766+++ linux-4.9/fs/aufs/ioctl.c 2021-02-24 16:15:09.531573855 +0100
22767@@ -0,0 +1,219 @@
22768+/*
22769+ * Copyright (C) 2005-2018 Junjiro R. Okajima
22770+ *
22771+ * This program, aufs is free software; you can redistribute it and/or modify
22772+ * it under the terms of the GNU General Public License as published by
22773+ * the Free Software Foundation; either version 2 of the License, or
22774+ * (at your option) any later version.
22775+ *
22776+ * This program is distributed in the hope that it will be useful,
22777+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22778+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22779+ * GNU General Public License for more details.
22780+ *
22781+ * You should have received a copy of the GNU General Public License
22782+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
22783+ */
86dc4139 22784+
e8791d4f
AM
22785+/*
22786+ * ioctl
22787+ * plink-management and readdir in userspace.
22788+ * assist the pathconf(3) wrapper library.
22789+ * move-down
22790+ * File-based Hierarchical Storage Management.
22791+ */
1facf9fc 22792+
e8791d4f
AM
22793+#include <linux/compat.h>
22794+#include <linux/file.h>
22795+#include "aufs.h"
1facf9fc 22796+
e8791d4f
AM
22797+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
22798+{
22799+ int err, fd;
22800+ aufs_bindex_t wbi, bindex, bbot;
22801+ struct file *h_file;
22802+ struct super_block *sb;
22803+ struct dentry *root;
22804+ struct au_branch *br;
22805+ struct aufs_wbr_fd wbrfd = {
22806+ .oflags = au_dir_roflags,
22807+ .brid = -1
22808+ };
22809+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
22810+ | O_NOATIME | O_CLOEXEC;
1facf9fc 22811+
e8791d4f 22812+ AuDebugOn(wbrfd.oflags & ~valid);
ae9dfd79 22813+
e8791d4f
AM
22814+ if (arg) {
22815+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
22816+ if (unlikely(err)) {
22817+ err = -EFAULT;
22818+ goto out;
22819+ }
ae9dfd79 22820+
e8791d4f
AM
22821+ err = -EINVAL;
22822+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
22823+ wbrfd.oflags |= au_dir_roflags;
22824+ AuDbg("0%o\n", wbrfd.oflags);
22825+ if (unlikely(wbrfd.oflags & ~valid))
22826+ goto out;
22827+ }
4a4d8108 22828+
e8791d4f
AM
22829+ fd = get_unused_fd_flags(0);
22830+ err = fd;
22831+ if (unlikely(fd < 0))
22832+ goto out;
4a4d8108 22833+
e8791d4f
AM
22834+ h_file = ERR_PTR(-EINVAL);
22835+ wbi = 0;
22836+ br = NULL;
22837+ sb = path->dentry->d_sb;
22838+ root = sb->s_root;
22839+ aufs_read_lock(root, AuLock_IR);
22840+ bbot = au_sbbot(sb);
22841+ if (wbrfd.brid >= 0) {
22842+ wbi = au_br_index(sb, wbrfd.brid);
22843+ if (unlikely(wbi < 0 || wbi > bbot))
22844+ goto out_unlock;
22845+ }
4a4d8108 22846+
e8791d4f
AM
22847+ h_file = ERR_PTR(-ENOENT);
22848+ br = au_sbr(sb, wbi);
22849+ if (!au_br_writable(br->br_perm)) {
22850+ if (arg)
22851+ goto out_unlock;
4a4d8108 22852+
e8791d4f
AM
22853+ bindex = wbi + 1;
22854+ wbi = -1;
22855+ for (; bindex <= bbot; bindex++) {
22856+ br = au_sbr(sb, bindex);
22857+ if (au_br_writable(br->br_perm)) {
22858+ wbi = bindex;
22859+ br = au_sbr(sb, wbi);
22860+ break;
22861+ }
ae9dfd79 22862+ }
027c5e7a 22863+ }
e8791d4f
AM
22864+ AuDbg("wbi %d\n", wbi);
22865+ if (wbi >= 0)
22866+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
22867+ /*force_wr*/0);
22868+
027c5e7a 22869+out_unlock:
e8791d4f
AM
22870+ aufs_read_unlock(root, AuLock_IR);
22871+ err = PTR_ERR(h_file);
22872+ if (IS_ERR(h_file))
22873+ goto out_fd;
22874+
22875+ au_br_put(br); /* cf. au_h_open() */
22876+ fd_install(fd, h_file);
22877+ err = fd;
22878+ goto out; /* success */
22879+
22880+out_fd:
22881+ put_unused_fd(fd);
4f0767ce 22882+out:
4a4d8108
AM
22883+ AuTraceErr(err);
22884+ return err;
1308ab2a 22885+}
1facf9fc 22886+
e8791d4f 22887+/* ---------------------------------------------------------------------- */
1facf9fc 22888+
e8791d4f
AM
22889+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
22890+{
22891+ long err;
22892+ struct dentry *dentry;
e49829fe 22893+
e8791d4f
AM
22894+ switch (cmd) {
22895+ case AUFS_CTL_RDU:
22896+ case AUFS_CTL_RDU_INO:
22897+ err = au_rdu_ioctl(file, cmd, arg);
22898+ break;
dece6358 22899+
e8791d4f
AM
22900+ case AUFS_CTL_WBR_FD:
22901+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
22902+ break;
1facf9fc 22903+
e8791d4f
AM
22904+ case AUFS_CTL_IBUSY:
22905+ err = au_ibusy_ioctl(file, arg);
22906+ break;
1facf9fc 22907+
e8791d4f
AM
22908+ case AUFS_CTL_BRINFO:
22909+ err = au_brinfo_ioctl(file, arg);
22910+ break;
1facf9fc 22911+
e8791d4f
AM
22912+ case AUFS_CTL_FHSM_FD:
22913+ dentry = file->f_path.dentry;
22914+ if (IS_ROOT(dentry))
22915+ err = au_fhsm_fd(dentry->d_sb, arg);
22916+ else
22917+ err = -ENOTTY;
22918+ break;
c1595e42 22919+
e8791d4f
AM
22920+ default:
22921+ /* do not call the lower */
22922+ AuDbg("0x%x\n", cmd);
22923+ err = -ENOTTY;
22924+ }
076b876e 22925+
e8791d4f
AM
22926+ AuTraceErr(err);
22927+ return err;
22928+}
1facf9fc 22929+
e8791d4f
AM
22930+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
22931+{
22932+ long err;
ae9dfd79 22933+
e8791d4f
AM
22934+ switch (cmd) {
22935+ case AUFS_CTL_MVDOWN:
22936+ err = au_mvdown(file->f_path.dentry, (void __user *)arg);
22937+ break;
1facf9fc 22938+
e8791d4f
AM
22939+ case AUFS_CTL_WBR_FD:
22940+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
22941+ break;
1facf9fc 22942+
e8791d4f
AM
22943+ default:
22944+ /* do not call the lower */
22945+ AuDbg("0x%x\n", cmd);
22946+ err = -ENOTTY;
22947+ }
1facf9fc 22948+
e8791d4f
AM
22949+ AuTraceErr(err);
22950+ return err;
22951+}
1facf9fc 22952+
e8791d4f
AM
22953+#ifdef CONFIG_COMPAT
22954+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
22955+ unsigned long arg)
22956+{
22957+ long err;
1facf9fc 22958+
e8791d4f
AM
22959+ switch (cmd) {
22960+ case AUFS_CTL_RDU:
22961+ case AUFS_CTL_RDU_INO:
22962+ err = au_rdu_compat_ioctl(file, cmd, arg);
22963+ break;
1308ab2a 22964+
e8791d4f
AM
22965+ case AUFS_CTL_IBUSY:
22966+ err = au_ibusy_compat_ioctl(file, arg);
22967+ break;
4a4d8108 22968+
e8791d4f
AM
22969+ case AUFS_CTL_BRINFO:
22970+ err = au_brinfo_compat_ioctl(file, arg);
22971+ break;
22972+
22973+ default:
22974+ err = aufs_ioctl_dir(file, cmd, arg);
22975+ }
22976+
22977+ AuTraceErr(err);
22978+ return err;
22979+}
22980+
22981+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
22982+ unsigned long arg)
22983+{
22984+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
22985+}
22986+#endif
22987diff -urNp -x '*.orig' linux-4.9/fs/aufs/loop.c linux-4.9/fs/aufs/loop.c
22988--- linux-4.9/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
22989+++ linux-4.9/fs/aufs/loop.c 2021-02-24 16:15:09.541574180 +0100
22990@@ -0,0 +1,163 @@
1facf9fc 22991+/*
ae9dfd79 22992+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 22993+ *
22994+ * This program, aufs is free software; you can redistribute it and/or modify
22995+ * it under the terms of the GNU General Public License as published by
22996+ * the Free Software Foundation; either version 2 of the License, or
22997+ * (at your option) any later version.
dece6358
AM
22998+ *
22999+ * This program is distributed in the hope that it will be useful,
23000+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23001+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23002+ * GNU General Public License for more details.
23003+ *
23004+ * You should have received a copy of the GNU General Public License
523b37e3 23005+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 23006+ */
23007+
23008+/*
23009+ * support for loopback block device as a branch
23010+ */
23011+
1facf9fc 23012+#include "aufs.h"
23013+
392086de
AM
23014+/* added into drivers/block/loop.c */
23015+static struct file *(*backing_file_func)(struct super_block *sb);
23016+
1facf9fc 23017+/*
23018+ * test if two lower dentries have overlapping branches.
23019+ */
b752ccd1 23020+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
1facf9fc 23021+{
b752ccd1 23022+ struct super_block *h_sb;
392086de
AM
23023+ struct file *backing_file;
23024+
23025+ if (unlikely(!backing_file_func)) {
23026+ /* don't load "loop" module here */
23027+ backing_file_func = symbol_get(loop_backing_file);
23028+ if (unlikely(!backing_file_func))
23029+ /* "loop" module is not loaded */
23030+ return 0;
23031+ }
1facf9fc 23032+
b752ccd1 23033+ h_sb = h_adding->d_sb;
392086de
AM
23034+ backing_file = backing_file_func(h_sb);
23035+ if (!backing_file)
1facf9fc 23036+ return 0;
23037+
2000de60 23038+ h_adding = backing_file->f_path.dentry;
b752ccd1
AM
23039+ /*
23040+ * h_adding can be local NFS.
23041+ * in this case aufs cannot detect the loop.
23042+ */
23043+ if (unlikely(h_adding->d_sb == sb))
1facf9fc 23044+ return 1;
b752ccd1 23045+ return !!au_test_subdir(h_adding, sb->s_root);
1facf9fc 23046+}
23047+
23048+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
23049+int au_test_loopback_kthread(void)
23050+{
b752ccd1
AM
23051+ int ret;
23052+ struct task_struct *tsk = current;
a2a7ad62 23053+ char c, comm[sizeof(tsk->comm)];
b752ccd1
AM
23054+
23055+ ret = 0;
23056+ if (tsk->flags & PF_KTHREAD) {
a2a7ad62
AM
23057+ get_task_comm(comm, tsk);
23058+ c = comm[4];
b752ccd1 23059+ ret = ('0' <= c && c <= '9'
a2a7ad62 23060+ && !strncmp(comm, "loop", 4));
b752ccd1 23061+ }
1facf9fc 23062+
b752ccd1 23063+ return ret;
1facf9fc 23064+}
87a755f4
AM
23065+
23066+/* ---------------------------------------------------------------------- */
23067+
23068+#define au_warn_loopback_step 16
23069+static int au_warn_loopback_nelem = au_warn_loopback_step;
23070+static unsigned long *au_warn_loopback_array;
23071+
23072+void au_warn_loopback(struct super_block *h_sb)
23073+{
23074+ int i, new_nelem;
23075+ unsigned long *a, magic;
23076+ static DEFINE_SPINLOCK(spin);
23077+
23078+ magic = h_sb->s_magic;
23079+ spin_lock(&spin);
23080+ a = au_warn_loopback_array;
23081+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
23082+ if (a[i] == magic) {
23083+ spin_unlock(&spin);
23084+ return;
23085+ }
23086+
23087+ /* h_sb is new to us, print it */
23088+ if (i < au_warn_loopback_nelem) {
23089+ a[i] = magic;
23090+ goto pr;
23091+ }
23092+
23093+ /* expand the array */
23094+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
23095+ a = au_kzrealloc(au_warn_loopback_array,
23096+ au_warn_loopback_nelem * sizeof(unsigned long),
e2f27e51
AM
23097+ new_nelem * sizeof(unsigned long), GFP_ATOMIC,
23098+ /*may_shrink*/0);
87a755f4
AM
23099+ if (a) {
23100+ au_warn_loopback_nelem = new_nelem;
23101+ au_warn_loopback_array = a;
23102+ a[i] = magic;
23103+ goto pr;
23104+ }
23105+
23106+ spin_unlock(&spin);
23107+ AuWarn1("realloc failed, ignored\n");
23108+ return;
23109+
23110+pr:
23111+ spin_unlock(&spin);
0c3ec466
AM
23112+ pr_warn("you may want to try another patch for loopback file "
23113+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
87a755f4
AM
23114+}
23115+
23116+int au_loopback_init(void)
23117+{
23118+ int err;
23119+ struct super_block *sb __maybe_unused;
23120+
79b8bda9 23121+ BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(unsigned long));
87a755f4
AM
23122+
23123+ err = 0;
23124+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
23125+ sizeof(unsigned long), GFP_NOFS);
23126+ if (unlikely(!au_warn_loopback_array))
23127+ err = -ENOMEM;
23128+
23129+ return err;
23130+}
23131+
23132+void au_loopback_fin(void)
23133+{
79b8bda9
AM
23134+ if (backing_file_func)
23135+ symbol_put(loop_backing_file);
ae9dfd79 23136+ kfree(au_warn_loopback_array);
87a755f4 23137+}
e8791d4f
AM
23138+
23139+/* ---------------------------------------------------------------------- */
23140+
23141+/* support the loopback block device insude aufs */
23142+
23143+struct file *aufs_real_loop(struct file *file)
23144+{
23145+ struct file *f;
23146+
23147+ BUG_ON(!au_test_aufs(file->f_path.dentry->d_sb));
23148+ fi_read_lock(file);
23149+ f = au_hf_top(file);
23150+ fi_read_unlock(file);
23151+ AuDebugOn(!f);
23152+ return f;
23153+}
23154diff -urNp -x '*.orig' linux-4.9/fs/aufs/loop.h linux-4.9/fs/aufs/loop.h
23155--- linux-4.9/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
23156+++ linux-4.9/fs/aufs/loop.h 2021-02-24 16:15:09.541574180 +0100
23157@@ -0,0 +1,58 @@
1facf9fc 23158+/*
ae9dfd79 23159+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 23160+ *
23161+ * This program, aufs is free software; you can redistribute it and/or modify
23162+ * it under the terms of the GNU General Public License as published by
23163+ * the Free Software Foundation; either version 2 of the License, or
23164+ * (at your option) any later version.
dece6358
AM
23165+ *
23166+ * This program is distributed in the hope that it will be useful,
23167+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23168+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23169+ * GNU General Public License for more details.
23170+ *
23171+ * You should have received a copy of the GNU General Public License
523b37e3 23172+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 23173+ */
23174+
23175+/*
23176+ * support for loopback mount as a branch
23177+ */
23178+
23179+#ifndef __AUFS_LOOP_H__
23180+#define __AUFS_LOOP_H__
23181+
23182+#ifdef __KERNEL__
23183+
dece6358
AM
23184+struct dentry;
23185+struct super_block;
1facf9fc 23186+
23187+#ifdef CONFIG_AUFS_BDEV_LOOP
392086de
AM
23188+/* drivers/block/loop.c */
23189+struct file *loop_backing_file(struct super_block *sb);
23190+
1facf9fc 23191+/* loop.c */
b752ccd1 23192+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
1facf9fc 23193+int au_test_loopback_kthread(void);
87a755f4
AM
23194+void au_warn_loopback(struct super_block *h_sb);
23195+
23196+int au_loopback_init(void);
23197+void au_loopback_fin(void);
e8791d4f
AM
23198+
23199+struct file *aufs_real_loop(struct file *file);
1facf9fc 23200+#else
e8791d4f
AM
23201+AuStub(struct file *, loop_backing_file, return NULL)
23202+
4a4d8108 23203+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
b752ccd1 23204+ struct dentry *h_adding)
4a4d8108 23205+AuStubInt0(au_test_loopback_kthread, void)
87a755f4
AM
23206+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
23207+
23208+AuStubInt0(au_loopback_init, void)
23209+AuStubVoid(au_loopback_fin, void)
e8791d4f
AM
23210+
23211+AuStub(struct file *, aufs_real_loop, return NULL, struct file *file)
1facf9fc 23212+#endif /* BLK_DEV_LOOP */
23213+
23214+#endif /* __KERNEL__ */
23215+#endif /* __AUFS_LOOP_H__ */
e8791d4f
AM
23216diff -urNp -x '*.orig' linux-4.9/fs/aufs/magic.mk linux-4.9/fs/aufs/magic.mk
23217--- linux-4.9/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
23218+++ linux-4.9/fs/aufs/magic.mk 2021-02-24 16:15:09.531573855 +0100
7e9cd9fe 23219@@ -0,0 +1,30 @@
1facf9fc 23220+
23221+# defined in ${srctree}/fs/fuse/inode.c
23222+# tristate
23223+ifdef CONFIG_FUSE_FS
23224+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
23225+endif
23226+
1facf9fc 23227+# defined in ${srctree}/fs/xfs/xfs_sb.h
23228+# tristate
23229+ifdef CONFIG_XFS_FS
23230+ccflags-y += -DXFS_SB_MAGIC=0x58465342
23231+endif
23232+
23233+# defined in ${srctree}/fs/configfs/mount.c
23234+# tristate
23235+ifdef CONFIG_CONFIGFS_FS
23236+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
23237+endif
23238+
1facf9fc 23239+# defined in ${srctree}/fs/ubifs/ubifs.h
23240+# tristate
23241+ifdef CONFIG_UBIFS_FS
23242+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
23243+endif
4a4d8108
AM
23244+
23245+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
23246+# tristate
23247+ifdef CONFIG_HFSPLUS_FS
23248+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
23249+endif
e8791d4f
AM
23250diff -urNp -x '*.orig' linux-4.9/fs/aufs/module.c linux-4.9/fs/aufs/module.c
23251--- linux-4.9/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
23252+++ linux-4.9/fs/aufs/module.c 2021-02-24 16:15:09.531573855 +0100
ae9dfd79 23253@@ -0,0 +1,266 @@
1facf9fc 23254+/*
ae9dfd79 23255+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 23256+ *
23257+ * This program, aufs is free software; you can redistribute it and/or modify
23258+ * it under the terms of the GNU General Public License as published by
23259+ * the Free Software Foundation; either version 2 of the License, or
23260+ * (at your option) any later version.
dece6358
AM
23261+ *
23262+ * This program is distributed in the hope that it will be useful,
23263+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23264+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23265+ * GNU General Public License for more details.
23266+ *
23267+ * You should have received a copy of the GNU General Public License
523b37e3 23268+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 23269+ */
23270+
23271+/*
23272+ * module global variables and operations
23273+ */
23274+
23275+#include <linux/module.h>
23276+#include <linux/seq_file.h>
23277+#include "aufs.h"
23278+
e2f27e51
AM
23279+/* shrinkable realloc */
23280+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink)
1facf9fc 23281+{
e2f27e51
AM
23282+ size_t sz;
23283+ int diff;
1facf9fc 23284+
e2f27e51
AM
23285+ sz = 0;
23286+ diff = -1;
23287+ if (p) {
23288+#if 0 /* unused */
23289+ if (!new_sz) {
ae9dfd79 23290+ kfree(p);
e2f27e51
AM
23291+ p = NULL;
23292+ goto out;
23293+ }
23294+#else
23295+ AuDebugOn(!new_sz);
23296+#endif
23297+ sz = ksize(p);
23298+ diff = au_kmidx_sub(sz, new_sz);
23299+ }
23300+ if (sz && !diff)
23301+ goto out;
23302+
23303+ if (sz < new_sz)
23304+ /* expand or SLOB */
23305+ p = krealloc(p, new_sz, gfp);
23306+ else if (new_sz < sz && may_shrink) {
23307+ /* shrink */
23308+ void *q;
23309+
23310+ q = kmalloc(new_sz, gfp);
23311+ if (q) {
23312+ if (p) {
23313+ memcpy(q, p, new_sz);
ae9dfd79 23314+ kfree(p);
e2f27e51
AM
23315+ }
23316+ p = q;
23317+ } else
23318+ p = NULL;
23319+ }
23320+
23321+out:
23322+ return p;
23323+}
23324+
23325+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
23326+ int may_shrink)
23327+{
23328+ p = au_krealloc(p, new_sz, gfp, may_shrink);
23329+ if (p && new_sz > nused)
1facf9fc 23330+ memset(p + nused, 0, new_sz - nused);
23331+ return p;
23332+}
23333+
23334+/* ---------------------------------------------------------------------- */
1facf9fc 23335+/*
23336+ * aufs caches
23337+ */
ae9dfd79 23338+struct kmem_cache *au_cache[AuCache_Last];
5afbbe0d
AM
23339+
23340+static void au_cache_fin(void)
23341+{
23342+ int i;
23343+
23344+ /*
23345+ * Make sure all delayed rcu free inodes are flushed before we
23346+ * destroy cache.
23347+ */
23348+ rcu_barrier();
23349+
23350+ /* excluding AuCache_HNOTIFY */
23351+ BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
23352+ for (i = 0; i < AuCache_HNOTIFY; i++) {
ae9dfd79
AM
23353+ kmem_cache_destroy(au_cache[i]);
23354+ au_cache[i] = NULL;
5afbbe0d
AM
23355+ }
23356+}
23357+
1facf9fc 23358+static int __init au_cache_init(void)
23359+{
ae9dfd79
AM
23360+ au_cache[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
23361+ if (au_cache[AuCache_DINFO])
027c5e7a 23362+ /* SLAB_DESTROY_BY_RCU */
ae9dfd79 23363+ au_cache[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
f0c0a007 23364+ au_icntnr_init_once);
ae9dfd79
AM
23365+ if (au_cache[AuCache_ICNTNR])
23366+ au_cache[AuCache_FINFO] = AuCacheCtor(au_finfo,
f0c0a007 23367+ au_fi_init_once);
ae9dfd79
AM
23368+ if (au_cache[AuCache_FINFO])
23369+ au_cache[AuCache_VDIR] = AuCache(au_vdir);
23370+ if (au_cache[AuCache_VDIR])
23371+ au_cache[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
23372+ if (au_cache[AuCache_DEHSTR])
1facf9fc 23373+ return 0;
23374+
5afbbe0d 23375+ au_cache_fin();
1facf9fc 23376+ return -ENOMEM;
23377+}
23378+
1facf9fc 23379+/* ---------------------------------------------------------------------- */
23380+
23381+int au_dir_roflags;
23382+
e49829fe 23383+#ifdef CONFIG_AUFS_SBILIST
1e00d052
AM
23384+/*
23385+ * iterate_supers_type() doesn't protect us from
23386+ * remounting (branch management)
23387+ */
ae9dfd79 23388+struct hlist_bl_head au_sbilist;
e49829fe
JR
23389+#endif
23390+
1facf9fc 23391+/*
23392+ * functions for module interface.
23393+ */
23394+MODULE_LICENSE("GPL");
23395+/* MODULE_LICENSE("GPL v2"); */
dece6358 23396+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
1facf9fc 23397+MODULE_DESCRIPTION(AUFS_NAME
23398+ " -- Advanced multi layered unification filesystem");
23399+MODULE_VERSION(AUFS_VERSION);
c06a8ce3 23400+MODULE_ALIAS_FS(AUFS_NAME);
1facf9fc 23401+
1facf9fc 23402+/* this module parameter has no meaning when SYSFS is disabled */
23403+int sysaufs_brs = 1;
23404+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
23405+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
23406+
076b876e 23407+/* this module parameter has no meaning when USER_NS is disabled */
8cdd5066 23408+bool au_userns;
076b876e
AM
23409+MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
23410+module_param_named(allow_userns, au_userns, bool, S_IRUGO);
23411+
1facf9fc 23412+/* ---------------------------------------------------------------------- */
23413+
23414+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
23415+
23416+int au_seq_path(struct seq_file *seq, struct path *path)
23417+{
79b8bda9
AM
23418+ int err;
23419+
23420+ err = seq_path(seq, path, au_esc_chars);
ae9dfd79 23421+ if (err >= 0)
79b8bda9 23422+ err = 0;
ae9dfd79 23423+ else
79b8bda9
AM
23424+ err = -ENOMEM;
23425+
23426+ return err;
1facf9fc 23427+}
23428+
23429+/* ---------------------------------------------------------------------- */
23430+
23431+static int __init aufs_init(void)
23432+{
23433+ int err, i;
23434+ char *p;
23435+
23436+ p = au_esc_chars;
23437+ for (i = 1; i <= ' '; i++)
23438+ *p++ = i;
23439+ *p++ = '\\';
23440+ *p++ = '\x7f';
23441+ *p = 0;
23442+
23443+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
23444+
b95c5147
AM
23445+ memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
23446+ for (i = 0; i < AuIop_Last; i++)
23447+ aufs_iop_nogetattr[i].getattr = NULL;
23448+
ae9dfd79 23449+ memset(au_cache, 0, sizeof(au_cache)); /* including hnotify */
f0c0a007 23450+
e49829fe 23451+ au_sbilist_init();
1facf9fc 23452+ sysaufs_brs_init();
23453+ au_debug_init();
4a4d8108 23454+ au_dy_init();
1facf9fc 23455+ err = sysaufs_init();
23456+ if (unlikely(err))
23457+ goto out;
e49829fe 23458+ err = au_procfs_init();
4f0767ce 23459+ if (unlikely(err))
953406b4 23460+ goto out_sysaufs;
e49829fe
JR
23461+ err = au_wkq_init();
23462+ if (unlikely(err))
23463+ goto out_procfs;
87a755f4 23464+ err = au_loopback_init();
1facf9fc 23465+ if (unlikely(err))
23466+ goto out_wkq;
87a755f4
AM
23467+ err = au_hnotify_init();
23468+ if (unlikely(err))
23469+ goto out_loopback;
1facf9fc 23470+ err = au_sysrq_init();
23471+ if (unlikely(err))
23472+ goto out_hin;
23473+ err = au_cache_init();
23474+ if (unlikely(err))
23475+ goto out_sysrq;
076b876e
AM
23476+
23477+ aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
1facf9fc 23478+ err = register_filesystem(&aufs_fs_type);
23479+ if (unlikely(err))
23480+ goto out_cache;
076b876e 23481+
4a4d8108
AM
23482+ /* since we define pr_fmt, call printk directly */
23483+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
1facf9fc 23484+ goto out; /* success */
23485+
4f0767ce 23486+out_cache:
1facf9fc 23487+ au_cache_fin();
4f0767ce 23488+out_sysrq:
1facf9fc 23489+ au_sysrq_fin();
4f0767ce 23490+out_hin:
4a4d8108 23491+ au_hnotify_fin();
87a755f4
AM
23492+out_loopback:
23493+ au_loopback_fin();
4f0767ce 23494+out_wkq:
1facf9fc 23495+ au_wkq_fin();
e49829fe
JR
23496+out_procfs:
23497+ au_procfs_fin();
4f0767ce 23498+out_sysaufs:
1facf9fc 23499+ sysaufs_fin();
4a4d8108 23500+ au_dy_fin();
4f0767ce 23501+out:
1facf9fc 23502+ return err;
23503+}
23504+
23505+static void __exit aufs_exit(void)
23506+{
23507+ unregister_filesystem(&aufs_fs_type);
23508+ au_cache_fin();
23509+ au_sysrq_fin();
4a4d8108 23510+ au_hnotify_fin();
87a755f4 23511+ au_loopback_fin();
1facf9fc 23512+ au_wkq_fin();
e49829fe 23513+ au_procfs_fin();
1facf9fc 23514+ sysaufs_fin();
4a4d8108 23515+ au_dy_fin();
1facf9fc 23516+}
23517+
23518+module_init(aufs_init);
23519+module_exit(aufs_exit);
e8791d4f
AM
23520diff -urNp -x '*.orig' linux-4.9/fs/aufs/module.h linux-4.9/fs/aufs/module.h
23521--- linux-4.9/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
23522+++ linux-4.9/fs/aufs/module.h 2021-02-24 16:15:09.531573855 +0100
ae9dfd79 23523@@ -0,0 +1,101 @@
1facf9fc 23524+/*
ae9dfd79 23525+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 23526+ *
23527+ * This program, aufs is free software; you can redistribute it and/or modify
23528+ * it under the terms of the GNU General Public License as published by
23529+ * the Free Software Foundation; either version 2 of the License, or
23530+ * (at your option) any later version.
dece6358
AM
23531+ *
23532+ * This program is distributed in the hope that it will be useful,
23533+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23534+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23535+ * GNU General Public License for more details.
23536+ *
23537+ * You should have received a copy of the GNU General Public License
523b37e3 23538+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 23539+ */
23540+
23541+/*
23542+ * module initialization and module-global
23543+ */
23544+
23545+#ifndef __AUFS_MODULE_H__
23546+#define __AUFS_MODULE_H__
23547+
23548+#ifdef __KERNEL__
23549+
23550+#include <linux/slab.h>
23551+
dece6358
AM
23552+struct path;
23553+struct seq_file;
23554+
1facf9fc 23555+/* module parameters */
1facf9fc 23556+extern int sysaufs_brs;
8cdd5066 23557+extern bool au_userns;
1facf9fc 23558+
23559+/* ---------------------------------------------------------------------- */
23560+
23561+extern int au_dir_roflags;
23562+
e2f27e51
AM
23563+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink);
23564+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
23565+ int may_shrink);
23566+
23567+static inline int au_kmidx_sub(size_t sz, size_t new_sz)
23568+{
23569+#ifndef CONFIG_SLOB
23570+ return kmalloc_index(sz) - kmalloc_index(new_sz);
23571+#else
23572+ return -1; /* SLOB is untested */
23573+#endif
23574+}
23575+
1facf9fc 23576+int au_seq_path(struct seq_file *seq, struct path *path);
23577+
e49829fe
JR
23578+#ifdef CONFIG_PROC_FS
23579+/* procfs.c */
23580+int __init au_procfs_init(void);
23581+void au_procfs_fin(void);
23582+#else
23583+AuStubInt0(au_procfs_init, void);
23584+AuStubVoid(au_procfs_fin, void);
23585+#endif
23586+
4f0767ce
JR
23587+/* ---------------------------------------------------------------------- */
23588+
ae9dfd79 23589+/* kmem cache */
1facf9fc 23590+enum {
23591+ AuCache_DINFO,
23592+ AuCache_ICNTNR,
23593+ AuCache_FINFO,
23594+ AuCache_VDIR,
23595+ AuCache_DEHSTR,
7eafdf33 23596+ AuCache_HNOTIFY, /* must be last */
1facf9fc 23597+ AuCache_Last
23598+};
23599+
ae9dfd79 23600+extern struct kmem_cache *au_cache[AuCache_Last];
f0c0a007 23601+
4a4d8108
AM
23602+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
23603+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
23604+#define AuCacheCtor(type, ctor) \
23605+ kmem_cache_create(#type, sizeof(struct type), \
23606+ __alignof__(struct type), AuCacheFlags, ctor)
1facf9fc 23607+
1facf9fc 23608+#define AuCacheFuncs(name, index) \
4a4d8108 23609+static inline struct au_##name *au_cache_alloc_##name(void) \
ae9dfd79 23610+{ return kmem_cache_alloc(au_cache[AuCache_##index], GFP_NOFS); } \
4a4d8108 23611+static inline void au_cache_free_##name(struct au_##name *p) \
ae9dfd79 23612+{ kmem_cache_free(au_cache[AuCache_##index], p); }
1facf9fc 23613+
23614+AuCacheFuncs(dinfo, DINFO);
23615+AuCacheFuncs(icntnr, ICNTNR);
23616+AuCacheFuncs(finfo, FINFO);
23617+AuCacheFuncs(vdir, VDIR);
4a4d8108
AM
23618+AuCacheFuncs(vdir_dehstr, DEHSTR);
23619+#ifdef CONFIG_AUFS_HNOTIFY
23620+AuCacheFuncs(hnotify, HNOTIFY);
23621+#endif
1facf9fc 23622+
4a4d8108
AM
23623+#endif /* __KERNEL__ */
23624+#endif /* __AUFS_MODULE_H__ */
e8791d4f
AM
23625diff -urNp -x '*.orig' linux-4.9/fs/aufs/mvdown.c linux-4.9/fs/aufs/mvdown.c
23626--- linux-4.9/fs/aufs/mvdown.c 1970-01-01 01:00:00.000000000 +0100
23627+++ linux-4.9/fs/aufs/mvdown.c 2021-02-24 16:15:09.531573855 +0100
5afbbe0d 23628@@ -0,0 +1,704 @@
c2b27bf2 23629+/*
ae9dfd79 23630+ * Copyright (C) 2011-2018 Junjiro R. Okajima
c2b27bf2
AM
23631+ *
23632+ * This program, aufs is free software; you can redistribute it and/or modify
23633+ * it under the terms of the GNU General Public License as published by
23634+ * the Free Software Foundation; either version 2 of the License, or
23635+ * (at your option) any later version.
23636+ *
23637+ * This program is distributed in the hope that it will be useful,
23638+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23639+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23640+ * GNU General Public License for more details.
23641+ *
23642+ * You should have received a copy of the GNU General Public License
523b37e3
AM
23643+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
23644+ */
23645+
23646+/*
23647+ * move-down, opposite of copy-up
c2b27bf2
AM
23648+ */
23649+
23650+#include "aufs.h"
23651+
c2b27bf2
AM
23652+struct au_mvd_args {
23653+ struct {
c2b27bf2
AM
23654+ struct super_block *h_sb;
23655+ struct dentry *h_parent;
23656+ struct au_hinode *hdir;
392086de 23657+ struct inode *h_dir, *h_inode;
c1595e42 23658+ struct au_pin pin;
c2b27bf2
AM
23659+ } info[AUFS_MVDOWN_NARRAY];
23660+
23661+ struct aufs_mvdown mvdown;
23662+ struct dentry *dentry, *parent;
23663+ struct inode *inode, *dir;
23664+ struct super_block *sb;
23665+ aufs_bindex_t bopq, bwh, bfound;
23666+ unsigned char rename_lock;
c2b27bf2
AM
23667+};
23668+
392086de 23669+#define mvd_errno mvdown.au_errno
076b876e
AM
23670+#define mvd_bsrc mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
23671+#define mvd_src_brid mvdown.stbr[AUFS_MVDOWN_UPPER].brid
23672+#define mvd_bdst mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
23673+#define mvd_dst_brid mvdown.stbr[AUFS_MVDOWN_LOWER].brid
c2b27bf2 23674+
392086de
AM
23675+#define mvd_h_src_sb info[AUFS_MVDOWN_UPPER].h_sb
23676+#define mvd_h_src_parent info[AUFS_MVDOWN_UPPER].h_parent
23677+#define mvd_hdir_src info[AUFS_MVDOWN_UPPER].hdir
23678+#define mvd_h_src_dir info[AUFS_MVDOWN_UPPER].h_dir
23679+#define mvd_h_src_inode info[AUFS_MVDOWN_UPPER].h_inode
c1595e42 23680+#define mvd_pin_src info[AUFS_MVDOWN_UPPER].pin
392086de
AM
23681+
23682+#define mvd_h_dst_sb info[AUFS_MVDOWN_LOWER].h_sb
23683+#define mvd_h_dst_parent info[AUFS_MVDOWN_LOWER].h_parent
23684+#define mvd_hdir_dst info[AUFS_MVDOWN_LOWER].hdir
23685+#define mvd_h_dst_dir info[AUFS_MVDOWN_LOWER].h_dir
23686+#define mvd_h_dst_inode info[AUFS_MVDOWN_LOWER].h_inode
c1595e42 23687+#define mvd_pin_dst info[AUFS_MVDOWN_LOWER].pin
c2b27bf2
AM
23688+
23689+#define AU_MVD_PR(flag, ...) do { \
23690+ if (flag) \
23691+ pr_err(__VA_ARGS__); \
23692+ } while (0)
23693+
076b876e
AM
23694+static int find_lower_writable(struct au_mvd_args *a)
23695+{
23696+ struct super_block *sb;
5afbbe0d 23697+ aufs_bindex_t bindex, bbot;
076b876e
AM
23698+ struct au_branch *br;
23699+
23700+ sb = a->sb;
23701+ bindex = a->mvd_bsrc;
5afbbe0d 23702+ bbot = au_sbbot(sb);
076b876e 23703+ if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
5afbbe0d 23704+ for (bindex++; bindex <= bbot; bindex++) {
076b876e
AM
23705+ br = au_sbr(sb, bindex);
23706+ if (au_br_fhsm(br->br_perm)
23707+ && (!(au_br_sb(br)->s_flags & MS_RDONLY)))
23708+ return bindex;
23709+ }
23710+ else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
5afbbe0d 23711+ for (bindex++; bindex <= bbot; bindex++) {
076b876e
AM
23712+ br = au_sbr(sb, bindex);
23713+ if (!au_br_rdonly(br))
23714+ return bindex;
23715+ }
23716+ else
5afbbe0d 23717+ for (bindex++; bindex <= bbot; bindex++) {
076b876e
AM
23718+ br = au_sbr(sb, bindex);
23719+ if (!(au_br_sb(br)->s_flags & MS_RDONLY)) {
23720+ if (au_br_rdonly(br))
23721+ a->mvdown.flags
23722+ |= AUFS_MVDOWN_ROLOWER_R;
23723+ return bindex;
23724+ }
23725+ }
23726+
23727+ return -1;
23728+}
23729+
c2b27bf2 23730+/* make the parent dir on bdst */
392086de 23731+static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
23732+{
23733+ int err;
23734+
23735+ err = 0;
23736+ a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
23737+ a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
23738+ a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
23739+ a->mvd_h_dst_parent = NULL;
5afbbe0d 23740+ if (au_dbbot(a->parent) >= a->mvd_bdst)
c2b27bf2
AM
23741+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
23742+ if (!a->mvd_h_dst_parent) {
23743+ err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
23744+ if (unlikely(err)) {
392086de 23745+ AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
c2b27bf2
AM
23746+ goto out;
23747+ }
23748+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
23749+ }
23750+
23751+out:
23752+ AuTraceErr(err);
23753+ return err;
23754+}
23755+
23756+/* lock them all */
392086de 23757+static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
23758+{
23759+ int err;
23760+ struct dentry *h_trap;
23761+
23762+ a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
23763+ a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
c1595e42
JR
23764+ err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
23765+ au_opt_udba(a->sb),
23766+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
23767+ AuTraceErr(err);
23768+ if (unlikely(err)) {
23769+ AU_MVD_PR(dmsg, "pin_dst failed\n");
23770+ goto out;
23771+ }
23772+
c2b27bf2
AM
23773+ if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
23774+ a->rename_lock = 0;
c1595e42
JR
23775+ au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
23776+ AuLsc_DI_PARENT, AuLsc_I_PARENT3,
23777+ au_opt_udba(a->sb),
23778+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
23779+ err = au_do_pin(&a->mvd_pin_src);
23780+ AuTraceErr(err);
5527c038 23781+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
c1595e42
JR
23782+ if (unlikely(err)) {
23783+ AU_MVD_PR(dmsg, "pin_src failed\n");
23784+ goto out_dst;
23785+ }
23786+ goto out; /* success */
c2b27bf2
AM
23787+ }
23788+
c2b27bf2 23789+ a->rename_lock = 1;
c1595e42
JR
23790+ au_pin_hdir_unlock(&a->mvd_pin_dst);
23791+ err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
23792+ au_opt_udba(a->sb),
23793+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
23794+ AuTraceErr(err);
5527c038 23795+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
c1595e42
JR
23796+ if (unlikely(err)) {
23797+ AU_MVD_PR(dmsg, "pin_src failed\n");
23798+ au_pin_hdir_lock(&a->mvd_pin_dst);
23799+ goto out_dst;
23800+ }
23801+ au_pin_hdir_unlock(&a->mvd_pin_src);
c2b27bf2
AM
23802+ h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
23803+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
23804+ if (h_trap) {
23805+ err = (h_trap != a->mvd_h_src_parent);
23806+ if (err)
23807+ err = (h_trap != a->mvd_h_dst_parent);
23808+ }
23809+ BUG_ON(err); /* it should never happen */
c1595e42
JR
23810+ if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
23811+ err = -EBUSY;
23812+ AuTraceErr(err);
23813+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
23814+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
23815+ au_pin_hdir_lock(&a->mvd_pin_src);
23816+ au_unpin(&a->mvd_pin_src);
23817+ au_pin_hdir_lock(&a->mvd_pin_dst);
23818+ goto out_dst;
23819+ }
23820+ goto out; /* success */
c2b27bf2 23821+
c1595e42
JR
23822+out_dst:
23823+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
23824+out:
23825+ AuTraceErr(err);
23826+ return err;
23827+}
23828+
392086de 23829+static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2 23830+{
c1595e42
JR
23831+ if (!a->rename_lock)
23832+ au_unpin(&a->mvd_pin_src);
23833+ else {
c2b27bf2
AM
23834+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
23835+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
c1595e42
JR
23836+ au_pin_hdir_lock(&a->mvd_pin_src);
23837+ au_unpin(&a->mvd_pin_src);
23838+ au_pin_hdir_lock(&a->mvd_pin_dst);
23839+ }
23840+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
23841+}
23842+
23843+/* copy-down the file */
392086de 23844+static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
23845+{
23846+ int err;
23847+ struct au_cp_generic cpg = {
23848+ .dentry = a->dentry,
23849+ .bdst = a->mvd_bdst,
23850+ .bsrc = a->mvd_bsrc,
23851+ .len = -1,
c1595e42 23852+ .pin = &a->mvd_pin_dst,
c2b27bf2
AM
23853+ .flags = AuCpup_DTIME | AuCpup_HOPEN
23854+ };
23855+
23856+ AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
392086de
AM
23857+ if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
23858+ au_fset_cpup(cpg.flags, OVERWRITE);
23859+ if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
23860+ au_fset_cpup(cpg.flags, RWDST);
c2b27bf2
AM
23861+ err = au_sio_cpdown_simple(&cpg);
23862+ if (unlikely(err))
392086de 23863+ AU_MVD_PR(dmsg, "cpdown failed\n");
c2b27bf2
AM
23864+
23865+ AuTraceErr(err);
23866+ return err;
23867+}
23868+
23869+/*
23870+ * unlink the whiteout on bdst if exist which may be created by UDBA while we
23871+ * were sleeping
23872+ */
392086de 23873+static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
23874+{
23875+ int err;
23876+ struct path h_path;
23877+ struct au_branch *br;
523b37e3 23878+ struct inode *delegated;
c2b27bf2
AM
23879+
23880+ br = au_sbr(a->sb, a->mvd_bdst);
23881+ h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
23882+ err = PTR_ERR(h_path.dentry);
23883+ if (IS_ERR(h_path.dentry)) {
392086de 23884+ AU_MVD_PR(dmsg, "wh_lkup failed\n");
c2b27bf2
AM
23885+ goto out;
23886+ }
23887+
23888+ err = 0;
5527c038 23889+ if (d_is_positive(h_path.dentry)) {
c2b27bf2 23890+ h_path.mnt = au_br_mnt(br);
523b37e3 23891+ delegated = NULL;
5527c038 23892+ err = vfsub_unlink(d_inode(a->mvd_h_dst_parent), &h_path,
523b37e3
AM
23893+ &delegated, /*force*/0);
23894+ if (unlikely(err == -EWOULDBLOCK)) {
23895+ pr_warn("cannot retry for NFSv4 delegation"
23896+ " for an internal unlink\n");
23897+ iput(delegated);
23898+ }
c2b27bf2 23899+ if (unlikely(err))
392086de 23900+ AU_MVD_PR(dmsg, "wh_unlink failed\n");
c2b27bf2
AM
23901+ }
23902+ dput(h_path.dentry);
23903+
23904+out:
23905+ AuTraceErr(err);
23906+ return err;
23907+}
23908+
23909+/*
23910+ * unlink the topmost h_dentry
c2b27bf2 23911+ */
392086de 23912+static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
23913+{
23914+ int err;
23915+ struct path h_path;
523b37e3 23916+ struct inode *delegated;
c2b27bf2
AM
23917+
23918+ h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
23919+ h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
523b37e3
AM
23920+ delegated = NULL;
23921+ err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
23922+ if (unlikely(err == -EWOULDBLOCK)) {
23923+ pr_warn("cannot retry for NFSv4 delegation"
23924+ " for an internal unlink\n");
23925+ iput(delegated);
23926+ }
c2b27bf2 23927+ if (unlikely(err))
392086de 23928+ AU_MVD_PR(dmsg, "unlink failed\n");
c2b27bf2
AM
23929+
23930+ AuTraceErr(err);
23931+ return err;
23932+}
23933+
076b876e
AM
23934+/* Since mvdown succeeded, we ignore an error of this function */
23935+static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
23936+{
23937+ int err;
23938+ struct au_branch *br;
23939+
23940+ a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
23941+ br = au_sbr(a->sb, a->mvd_bsrc);
23942+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
23943+ if (!err) {
23944+ br = au_sbr(a->sb, a->mvd_bdst);
23945+ a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
23946+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
23947+ }
23948+ if (!err)
23949+ a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
23950+ else
23951+ AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
23952+}
23953+
c2b27bf2
AM
23954+/*
23955+ * copy-down the file and unlink the bsrc file.
23956+ * - unlink the bdst whout if exist
23957+ * - copy-down the file (with whtmp name and rename)
23958+ * - unlink the bsrc file
23959+ */
392086de 23960+static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
23961+{
23962+ int err;
23963+
392086de 23964+ err = au_do_mkdir(dmsg, a);
c2b27bf2 23965+ if (!err)
392086de 23966+ err = au_do_lock(dmsg, a);
c2b27bf2
AM
23967+ if (unlikely(err))
23968+ goto out;
23969+
23970+ /*
23971+ * do not revert the activities we made on bdst since they should be
23972+ * harmless in aufs.
23973+ */
23974+
392086de 23975+ err = au_do_cpdown(dmsg, a);
c2b27bf2 23976+ if (!err)
392086de
AM
23977+ err = au_do_unlink_wh(dmsg, a);
23978+ if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
23979+ err = au_do_unlink(dmsg, a);
c2b27bf2
AM
23980+ if (unlikely(err))
23981+ goto out_unlock;
23982+
c1595e42
JR
23983+ AuDbg("%pd2, 0x%x, %d --> %d\n",
23984+ a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
076b876e
AM
23985+ if (find_lower_writable(a) < 0)
23986+ a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
23987+
23988+ if (a->mvdown.flags & AUFS_MVDOWN_STFS)
23989+ au_do_stfs(dmsg, a);
23990+
c2b27bf2 23991+ /* maintain internal array */
392086de
AM
23992+ if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
23993+ au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
5afbbe0d 23994+ au_set_dbtop(a->dentry, a->mvd_bdst);
392086de 23995+ au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
5afbbe0d 23996+ au_set_ibtop(a->inode, a->mvd_bdst);
79b8bda9
AM
23997+ } else {
23998+ /* hide the lower */
23999+ au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
5afbbe0d 24000+ au_set_dbbot(a->dentry, a->mvd_bsrc);
79b8bda9 24001+ au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
5afbbe0d 24002+ au_set_ibbot(a->inode, a->mvd_bsrc);
392086de 24003+ }
5afbbe0d
AM
24004+ if (au_dbbot(a->dentry) < a->mvd_bdst)
24005+ au_set_dbbot(a->dentry, a->mvd_bdst);
24006+ if (au_ibbot(a->inode) < a->mvd_bdst)
24007+ au_set_ibbot(a->inode, a->mvd_bdst);
c2b27bf2
AM
24008+
24009+out_unlock:
392086de 24010+ au_do_unlock(dmsg, a);
c2b27bf2
AM
24011+out:
24012+ AuTraceErr(err);
24013+ return err;
24014+}
24015+
24016+/* ---------------------------------------------------------------------- */
24017+
c2b27bf2 24018+/* make sure the file is idle */
392086de 24019+static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
24020+{
24021+ int err, plinked;
c2b27bf2
AM
24022+
24023+ err = 0;
c2b27bf2 24024+ plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
5afbbe0d 24025+ if (au_dbtop(a->dentry) == a->mvd_bsrc
c1595e42 24026+ && au_dcount(a->dentry) == 1
c2b27bf2 24027+ && atomic_read(&a->inode->i_count) == 1
392086de 24028+ /* && a->mvd_h_src_inode->i_nlink == 1 */
c2b27bf2
AM
24029+ && (!plinked || !au_plink_test(a->inode))
24030+ && a->inode->i_nlink == 1)
24031+ goto out;
24032+
24033+ err = -EBUSY;
392086de 24034+ AU_MVD_PR(dmsg,
c1595e42 24035+ "b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
5afbbe0d 24036+ a->mvd_bsrc, au_dbtop(a->dentry), au_dcount(a->dentry),
c2b27bf2 24037+ atomic_read(&a->inode->i_count), a->inode->i_nlink,
392086de 24038+ a->mvd_h_src_inode->i_nlink,
c2b27bf2
AM
24039+ plinked, plinked ? au_plink_test(a->inode) : 0);
24040+
24041+out:
24042+ AuTraceErr(err);
24043+ return err;
24044+}
24045+
24046+/* make sure the parent dir is fine */
392086de 24047+static int au_mvd_args_parent(const unsigned char dmsg,
c2b27bf2
AM
24048+ struct au_mvd_args *a)
24049+{
24050+ int err;
24051+ aufs_bindex_t bindex;
24052+
24053+ err = 0;
24054+ if (unlikely(au_alive_dir(a->parent))) {
24055+ err = -ENOENT;
392086de 24056+ AU_MVD_PR(dmsg, "parent dir is dead\n");
c2b27bf2
AM
24057+ goto out;
24058+ }
24059+
24060+ a->bopq = au_dbdiropq(a->parent);
24061+ bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
24062+ AuDbg("b%d\n", bindex);
24063+ if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
24064+ || (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
24065+ err = -EINVAL;
392086de
AM
24066+ a->mvd_errno = EAU_MVDOWN_OPAQUE;
24067+ AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
c2b27bf2
AM
24068+ a->bopq, a->mvd_bdst);
24069+ }
24070+
24071+out:
24072+ AuTraceErr(err);
24073+ return err;
24074+}
24075+
392086de 24076+static int au_mvd_args_intermediate(const unsigned char dmsg,
c2b27bf2
AM
24077+ struct au_mvd_args *a)
24078+{
24079+ int err;
24080+ struct au_dinfo *dinfo, *tmp;
24081+
24082+ /* lookup the next lower positive entry */
24083+ err = -ENOMEM;
24084+ tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
24085+ if (unlikely(!tmp))
24086+ goto out;
24087+
24088+ a->bfound = -1;
24089+ a->bwh = -1;
24090+ dinfo = au_di(a->dentry);
24091+ au_di_cp(tmp, dinfo);
24092+ au_di_swap(tmp, dinfo);
24093+
24094+ /* returns the number of positive dentries */
5afbbe0d
AM
24095+ err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1,
24096+ /* AuLkup_IGNORE_PERM */ 0);
c2b27bf2
AM
24097+ if (!err)
24098+ a->bwh = au_dbwh(a->dentry);
24099+ else if (err > 0)
5afbbe0d 24100+ a->bfound = au_dbtop(a->dentry);
c2b27bf2
AM
24101+
24102+ au_di_swap(tmp, dinfo);
24103+ au_rw_write_unlock(&tmp->di_rwsem);
24104+ au_di_free(tmp);
24105+ if (unlikely(err < 0))
392086de 24106+ AU_MVD_PR(dmsg, "failed look-up lower\n");
c2b27bf2
AM
24107+
24108+ /*
24109+ * here, we have these cases.
24110+ * bfound == -1
24111+ * no positive dentry under bsrc. there are more sub-cases.
24112+ * bwh < 0
24113+ * there no whiteout, we can safely move-down.
24114+ * bwh <= bsrc
24115+ * impossible
24116+ * bsrc < bwh && bwh < bdst
24117+ * there is a whiteout on RO branch. cannot proceed.
24118+ * bwh == bdst
24119+ * there is a whiteout on the RW target branch. it should
24120+ * be removed.
24121+ * bdst < bwh
24122+ * there is a whiteout somewhere unrelated branch.
24123+ * -1 < bfound && bfound <= bsrc
24124+ * impossible.
24125+ * bfound < bdst
24126+ * found, but it is on RO branch between bsrc and bdst. cannot
24127+ * proceed.
24128+ * bfound == bdst
24129+ * found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
24130+ * error.
24131+ * bdst < bfound
24132+ * found, after we create the file on bdst, it will be hidden.
24133+ */
24134+
24135+ AuDebugOn(a->bfound == -1
24136+ && a->bwh != -1
24137+ && a->bwh <= a->mvd_bsrc);
24138+ AuDebugOn(-1 < a->bfound
24139+ && a->bfound <= a->mvd_bsrc);
24140+
24141+ err = -EINVAL;
24142+ if (a->bfound == -1
24143+ && a->mvd_bsrc < a->bwh
24144+ && a->bwh != -1
24145+ && a->bwh < a->mvd_bdst) {
392086de
AM
24146+ a->mvd_errno = EAU_MVDOWN_WHITEOUT;
24147+ AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
c2b27bf2
AM
24148+ a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
24149+ goto out;
24150+ } else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
392086de
AM
24151+ a->mvd_errno = EAU_MVDOWN_UPPER;
24152+ AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
c2b27bf2
AM
24153+ a->mvd_bdst, a->bfound);
24154+ goto out;
24155+ }
24156+
24157+ err = 0; /* success */
24158+
24159+out:
24160+ AuTraceErr(err);
24161+ return err;
24162+}
24163+
392086de 24164+static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
24165+{
24166+ int err;
24167+
392086de
AM
24168+ err = 0;
24169+ if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
24170+ && a->bfound == a->mvd_bdst)
24171+ err = -EEXIST;
c2b27bf2
AM
24172+ AuTraceErr(err);
24173+ return err;
24174+}
24175+
392086de 24176+static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
24177+{
24178+ int err;
24179+ struct au_branch *br;
24180+
24181+ err = -EISDIR;
24182+ if (unlikely(S_ISDIR(a->inode->i_mode)))
24183+ goto out;
24184+
24185+ err = -EINVAL;
392086de 24186+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
5afbbe0d 24187+ a->mvd_bsrc = au_ibtop(a->inode);
392086de
AM
24188+ else {
24189+ a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
24190+ if (unlikely(a->mvd_bsrc < 0
5afbbe0d
AM
24191+ || (a->mvd_bsrc < au_dbtop(a->dentry)
24192+ || au_dbbot(a->dentry) < a->mvd_bsrc
392086de 24193+ || !au_h_dptr(a->dentry, a->mvd_bsrc))
5afbbe0d
AM
24194+ || (a->mvd_bsrc < au_ibtop(a->inode)
24195+ || au_ibbot(a->inode) < a->mvd_bsrc
392086de
AM
24196+ || !au_h_iptr(a->inode, a->mvd_bsrc)))) {
24197+ a->mvd_errno = EAU_MVDOWN_NOUPPER;
24198+ AU_MVD_PR(dmsg, "no upper\n");
24199+ goto out;
24200+ }
24201+ }
5afbbe0d 24202+ if (unlikely(a->mvd_bsrc == au_sbbot(a->sb))) {
392086de
AM
24203+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
24204+ AU_MVD_PR(dmsg, "on the bottom\n");
c2b27bf2
AM
24205+ goto out;
24206+ }
392086de 24207+ a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
c2b27bf2
AM
24208+ br = au_sbr(a->sb, a->mvd_bsrc);
24209+ err = au_br_rdonly(br);
392086de
AM
24210+ if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
24211+ if (unlikely(err))
24212+ goto out;
24213+ } else if (!(vfsub_native_ro(a->mvd_h_src_inode)
24214+ || IS_APPEND(a->mvd_h_src_inode))) {
24215+ if (err)
24216+ a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
24217+ /* go on */
24218+ } else
c2b27bf2
AM
24219+ goto out;
24220+
24221+ err = -EINVAL;
392086de
AM
24222+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
24223+ a->mvd_bdst = find_lower_writable(a);
24224+ if (unlikely(a->mvd_bdst < 0)) {
24225+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
24226+ AU_MVD_PR(dmsg, "no writable lower branch\n");
24227+ goto out;
24228+ }
24229+ } else {
24230+ a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
24231+ if (unlikely(a->mvd_bdst < 0
5afbbe0d 24232+ || au_sbbot(a->sb) < a->mvd_bdst)) {
392086de
AM
24233+ a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
24234+ AU_MVD_PR(dmsg, "no lower brid\n");
24235+ goto out;
24236+ }
c2b27bf2
AM
24237+ }
24238+
392086de 24239+ err = au_mvd_args_busy(dmsg, a);
c2b27bf2 24240+ if (!err)
392086de 24241+ err = au_mvd_args_parent(dmsg, a);
c2b27bf2 24242+ if (!err)
392086de 24243+ err = au_mvd_args_intermediate(dmsg, a);
c2b27bf2 24244+ if (!err)
392086de 24245+ err = au_mvd_args_exist(dmsg, a);
c2b27bf2
AM
24246+ if (!err)
24247+ AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
24248+
24249+out:
24250+ AuTraceErr(err);
24251+ return err;
24252+}
24253+
24254+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
24255+{
392086de
AM
24256+ int err, e;
24257+ unsigned char dmsg;
24258+ struct au_mvd_args *args;
79b8bda9 24259+ struct inode *inode;
c2b27bf2 24260+
79b8bda9 24261+ inode = d_inode(dentry);
c2b27bf2
AM
24262+ err = -EPERM;
24263+ if (unlikely(!capable(CAP_SYS_ADMIN)))
24264+ goto out;
24265+
392086de
AM
24266+ err = -ENOMEM;
24267+ args = kmalloc(sizeof(*args), GFP_NOFS);
24268+ if (unlikely(!args))
24269+ goto out;
24270+
24271+ err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
24272+ if (!err)
24273+ err = !access_ok(VERIFY_WRITE, uarg, sizeof(*uarg));
c2b27bf2
AM
24274+ if (unlikely(err)) {
24275+ err = -EFAULT;
392086de
AM
24276+ AuTraceErr(err);
24277+ goto out_free;
c2b27bf2 24278+ }
392086de
AM
24279+ AuDbg("flags 0x%x\n", args->mvdown.flags);
24280+ args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
24281+ args->mvdown.au_errno = 0;
24282+ args->dentry = dentry;
79b8bda9 24283+ args->inode = inode;
392086de 24284+ args->sb = dentry->d_sb;
c2b27bf2 24285+
392086de
AM
24286+ err = -ENOENT;
24287+ dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
24288+ args->parent = dget_parent(dentry);
5527c038 24289+ args->dir = d_inode(args->parent);
febd17d6 24290+ inode_lock_nested(args->dir, I_MUTEX_PARENT);
392086de
AM
24291+ dput(args->parent);
24292+ if (unlikely(args->parent != dentry->d_parent)) {
24293+ AU_MVD_PR(dmsg, "parent dir is moved\n");
c2b27bf2
AM
24294+ goto out_dir;
24295+ }
24296+
febd17d6 24297+ inode_lock_nested(inode, I_MUTEX_CHILD);
b95c5147 24298+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
c2b27bf2
AM
24299+ if (unlikely(err))
24300+ goto out_inode;
24301+
392086de
AM
24302+ di_write_lock_parent(args->parent);
24303+ err = au_mvd_args(dmsg, args);
c2b27bf2
AM
24304+ if (unlikely(err))
24305+ goto out_parent;
24306+
392086de 24307+ err = au_do_mvdown(dmsg, args);
c2b27bf2
AM
24308+ if (unlikely(err))
24309+ goto out_parent;
c2b27bf2 24310+
392086de 24311+ au_cpup_attr_timesizes(args->dir);
79b8bda9
AM
24312+ au_cpup_attr_timesizes(inode);
24313+ if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
24314+ au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
c2b27bf2
AM
24315+ /* au_digen_dec(dentry); */
24316+
24317+out_parent:
392086de 24318+ di_write_unlock(args->parent);
c2b27bf2
AM
24319+ aufs_read_unlock(dentry, AuLock_DW);
24320+out_inode:
febd17d6 24321+ inode_unlock(inode);
c2b27bf2 24322+out_dir:
febd17d6 24323+ inode_unlock(args->dir);
392086de
AM
24324+out_free:
24325+ e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
24326+ if (unlikely(e))
24327+ err = -EFAULT;
ae9dfd79 24328+ kfree(args);
c2b27bf2
AM
24329+out:
24330+ AuTraceErr(err);
24331+ return err;
24332+}
e8791d4f
AM
24333diff -urNp -x '*.orig' linux-4.9/fs/aufs/opts.c linux-4.9/fs/aufs/opts.c
24334--- linux-4.9/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
24335+++ linux-4.9/fs/aufs/opts.c 2021-02-24 16:15:09.534907296 +0100
ae9dfd79 24336@@ -0,0 +1,1913 @@
1facf9fc 24337+/*
ae9dfd79 24338+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 24339+ *
24340+ * This program, aufs is free software; you can redistribute it and/or modify
24341+ * it under the terms of the GNU General Public License as published by
24342+ * the Free Software Foundation; either version 2 of the License, or
24343+ * (at your option) any later version.
dece6358
AM
24344+ *
24345+ * This program is distributed in the hope that it will be useful,
24346+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24347+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24348+ * GNU General Public License for more details.
24349+ *
24350+ * You should have received a copy of the GNU General Public License
523b37e3 24351+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 24352+ */
24353+
24354+/*
24355+ * mount options/flags
24356+ */
24357+
dece6358 24358+#include <linux/namei.h>
1facf9fc 24359+#include <linux/types.h> /* a distribution requires */
24360+#include <linux/parser.h>
24361+#include "aufs.h"
24362+
24363+/* ---------------------------------------------------------------------- */
24364+
24365+enum {
24366+ Opt_br,
7e9cd9fe
AM
24367+ Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend,
24368+ Opt_idel, Opt_imod,
24369+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash,
dece6358 24370+ Opt_rdblk_def, Opt_rdhash_def,
7e9cd9fe 24371+ Opt_xino, Opt_noxino,
1facf9fc 24372+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
24373+ Opt_trunc_xino_path, Opt_itrunc_xino,
24374+ Opt_trunc_xib, Opt_notrunc_xib,
dece6358 24375+ Opt_shwh, Opt_noshwh,
1facf9fc 24376+ Opt_plink, Opt_noplink, Opt_list_plink,
24377+ Opt_udba,
4a4d8108 24378+ Opt_dio, Opt_nodio,
1facf9fc 24379+ Opt_diropq_a, Opt_diropq_w,
24380+ Opt_warn_perm, Opt_nowarn_perm,
24381+ Opt_wbr_copyup, Opt_wbr_create,
076b876e 24382+ Opt_fhsm_sec,
1facf9fc 24383+ Opt_verbose, Opt_noverbose,
24384+ Opt_sum, Opt_nosum, Opt_wsum,
076b876e 24385+ Opt_dirperm1, Opt_nodirperm1,
ae9dfd79 24386+ Opt_dirren, Opt_nodirren,
c1595e42 24387+ Opt_acl, Opt_noacl,
1facf9fc 24388+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
24389+};
24390+
24391+static match_table_t options = {
24392+ {Opt_br, "br=%s"},
24393+ {Opt_br, "br:%s"},
24394+
24395+ {Opt_add, "add=%d:%s"},
24396+ {Opt_add, "add:%d:%s"},
24397+ {Opt_add, "ins=%d:%s"},
24398+ {Opt_add, "ins:%d:%s"},
24399+ {Opt_append, "append=%s"},
24400+ {Opt_append, "append:%s"},
24401+ {Opt_prepend, "prepend=%s"},
24402+ {Opt_prepend, "prepend:%s"},
24403+
24404+ {Opt_del, "del=%s"},
24405+ {Opt_del, "del:%s"},
24406+ /* {Opt_idel, "idel:%d"}, */
24407+ {Opt_mod, "mod=%s"},
24408+ {Opt_mod, "mod:%s"},
24409+ /* {Opt_imod, "imod:%d:%s"}, */
24410+
24411+ {Opt_dirwh, "dirwh=%d"},
24412+
24413+ {Opt_xino, "xino=%s"},
24414+ {Opt_noxino, "noxino"},
24415+ {Opt_trunc_xino, "trunc_xino"},
24416+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
24417+ {Opt_notrunc_xino, "notrunc_xino"},
24418+ {Opt_trunc_xino_path, "trunc_xino=%s"},
24419+ {Opt_itrunc_xino, "itrunc_xino=%d"},
24420+ /* {Opt_zxino, "zxino=%s"}, */
24421+ {Opt_trunc_xib, "trunc_xib"},
24422+ {Opt_notrunc_xib, "notrunc_xib"},
24423+
e49829fe 24424+#ifdef CONFIG_PROC_FS
1facf9fc 24425+ {Opt_plink, "plink"},
e49829fe
JR
24426+#else
24427+ {Opt_ignore_silent, "plink"},
24428+#endif
24429+
1facf9fc 24430+ {Opt_noplink, "noplink"},
e49829fe 24431+
1facf9fc 24432+#ifdef CONFIG_AUFS_DEBUG
24433+ {Opt_list_plink, "list_plink"},
24434+#endif
24435+
24436+ {Opt_udba, "udba=%s"},
24437+
4a4d8108
AM
24438+ {Opt_dio, "dio"},
24439+ {Opt_nodio, "nodio"},
24440+
ae9dfd79
AM
24441+#ifdef CONFIG_AUFS_DIRREN
24442+ {Opt_dirren, "dirren"},
24443+ {Opt_nodirren, "nodirren"},
24444+#else
24445+ {Opt_ignore, "dirren"},
24446+ {Opt_ignore_silent, "nodirren"},
24447+#endif
24448+
076b876e
AM
24449+#ifdef CONFIG_AUFS_FHSM
24450+ {Opt_fhsm_sec, "fhsm_sec=%d"},
24451+#else
ae9dfd79 24452+ {Opt_ignore, "fhsm_sec=%d"},
076b876e
AM
24453+#endif
24454+
1facf9fc 24455+ {Opt_diropq_a, "diropq=always"},
24456+ {Opt_diropq_a, "diropq=a"},
24457+ {Opt_diropq_w, "diropq=whiteouted"},
24458+ {Opt_diropq_w, "diropq=w"},
24459+
24460+ {Opt_warn_perm, "warn_perm"},
24461+ {Opt_nowarn_perm, "nowarn_perm"},
24462+
24463+ /* keep them temporary */
1facf9fc 24464+ {Opt_ignore_silent, "nodlgt"},
ae9dfd79 24465+ {Opt_ignore, "clean_plink"},
1facf9fc 24466+
dece6358
AM
24467+#ifdef CONFIG_AUFS_SHWH
24468+ {Opt_shwh, "shwh"},
24469+#endif
24470+ {Opt_noshwh, "noshwh"},
24471+
076b876e
AM
24472+ {Opt_dirperm1, "dirperm1"},
24473+ {Opt_nodirperm1, "nodirperm1"},
24474+
1facf9fc 24475+ {Opt_verbose, "verbose"},
24476+ {Opt_verbose, "v"},
24477+ {Opt_noverbose, "noverbose"},
24478+ {Opt_noverbose, "quiet"},
24479+ {Opt_noverbose, "q"},
24480+ {Opt_noverbose, "silent"},
24481+
24482+ {Opt_sum, "sum"},
24483+ {Opt_nosum, "nosum"},
24484+ {Opt_wsum, "wsum"},
24485+
24486+ {Opt_rdcache, "rdcache=%d"},
24487+ {Opt_rdblk, "rdblk=%d"},
dece6358 24488+ {Opt_rdblk_def, "rdblk=def"},
1facf9fc 24489+ {Opt_rdhash, "rdhash=%d"},
dece6358 24490+ {Opt_rdhash_def, "rdhash=def"},
1facf9fc 24491+
24492+ {Opt_wbr_create, "create=%s"},
24493+ {Opt_wbr_create, "create_policy=%s"},
24494+ {Opt_wbr_copyup, "cpup=%s"},
24495+ {Opt_wbr_copyup, "copyup=%s"},
24496+ {Opt_wbr_copyup, "copyup_policy=%s"},
24497+
c1595e42
JR
24498+ /* generic VFS flag */
24499+#ifdef CONFIG_FS_POSIX_ACL
24500+ {Opt_acl, "acl"},
24501+ {Opt_noacl, "noacl"},
24502+#else
ae9dfd79 24503+ {Opt_ignore, "acl"},
c1595e42
JR
24504+ {Opt_ignore_silent, "noacl"},
24505+#endif
24506+
1facf9fc 24507+ /* internal use for the scripts */
24508+ {Opt_ignore_silent, "si=%s"},
24509+
24510+ {Opt_br, "dirs=%s"},
24511+ {Opt_ignore, "debug=%d"},
24512+ {Opt_ignore, "delete=whiteout"},
24513+ {Opt_ignore, "delete=all"},
24514+ {Opt_ignore, "imap=%s"},
24515+
1308ab2a 24516+ /* temporary workaround, due to old mount(8)? */
24517+ {Opt_ignore_silent, "relatime"},
24518+
1facf9fc 24519+ {Opt_err, NULL}
24520+};
24521+
24522+/* ---------------------------------------------------------------------- */
24523+
076b876e 24524+static const char *au_parser_pattern(int val, match_table_t tbl)
1facf9fc 24525+{
076b876e
AM
24526+ struct match_token *p;
24527+
24528+ p = tbl;
24529+ while (p->pattern) {
24530+ if (p->token == val)
24531+ return p->pattern;
24532+ p++;
1facf9fc 24533+ }
24534+ BUG();
24535+ return "??";
24536+}
24537+
076b876e
AM
24538+static const char *au_optstr(int *val, match_table_t tbl)
24539+{
24540+ struct match_token *p;
24541+ int v;
24542+
24543+ v = *val;
2000de60
JR
24544+ if (!v)
24545+ goto out;
076b876e 24546+ p = tbl;
2000de60
JR
24547+ while (p->pattern) {
24548+ if (p->token
24549+ && (v & p->token) == p->token) {
076b876e
AM
24550+ *val &= ~p->token;
24551+ return p->pattern;
24552+ }
24553+ p++;
24554+ }
2000de60
JR
24555+
24556+out:
076b876e
AM
24557+ return NULL;
24558+}
24559+
1facf9fc 24560+/* ---------------------------------------------------------------------- */
24561+
1e00d052 24562+static match_table_t brperm = {
1facf9fc 24563+ {AuBrPerm_RO, AUFS_BRPERM_RO},
24564+ {AuBrPerm_RR, AUFS_BRPERM_RR},
24565+ {AuBrPerm_RW, AUFS_BRPERM_RW},
1e00d052
AM
24566+ {0, NULL}
24567+};
1facf9fc 24568+
86dc4139 24569+static match_table_t brattr = {
076b876e
AM
24570+ /* general */
24571+ {AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG},
24572+ {AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL},
c1595e42 24573+ /* 'unpin' attrib is meaningless since linux-3.18-rc1 */
86dc4139 24574+ {AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN},
2000de60 24575+#ifdef CONFIG_AUFS_FHSM
076b876e 24576+ {AuBrAttr_FHSM, AUFS_BRATTR_FHSM},
2000de60
JR
24577+#endif
24578+#ifdef CONFIG_AUFS_XATTR
c1595e42
JR
24579+ {AuBrAttr_ICEX, AUFS_BRATTR_ICEX},
24580+ {AuBrAttr_ICEX_SEC, AUFS_BRATTR_ICEX_SEC},
24581+ {AuBrAttr_ICEX_SYS, AUFS_BRATTR_ICEX_SYS},
24582+ {AuBrAttr_ICEX_TR, AUFS_BRATTR_ICEX_TR},
24583+ {AuBrAttr_ICEX_USR, AUFS_BRATTR_ICEX_USR},
24584+ {AuBrAttr_ICEX_OTH, AUFS_BRATTR_ICEX_OTH},
2000de60 24585+#endif
076b876e
AM
24586+
24587+ /* ro/rr branch */
1e00d052 24588+ {AuBrRAttr_WH, AUFS_BRRATTR_WH},
076b876e
AM
24589+
24590+ /* rw branch */
24591+ {AuBrWAttr_MOO, AUFS_BRWATTR_MOO},
1e00d052 24592+ {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
076b876e 24593+
1e00d052 24594+ {0, NULL}
1facf9fc 24595+};
24596+
1e00d052
AM
24597+static int br_attr_val(char *str, match_table_t table, substring_t args[])
24598+{
24599+ int attr, v;
24600+ char *p;
24601+
24602+ attr = 0;
24603+ do {
24604+ p = strchr(str, '+');
24605+ if (p)
24606+ *p = 0;
24607+ v = match_token(str, table, args);
076b876e
AM
24608+ if (v) {
24609+ if (v & AuBrAttr_CMOO_Mask)
24610+ attr &= ~AuBrAttr_CMOO_Mask;
1e00d052 24611+ attr |= v;
076b876e 24612+ } else {
1e00d052
AM
24613+ if (p)
24614+ *p = '+';
0c3ec466 24615+ pr_warn("ignored branch attribute %s\n", str);
1e00d052
AM
24616+ break;
24617+ }
24618+ if (p)
24619+ str = p + 1;
24620+ } while (p);
24621+
24622+ return attr;
24623+}
24624+
076b876e
AM
24625+static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm)
24626+{
24627+ int sz;
24628+ const char *p;
24629+ char *q;
24630+
076b876e
AM
24631+ q = str->a;
24632+ *q = 0;
24633+ p = au_optstr(&perm, brattr);
24634+ if (p) {
24635+ sz = strlen(p);
24636+ memcpy(q, p, sz + 1);
24637+ q += sz;
24638+ } else
24639+ goto out;
24640+
24641+ do {
24642+ p = au_optstr(&perm, brattr);
24643+ if (p) {
24644+ *q++ = '+';
24645+ sz = strlen(p);
24646+ memcpy(q, p, sz + 1);
24647+ q += sz;
24648+ }
24649+ } while (p);
24650+
24651+out:
c1595e42 24652+ return q - str->a;
076b876e
AM
24653+}
24654+
4a4d8108 24655+static int noinline_for_stack br_perm_val(char *perm)
1facf9fc 24656+{
076b876e
AM
24657+ int val, bad, sz;
24658+ char *p;
1facf9fc 24659+ substring_t args[MAX_OPT_ARGS];
076b876e 24660+ au_br_perm_str_t attr;
1facf9fc 24661+
1e00d052
AM
24662+ p = strchr(perm, '+');
24663+ if (p)
24664+ *p = 0;
24665+ val = match_token(perm, brperm, args);
24666+ if (!val) {
24667+ if (p)
24668+ *p = '+';
0c3ec466 24669+ pr_warn("ignored branch permission %s\n", perm);
1e00d052
AM
24670+ val = AuBrPerm_RO;
24671+ goto out;
24672+ }
24673+ if (!p)
24674+ goto out;
24675+
076b876e
AM
24676+ val |= br_attr_val(p + 1, brattr, args);
24677+
24678+ bad = 0;
86dc4139 24679+ switch (val & AuBrPerm_Mask) {
1e00d052
AM
24680+ case AuBrPerm_RO:
24681+ case AuBrPerm_RR:
076b876e
AM
24682+ bad = val & AuBrWAttr_Mask;
24683+ val &= ~AuBrWAttr_Mask;
1e00d052
AM
24684+ break;
24685+ case AuBrPerm_RW:
076b876e
AM
24686+ bad = val & AuBrRAttr_Mask;
24687+ val &= ~AuBrRAttr_Mask;
1e00d052
AM
24688+ break;
24689+ }
c1595e42
JR
24690+
24691+ /*
24692+ * 'unpin' attrib becomes meaningless since linux-3.18-rc1, but aufs
24693+ * does not treat it as an error, just warning.
24694+ * this is a tiny guard for the user operation.
24695+ */
24696+ if (val & AuBrAttr_UNPIN) {
24697+ bad |= AuBrAttr_UNPIN;
24698+ val &= ~AuBrAttr_UNPIN;
24699+ }
24700+
076b876e
AM
24701+ if (unlikely(bad)) {
24702+ sz = au_do_optstr_br_attr(&attr, bad);
24703+ AuDebugOn(!sz);
24704+ pr_warn("ignored branch attribute %s\n", attr.a);
24705+ }
1e00d052
AM
24706+
24707+out:
1facf9fc 24708+ return val;
24709+}
24710+
076b876e 24711+void au_optstr_br_perm(au_br_perm_str_t *str, int perm)
1facf9fc 24712+{
076b876e
AM
24713+ au_br_perm_str_t attr;
24714+ const char *p;
24715+ char *q;
1e00d052
AM
24716+ int sz;
24717+
076b876e
AM
24718+ q = str->a;
24719+ p = au_optstr(&perm, brperm);
24720+ AuDebugOn(!p || !*p);
24721+ sz = strlen(p);
24722+ memcpy(q, p, sz + 1);
24723+ q += sz;
1e00d052 24724+
076b876e
AM
24725+ sz = au_do_optstr_br_attr(&attr, perm);
24726+ if (sz) {
24727+ *q++ = '+';
24728+ memcpy(q, attr.a, sz + 1);
1e00d052
AM
24729+ }
24730+
076b876e 24731+ AuDebugOn(strlen(str->a) >= sizeof(str->a));
1facf9fc 24732+}
24733+
24734+/* ---------------------------------------------------------------------- */
24735+
24736+static match_table_t udbalevel = {
24737+ {AuOpt_UDBA_REVAL, "reval"},
24738+ {AuOpt_UDBA_NONE, "none"},
4a4d8108
AM
24739+#ifdef CONFIG_AUFS_HNOTIFY
24740+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
24741+#ifdef CONFIG_AUFS_HFSNOTIFY
24742+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
4a4d8108 24743+#endif
1facf9fc 24744+#endif
24745+ {-1, NULL}
24746+};
24747+
4a4d8108 24748+static int noinline_for_stack udba_val(char *str)
1facf9fc 24749+{
24750+ substring_t args[MAX_OPT_ARGS];
24751+
7f207e10 24752+ return match_token(str, udbalevel, args);
1facf9fc 24753+}
24754+
24755+const char *au_optstr_udba(int udba)
24756+{
076b876e 24757+ return au_parser_pattern(udba, udbalevel);
1facf9fc 24758+}
24759+
24760+/* ---------------------------------------------------------------------- */
24761+
24762+static match_table_t au_wbr_create_policy = {
24763+ {AuWbrCreate_TDP, "tdp"},
24764+ {AuWbrCreate_TDP, "top-down-parent"},
24765+ {AuWbrCreate_RR, "rr"},
24766+ {AuWbrCreate_RR, "round-robin"},
24767+ {AuWbrCreate_MFS, "mfs"},
24768+ {AuWbrCreate_MFS, "most-free-space"},
24769+ {AuWbrCreate_MFSV, "mfs:%d"},
24770+ {AuWbrCreate_MFSV, "most-free-space:%d"},
24771+
f2c43d5f
AM
24772+ /* top-down regardless the parent, and then mfs */
24773+ {AuWbrCreate_TDMFS, "tdmfs:%d"},
24774+ {AuWbrCreate_TDMFSV, "tdmfs:%d:%d"},
24775+
1facf9fc 24776+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
24777+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
24778+ {AuWbrCreate_PMFS, "pmfs"},
24779+ {AuWbrCreate_PMFSV, "pmfs:%d"},
392086de
AM
24780+ {AuWbrCreate_PMFSRR, "pmfsrr:%d"},
24781+ {AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"},
1facf9fc 24782+
24783+ {-1, NULL}
24784+};
24785+
dece6358
AM
24786+/*
24787+ * cf. linux/lib/parser.c and cmdline.c
24788+ * gave up calling memparse() since it uses simple_strtoull() instead of
9dbd164d 24789+ * kstrto...().
dece6358 24790+ */
4a4d8108
AM
24791+static int noinline_for_stack
24792+au_match_ull(substring_t *s, unsigned long long *result)
1facf9fc 24793+{
24794+ int err;
24795+ unsigned int len;
24796+ char a[32];
24797+
24798+ err = -ERANGE;
24799+ len = s->to - s->from;
24800+ if (len + 1 <= sizeof(a)) {
24801+ memcpy(a, s->from, len);
24802+ a[len] = '\0';
9dbd164d 24803+ err = kstrtoull(a, 0, result);
1facf9fc 24804+ }
24805+ return err;
24806+}
24807+
24808+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
24809+ struct au_opt_wbr_create *create)
24810+{
24811+ int err;
24812+ unsigned long long ull;
24813+
24814+ err = 0;
24815+ if (!au_match_ull(arg, &ull))
24816+ create->mfsrr_watermark = ull;
24817+ else {
4a4d8108 24818+ pr_err("bad integer in %s\n", str);
1facf9fc 24819+ err = -EINVAL;
24820+ }
24821+
24822+ return err;
24823+}
24824+
24825+static int au_wbr_mfs_sec(substring_t *arg, char *str,
24826+ struct au_opt_wbr_create *create)
24827+{
24828+ int n, err;
24829+
24830+ err = 0;
027c5e7a 24831+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
1facf9fc 24832+ create->mfs_second = n;
24833+ else {
4a4d8108 24834+ pr_err("bad integer in %s\n", str);
1facf9fc 24835+ err = -EINVAL;
24836+ }
24837+
24838+ return err;
24839+}
24840+
4a4d8108
AM
24841+static int noinline_for_stack
24842+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
1facf9fc 24843+{
24844+ int err, e;
24845+ substring_t args[MAX_OPT_ARGS];
24846+
24847+ err = match_token(str, au_wbr_create_policy, args);
24848+ create->wbr_create = err;
24849+ switch (err) {
24850+ case AuWbrCreate_MFSRRV:
f2c43d5f 24851+ case AuWbrCreate_TDMFSV:
392086de 24852+ case AuWbrCreate_PMFSRRV:
1facf9fc 24853+ e = au_wbr_mfs_wmark(&args[0], str, create);
24854+ if (!e)
24855+ e = au_wbr_mfs_sec(&args[1], str, create);
24856+ if (unlikely(e))
24857+ err = e;
24858+ break;
24859+ case AuWbrCreate_MFSRR:
f2c43d5f 24860+ case AuWbrCreate_TDMFS:
392086de 24861+ case AuWbrCreate_PMFSRR:
1facf9fc 24862+ e = au_wbr_mfs_wmark(&args[0], str, create);
24863+ if (unlikely(e)) {
24864+ err = e;
24865+ break;
24866+ }
24867+ /*FALLTHROUGH*/
24868+ case AuWbrCreate_MFS:
24869+ case AuWbrCreate_PMFS:
027c5e7a 24870+ create->mfs_second = AUFS_MFS_DEF_SEC;
1facf9fc 24871+ break;
24872+ case AuWbrCreate_MFSV:
24873+ case AuWbrCreate_PMFSV:
24874+ e = au_wbr_mfs_sec(&args[0], str, create);
24875+ if (unlikely(e))
24876+ err = e;
24877+ break;
24878+ }
24879+
24880+ return err;
24881+}
24882+
24883+const char *au_optstr_wbr_create(int wbr_create)
24884+{
076b876e 24885+ return au_parser_pattern(wbr_create, au_wbr_create_policy);
1facf9fc 24886+}
24887+
24888+static match_table_t au_wbr_copyup_policy = {
24889+ {AuWbrCopyup_TDP, "tdp"},
24890+ {AuWbrCopyup_TDP, "top-down-parent"},
24891+ {AuWbrCopyup_BUP, "bup"},
24892+ {AuWbrCopyup_BUP, "bottom-up-parent"},
24893+ {AuWbrCopyup_BU, "bu"},
24894+ {AuWbrCopyup_BU, "bottom-up"},
24895+ {-1, NULL}
24896+};
24897+
4a4d8108 24898+static int noinline_for_stack au_wbr_copyup_val(char *str)
1facf9fc 24899+{
24900+ substring_t args[MAX_OPT_ARGS];
24901+
24902+ return match_token(str, au_wbr_copyup_policy, args);
24903+}
24904+
24905+const char *au_optstr_wbr_copyup(int wbr_copyup)
24906+{
076b876e 24907+ return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy);
1facf9fc 24908+}
24909+
24910+/* ---------------------------------------------------------------------- */
24911+
24912+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
24913+
24914+static void dump_opts(struct au_opts *opts)
24915+{
24916+#ifdef CONFIG_AUFS_DEBUG
24917+ /* reduce stack space */
24918+ union {
24919+ struct au_opt_add *add;
24920+ struct au_opt_del *del;
24921+ struct au_opt_mod *mod;
24922+ struct au_opt_xino *xino;
24923+ struct au_opt_xino_itrunc *xino_itrunc;
24924+ struct au_opt_wbr_create *create;
24925+ } u;
24926+ struct au_opt *opt;
24927+
24928+ opt = opts->opt;
24929+ while (opt->type != Opt_tail) {
24930+ switch (opt->type) {
24931+ case Opt_add:
24932+ u.add = &opt->add;
24933+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
24934+ u.add->bindex, u.add->pathname, u.add->perm,
24935+ u.add->path.dentry);
24936+ break;
24937+ case Opt_del:
24938+ case Opt_idel:
24939+ u.del = &opt->del;
24940+ AuDbg("del {%s, %p}\n",
24941+ u.del->pathname, u.del->h_path.dentry);
24942+ break;
24943+ case Opt_mod:
24944+ case Opt_imod:
24945+ u.mod = &opt->mod;
24946+ AuDbg("mod {%s, 0x%x, %p}\n",
24947+ u.mod->path, u.mod->perm, u.mod->h_root);
24948+ break;
24949+ case Opt_append:
24950+ u.add = &opt->add;
24951+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
24952+ u.add->bindex, u.add->pathname, u.add->perm,
24953+ u.add->path.dentry);
24954+ break;
24955+ case Opt_prepend:
24956+ u.add = &opt->add;
24957+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
24958+ u.add->bindex, u.add->pathname, u.add->perm,
24959+ u.add->path.dentry);
24960+ break;
24961+ case Opt_dirwh:
24962+ AuDbg("dirwh %d\n", opt->dirwh);
24963+ break;
24964+ case Opt_rdcache:
24965+ AuDbg("rdcache %d\n", opt->rdcache);
24966+ break;
24967+ case Opt_rdblk:
24968+ AuDbg("rdblk %u\n", opt->rdblk);
24969+ break;
dece6358
AM
24970+ case Opt_rdblk_def:
24971+ AuDbg("rdblk_def\n");
24972+ break;
1facf9fc 24973+ case Opt_rdhash:
24974+ AuDbg("rdhash %u\n", opt->rdhash);
24975+ break;
dece6358
AM
24976+ case Opt_rdhash_def:
24977+ AuDbg("rdhash_def\n");
24978+ break;
1facf9fc 24979+ case Opt_xino:
24980+ u.xino = &opt->xino;
523b37e3 24981+ AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
1facf9fc 24982+ break;
24983+ case Opt_trunc_xino:
24984+ AuLabel(trunc_xino);
24985+ break;
24986+ case Opt_notrunc_xino:
24987+ AuLabel(notrunc_xino);
24988+ break;
24989+ case Opt_trunc_xino_path:
24990+ case Opt_itrunc_xino:
24991+ u.xino_itrunc = &opt->xino_itrunc;
24992+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
24993+ break;
1facf9fc 24994+ case Opt_noxino:
24995+ AuLabel(noxino);
24996+ break;
24997+ case Opt_trunc_xib:
24998+ AuLabel(trunc_xib);
24999+ break;
25000+ case Opt_notrunc_xib:
25001+ AuLabel(notrunc_xib);
25002+ break;
dece6358
AM
25003+ case Opt_shwh:
25004+ AuLabel(shwh);
25005+ break;
25006+ case Opt_noshwh:
25007+ AuLabel(noshwh);
25008+ break;
076b876e
AM
25009+ case Opt_dirperm1:
25010+ AuLabel(dirperm1);
25011+ break;
25012+ case Opt_nodirperm1:
25013+ AuLabel(nodirperm1);
25014+ break;
1facf9fc 25015+ case Opt_plink:
25016+ AuLabel(plink);
25017+ break;
25018+ case Opt_noplink:
25019+ AuLabel(noplink);
25020+ break;
25021+ case Opt_list_plink:
25022+ AuLabel(list_plink);
25023+ break;
25024+ case Opt_udba:
25025+ AuDbg("udba %d, %s\n",
25026+ opt->udba, au_optstr_udba(opt->udba));
25027+ break;
4a4d8108
AM
25028+ case Opt_dio:
25029+ AuLabel(dio);
25030+ break;
25031+ case Opt_nodio:
25032+ AuLabel(nodio);
25033+ break;
1facf9fc 25034+ case Opt_diropq_a:
25035+ AuLabel(diropq_a);
25036+ break;
25037+ case Opt_diropq_w:
25038+ AuLabel(diropq_w);
25039+ break;
25040+ case Opt_warn_perm:
25041+ AuLabel(warn_perm);
25042+ break;
25043+ case Opt_nowarn_perm:
25044+ AuLabel(nowarn_perm);
25045+ break;
1facf9fc 25046+ case Opt_verbose:
25047+ AuLabel(verbose);
25048+ break;
25049+ case Opt_noverbose:
25050+ AuLabel(noverbose);
25051+ break;
25052+ case Opt_sum:
25053+ AuLabel(sum);
25054+ break;
25055+ case Opt_nosum:
25056+ AuLabel(nosum);
25057+ break;
25058+ case Opt_wsum:
25059+ AuLabel(wsum);
25060+ break;
25061+ case Opt_wbr_create:
25062+ u.create = &opt->wbr_create;
25063+ AuDbg("create %d, %s\n", u.create->wbr_create,
25064+ au_optstr_wbr_create(u.create->wbr_create));
25065+ switch (u.create->wbr_create) {
25066+ case AuWbrCreate_MFSV:
25067+ case AuWbrCreate_PMFSV:
25068+ AuDbg("%d sec\n", u.create->mfs_second);
25069+ break;
25070+ case AuWbrCreate_MFSRR:
f2c43d5f 25071+ case AuWbrCreate_TDMFS:
1facf9fc 25072+ AuDbg("%llu watermark\n",
25073+ u.create->mfsrr_watermark);
25074+ break;
25075+ case AuWbrCreate_MFSRRV:
f2c43d5f 25076+ case AuWbrCreate_TDMFSV:
392086de 25077+ case AuWbrCreate_PMFSRRV:
1facf9fc 25078+ AuDbg("%llu watermark, %d sec\n",
25079+ u.create->mfsrr_watermark,
25080+ u.create->mfs_second);
25081+ break;
25082+ }
25083+ break;
25084+ case Opt_wbr_copyup:
25085+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
25086+ au_optstr_wbr_copyup(opt->wbr_copyup));
25087+ break;
076b876e
AM
25088+ case Opt_fhsm_sec:
25089+ AuDbg("fhsm_sec %u\n", opt->fhsm_second);
25090+ break;
ae9dfd79
AM
25091+ case Opt_dirren:
25092+ AuLabel(dirren);
25093+ break;
25094+ case Opt_nodirren:
25095+ AuLabel(nodirren);
25096+ break;
c1595e42
JR
25097+ case Opt_acl:
25098+ AuLabel(acl);
25099+ break;
25100+ case Opt_noacl:
25101+ AuLabel(noacl);
25102+ break;
1facf9fc 25103+ default:
25104+ BUG();
25105+ }
25106+ opt++;
25107+ }
25108+#endif
25109+}
25110+
25111+void au_opts_free(struct au_opts *opts)
25112+{
25113+ struct au_opt *opt;
25114+
25115+ opt = opts->opt;
25116+ while (opt->type != Opt_tail) {
25117+ switch (opt->type) {
25118+ case Opt_add:
25119+ case Opt_append:
25120+ case Opt_prepend:
25121+ path_put(&opt->add.path);
25122+ break;
25123+ case Opt_del:
25124+ case Opt_idel:
25125+ path_put(&opt->del.h_path);
25126+ break;
25127+ case Opt_mod:
25128+ case Opt_imod:
25129+ dput(opt->mod.h_root);
25130+ break;
25131+ case Opt_xino:
25132+ fput(opt->xino.file);
25133+ break;
25134+ }
25135+ opt++;
25136+ }
25137+}
25138+
25139+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
25140+ aufs_bindex_t bindex)
25141+{
25142+ int err;
25143+ struct au_opt_add *add = &opt->add;
25144+ char *p;
25145+
25146+ add->bindex = bindex;
1e00d052 25147+ add->perm = AuBrPerm_RO;
1facf9fc 25148+ add->pathname = opt_str;
25149+ p = strchr(opt_str, '=');
25150+ if (p) {
25151+ *p++ = 0;
25152+ if (*p)
25153+ add->perm = br_perm_val(p);
25154+ }
25155+
25156+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
25157+ if (!err) {
25158+ if (!p) {
25159+ add->perm = AuBrPerm_RO;
25160+ if (au_test_fs_rr(add->path.dentry->d_sb))
25161+ add->perm = AuBrPerm_RR;
25162+ else if (!bindex && !(sb_flags & MS_RDONLY))
25163+ add->perm = AuBrPerm_RW;
25164+ }
25165+ opt->type = Opt_add;
25166+ goto out;
25167+ }
4a4d8108 25168+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
1facf9fc 25169+ err = -EINVAL;
25170+
4f0767ce 25171+out:
1facf9fc 25172+ return err;
25173+}
25174+
25175+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
25176+{
25177+ int err;
25178+
25179+ del->pathname = args[0].from;
25180+ AuDbg("del path %s\n", del->pathname);
25181+
25182+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
25183+ if (unlikely(err))
4a4d8108 25184+ pr_err("lookup failed %s (%d)\n", del->pathname, err);
1facf9fc 25185+
25186+ return err;
25187+}
25188+
25189+#if 0 /* reserved for future use */
25190+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
25191+ struct au_opt_del *del, substring_t args[])
25192+{
25193+ int err;
25194+ struct dentry *root;
25195+
25196+ err = -EINVAL;
25197+ root = sb->s_root;
25198+ aufs_read_lock(root, AuLock_FLUSH);
5afbbe0d 25199+ if (bindex < 0 || au_sbbot(sb) < bindex) {
4a4d8108 25200+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 25201+ goto out;
25202+ }
25203+
25204+ err = 0;
25205+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
25206+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
25207+
4f0767ce 25208+out:
1facf9fc 25209+ aufs_read_unlock(root, !AuLock_IR);
25210+ return err;
25211+}
25212+#endif
25213+
4a4d8108
AM
25214+static int noinline_for_stack
25215+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
1facf9fc 25216+{
25217+ int err;
25218+ struct path path;
25219+ char *p;
25220+
25221+ err = -EINVAL;
25222+ mod->path = args[0].from;
25223+ p = strchr(mod->path, '=');
25224+ if (unlikely(!p)) {
4a4d8108 25225+ pr_err("no permssion %s\n", args[0].from);
1facf9fc 25226+ goto out;
25227+ }
25228+
25229+ *p++ = 0;
25230+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
25231+ if (unlikely(err)) {
4a4d8108 25232+ pr_err("lookup failed %s (%d)\n", mod->path, err);
1facf9fc 25233+ goto out;
25234+ }
25235+
25236+ mod->perm = br_perm_val(p);
25237+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
25238+ mod->h_root = dget(path.dentry);
25239+ path_put(&path);
25240+
4f0767ce 25241+out:
1facf9fc 25242+ return err;
25243+}
25244+
25245+#if 0 /* reserved for future use */
25246+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
25247+ struct au_opt_mod *mod, substring_t args[])
25248+{
25249+ int err;
25250+ struct dentry *root;
25251+
25252+ err = -EINVAL;
25253+ root = sb->s_root;
25254+ aufs_read_lock(root, AuLock_FLUSH);
5afbbe0d 25255+ if (bindex < 0 || au_sbbot(sb) < bindex) {
4a4d8108 25256+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 25257+ goto out;
25258+ }
25259+
25260+ err = 0;
25261+ mod->perm = br_perm_val(args[1].from);
25262+ AuDbg("mod path %s, perm 0x%x, %s\n",
25263+ mod->path, mod->perm, args[1].from);
25264+ mod->h_root = dget(au_h_dptr(root, bindex));
25265+
4f0767ce 25266+out:
1facf9fc 25267+ aufs_read_unlock(root, !AuLock_IR);
25268+ return err;
25269+}
25270+#endif
25271+
25272+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
25273+ substring_t args[])
25274+{
25275+ int err;
25276+ struct file *file;
25277+
25278+ file = au_xino_create(sb, args[0].from, /*silent*/0);
25279+ err = PTR_ERR(file);
25280+ if (IS_ERR(file))
25281+ goto out;
25282+
25283+ err = -EINVAL;
2000de60 25284+ if (unlikely(file->f_path.dentry->d_sb == sb)) {
1facf9fc 25285+ fput(file);
4a4d8108 25286+ pr_err("%s must be outside\n", args[0].from);
1facf9fc 25287+ goto out;
25288+ }
25289+
25290+ err = 0;
25291+ xino->file = file;
25292+ xino->path = args[0].from;
25293+
4f0767ce 25294+out:
1facf9fc 25295+ return err;
25296+}
25297+
4a4d8108
AM
25298+static int noinline_for_stack
25299+au_opts_parse_xino_itrunc_path(struct super_block *sb,
25300+ struct au_opt_xino_itrunc *xino_itrunc,
25301+ substring_t args[])
1facf9fc 25302+{
25303+ int err;
5afbbe0d 25304+ aufs_bindex_t bbot, bindex;
1facf9fc 25305+ struct path path;
25306+ struct dentry *root;
25307+
25308+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
25309+ if (unlikely(err)) {
4a4d8108 25310+ pr_err("lookup failed %s (%d)\n", args[0].from, err);
1facf9fc 25311+ goto out;
25312+ }
25313+
25314+ xino_itrunc->bindex = -1;
25315+ root = sb->s_root;
25316+ aufs_read_lock(root, AuLock_FLUSH);
5afbbe0d
AM
25317+ bbot = au_sbbot(sb);
25318+ for (bindex = 0; bindex <= bbot; bindex++) {
1facf9fc 25319+ if (au_h_dptr(root, bindex) == path.dentry) {
25320+ xino_itrunc->bindex = bindex;
25321+ break;
25322+ }
25323+ }
25324+ aufs_read_unlock(root, !AuLock_IR);
25325+ path_put(&path);
25326+
25327+ if (unlikely(xino_itrunc->bindex < 0)) {
4a4d8108 25328+ pr_err("no such branch %s\n", args[0].from);
1facf9fc 25329+ err = -EINVAL;
25330+ }
25331+
4f0767ce 25332+out:
1facf9fc 25333+ return err;
25334+}
25335+
25336+/* called without aufs lock */
25337+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
25338+{
25339+ int err, n, token;
25340+ aufs_bindex_t bindex;
25341+ unsigned char skipped;
25342+ struct dentry *root;
25343+ struct au_opt *opt, *opt_tail;
25344+ char *opt_str;
25345+ /* reduce the stack space */
25346+ union {
25347+ struct au_opt_xino_itrunc *xino_itrunc;
25348+ struct au_opt_wbr_create *create;
25349+ } u;
25350+ struct {
25351+ substring_t args[MAX_OPT_ARGS];
25352+ } *a;
25353+
25354+ err = -ENOMEM;
25355+ a = kmalloc(sizeof(*a), GFP_NOFS);
25356+ if (unlikely(!a))
25357+ goto out;
25358+
25359+ root = sb->s_root;
25360+ err = 0;
25361+ bindex = 0;
25362+ opt = opts->opt;
25363+ opt_tail = opt + opts->max_opt - 1;
25364+ opt->type = Opt_tail;
25365+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
25366+ err = -EINVAL;
25367+ skipped = 0;
25368+ token = match_token(opt_str, options, a->args);
25369+ switch (token) {
25370+ case Opt_br:
25371+ err = 0;
25372+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
25373+ && *opt_str) {
25374+ err = opt_add(opt, opt_str, opts->sb_flags,
25375+ bindex++);
25376+ if (unlikely(!err && ++opt > opt_tail)) {
25377+ err = -E2BIG;
25378+ break;
25379+ }
25380+ opt->type = Opt_tail;
25381+ skipped = 1;
25382+ }
25383+ break;
25384+ case Opt_add:
25385+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 25386+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 25387+ break;
25388+ }
25389+ bindex = n;
25390+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
25391+ bindex);
25392+ if (!err)
25393+ opt->type = token;
25394+ break;
25395+ case Opt_append:
25396+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
25397+ /*dummy bindex*/1);
25398+ if (!err)
25399+ opt->type = token;
25400+ break;
25401+ case Opt_prepend:
25402+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
25403+ /*bindex*/0);
25404+ if (!err)
25405+ opt->type = token;
25406+ break;
25407+ case Opt_del:
25408+ err = au_opts_parse_del(&opt->del, a->args);
25409+ if (!err)
25410+ opt->type = token;
25411+ break;
25412+#if 0 /* reserved for future use */
25413+ case Opt_idel:
25414+ del->pathname = "(indexed)";
25415+ if (unlikely(match_int(&args[0], &n))) {
4a4d8108 25416+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 25417+ break;
25418+ }
25419+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
25420+ if (!err)
25421+ opt->type = token;
25422+ break;
25423+#endif
25424+ case Opt_mod:
25425+ err = au_opts_parse_mod(&opt->mod, a->args);
25426+ if (!err)
25427+ opt->type = token;
25428+ break;
25429+#ifdef IMOD /* reserved for future use */
25430+ case Opt_imod:
25431+ u.mod->path = "(indexed)";
25432+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 25433+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 25434+ break;
25435+ }
25436+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
25437+ if (!err)
25438+ opt->type = token;
25439+ break;
25440+#endif
25441+ case Opt_xino:
25442+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
25443+ if (!err)
25444+ opt->type = token;
25445+ break;
25446+
25447+ case Opt_trunc_xino_path:
25448+ err = au_opts_parse_xino_itrunc_path
25449+ (sb, &opt->xino_itrunc, a->args);
25450+ if (!err)
25451+ opt->type = token;
25452+ break;
25453+
25454+ case Opt_itrunc_xino:
25455+ u.xino_itrunc = &opt->xino_itrunc;
25456+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 25457+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 25458+ break;
25459+ }
25460+ u.xino_itrunc->bindex = n;
25461+ aufs_read_lock(root, AuLock_FLUSH);
5afbbe0d 25462+ if (n < 0 || au_sbbot(sb) < n) {
4a4d8108 25463+ pr_err("out of bounds, %d\n", n);
1facf9fc 25464+ aufs_read_unlock(root, !AuLock_IR);
25465+ break;
25466+ }
25467+ aufs_read_unlock(root, !AuLock_IR);
25468+ err = 0;
25469+ opt->type = token;
25470+ break;
25471+
25472+ case Opt_dirwh:
25473+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
25474+ break;
25475+ err = 0;
25476+ opt->type = token;
25477+ break;
25478+
25479+ case Opt_rdcache:
027c5e7a
AM
25480+ if (unlikely(match_int(&a->args[0], &n))) {
25481+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 25482+ break;
027c5e7a
AM
25483+ }
25484+ if (unlikely(n > AUFS_RDCACHE_MAX)) {
25485+ pr_err("rdcache must be smaller than %d\n",
25486+ AUFS_RDCACHE_MAX);
25487+ break;
25488+ }
25489+ opt->rdcache = n;
1facf9fc 25490+ err = 0;
25491+ opt->type = token;
25492+ break;
25493+ case Opt_rdblk:
25494+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 25495+ || n < 0
1facf9fc 25496+ || n > KMALLOC_MAX_SIZE)) {
4a4d8108 25497+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 25498+ break;
25499+ }
1308ab2a 25500+ if (unlikely(n && n < NAME_MAX)) {
4a4d8108
AM
25501+ pr_err("rdblk must be larger than %d\n",
25502+ NAME_MAX);
1facf9fc 25503+ break;
25504+ }
25505+ opt->rdblk = n;
25506+ err = 0;
25507+ opt->type = token;
25508+ break;
25509+ case Opt_rdhash:
25510+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 25511+ || n < 0
1facf9fc 25512+ || n * sizeof(struct hlist_head)
25513+ > KMALLOC_MAX_SIZE)) {
4a4d8108 25514+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 25515+ break;
25516+ }
25517+ opt->rdhash = n;
25518+ err = 0;
25519+ opt->type = token;
25520+ break;
25521+
25522+ case Opt_trunc_xino:
25523+ case Opt_notrunc_xino:
25524+ case Opt_noxino:
25525+ case Opt_trunc_xib:
25526+ case Opt_notrunc_xib:
dece6358
AM
25527+ case Opt_shwh:
25528+ case Opt_noshwh:
076b876e
AM
25529+ case Opt_dirperm1:
25530+ case Opt_nodirperm1:
1facf9fc 25531+ case Opt_plink:
25532+ case Opt_noplink:
25533+ case Opt_list_plink:
4a4d8108
AM
25534+ case Opt_dio:
25535+ case Opt_nodio:
1facf9fc 25536+ case Opt_diropq_a:
25537+ case Opt_diropq_w:
25538+ case Opt_warn_perm:
25539+ case Opt_nowarn_perm:
1facf9fc 25540+ case Opt_verbose:
25541+ case Opt_noverbose:
25542+ case Opt_sum:
25543+ case Opt_nosum:
25544+ case Opt_wsum:
dece6358
AM
25545+ case Opt_rdblk_def:
25546+ case Opt_rdhash_def:
ae9dfd79
AM
25547+ case Opt_dirren:
25548+ case Opt_nodirren:
c1595e42
JR
25549+ case Opt_acl:
25550+ case Opt_noacl:
1facf9fc 25551+ err = 0;
25552+ opt->type = token;
25553+ break;
25554+
25555+ case Opt_udba:
25556+ opt->udba = udba_val(a->args[0].from);
25557+ if (opt->udba >= 0) {
25558+ err = 0;
25559+ opt->type = token;
25560+ } else
4a4d8108 25561+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 25562+ break;
25563+
25564+ case Opt_wbr_create:
25565+ u.create = &opt->wbr_create;
25566+ u.create->wbr_create
25567+ = au_wbr_create_val(a->args[0].from, u.create);
25568+ if (u.create->wbr_create >= 0) {
25569+ err = 0;
25570+ opt->type = token;
25571+ } else
4a4d8108 25572+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 25573+ break;
25574+ case Opt_wbr_copyup:
25575+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
25576+ if (opt->wbr_copyup >= 0) {
25577+ err = 0;
25578+ opt->type = token;
25579+ } else
4a4d8108 25580+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 25581+ break;
25582+
076b876e
AM
25583+ case Opt_fhsm_sec:
25584+ if (unlikely(match_int(&a->args[0], &n)
25585+ || n < 0)) {
25586+ pr_err("bad integer in %s\n", opt_str);
25587+ break;
25588+ }
25589+ if (sysaufs_brs) {
25590+ opt->fhsm_second = n;
25591+ opt->type = token;
25592+ } else
25593+ pr_warn("ignored %s\n", opt_str);
25594+ err = 0;
25595+ break;
25596+
1facf9fc 25597+ case Opt_ignore:
0c3ec466 25598+ pr_warn("ignored %s\n", opt_str);
1facf9fc 25599+ /*FALLTHROUGH*/
25600+ case Opt_ignore_silent:
25601+ skipped = 1;
25602+ err = 0;
25603+ break;
25604+ case Opt_err:
4a4d8108 25605+ pr_err("unknown option %s\n", opt_str);
1facf9fc 25606+ break;
25607+ }
25608+
25609+ if (!err && !skipped) {
25610+ if (unlikely(++opt > opt_tail)) {
25611+ err = -E2BIG;
25612+ opt--;
25613+ opt->type = Opt_tail;
25614+ break;
25615+ }
25616+ opt->type = Opt_tail;
25617+ }
25618+ }
25619+
ae9dfd79 25620+ kfree(a);
1facf9fc 25621+ dump_opts(opts);
25622+ if (unlikely(err))
25623+ au_opts_free(opts);
25624+
4f0767ce 25625+out:
1facf9fc 25626+ return err;
25627+}
25628+
25629+static int au_opt_wbr_create(struct super_block *sb,
25630+ struct au_opt_wbr_create *create)
25631+{
25632+ int err;
25633+ struct au_sbinfo *sbinfo;
25634+
dece6358
AM
25635+ SiMustWriteLock(sb);
25636+
1facf9fc 25637+ err = 1; /* handled */
25638+ sbinfo = au_sbi(sb);
25639+ if (sbinfo->si_wbr_create_ops->fin) {
25640+ err = sbinfo->si_wbr_create_ops->fin(sb);
25641+ if (!err)
25642+ err = 1;
25643+ }
25644+
25645+ sbinfo->si_wbr_create = create->wbr_create;
25646+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
25647+ switch (create->wbr_create) {
25648+ case AuWbrCreate_MFSRRV:
25649+ case AuWbrCreate_MFSRR:
f2c43d5f
AM
25650+ case AuWbrCreate_TDMFS:
25651+ case AuWbrCreate_TDMFSV:
392086de
AM
25652+ case AuWbrCreate_PMFSRR:
25653+ case AuWbrCreate_PMFSRRV:
1facf9fc 25654+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
25655+ /*FALLTHROUGH*/
25656+ case AuWbrCreate_MFS:
25657+ case AuWbrCreate_MFSV:
25658+ case AuWbrCreate_PMFS:
25659+ case AuWbrCreate_PMFSV:
e49829fe
JR
25660+ sbinfo->si_wbr_mfs.mfs_expire
25661+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
1facf9fc 25662+ break;
25663+ }
25664+
25665+ if (sbinfo->si_wbr_create_ops->init)
25666+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
25667+
25668+ return err;
25669+}
25670+
25671+/*
25672+ * returns,
25673+ * plus: processed without an error
25674+ * zero: unprocessed
25675+ */
25676+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
25677+ struct au_opts *opts)
25678+{
25679+ int err;
25680+ struct au_sbinfo *sbinfo;
25681+
dece6358
AM
25682+ SiMustWriteLock(sb);
25683+
1facf9fc 25684+ err = 1; /* handled */
25685+ sbinfo = au_sbi(sb);
25686+ switch (opt->type) {
25687+ case Opt_udba:
25688+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
25689+ sbinfo->si_mntflags |= opt->udba;
25690+ opts->given_udba |= opt->udba;
25691+ break;
25692+
25693+ case Opt_plink:
25694+ au_opt_set(sbinfo->si_mntflags, PLINK);
25695+ break;
25696+ case Opt_noplink:
25697+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
e49829fe 25698+ au_plink_put(sb, /*verbose*/1);
1facf9fc 25699+ au_opt_clr(sbinfo->si_mntflags, PLINK);
25700+ break;
25701+ case Opt_list_plink:
25702+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
25703+ au_plink_list(sb);
25704+ break;
25705+
4a4d8108
AM
25706+ case Opt_dio:
25707+ au_opt_set(sbinfo->si_mntflags, DIO);
25708+ au_fset_opts(opts->flags, REFRESH_DYAOP);
25709+ break;
25710+ case Opt_nodio:
25711+ au_opt_clr(sbinfo->si_mntflags, DIO);
25712+ au_fset_opts(opts->flags, REFRESH_DYAOP);
25713+ break;
25714+
076b876e
AM
25715+ case Opt_fhsm_sec:
25716+ au_fhsm_set(sbinfo, opt->fhsm_second);
25717+ break;
25718+
1facf9fc 25719+ case Opt_diropq_a:
25720+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
25721+ break;
25722+ case Opt_diropq_w:
25723+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
25724+ break;
25725+
25726+ case Opt_warn_perm:
25727+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
25728+ break;
25729+ case Opt_nowarn_perm:
25730+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
25731+ break;
25732+
1facf9fc 25733+ case Opt_verbose:
25734+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
25735+ break;
25736+ case Opt_noverbose:
25737+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
25738+ break;
25739+
25740+ case Opt_sum:
25741+ au_opt_set(sbinfo->si_mntflags, SUM);
25742+ break;
25743+ case Opt_wsum:
25744+ au_opt_clr(sbinfo->si_mntflags, SUM);
25745+ au_opt_set(sbinfo->si_mntflags, SUM_W);
25746+ case Opt_nosum:
25747+ au_opt_clr(sbinfo->si_mntflags, SUM);
25748+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
25749+ break;
25750+
25751+ case Opt_wbr_create:
25752+ err = au_opt_wbr_create(sb, &opt->wbr_create);
25753+ break;
25754+ case Opt_wbr_copyup:
25755+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
25756+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
25757+ break;
25758+
25759+ case Opt_dirwh:
25760+ sbinfo->si_dirwh = opt->dirwh;
25761+ break;
25762+
25763+ case Opt_rdcache:
e49829fe
JR
25764+ sbinfo->si_rdcache
25765+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
1facf9fc 25766+ break;
25767+ case Opt_rdblk:
25768+ sbinfo->si_rdblk = opt->rdblk;
25769+ break;
dece6358
AM
25770+ case Opt_rdblk_def:
25771+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
25772+ break;
1facf9fc 25773+ case Opt_rdhash:
25774+ sbinfo->si_rdhash = opt->rdhash;
25775+ break;
dece6358
AM
25776+ case Opt_rdhash_def:
25777+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
25778+ break;
25779+
25780+ case Opt_shwh:
25781+ au_opt_set(sbinfo->si_mntflags, SHWH);
25782+ break;
25783+ case Opt_noshwh:
25784+ au_opt_clr(sbinfo->si_mntflags, SHWH);
25785+ break;
1facf9fc 25786+
076b876e
AM
25787+ case Opt_dirperm1:
25788+ au_opt_set(sbinfo->si_mntflags, DIRPERM1);
25789+ break;
25790+ case Opt_nodirperm1:
25791+ au_opt_clr(sbinfo->si_mntflags, DIRPERM1);
25792+ break;
25793+
1facf9fc 25794+ case Opt_trunc_xino:
25795+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
25796+ break;
25797+ case Opt_notrunc_xino:
25798+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
25799+ break;
25800+
25801+ case Opt_trunc_xino_path:
25802+ case Opt_itrunc_xino:
25803+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
25804+ if (!err)
25805+ err = 1;
25806+ break;
25807+
25808+ case Opt_trunc_xib:
25809+ au_fset_opts(opts->flags, TRUNC_XIB);
25810+ break;
25811+ case Opt_notrunc_xib:
25812+ au_fclr_opts(opts->flags, TRUNC_XIB);
25813+ break;
25814+
ae9dfd79
AM
25815+ case Opt_dirren:
25816+ err = 1;
25817+ if (!au_opt_test(sbinfo->si_mntflags, DIRREN)) {
25818+ err = au_dr_opt_set(sb);
25819+ if (!err)
25820+ err = 1;
25821+ }
25822+ if (err == 1)
25823+ au_opt_set(sbinfo->si_mntflags, DIRREN);
25824+ break;
25825+ case Opt_nodirren:
25826+ err = 1;
25827+ if (au_opt_test(sbinfo->si_mntflags, DIRREN)) {
25828+ err = au_dr_opt_clr(sb, au_ftest_opts(opts->flags,
25829+ DR_FLUSHED));
25830+ if (!err)
25831+ err = 1;
25832+ }
25833+ if (err == 1)
25834+ au_opt_clr(sbinfo->si_mntflags, DIRREN);
25835+ break;
25836+
c1595e42
JR
25837+ case Opt_acl:
25838+ sb->s_flags |= MS_POSIXACL;
25839+ break;
25840+ case Opt_noacl:
25841+ sb->s_flags &= ~MS_POSIXACL;
25842+ break;
25843+
1facf9fc 25844+ default:
25845+ err = 0;
25846+ break;
25847+ }
25848+
25849+ return err;
25850+}
25851+
25852+/*
25853+ * returns tri-state.
25854+ * plus: processed without an error
25855+ * zero: unprocessed
25856+ * minus: error
25857+ */
25858+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
25859+ struct au_opts *opts)
25860+{
25861+ int err, do_refresh;
25862+
25863+ err = 0;
25864+ switch (opt->type) {
25865+ case Opt_append:
5afbbe0d 25866+ opt->add.bindex = au_sbbot(sb) + 1;
1facf9fc 25867+ if (opt->add.bindex < 0)
25868+ opt->add.bindex = 0;
25869+ goto add;
25870+ case Opt_prepend:
25871+ opt->add.bindex = 0;
f6b6e03d 25872+ add: /* indented label */
1facf9fc 25873+ case Opt_add:
25874+ err = au_br_add(sb, &opt->add,
25875+ au_ftest_opts(opts->flags, REMOUNT));
25876+ if (!err) {
25877+ err = 1;
027c5e7a 25878+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 25879+ }
25880+ break;
25881+
25882+ case Opt_del:
25883+ case Opt_idel:
25884+ err = au_br_del(sb, &opt->del,
25885+ au_ftest_opts(opts->flags, REMOUNT));
25886+ if (!err) {
25887+ err = 1;
25888+ au_fset_opts(opts->flags, TRUNC_XIB);
027c5e7a 25889+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 25890+ }
25891+ break;
25892+
25893+ case Opt_mod:
25894+ case Opt_imod:
25895+ err = au_br_mod(sb, &opt->mod,
25896+ au_ftest_opts(opts->flags, REMOUNT),
25897+ &do_refresh);
25898+ if (!err) {
25899+ err = 1;
027c5e7a
AM
25900+ if (do_refresh)
25901+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 25902+ }
25903+ break;
25904+ }
1facf9fc 25905+ return err;
25906+}
25907+
25908+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
25909+ struct au_opt_xino **opt_xino,
25910+ struct au_opts *opts)
25911+{
25912+ int err;
5afbbe0d 25913+ aufs_bindex_t bbot, bindex;
1facf9fc 25914+ struct dentry *root, *parent, *h_root;
25915+
25916+ err = 0;
25917+ switch (opt->type) {
25918+ case Opt_xino:
25919+ err = au_xino_set(sb, &opt->xino,
25920+ !!au_ftest_opts(opts->flags, REMOUNT));
25921+ if (unlikely(err))
25922+ break;
25923+
25924+ *opt_xino = &opt->xino;
25925+ au_xino_brid_set(sb, -1);
25926+
25927+ /* safe d_parent access */
2000de60 25928+ parent = opt->xino.file->f_path.dentry->d_parent;
1facf9fc 25929+ root = sb->s_root;
5afbbe0d
AM
25930+ bbot = au_sbbot(sb);
25931+ for (bindex = 0; bindex <= bbot; bindex++) {
1facf9fc 25932+ h_root = au_h_dptr(root, bindex);
25933+ if (h_root == parent) {
25934+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
25935+ break;
25936+ }
25937+ }
25938+ break;
25939+
25940+ case Opt_noxino:
25941+ au_xino_clr(sb);
25942+ au_xino_brid_set(sb, -1);
25943+ *opt_xino = (void *)-1;
25944+ break;
25945+ }
25946+
25947+ return err;
25948+}
25949+
25950+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
25951+ unsigned int pending)
25952+{
076b876e 25953+ int err, fhsm;
5afbbe0d 25954+ aufs_bindex_t bindex, bbot;
79b8bda9 25955+ unsigned char do_plink, skip, do_free, can_no_dreval;
1facf9fc 25956+ struct au_branch *br;
25957+ struct au_wbr *wbr;
79b8bda9 25958+ struct dentry *root, *dentry;
1facf9fc 25959+ struct inode *dir, *h_dir;
25960+ struct au_sbinfo *sbinfo;
25961+ struct au_hinode *hdir;
25962+
dece6358
AM
25963+ SiMustAnyLock(sb);
25964+
1facf9fc 25965+ sbinfo = au_sbi(sb);
25966+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
25967+
dece6358
AM
25968+ if (!(sb_flags & MS_RDONLY)) {
25969+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
0c3ec466 25970+ pr_warn("first branch should be rw\n");
dece6358 25971+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
febd17d6 25972+ pr_warn_once("shwh should be used with ro\n");
dece6358 25973+ }
1facf9fc 25974+
4a4d8108 25975+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
1facf9fc 25976+ && !au_opt_test(sbinfo->si_mntflags, XINO))
febd17d6 25977+ pr_warn_once("udba=*notify requires xino\n");
1facf9fc 25978+
076b876e 25979+ if (au_opt_test(sbinfo->si_mntflags, DIRPERM1))
febd17d6
JR
25980+ pr_warn_once("dirperm1 breaks the protection"
25981+ " by the permission bits on the lower branch\n");
076b876e 25982+
1facf9fc 25983+ err = 0;
076b876e 25984+ fhsm = 0;
1facf9fc 25985+ root = sb->s_root;
5527c038 25986+ dir = d_inode(root);
1facf9fc 25987+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
79b8bda9
AM
25988+ can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
25989+ UDBA_NONE);
5afbbe0d
AM
25990+ bbot = au_sbbot(sb);
25991+ for (bindex = 0; !err && bindex <= bbot; bindex++) {
1facf9fc 25992+ skip = 0;
25993+ h_dir = au_h_iptr(dir, bindex);
25994+ br = au_sbr(sb, bindex);
1facf9fc 25995+
c1595e42
JR
25996+ if ((br->br_perm & AuBrAttr_ICEX)
25997+ && !h_dir->i_op->listxattr)
25998+ br->br_perm &= ~AuBrAttr_ICEX;
25999+#if 0
26000+ if ((br->br_perm & AuBrAttr_ICEX_SEC)
26001+ && (au_br_sb(br)->s_flags & MS_NOSEC))
26002+ br->br_perm &= ~AuBrAttr_ICEX_SEC;
26003+#endif
26004+
26005+ do_free = 0;
1facf9fc 26006+ wbr = br->br_wbr;
26007+ if (wbr)
26008+ wbr_wh_read_lock(wbr);
26009+
1e00d052 26010+ if (!au_br_writable(br->br_perm)) {
1facf9fc 26011+ do_free = !!wbr;
26012+ skip = (!wbr
26013+ || (!wbr->wbr_whbase
26014+ && !wbr->wbr_plink
26015+ && !wbr->wbr_orph));
1e00d052 26016+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 26017+ /* skip = (!br->br_whbase && !br->br_orph); */
26018+ skip = (!wbr || !wbr->wbr_whbase);
26019+ if (skip && wbr) {
26020+ if (do_plink)
26021+ skip = !!wbr->wbr_plink;
26022+ else
26023+ skip = !wbr->wbr_plink;
26024+ }
1e00d052 26025+ } else {
1facf9fc 26026+ /* skip = (br->br_whbase && br->br_ohph); */
26027+ skip = (wbr && wbr->wbr_whbase);
26028+ if (skip) {
26029+ if (do_plink)
26030+ skip = !!wbr->wbr_plink;
26031+ else
26032+ skip = !wbr->wbr_plink;
26033+ }
1facf9fc 26034+ }
26035+ if (wbr)
26036+ wbr_wh_read_unlock(wbr);
26037+
79b8bda9
AM
26038+ if (can_no_dreval) {
26039+ dentry = br->br_path.dentry;
26040+ spin_lock(&dentry->d_lock);
26041+ if (dentry->d_flags &
26042+ (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE))
26043+ can_no_dreval = 0;
26044+ spin_unlock(&dentry->d_lock);
26045+ }
26046+
076b876e
AM
26047+ if (au_br_fhsm(br->br_perm)) {
26048+ fhsm++;
26049+ AuDebugOn(!br->br_fhsm);
26050+ }
26051+
1facf9fc 26052+ if (skip)
26053+ continue;
26054+
26055+ hdir = au_hi(dir, bindex);
5afbbe0d 26056+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 26057+ if (wbr)
26058+ wbr_wh_write_lock(wbr);
86dc4139 26059+ err = au_wh_init(br, sb);
1facf9fc 26060+ if (wbr)
26061+ wbr_wh_write_unlock(wbr);
5afbbe0d 26062+ au_hn_inode_unlock(hdir);
1facf9fc 26063+
26064+ if (!err && do_free) {
ae9dfd79 26065+ kfree(wbr);
1facf9fc 26066+ br->br_wbr = NULL;
26067+ }
26068+ }
26069+
79b8bda9
AM
26070+ if (can_no_dreval)
26071+ au_fset_si(sbinfo, NO_DREVAL);
26072+ else
26073+ au_fclr_si(sbinfo, NO_DREVAL);
26074+
c1595e42 26075+ if (fhsm >= 2) {
076b876e 26076+ au_fset_si(sbinfo, FHSM);
5afbbe0d 26077+ for (bindex = bbot; bindex >= 0; bindex--) {
c1595e42
JR
26078+ br = au_sbr(sb, bindex);
26079+ if (au_br_fhsm(br->br_perm)) {
26080+ au_fhsm_set_bottom(sb, bindex);
26081+ break;
26082+ }
26083+ }
26084+ } else {
076b876e 26085+ au_fclr_si(sbinfo, FHSM);
c1595e42
JR
26086+ au_fhsm_set_bottom(sb, -1);
26087+ }
076b876e 26088+
1facf9fc 26089+ return err;
26090+}
26091+
26092+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
26093+{
26094+ int err;
26095+ unsigned int tmp;
5afbbe0d 26096+ aufs_bindex_t bindex, bbot;
1facf9fc 26097+ struct au_opt *opt;
26098+ struct au_opt_xino *opt_xino, xino;
26099+ struct au_sbinfo *sbinfo;
027c5e7a 26100+ struct au_branch *br;
076b876e 26101+ struct inode *dir;
1facf9fc 26102+
dece6358
AM
26103+ SiMustWriteLock(sb);
26104+
1facf9fc 26105+ err = 0;
26106+ opt_xino = NULL;
26107+ opt = opts->opt;
26108+ while (err >= 0 && opt->type != Opt_tail)
26109+ err = au_opt_simple(sb, opt++, opts);
26110+ if (err > 0)
26111+ err = 0;
26112+ else if (unlikely(err < 0))
26113+ goto out;
26114+
26115+ /* disable xino and udba temporary */
26116+ sbinfo = au_sbi(sb);
26117+ tmp = sbinfo->si_mntflags;
26118+ au_opt_clr(sbinfo->si_mntflags, XINO);
26119+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
26120+
26121+ opt = opts->opt;
26122+ while (err >= 0 && opt->type != Opt_tail)
26123+ err = au_opt_br(sb, opt++, opts);
26124+ if (err > 0)
26125+ err = 0;
26126+ else if (unlikely(err < 0))
26127+ goto out;
26128+
5afbbe0d
AM
26129+ bbot = au_sbbot(sb);
26130+ if (unlikely(bbot < 0)) {
1facf9fc 26131+ err = -EINVAL;
4a4d8108 26132+ pr_err("no branches\n");
1facf9fc 26133+ goto out;
26134+ }
26135+
26136+ if (au_opt_test(tmp, XINO))
26137+ au_opt_set(sbinfo->si_mntflags, XINO);
26138+ opt = opts->opt;
26139+ while (!err && opt->type != Opt_tail)
26140+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
26141+ if (unlikely(err))
26142+ goto out;
26143+
26144+ err = au_opts_verify(sb, sb->s_flags, tmp);
26145+ if (unlikely(err))
26146+ goto out;
26147+
26148+ /* restore xino */
26149+ if (au_opt_test(tmp, XINO) && !opt_xino) {
26150+ xino.file = au_xino_def(sb);
26151+ err = PTR_ERR(xino.file);
26152+ if (IS_ERR(xino.file))
26153+ goto out;
26154+
26155+ err = au_xino_set(sb, &xino, /*remount*/0);
26156+ fput(xino.file);
26157+ if (unlikely(err))
26158+ goto out;
26159+ }
26160+
26161+ /* restore udba */
027c5e7a 26162+ tmp &= AuOptMask_UDBA;
1facf9fc 26163+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
027c5e7a 26164+ sbinfo->si_mntflags |= tmp;
5afbbe0d
AM
26165+ bbot = au_sbbot(sb);
26166+ for (bindex = 0; bindex <= bbot; bindex++) {
027c5e7a
AM
26167+ br = au_sbr(sb, bindex);
26168+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
26169+ if (unlikely(err))
26170+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
26171+ bindex, err);
26172+ /* go on even if err */
26173+ }
4a4d8108 26174+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
5527c038 26175+ dir = d_inode(sb->s_root);
4a4d8108 26176+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
1facf9fc 26177+ }
26178+
4f0767ce 26179+out:
1facf9fc 26180+ return err;
26181+}
26182+
26183+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
26184+{
26185+ int err, rerr;
79b8bda9 26186+ unsigned char no_dreval;
1facf9fc 26187+ struct inode *dir;
26188+ struct au_opt_xino *opt_xino;
26189+ struct au_opt *opt;
26190+ struct au_sbinfo *sbinfo;
26191+
dece6358
AM
26192+ SiMustWriteLock(sb);
26193+
ae9dfd79
AM
26194+ err = au_dr_opt_flush(sb);
26195+ if (unlikely(err))
26196+ goto out;
26197+ au_fset_opts(opts->flags, DR_FLUSHED);
26198+
5527c038 26199+ dir = d_inode(sb->s_root);
1facf9fc 26200+ sbinfo = au_sbi(sb);
1facf9fc 26201+ opt_xino = NULL;
26202+ opt = opts->opt;
26203+ while (err >= 0 && opt->type != Opt_tail) {
26204+ err = au_opt_simple(sb, opt, opts);
26205+ if (!err)
26206+ err = au_opt_br(sb, opt, opts);
26207+ if (!err)
26208+ err = au_opt_xino(sb, opt, &opt_xino, opts);
26209+ opt++;
26210+ }
26211+ if (err > 0)
26212+ err = 0;
26213+ AuTraceErr(err);
26214+ /* go on even err */
26215+
79b8bda9 26216+ no_dreval = !!au_ftest_si(sbinfo, NO_DREVAL);
1facf9fc 26217+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
26218+ if (unlikely(rerr && !err))
26219+ err = rerr;
26220+
79b8bda9 26221+ if (no_dreval != !!au_ftest_si(sbinfo, NO_DREVAL))
b95c5147 26222+ au_fset_opts(opts->flags, REFRESH_IDOP);
79b8bda9 26223+
1facf9fc 26224+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
26225+ rerr = au_xib_trunc(sb);
26226+ if (unlikely(rerr && !err))
26227+ err = rerr;
26228+ }
26229+
26230+ /* will be handled by the caller */
027c5e7a 26231+ if (!au_ftest_opts(opts->flags, REFRESH)
79b8bda9
AM
26232+ && (opts->given_udba
26233+ || au_opt_test(sbinfo->si_mntflags, XINO)
b95c5147 26234+ || au_ftest_opts(opts->flags, REFRESH_IDOP)
79b8bda9 26235+ ))
027c5e7a 26236+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 26237+
26238+ AuDbg("status 0x%x\n", opts->flags);
ae9dfd79
AM
26239+
26240+out:
1facf9fc 26241+ return err;
26242+}
26243+
26244+/* ---------------------------------------------------------------------- */
26245+
26246+unsigned int au_opt_udba(struct super_block *sb)
26247+{
26248+ return au_mntflags(sb) & AuOptMask_UDBA;
26249+}
e8791d4f
AM
26250diff -urNp -x '*.orig' linux-4.9/fs/aufs/opts.h linux-4.9/fs/aufs/opts.h
26251--- linux-4.9/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
26252+++ linux-4.9/fs/aufs/opts.h 2021-02-24 16:15:09.534907296 +0100
ae9dfd79 26253@@ -0,0 +1,224 @@
1facf9fc 26254+/*
ae9dfd79 26255+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 26256+ *
26257+ * This program, aufs is free software; you can redistribute it and/or modify
26258+ * it under the terms of the GNU General Public License as published by
26259+ * the Free Software Foundation; either version 2 of the License, or
26260+ * (at your option) any later version.
dece6358
AM
26261+ *
26262+ * This program is distributed in the hope that it will be useful,
26263+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26264+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26265+ * GNU General Public License for more details.
26266+ *
26267+ * You should have received a copy of the GNU General Public License
523b37e3 26268+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26269+ */
26270+
26271+/*
26272+ * mount options/flags
26273+ */
26274+
26275+#ifndef __AUFS_OPTS_H__
26276+#define __AUFS_OPTS_H__
26277+
26278+#ifdef __KERNEL__
26279+
dece6358 26280+#include <linux/path.h>
1facf9fc 26281+
dece6358 26282+struct file;
dece6358 26283+
1facf9fc 26284+/* ---------------------------------------------------------------------- */
26285+
26286+/* mount flags */
26287+#define AuOpt_XINO 1 /* external inode number bitmap
26288+ and translation table */
26289+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
26290+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
26291+#define AuOpt_UDBA_REVAL (1 << 3)
4a4d8108 26292+#define AuOpt_UDBA_HNOTIFY (1 << 4)
dece6358
AM
26293+#define AuOpt_SHWH (1 << 5) /* show whiteout */
26294+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
076b876e
AM
26295+#define AuOpt_DIRPERM1 (1 << 7) /* ignore the lower dir's perm
26296+ bits */
dece6358
AM
26297+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
26298+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
26299+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
26300+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
26301+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
4a4d8108 26302+#define AuOpt_DIO (1 << 14) /* direct io */
ae9dfd79 26303+#define AuOpt_DIRREN (1 << 15) /* directory rename */
1facf9fc 26304+
4a4d8108
AM
26305+#ifndef CONFIG_AUFS_HNOTIFY
26306+#undef AuOpt_UDBA_HNOTIFY
26307+#define AuOpt_UDBA_HNOTIFY 0
1facf9fc 26308+#endif
ae9dfd79
AM
26309+#ifndef CONFIG_AUFS_DIRREN
26310+#undef AuOpt_DIRREN
26311+#define AuOpt_DIRREN 0
26312+#endif
dece6358
AM
26313+#ifndef CONFIG_AUFS_SHWH
26314+#undef AuOpt_SHWH
26315+#define AuOpt_SHWH 0
26316+#endif
1facf9fc 26317+
26318+#define AuOpt_Def (AuOpt_XINO \
26319+ | AuOpt_UDBA_REVAL \
26320+ | AuOpt_PLINK \
26321+ /* | AuOpt_DIRPERM1 */ \
26322+ | AuOpt_WARN_PERM)
26323+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
26324+ | AuOpt_UDBA_REVAL \
4a4d8108 26325+ | AuOpt_UDBA_HNOTIFY)
1facf9fc 26326+
26327+#define au_opt_test(flags, name) (flags & AuOpt_##name)
26328+#define au_opt_set(flags, name) do { \
26329+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
26330+ ((flags) |= AuOpt_##name); \
26331+} while (0)
26332+#define au_opt_set_udba(flags, name) do { \
26333+ (flags) &= ~AuOptMask_UDBA; \
26334+ ((flags) |= AuOpt_##name); \
26335+} while (0)
7f207e10
AM
26336+#define au_opt_clr(flags, name) do { \
26337+ ((flags) &= ~AuOpt_##name); \
26338+} while (0)
1facf9fc 26339+
e49829fe
JR
26340+static inline unsigned int au_opts_plink(unsigned int mntflags)
26341+{
26342+#ifdef CONFIG_PROC_FS
26343+ return mntflags;
26344+#else
26345+ return mntflags & ~AuOpt_PLINK;
26346+#endif
26347+}
26348+
1facf9fc 26349+/* ---------------------------------------------------------------------- */
26350+
26351+/* policies to select one among multiple writable branches */
26352+enum {
26353+ AuWbrCreate_TDP, /* top down parent */
26354+ AuWbrCreate_RR, /* round robin */
26355+ AuWbrCreate_MFS, /* most free space */
26356+ AuWbrCreate_MFSV, /* mfs with seconds */
26357+ AuWbrCreate_MFSRR, /* mfs then rr */
26358+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
f2c43d5f
AM
26359+ AuWbrCreate_TDMFS, /* top down regardless parent and mfs */
26360+ AuWbrCreate_TDMFSV, /* top down regardless parent and mfs */
1facf9fc 26361+ AuWbrCreate_PMFS, /* parent and mfs */
26362+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
392086de
AM
26363+ AuWbrCreate_PMFSRR, /* parent, mfs and round-robin */
26364+ AuWbrCreate_PMFSRRV, /* plus seconds */
1facf9fc 26365+
26366+ AuWbrCreate_Def = AuWbrCreate_TDP
26367+};
26368+
26369+enum {
26370+ AuWbrCopyup_TDP, /* top down parent */
26371+ AuWbrCopyup_BUP, /* bottom up parent */
26372+ AuWbrCopyup_BU, /* bottom up */
26373+
26374+ AuWbrCopyup_Def = AuWbrCopyup_TDP
26375+};
26376+
26377+/* ---------------------------------------------------------------------- */
26378+
26379+struct au_opt_add {
26380+ aufs_bindex_t bindex;
26381+ char *pathname;
26382+ int perm;
26383+ struct path path;
26384+};
26385+
26386+struct au_opt_del {
26387+ char *pathname;
26388+ struct path h_path;
26389+};
26390+
26391+struct au_opt_mod {
26392+ char *path;
26393+ int perm;
26394+ struct dentry *h_root;
26395+};
26396+
26397+struct au_opt_xino {
26398+ char *path;
26399+ struct file *file;
26400+};
26401+
26402+struct au_opt_xino_itrunc {
26403+ aufs_bindex_t bindex;
26404+};
26405+
26406+struct au_opt_wbr_create {
26407+ int wbr_create;
26408+ int mfs_second;
26409+ unsigned long long mfsrr_watermark;
26410+};
26411+
26412+struct au_opt {
26413+ int type;
26414+ union {
26415+ struct au_opt_xino xino;
26416+ struct au_opt_xino_itrunc xino_itrunc;
26417+ struct au_opt_add add;
26418+ struct au_opt_del del;
26419+ struct au_opt_mod mod;
26420+ int dirwh;
26421+ int rdcache;
26422+ unsigned int rdblk;
26423+ unsigned int rdhash;
26424+ int udba;
26425+ struct au_opt_wbr_create wbr_create;
26426+ int wbr_copyup;
076b876e 26427+ unsigned int fhsm_second;
1facf9fc 26428+ };
26429+};
26430+
26431+/* opts flags */
26432+#define AuOpts_REMOUNT 1
027c5e7a
AM
26433+#define AuOpts_REFRESH (1 << 1)
26434+#define AuOpts_TRUNC_XIB (1 << 2)
26435+#define AuOpts_REFRESH_DYAOP (1 << 3)
b95c5147 26436+#define AuOpts_REFRESH_IDOP (1 << 4)
ae9dfd79 26437+#define AuOpts_DR_FLUSHED (1 << 5)
1facf9fc 26438+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
7f207e10
AM
26439+#define au_fset_opts(flags, name) \
26440+ do { (flags) |= AuOpts_##name; } while (0)
26441+#define au_fclr_opts(flags, name) \
26442+ do { (flags) &= ~AuOpts_##name; } while (0)
1facf9fc 26443+
ae9dfd79
AM
26444+#ifndef CONFIG_AUFS_DIRREN
26445+#undef AuOpts_DR_FLUSHED
26446+#define AuOpts_DR_FLUSHED 0
26447+#endif
26448+
1facf9fc 26449+struct au_opts {
26450+ struct au_opt *opt;
26451+ int max_opt;
26452+
26453+ unsigned int given_udba;
26454+ unsigned int flags;
26455+ unsigned long sb_flags;
26456+};
26457+
26458+/* ---------------------------------------------------------------------- */
26459+
7e9cd9fe 26460+/* opts.c */
076b876e 26461+void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
1facf9fc 26462+const char *au_optstr_udba(int udba);
26463+const char *au_optstr_wbr_copyup(int wbr_copyup);
26464+const char *au_optstr_wbr_create(int wbr_create);
26465+
26466+void au_opts_free(struct au_opts *opts);
ae9dfd79 26467+struct super_block;
1facf9fc 26468+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
26469+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
26470+ unsigned int pending);
26471+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
26472+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
26473+
26474+unsigned int au_opt_udba(struct super_block *sb);
26475+
1facf9fc 26476+#endif /* __KERNEL__ */
26477+#endif /* __AUFS_OPTS_H__ */
e8791d4f
AM
26478diff -urNp -x '*.orig' linux-4.9/fs/aufs/plink.c linux-4.9/fs/aufs/plink.c
26479--- linux-4.9/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
26480+++ linux-4.9/fs/aufs/plink.c 2021-02-24 16:15:09.534907296 +0100
ae9dfd79 26481@@ -0,0 +1,515 @@
1facf9fc 26482+/*
ae9dfd79 26483+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 26484+ *
26485+ * This program, aufs is free software; you can redistribute it and/or modify
26486+ * it under the terms of the GNU General Public License as published by
26487+ * the Free Software Foundation; either version 2 of the License, or
26488+ * (at your option) any later version.
dece6358
AM
26489+ *
26490+ * This program is distributed in the hope that it will be useful,
26491+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26492+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26493+ * GNU General Public License for more details.
26494+ *
26495+ * You should have received a copy of the GNU General Public License
523b37e3 26496+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26497+ */
26498+
26499+/*
26500+ * pseudo-link
26501+ */
26502+
26503+#include "aufs.h"
26504+
26505+/*
e49829fe 26506+ * the pseudo-link maintenance mode.
1facf9fc 26507+ * during a user process maintains the pseudo-links,
26508+ * prohibit adding a new plink and branch manipulation.
e49829fe
JR
26509+ *
26510+ * Flags
26511+ * NOPLM:
26512+ * For entry functions which will handle plink, and i_mutex is already held
26513+ * in VFS.
26514+ * They cannot wait and should return an error at once.
26515+ * Callers has to check the error.
26516+ * NOPLMW:
26517+ * For entry functions which will handle plink, but i_mutex is not held
26518+ * in VFS.
26519+ * They can wait the plink maintenance mode to finish.
26520+ *
26521+ * They behave like F_SETLK and F_SETLKW.
26522+ * If the caller never handle plink, then both flags are unnecessary.
1facf9fc 26523+ */
e49829fe
JR
26524+
26525+int au_plink_maint(struct super_block *sb, int flags)
1facf9fc 26526+{
e49829fe
JR
26527+ int err;
26528+ pid_t pid, ppid;
f0c0a007 26529+ struct task_struct *parent, *prev;
e49829fe 26530+ struct au_sbinfo *sbi;
dece6358
AM
26531+
26532+ SiMustAnyLock(sb);
26533+
e49829fe
JR
26534+ err = 0;
26535+ if (!au_opt_test(au_mntflags(sb), PLINK))
26536+ goto out;
26537+
26538+ sbi = au_sbi(sb);
26539+ pid = sbi->si_plink_maint_pid;
26540+ if (!pid || pid == current->pid)
26541+ goto out;
26542+
26543+ /* todo: it highly depends upon /sbin/mount.aufs */
f0c0a007
AM
26544+ prev = NULL;
26545+ parent = current;
26546+ ppid = 0;
e49829fe 26547+ rcu_read_lock();
f0c0a007
AM
26548+ while (1) {
26549+ parent = rcu_dereference(parent->real_parent);
26550+ if (parent == prev)
26551+ break;
26552+ ppid = task_pid_vnr(parent);
26553+ if (pid == ppid) {
26554+ rcu_read_unlock();
26555+ goto out;
26556+ }
26557+ prev = parent;
26558+ }
e49829fe 26559+ rcu_read_unlock();
e49829fe
JR
26560+
26561+ if (au_ftest_lock(flags, NOPLMW)) {
027c5e7a
AM
26562+ /* if there is no i_mutex lock in VFS, we don't need to wait */
26563+ /* AuDebugOn(!lockdep_depth(current)); */
e49829fe
JR
26564+ while (sbi->si_plink_maint_pid) {
26565+ si_read_unlock(sb);
26566+ /* gave up wake_up_bit() */
26567+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
26568+
26569+ if (au_ftest_lock(flags, FLUSH))
26570+ au_nwt_flush(&sbi->si_nowait);
26571+ si_noflush_read_lock(sb);
26572+ }
26573+ } else if (au_ftest_lock(flags, NOPLM)) {
26574+ AuDbg("ppid %d, pid %d\n", ppid, pid);
26575+ err = -EAGAIN;
26576+ }
26577+
26578+out:
26579+ return err;
4a4d8108
AM
26580+}
26581+
e49829fe 26582+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
4a4d8108 26583+{
4a4d8108 26584+ spin_lock(&sbinfo->si_plink_maint_lock);
027c5e7a 26585+ sbinfo->si_plink_maint_pid = 0;
4a4d8108 26586+ spin_unlock(&sbinfo->si_plink_maint_lock);
027c5e7a 26587+ wake_up_all(&sbinfo->si_plink_wq);
4a4d8108
AM
26588+}
26589+
e49829fe 26590+int au_plink_maint_enter(struct super_block *sb)
4a4d8108
AM
26591+{
26592+ int err;
4a4d8108
AM
26593+ struct au_sbinfo *sbinfo;
26594+
26595+ err = 0;
4a4d8108
AM
26596+ sbinfo = au_sbi(sb);
26597+ /* make sure i am the only one in this fs */
e49829fe
JR
26598+ si_write_lock(sb, AuLock_FLUSH);
26599+ if (au_opt_test(au_mntflags(sb), PLINK)) {
26600+ spin_lock(&sbinfo->si_plink_maint_lock);
26601+ if (!sbinfo->si_plink_maint_pid)
26602+ sbinfo->si_plink_maint_pid = current->pid;
26603+ else
26604+ err = -EBUSY;
26605+ spin_unlock(&sbinfo->si_plink_maint_lock);
26606+ }
4a4d8108
AM
26607+ si_write_unlock(sb);
26608+
26609+ return err;
1facf9fc 26610+}
26611+
26612+/* ---------------------------------------------------------------------- */
26613+
1facf9fc 26614+#ifdef CONFIG_AUFS_DEBUG
26615+void au_plink_list(struct super_block *sb)
26616+{
86dc4139 26617+ int i;
1facf9fc 26618+ struct au_sbinfo *sbinfo;
ae9dfd79
AM
26619+ struct hlist_bl_head *hbl;
26620+ struct hlist_bl_node *pos;
5afbbe0d 26621+ struct au_icntnr *icntnr;
1facf9fc 26622+
dece6358
AM
26623+ SiMustAnyLock(sb);
26624+
1facf9fc 26625+ sbinfo = au_sbi(sb);
26626+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 26627+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 26628+
86dc4139 26629+ for (i = 0; i < AuPlink_NHASH; i++) {
ae9dfd79
AM
26630+ hbl = sbinfo->si_plink + i;
26631+ hlist_bl_lock(hbl);
26632+ hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
5afbbe0d 26633+ AuDbg("%lu\n", icntnr->vfs_inode.i_ino);
ae9dfd79 26634+ hlist_bl_unlock(hbl);
86dc4139 26635+ }
1facf9fc 26636+}
26637+#endif
26638+
26639+/* is the inode pseudo-linked? */
26640+int au_plink_test(struct inode *inode)
26641+{
86dc4139 26642+ int found, i;
1facf9fc 26643+ struct au_sbinfo *sbinfo;
ae9dfd79
AM
26644+ struct hlist_bl_head *hbl;
26645+ struct hlist_bl_node *pos;
5afbbe0d 26646+ struct au_icntnr *icntnr;
1facf9fc 26647+
26648+ sbinfo = au_sbi(inode->i_sb);
dece6358 26649+ AuRwMustAnyLock(&sbinfo->si_rwsem);
1facf9fc 26650+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
e49829fe 26651+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
1facf9fc 26652+
26653+ found = 0;
86dc4139 26654+ i = au_plink_hash(inode->i_ino);
ae9dfd79
AM
26655+ hbl = sbinfo->si_plink + i;
26656+ hlist_bl_lock(hbl);
26657+ hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
5afbbe0d 26658+ if (&icntnr->vfs_inode == inode) {
1facf9fc 26659+ found = 1;
26660+ break;
26661+ }
ae9dfd79 26662+ hlist_bl_unlock(hbl);
1facf9fc 26663+ return found;
26664+}
26665+
26666+/* ---------------------------------------------------------------------- */
26667+
26668+/*
26669+ * generate a name for plink.
26670+ * the file will be stored under AUFS_WH_PLINKDIR.
26671+ */
26672+/* 20 is max digits length of ulong 64 */
26673+#define PLINK_NAME_LEN ((20 + 1) * 2)
26674+
26675+static int plink_name(char *name, int len, struct inode *inode,
26676+ aufs_bindex_t bindex)
26677+{
26678+ int rlen;
26679+ struct inode *h_inode;
26680+
26681+ h_inode = au_h_iptr(inode, bindex);
26682+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
26683+ return rlen;
26684+}
26685+
7f207e10
AM
26686+struct au_do_plink_lkup_args {
26687+ struct dentry **errp;
26688+ struct qstr *tgtname;
26689+ struct dentry *h_parent;
26690+ struct au_branch *br;
26691+};
26692+
26693+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
26694+ struct dentry *h_parent,
26695+ struct au_branch *br)
26696+{
26697+ struct dentry *h_dentry;
febd17d6 26698+ struct inode *h_inode;
7f207e10 26699+
febd17d6 26700+ h_inode = d_inode(h_parent);
ae9dfd79 26701+ vfsub_inode_lock_shared_nested(h_inode, AuLsc_I_CHILD2);
b4510431 26702+ h_dentry = vfsub_lkup_one(tgtname, h_parent);
ae9dfd79 26703+ inode_unlock_shared(h_inode);
7f207e10
AM
26704+ return h_dentry;
26705+}
26706+
26707+static void au_call_do_plink_lkup(void *args)
26708+{
26709+ struct au_do_plink_lkup_args *a = args;
26710+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
26711+}
26712+
1facf9fc 26713+/* lookup the plink-ed @inode under the branch at @bindex */
26714+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
26715+{
26716+ struct dentry *h_dentry, *h_parent;
26717+ struct au_branch *br;
7f207e10 26718+ int wkq_err;
1facf9fc 26719+ char a[PLINK_NAME_LEN];
0c3ec466 26720+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 26721+
e49829fe
JR
26722+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
26723+
1facf9fc 26724+ br = au_sbr(inode->i_sb, bindex);
26725+ h_parent = br->br_wbr->wbr_plink;
1facf9fc 26726+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
26727+
2dfbb274 26728+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
7f207e10
AM
26729+ struct au_do_plink_lkup_args args = {
26730+ .errp = &h_dentry,
26731+ .tgtname = &tgtname,
26732+ .h_parent = h_parent,
26733+ .br = br
26734+ };
26735+
26736+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
26737+ if (unlikely(wkq_err))
26738+ h_dentry = ERR_PTR(wkq_err);
26739+ } else
26740+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
26741+
1facf9fc 26742+ return h_dentry;
26743+}
26744+
26745+/* create a pseudo-link */
26746+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
26747+ struct dentry *h_dentry, struct au_branch *br)
26748+{
26749+ int err;
26750+ struct path h_path = {
86dc4139 26751+ .mnt = au_br_mnt(br)
1facf9fc 26752+ };
523b37e3 26753+ struct inode *h_dir, *delegated;
1facf9fc 26754+
5527c038 26755+ h_dir = d_inode(h_parent);
febd17d6 26756+ inode_lock_nested(h_dir, AuLsc_I_CHILD2);
4f0767ce 26757+again:
b4510431 26758+ h_path.dentry = vfsub_lkup_one(tgt, h_parent);
1facf9fc 26759+ err = PTR_ERR(h_path.dentry);
26760+ if (IS_ERR(h_path.dentry))
26761+ goto out;
26762+
26763+ err = 0;
26764+ /* wh.plink dir is not monitored */
7f207e10 26765+ /* todo: is it really safe? */
5527c038
JR
26766+ if (d_is_positive(h_path.dentry)
26767+ && d_inode(h_path.dentry) != d_inode(h_dentry)) {
523b37e3
AM
26768+ delegated = NULL;
26769+ err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
26770+ if (unlikely(err == -EWOULDBLOCK)) {
26771+ pr_warn("cannot retry for NFSv4 delegation"
26772+ " for an internal unlink\n");
26773+ iput(delegated);
26774+ }
1facf9fc 26775+ dput(h_path.dentry);
26776+ h_path.dentry = NULL;
26777+ if (!err)
26778+ goto again;
26779+ }
5527c038 26780+ if (!err && d_is_negative(h_path.dentry)) {
523b37e3
AM
26781+ delegated = NULL;
26782+ err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
26783+ if (unlikely(err == -EWOULDBLOCK)) {
26784+ pr_warn("cannot retry for NFSv4 delegation"
26785+ " for an internal link\n");
26786+ iput(delegated);
26787+ }
26788+ }
1facf9fc 26789+ dput(h_path.dentry);
26790+
4f0767ce 26791+out:
febd17d6 26792+ inode_unlock(h_dir);
1facf9fc 26793+ return err;
26794+}
26795+
26796+struct do_whplink_args {
26797+ int *errp;
26798+ struct qstr *tgt;
26799+ struct dentry *h_parent;
26800+ struct dentry *h_dentry;
26801+ struct au_branch *br;
26802+};
26803+
26804+static void call_do_whplink(void *args)
26805+{
26806+ struct do_whplink_args *a = args;
26807+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
26808+}
26809+
26810+static int whplink(struct dentry *h_dentry, struct inode *inode,
26811+ aufs_bindex_t bindex, struct au_branch *br)
26812+{
26813+ int err, wkq_err;
26814+ struct au_wbr *wbr;
26815+ struct dentry *h_parent;
1facf9fc 26816+ char a[PLINK_NAME_LEN];
0c3ec466 26817+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 26818+
26819+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
26820+ h_parent = wbr->wbr_plink;
1facf9fc 26821+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
26822+
26823+ /* always superio. */
2dfbb274 26824+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
1facf9fc 26825+ struct do_whplink_args args = {
26826+ .errp = &err,
26827+ .tgt = &tgtname,
26828+ .h_parent = h_parent,
26829+ .h_dentry = h_dentry,
26830+ .br = br
26831+ };
26832+ wkq_err = au_wkq_wait(call_do_whplink, &args);
26833+ if (unlikely(wkq_err))
26834+ err = wkq_err;
26835+ } else
26836+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
1facf9fc 26837+
26838+ return err;
26839+}
26840+
1facf9fc 26841+/*
26842+ * create a new pseudo-link for @h_dentry on @bindex.
26843+ * the linked inode is held in aufs @inode.
26844+ */
26845+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
26846+ struct dentry *h_dentry)
26847+{
26848+ struct super_block *sb;
26849+ struct au_sbinfo *sbinfo;
ae9dfd79
AM
26850+ struct hlist_bl_head *hbl;
26851+ struct hlist_bl_node *pos;
5afbbe0d 26852+ struct au_icntnr *icntnr;
86dc4139 26853+ int found, err, cnt, i;
1facf9fc 26854+
26855+ sb = inode->i_sb;
26856+ sbinfo = au_sbi(sb);
26857+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 26858+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 26859+
86dc4139 26860+ found = au_plink_test(inode);
4a4d8108 26861+ if (found)
1facf9fc 26862+ return;
4a4d8108 26863+
86dc4139 26864+ i = au_plink_hash(inode->i_ino);
ae9dfd79 26865+ hbl = sbinfo->si_plink + i;
5afbbe0d 26866+ au_igrab(inode);
1facf9fc 26867+
ae9dfd79
AM
26868+ hlist_bl_lock(hbl);
26869+ hlist_bl_for_each_entry(icntnr, pos, hbl, plink) {
5afbbe0d 26870+ if (&icntnr->vfs_inode == inode) {
4a4d8108
AM
26871+ found = 1;
26872+ break;
26873+ }
1facf9fc 26874+ }
5afbbe0d
AM
26875+ if (!found) {
26876+ icntnr = container_of(inode, struct au_icntnr, vfs_inode);
ae9dfd79 26877+ hlist_bl_add_head(&icntnr->plink, hbl);
5afbbe0d 26878+ }
ae9dfd79 26879+ hlist_bl_unlock(hbl);
4a4d8108 26880+ if (!found) {
ae9dfd79 26881+ cnt = au_hbl_count(hbl);
86dc4139
AM
26882+#define msg "unexpectedly unblanced or too many pseudo-links"
26883+ if (cnt > AUFS_PLINK_WARN)
26884+ AuWarn1(msg ", %d\n", cnt);
26885+#undef msg
1facf9fc 26886+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
5afbbe0d
AM
26887+ if (unlikely(err)) {
26888+ pr_warn("err %d, damaged pseudo link.\n", err);
ae9dfd79 26889+ au_hbl_del(&icntnr->plink, hbl);
5afbbe0d 26890+ iput(&icntnr->vfs_inode);
4a4d8108 26891+ }
5afbbe0d
AM
26892+ } else
26893+ iput(&icntnr->vfs_inode);
1facf9fc 26894+}
26895+
26896+/* free all plinks */
e49829fe 26897+void au_plink_put(struct super_block *sb, int verbose)
1facf9fc 26898+{
86dc4139 26899+ int i, warned;
1facf9fc 26900+ struct au_sbinfo *sbinfo;
ae9dfd79
AM
26901+ struct hlist_bl_head *hbl;
26902+ struct hlist_bl_node *pos, *tmp;
5afbbe0d 26903+ struct au_icntnr *icntnr;
1facf9fc 26904+
dece6358
AM
26905+ SiMustWriteLock(sb);
26906+
1facf9fc 26907+ sbinfo = au_sbi(sb);
26908+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 26909+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 26910+
1facf9fc 26911+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
26912+ warned = 0;
26913+ for (i = 0; i < AuPlink_NHASH; i++) {
ae9dfd79
AM
26914+ hbl = sbinfo->si_plink + i;
26915+ if (!warned && verbose && !hlist_bl_empty(hbl)) {
86dc4139
AM
26916+ pr_warn("pseudo-link is not flushed");
26917+ warned = 1;
26918+ }
ae9dfd79 26919+ hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink)
5afbbe0d 26920+ iput(&icntnr->vfs_inode);
ae9dfd79 26921+ INIT_HLIST_BL_HEAD(hbl);
86dc4139 26922+ }
1facf9fc 26923+}
26924+
e49829fe
JR
26925+void au_plink_clean(struct super_block *sb, int verbose)
26926+{
26927+ struct dentry *root;
26928+
26929+ root = sb->s_root;
26930+ aufs_write_lock(root);
26931+ if (au_opt_test(au_mntflags(sb), PLINK))
26932+ au_plink_put(sb, verbose);
26933+ aufs_write_unlock(root);
26934+}
26935+
86dc4139
AM
26936+static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
26937+{
26938+ int do_put;
5afbbe0d 26939+ aufs_bindex_t btop, bbot, bindex;
86dc4139
AM
26940+
26941+ do_put = 0;
5afbbe0d
AM
26942+ btop = au_ibtop(inode);
26943+ bbot = au_ibbot(inode);
26944+ if (btop >= 0) {
26945+ for (bindex = btop; bindex <= bbot; bindex++) {
86dc4139
AM
26946+ if (!au_h_iptr(inode, bindex)
26947+ || au_ii_br_id(inode, bindex) != br_id)
26948+ continue;
26949+ au_set_h_iptr(inode, bindex, NULL, 0);
26950+ do_put = 1;
26951+ break;
26952+ }
26953+ if (do_put)
5afbbe0d 26954+ for (bindex = btop; bindex <= bbot; bindex++)
86dc4139
AM
26955+ if (au_h_iptr(inode, bindex)) {
26956+ do_put = 0;
26957+ break;
26958+ }
26959+ } else
26960+ do_put = 1;
26961+
26962+ return do_put;
26963+}
26964+
1facf9fc 26965+/* free the plinks on a branch specified by @br_id */
26966+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
26967+{
26968+ struct au_sbinfo *sbinfo;
ae9dfd79
AM
26969+ struct hlist_bl_head *hbl;
26970+ struct hlist_bl_node *pos, *tmp;
5afbbe0d 26971+ struct au_icntnr *icntnr;
1facf9fc 26972+ struct inode *inode;
86dc4139 26973+ int i, do_put;
1facf9fc 26974+
dece6358
AM
26975+ SiMustWriteLock(sb);
26976+
1facf9fc 26977+ sbinfo = au_sbi(sb);
26978+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 26979+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 26980+
ae9dfd79 26981+ /* no bit_lock since sbinfo is write-locked */
86dc4139 26982+ for (i = 0; i < AuPlink_NHASH; i++) {
ae9dfd79
AM
26983+ hbl = sbinfo->si_plink + i;
26984+ hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink) {
5afbbe0d 26985+ inode = au_igrab(&icntnr->vfs_inode);
86dc4139
AM
26986+ ii_write_lock_child(inode);
26987+ do_put = au_plink_do_half_refresh(inode, br_id);
5afbbe0d 26988+ if (do_put) {
ae9dfd79 26989+ hlist_bl_del(&icntnr->plink);
5afbbe0d
AM
26990+ iput(inode);
26991+ }
86dc4139
AM
26992+ ii_write_unlock(inode);
26993+ iput(inode);
dece6358 26994+ }
dece6358
AM
26995+ }
26996+}
e8791d4f
AM
26997diff -urNp -x '*.orig' linux-4.9/fs/aufs/poll.c linux-4.9/fs/aufs/poll.c
26998--- linux-4.9/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
26999+++ linux-4.9/fs/aufs/poll.c 2021-02-24 16:15:09.534907296 +0100
ae9dfd79 27000@@ -0,0 +1,53 @@
dece6358 27001+/*
ae9dfd79 27002+ * Copyright (C) 2005-2018 Junjiro R. Okajima
dece6358
AM
27003+ *
27004+ * This program, aufs is free software; you can redistribute it and/or modify
27005+ * it under the terms of the GNU General Public License as published by
27006+ * the Free Software Foundation; either version 2 of the License, or
27007+ * (at your option) any later version.
27008+ *
27009+ * This program is distributed in the hope that it will be useful,
27010+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27011+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27012+ * GNU General Public License for more details.
27013+ *
27014+ * You should have received a copy of the GNU General Public License
523b37e3 27015+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358
AM
27016+ */
27017+
1308ab2a 27018+/*
27019+ * poll operation
27020+ * There is only one filesystem which implements ->poll operation, currently.
27021+ */
27022+
27023+#include "aufs.h"
27024+
27025+unsigned int aufs_poll(struct file *file, poll_table *wait)
27026+{
27027+ unsigned int mask;
27028+ int err;
27029+ struct file *h_file;
1308ab2a 27030+ struct super_block *sb;
27031+
27032+ /* We should pretend an error happened. */
27033+ mask = POLLERR /* | POLLIN | POLLOUT */;
b912730e 27034+ sb = file->f_path.dentry->d_sb;
e49829fe 27035+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e 27036+
ae9dfd79 27037+ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
b912730e
AM
27038+ err = PTR_ERR(h_file);
27039+ if (IS_ERR(h_file))
1308ab2a 27040+ goto out;
27041+
27042+ /* it is not an error if h_file has no operation */
27043+ mask = DEFAULT_POLLMASK;
523b37e3 27044+ if (h_file->f_op->poll)
1308ab2a 27045+ mask = h_file->f_op->poll(h_file, wait);
b912730e 27046+ fput(h_file); /* instead of au_read_post() */
1308ab2a 27047+
4f0767ce 27048+out:
1308ab2a 27049+ si_read_unlock(sb);
ae9dfd79
AM
27050+ if (mask & POLLERR)
27051+ AuDbg("mask 0x%x\n", mask);
1308ab2a 27052+ return mask;
27053+}
e8791d4f
AM
27054diff -urNp -x '*.orig' linux-4.9/fs/aufs/posix_acl.c linux-4.9/fs/aufs/posix_acl.c
27055--- linux-4.9/fs/aufs/posix_acl.c 1970-01-01 01:00:00.000000000 +0100
27056+++ linux-4.9/fs/aufs/posix_acl.c 2021-02-24 16:15:09.534907296 +0100
ae9dfd79 27057@@ -0,0 +1,102 @@
c1595e42 27058+/*
ae9dfd79 27059+ * Copyright (C) 2014-2018 Junjiro R. Okajima
c1595e42
JR
27060+ *
27061+ * This program, aufs is free software; you can redistribute it and/or modify
27062+ * it under the terms of the GNU General Public License as published by
27063+ * the Free Software Foundation; either version 2 of the License, or
27064+ * (at your option) any later version.
27065+ *
27066+ * This program is distributed in the hope that it will be useful,
27067+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27068+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27069+ * GNU General Public License for more details.
27070+ *
27071+ * You should have received a copy of the GNU General Public License
27072+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
27073+ */
27074+
27075+/*
27076+ * posix acl operations
27077+ */
27078+
27079+#include <linux/fs.h>
c1595e42
JR
27080+#include "aufs.h"
27081+
27082+struct posix_acl *aufs_get_acl(struct inode *inode, int type)
27083+{
27084+ struct posix_acl *acl;
27085+ int err;
27086+ aufs_bindex_t bindex;
27087+ struct inode *h_inode;
27088+ struct super_block *sb;
27089+
27090+ acl = NULL;
27091+ sb = inode->i_sb;
27092+ si_read_lock(sb, AuLock_FLUSH);
27093+ ii_read_lock_child(inode);
27094+ if (!(sb->s_flags & MS_POSIXACL))
27095+ goto out;
27096+
5afbbe0d 27097+ bindex = au_ibtop(inode);
c1595e42
JR
27098+ h_inode = au_h_iptr(inode, bindex);
27099+ if (unlikely(!h_inode
27100+ || ((h_inode->i_mode & S_IFMT)
27101+ != (inode->i_mode & S_IFMT)))) {
27102+ err = au_busy_or_stale();
27103+ acl = ERR_PTR(err);
27104+ goto out;
27105+ }
27106+
27107+ /* always topmost only */
27108+ acl = get_acl(h_inode, type);
ae9dfd79
AM
27109+ if (!IS_ERR_OR_NULL(acl))
27110+ set_cached_acl(inode, type, acl);
c1595e42
JR
27111+
27112+out:
27113+ ii_read_unlock(inode);
27114+ si_read_unlock(sb);
27115+
27116+ AuTraceErrPtr(acl);
27117+ return acl;
27118+}
27119+
27120+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
27121+{
27122+ int err;
27123+ ssize_t ssz;
27124+ struct dentry *dentry;
f2c43d5f 27125+ struct au_sxattr arg = {
c1595e42
JR
27126+ .type = AU_ACL_SET,
27127+ .u.acl_set = {
27128+ .acl = acl,
27129+ .type = type
27130+ },
27131+ };
27132+
5afbbe0d
AM
27133+ IMustLock(inode);
27134+
c1595e42
JR
27135+ if (inode->i_ino == AUFS_ROOT_INO)
27136+ dentry = dget(inode->i_sb->s_root);
27137+ else {
27138+ dentry = d_find_alias(inode);
27139+ if (!dentry)
27140+ dentry = d_find_any_alias(inode);
27141+ if (!dentry) {
27142+ pr_warn("cannot handle this inode, "
27143+ "please report to aufs-users ML\n");
27144+ err = -ENOENT;
27145+ goto out;
27146+ }
27147+ }
27148+
f2c43d5f 27149+ ssz = au_sxattr(dentry, inode, &arg);
c1595e42
JR
27150+ dput(dentry);
27151+ err = ssz;
ae9dfd79 27152+ if (ssz >= 0) {
c1595e42 27153+ err = 0;
ae9dfd79
AM
27154+ set_cached_acl(inode, type, acl);
27155+ }
c1595e42
JR
27156+
27157+out:
c1595e42
JR
27158+ return err;
27159+}
e8791d4f
AM
27160diff -urNp -x '*.orig' linux-4.9/fs/aufs/procfs.c linux-4.9/fs/aufs/procfs.c
27161--- linux-4.9/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
27162+++ linux-4.9/fs/aufs/procfs.c 2021-02-24 16:15:09.534907296 +0100
ae9dfd79 27163@@ -0,0 +1,170 @@
e49829fe 27164+/*
ae9dfd79 27165+ * Copyright (C) 2010-2018 Junjiro R. Okajima
e49829fe
JR
27166+ *
27167+ * This program, aufs is free software; you can redistribute it and/or modify
27168+ * it under the terms of the GNU General Public License as published by
27169+ * the Free Software Foundation; either version 2 of the License, or
27170+ * (at your option) any later version.
27171+ *
27172+ * This program is distributed in the hope that it will be useful,
27173+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27174+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27175+ * GNU General Public License for more details.
27176+ *
27177+ * You should have received a copy of the GNU General Public License
523b37e3 27178+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
27179+ */
27180+
27181+/*
27182+ * procfs interfaces
27183+ */
27184+
27185+#include <linux/proc_fs.h>
27186+#include "aufs.h"
27187+
27188+static int au_procfs_plm_release(struct inode *inode, struct file *file)
27189+{
27190+ struct au_sbinfo *sbinfo;
27191+
27192+ sbinfo = file->private_data;
27193+ if (sbinfo) {
27194+ au_plink_maint_leave(sbinfo);
27195+ kobject_put(&sbinfo->si_kobj);
27196+ }
27197+
27198+ return 0;
27199+}
27200+
27201+static void au_procfs_plm_write_clean(struct file *file)
27202+{
27203+ struct au_sbinfo *sbinfo;
27204+
27205+ sbinfo = file->private_data;
27206+ if (sbinfo)
27207+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
27208+}
27209+
27210+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
27211+{
27212+ int err;
27213+ struct super_block *sb;
27214+ struct au_sbinfo *sbinfo;
ae9dfd79 27215+ struct hlist_bl_node *pos;
e49829fe
JR
27216+
27217+ err = -EBUSY;
27218+ if (unlikely(file->private_data))
27219+ goto out;
27220+
27221+ sb = NULL;
53392da6 27222+ /* don't use au_sbilist_lock() here */
ae9dfd79
AM
27223+ hlist_bl_lock(&au_sbilist);
27224+ hlist_bl_for_each_entry(sbinfo, pos, &au_sbilist, si_list)
e49829fe
JR
27225+ if (id == sysaufs_si_id(sbinfo)) {
27226+ kobject_get(&sbinfo->si_kobj);
27227+ sb = sbinfo->si_sb;
27228+ break;
27229+ }
ae9dfd79 27230+ hlist_bl_unlock(&au_sbilist);
e49829fe
JR
27231+
27232+ err = -EINVAL;
27233+ if (unlikely(!sb))
27234+ goto out;
27235+
27236+ err = au_plink_maint_enter(sb);
27237+ if (!err)
27238+ /* keep kobject_get() */
27239+ file->private_data = sbinfo;
27240+ else
27241+ kobject_put(&sbinfo->si_kobj);
27242+out:
27243+ return err;
27244+}
27245+
27246+/*
27247+ * Accept a valid "si=xxxx" only.
27248+ * Once it is accepted successfully, accept "clean" too.
27249+ */
27250+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
27251+ size_t count, loff_t *ppos)
27252+{
27253+ ssize_t err;
27254+ unsigned long id;
27255+ /* last newline is allowed */
27256+ char buf[3 + sizeof(unsigned long) * 2 + 1];
27257+
27258+ err = -EACCES;
27259+ if (unlikely(!capable(CAP_SYS_ADMIN)))
27260+ goto out;
27261+
27262+ err = -EINVAL;
27263+ if (unlikely(count > sizeof(buf)))
27264+ goto out;
27265+
27266+ err = copy_from_user(buf, ubuf, count);
27267+ if (unlikely(err)) {
27268+ err = -EFAULT;
27269+ goto out;
27270+ }
27271+ buf[count] = 0;
27272+
27273+ err = -EINVAL;
27274+ if (!strcmp("clean", buf)) {
27275+ au_procfs_plm_write_clean(file);
27276+ goto out_success;
27277+ } else if (unlikely(strncmp("si=", buf, 3)))
27278+ goto out;
27279+
9dbd164d 27280+ err = kstrtoul(buf + 3, 16, &id);
e49829fe
JR
27281+ if (unlikely(err))
27282+ goto out;
27283+
27284+ err = au_procfs_plm_write_si(file, id);
27285+ if (unlikely(err))
27286+ goto out;
27287+
27288+out_success:
27289+ err = count; /* success */
27290+out:
27291+ return err;
27292+}
27293+
27294+static const struct file_operations au_procfs_plm_fop = {
27295+ .write = au_procfs_plm_write,
27296+ .release = au_procfs_plm_release,
27297+ .owner = THIS_MODULE
27298+};
27299+
27300+/* ---------------------------------------------------------------------- */
27301+
27302+static struct proc_dir_entry *au_procfs_dir;
27303+
27304+void au_procfs_fin(void)
27305+{
27306+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
27307+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
27308+}
27309+
27310+int __init au_procfs_init(void)
27311+{
27312+ int err;
27313+ struct proc_dir_entry *entry;
27314+
27315+ err = -ENOMEM;
27316+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
27317+ if (unlikely(!au_procfs_dir))
27318+ goto out;
27319+
27320+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
27321+ au_procfs_dir, &au_procfs_plm_fop);
27322+ if (unlikely(!entry))
27323+ goto out_dir;
27324+
27325+ err = 0;
27326+ goto out; /* success */
27327+
27328+
27329+out_dir:
27330+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
27331+out:
27332+ return err;
27333+}
e8791d4f
AM
27334diff -urNp -x '*.orig' linux-4.9/fs/aufs/rdu.c linux-4.9/fs/aufs/rdu.c
27335--- linux-4.9/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
27336+++ linux-4.9/fs/aufs/rdu.c 2021-02-24 16:15:09.534907296 +0100
5afbbe0d 27337@@ -0,0 +1,381 @@
1308ab2a 27338+/*
ae9dfd79 27339+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1308ab2a 27340+ *
27341+ * This program, aufs is free software; you can redistribute it and/or modify
27342+ * it under the terms of the GNU General Public License as published by
27343+ * the Free Software Foundation; either version 2 of the License, or
27344+ * (at your option) any later version.
27345+ *
27346+ * This program is distributed in the hope that it will be useful,
27347+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27348+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27349+ * GNU General Public License for more details.
27350+ *
27351+ * You should have received a copy of the GNU General Public License
523b37e3 27352+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1308ab2a 27353+ */
27354+
27355+/*
27356+ * readdir in userspace.
27357+ */
27358+
b752ccd1 27359+#include <linux/compat.h>
4a4d8108 27360+#include <linux/fs_stack.h>
1308ab2a 27361+#include <linux/security.h>
1308ab2a 27362+#include "aufs.h"
27363+
27364+/* bits for struct aufs_rdu.flags */
27365+#define AuRdu_CALLED 1
27366+#define AuRdu_CONT (1 << 1)
27367+#define AuRdu_FULL (1 << 2)
27368+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
7f207e10
AM
27369+#define au_fset_rdu(flags, name) \
27370+ do { (flags) |= AuRdu_##name; } while (0)
27371+#define au_fclr_rdu(flags, name) \
27372+ do { (flags) &= ~AuRdu_##name; } while (0)
1308ab2a 27373+
27374+struct au_rdu_arg {
392086de 27375+ struct dir_context ctx;
1308ab2a 27376+ struct aufs_rdu *rdu;
27377+ union au_rdu_ent_ul ent;
27378+ unsigned long end;
27379+
27380+ struct super_block *sb;
27381+ int err;
27382+};
27383+
392086de 27384+static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
1308ab2a 27385+ loff_t offset, u64 h_ino, unsigned int d_type)
27386+{
27387+ int err, len;
392086de 27388+ struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
1308ab2a 27389+ struct aufs_rdu *rdu = arg->rdu;
27390+ struct au_rdu_ent ent;
27391+
27392+ err = 0;
27393+ arg->err = 0;
27394+ au_fset_rdu(rdu->cookie.flags, CALLED);
27395+ len = au_rdu_len(nlen);
27396+ if (arg->ent.ul + len < arg->end) {
27397+ ent.ino = h_ino;
27398+ ent.bindex = rdu->cookie.bindex;
27399+ ent.type = d_type;
27400+ ent.nlen = nlen;
4a4d8108
AM
27401+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
27402+ ent.type = DT_UNKNOWN;
1308ab2a 27403+
9dbd164d 27404+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 27405+ err = -EFAULT;
27406+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
27407+ goto out;
27408+ if (copy_to_user(arg->ent.e->name, name, nlen))
27409+ goto out;
27410+ /* the terminating NULL */
27411+ if (__put_user(0, arg->ent.e->name + nlen))
27412+ goto out;
27413+ err = 0;
27414+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
27415+ arg->ent.ul += len;
27416+ rdu->rent++;
27417+ } else {
27418+ err = -EFAULT;
27419+ au_fset_rdu(rdu->cookie.flags, FULL);
27420+ rdu->full = 1;
27421+ rdu->tail = arg->ent;
27422+ }
27423+
4f0767ce 27424+out:
1308ab2a 27425+ /* AuTraceErr(err); */
27426+ return err;
27427+}
27428+
27429+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
27430+{
27431+ int err;
27432+ loff_t offset;
27433+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
27434+
92d182d2 27435+ /* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
1308ab2a 27436+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
27437+ err = offset;
27438+ if (unlikely(offset != cookie->h_pos))
27439+ goto out;
27440+
27441+ err = 0;
27442+ do {
27443+ arg->err = 0;
27444+ au_fclr_rdu(cookie->flags, CALLED);
27445+ /* smp_mb(); */
392086de 27446+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1308ab2a 27447+ if (err >= 0)
27448+ err = arg->err;
27449+ } while (!err
27450+ && au_ftest_rdu(cookie->flags, CALLED)
27451+ && !au_ftest_rdu(cookie->flags, FULL));
27452+ cookie->h_pos = h_file->f_pos;
27453+
4f0767ce 27454+out:
1308ab2a 27455+ AuTraceErr(err);
27456+ return err;
27457+}
27458+
27459+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
27460+{
27461+ int err;
5afbbe0d 27462+ aufs_bindex_t bbot;
392086de
AM
27463+ struct au_rdu_arg arg = {
27464+ .ctx = {
2000de60 27465+ .actor = au_rdu_fill
392086de
AM
27466+ }
27467+ };
1308ab2a 27468+ struct dentry *dentry;
27469+ struct inode *inode;
27470+ struct file *h_file;
27471+ struct au_rdu_cookie *cookie = &rdu->cookie;
27472+
27473+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
27474+ if (unlikely(err)) {
27475+ err = -EFAULT;
27476+ AuTraceErr(err);
27477+ goto out;
27478+ }
27479+ rdu->rent = 0;
27480+ rdu->tail = rdu->ent;
27481+ rdu->full = 0;
27482+ arg.rdu = rdu;
27483+ arg.ent = rdu->ent;
27484+ arg.end = arg.ent.ul;
27485+ arg.end += rdu->sz;
27486+
27487+ err = -ENOTDIR;
5afbbe0d 27488+ if (unlikely(!file->f_op->iterate && !file->f_op->iterate_shared))
1308ab2a 27489+ goto out;
27490+
27491+ err = security_file_permission(file, MAY_READ);
27492+ AuTraceErr(err);
27493+ if (unlikely(err))
27494+ goto out;
27495+
2000de60 27496+ dentry = file->f_path.dentry;
5527c038 27497+ inode = d_inode(dentry);
5afbbe0d 27498+ inode_lock_shared(inode);
1308ab2a 27499+
27500+ arg.sb = inode->i_sb;
e49829fe
JR
27501+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
27502+ if (unlikely(err))
27503+ goto out_mtx;
027c5e7a
AM
27504+ err = au_alive_dir(dentry);
27505+ if (unlikely(err))
27506+ goto out_si;
e49829fe 27507+ /* todo: reval? */
1308ab2a 27508+ fi_read_lock(file);
27509+
27510+ err = -EAGAIN;
27511+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
27512+ && cookie->generation != au_figen(file)))
27513+ goto out_unlock;
27514+
27515+ err = 0;
27516+ if (!rdu->blk) {
27517+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
27518+ if (!rdu->blk)
27519+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
27520+ }
5afbbe0d
AM
27521+ bbot = au_fbtop(file);
27522+ if (cookie->bindex < bbot)
27523+ cookie->bindex = bbot;
27524+ bbot = au_fbbot_dir(file);
27525+ /* AuDbg("b%d, b%d\n", cookie->bindex, bbot); */
27526+ for (; !err && cookie->bindex <= bbot;
1308ab2a 27527+ cookie->bindex++, cookie->h_pos = 0) {
4a4d8108 27528+ h_file = au_hf_dir(file, cookie->bindex);
1308ab2a 27529+ if (!h_file)
27530+ continue;
27531+
27532+ au_fclr_rdu(cookie->flags, FULL);
27533+ err = au_rdu_do(h_file, &arg);
27534+ AuTraceErr(err);
27535+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
27536+ break;
27537+ }
27538+ AuDbg("rent %llu\n", rdu->rent);
27539+
27540+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
27541+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
27542+ au_fset_rdu(cookie->flags, CONT);
27543+ cookie->generation = au_figen(file);
27544+ }
27545+
27546+ ii_read_lock_child(inode);
5afbbe0d 27547+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibtop(inode)));
1308ab2a 27548+ ii_read_unlock(inode);
27549+
4f0767ce 27550+out_unlock:
1308ab2a 27551+ fi_read_unlock(file);
027c5e7a 27552+out_si:
1308ab2a 27553+ si_read_unlock(arg.sb);
4f0767ce 27554+out_mtx:
5afbbe0d 27555+ inode_unlock_shared(inode);
4f0767ce 27556+out:
1308ab2a 27557+ AuTraceErr(err);
27558+ return err;
27559+}
27560+
27561+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
27562+{
27563+ int err;
27564+ ino_t ino;
27565+ unsigned long long nent;
27566+ union au_rdu_ent_ul *u;
27567+ struct au_rdu_ent ent;
27568+ struct super_block *sb;
27569+
27570+ err = 0;
27571+ nent = rdu->nent;
27572+ u = &rdu->ent;
2000de60 27573+ sb = file->f_path.dentry->d_sb;
1308ab2a 27574+ si_read_lock(sb, AuLock_FLUSH);
27575+ while (nent-- > 0) {
9dbd164d 27576+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 27577+ err = copy_from_user(&ent, u->e, sizeof(ent));
4a4d8108
AM
27578+ if (!err)
27579+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
1308ab2a 27580+ if (unlikely(err)) {
27581+ err = -EFAULT;
27582+ AuTraceErr(err);
27583+ break;
27584+ }
27585+
27586+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
27587+ if (!ent.wh)
27588+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
27589+ else
27590+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
27591+ &ino);
27592+ if (unlikely(err)) {
27593+ AuTraceErr(err);
27594+ break;
27595+ }
27596+
27597+ err = __put_user(ino, &u->e->ino);
27598+ if (unlikely(err)) {
27599+ err = -EFAULT;
27600+ AuTraceErr(err);
27601+ break;
27602+ }
27603+ u->ul += au_rdu_len(ent.nlen);
27604+ }
27605+ si_read_unlock(sb);
27606+
27607+ return err;
27608+}
27609+
27610+/* ---------------------------------------------------------------------- */
27611+
27612+static int au_rdu_verify(struct aufs_rdu *rdu)
27613+{
b752ccd1 27614+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
1308ab2a 27615+ "%llu, b%d, 0x%x, g%u}\n",
b752ccd1 27616+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
1308ab2a 27617+ rdu->blk,
27618+ rdu->rent, rdu->shwh, rdu->full,
27619+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
27620+ rdu->cookie.generation);
dece6358 27621+
b752ccd1 27622+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
1308ab2a 27623+ return 0;
dece6358 27624+
b752ccd1
AM
27625+ AuDbg("%u:%u\n",
27626+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
1308ab2a 27627+ return -EINVAL;
27628+}
27629+
27630+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
dece6358 27631+{
1308ab2a 27632+ long err, e;
27633+ struct aufs_rdu rdu;
27634+ void __user *p = (void __user *)arg;
dece6358 27635+
1308ab2a 27636+ err = copy_from_user(&rdu, p, sizeof(rdu));
27637+ if (unlikely(err)) {
27638+ err = -EFAULT;
27639+ AuTraceErr(err);
27640+ goto out;
27641+ }
27642+ err = au_rdu_verify(&rdu);
dece6358
AM
27643+ if (unlikely(err))
27644+ goto out;
27645+
1308ab2a 27646+ switch (cmd) {
27647+ case AUFS_CTL_RDU:
27648+ err = au_rdu(file, &rdu);
27649+ if (unlikely(err))
27650+ break;
dece6358 27651+
1308ab2a 27652+ e = copy_to_user(p, &rdu, sizeof(rdu));
27653+ if (unlikely(e)) {
27654+ err = -EFAULT;
27655+ AuTraceErr(err);
27656+ }
27657+ break;
27658+ case AUFS_CTL_RDU_INO:
27659+ err = au_rdu_ino(file, &rdu);
27660+ break;
27661+
27662+ default:
4a4d8108 27663+ /* err = -ENOTTY; */
1308ab2a 27664+ err = -EINVAL;
27665+ }
dece6358 27666+
4f0767ce 27667+out:
1308ab2a 27668+ AuTraceErr(err);
27669+ return err;
1facf9fc 27670+}
b752ccd1
AM
27671+
27672+#ifdef CONFIG_COMPAT
27673+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
27674+{
27675+ long err, e;
27676+ struct aufs_rdu rdu;
27677+ void __user *p = compat_ptr(arg);
27678+
27679+ /* todo: get_user()? */
27680+ err = copy_from_user(&rdu, p, sizeof(rdu));
27681+ if (unlikely(err)) {
27682+ err = -EFAULT;
27683+ AuTraceErr(err);
27684+ goto out;
27685+ }
27686+ rdu.ent.e = compat_ptr(rdu.ent.ul);
27687+ err = au_rdu_verify(&rdu);
27688+ if (unlikely(err))
27689+ goto out;
27690+
27691+ switch (cmd) {
27692+ case AUFS_CTL_RDU:
27693+ err = au_rdu(file, &rdu);
27694+ if (unlikely(err))
27695+ break;
27696+
27697+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
27698+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
27699+ e = copy_to_user(p, &rdu, sizeof(rdu));
27700+ if (unlikely(e)) {
27701+ err = -EFAULT;
27702+ AuTraceErr(err);
27703+ }
27704+ break;
27705+ case AUFS_CTL_RDU_INO:
27706+ err = au_rdu_ino(file, &rdu);
27707+ break;
27708+
27709+ default:
27710+ /* err = -ENOTTY; */
27711+ err = -EINVAL;
27712+ }
27713+
4f0767ce 27714+out:
b752ccd1
AM
27715+ AuTraceErr(err);
27716+ return err;
27717+}
27718+#endif
e8791d4f
AM
27719diff -urNp -x '*.orig' linux-4.9/fs/aufs/rwsem.h linux-4.9/fs/aufs/rwsem.h
27720--- linux-4.9/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
27721+++ linux-4.9/fs/aufs/rwsem.h 2021-02-24 16:15:09.534907296 +0100
5afbbe0d 27722@@ -0,0 +1,198 @@
1facf9fc 27723+/*
ae9dfd79 27724+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 27725+ *
27726+ * This program, aufs is free software; you can redistribute it and/or modify
27727+ * it under the terms of the GNU General Public License as published by
27728+ * the Free Software Foundation; either version 2 of the License, or
27729+ * (at your option) any later version.
dece6358
AM
27730+ *
27731+ * This program is distributed in the hope that it will be useful,
27732+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27733+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27734+ * GNU General Public License for more details.
27735+ *
27736+ * You should have received a copy of the GNU General Public License
523b37e3 27737+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 27738+ */
27739+
27740+/*
27741+ * simple read-write semaphore wrappers
27742+ */
27743+
27744+#ifndef __AUFS_RWSEM_H__
27745+#define __AUFS_RWSEM_H__
27746+
27747+#ifdef __KERNEL__
27748+
4a4d8108 27749+#include "debug.h"
dece6358
AM
27750+
27751+struct au_rwsem {
27752+ struct rw_semaphore rwsem;
27753+#ifdef CONFIG_AUFS_DEBUG
27754+ /* just for debugging, not almighty counter */
27755+ atomic_t rcnt, wcnt;
27756+#endif
27757+};
27758+
5afbbe0d
AM
27759+#ifdef CONFIG_LOCKDEP
27760+#define au_lockdep_set_name(rw) \
27761+ lockdep_set_class_and_name(&(rw)->rwsem, \
27762+ /*original key*/(rw)->rwsem.dep_map.key, \
27763+ /*name*/#rw)
27764+#else
27765+#define au_lockdep_set_name(rw) do {} while (0)
27766+#endif
27767+
dece6358
AM
27768+#ifdef CONFIG_AUFS_DEBUG
27769+#define AuDbgCntInit(rw) do { \
27770+ atomic_set(&(rw)->rcnt, 0); \
27771+ atomic_set(&(rw)->wcnt, 0); \
27772+ smp_mb(); /* atomic set */ \
27773+} while (0)
27774+
5afbbe0d
AM
27775+#define AuDbgCnt(rw, cnt) atomic_read(&(rw)->cnt)
27776+#define AuDbgCntInc(rw, cnt) atomic_inc(&(rw)->cnt)
27777+#define AuDbgCntDec(rw, cnt) WARN_ON(atomic_dec_return(&(rw)->cnt) < 0)
27778+#define AuDbgRcntInc(rw) AuDbgCntInc(rw, rcnt)
27779+#define AuDbgRcntDec(rw) AuDbgCntDec(rw, rcnt)
27780+#define AuDbgWcntInc(rw) AuDbgCntInc(rw, wcnt)
27781+#define AuDbgWcntDec(rw) AuDbgCntDec(rw, wcnt)
dece6358 27782+#else
5afbbe0d 27783+#define AuDbgCnt(rw, cnt) 0
dece6358
AM
27784+#define AuDbgCntInit(rw) do {} while (0)
27785+#define AuDbgRcntInc(rw) do {} while (0)
27786+#define AuDbgRcntDec(rw) do {} while (0)
27787+#define AuDbgWcntInc(rw) do {} while (0)
27788+#define AuDbgWcntDec(rw) do {} while (0)
27789+#endif /* CONFIG_AUFS_DEBUG */
27790+
27791+/* to debug easier, do not make them inlined functions */
5afbbe0d 27792+#define AuRwMustNoWaiters(rw) AuDebugOn(rwsem_is_contended(&(rw)->rwsem))
dece6358 27793+/* rwsem_is_locked() is unusable */
5afbbe0d
AM
27794+#define AuRwMustReadLock(rw) AuDebugOn(AuDbgCnt(rw, rcnt) <= 0)
27795+#define AuRwMustWriteLock(rw) AuDebugOn(AuDbgCnt(rw, wcnt) <= 0)
27796+#define AuRwMustAnyLock(rw) AuDebugOn(AuDbgCnt(rw, rcnt) <= 0 \
27797+ && AuDbgCnt(rw, wcnt) <= 0)
27798+#define AuRwDestroy(rw) AuDebugOn(AuDbgCnt(rw, rcnt) \
27799+ || AuDbgCnt(rw, wcnt))
27800+
27801+#define au_rw_init(rw) do { \
27802+ AuDbgCntInit(rw); \
27803+ init_rwsem(&(rw)->rwsem); \
27804+ au_lockdep_set_name(rw); \
27805+ } while (0)
dece6358 27806+
5afbbe0d
AM
27807+#define au_rw_init_wlock(rw) do { \
27808+ au_rw_init(rw); \
27809+ down_write(&(rw)->rwsem); \
27810+ AuDbgWcntInc(rw); \
27811+ } while (0)
dece6358 27812+
5afbbe0d
AM
27813+#define au_rw_init_wlock_nested(rw, lsc) do { \
27814+ au_rw_init(rw); \
27815+ down_write_nested(&(rw)->rwsem, lsc); \
27816+ AuDbgWcntInc(rw); \
27817+ } while (0)
dece6358
AM
27818+
27819+static inline void au_rw_read_lock(struct au_rwsem *rw)
27820+{
27821+ down_read(&rw->rwsem);
27822+ AuDbgRcntInc(rw);
27823+}
27824+
27825+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
27826+{
27827+ down_read_nested(&rw->rwsem, lsc);
27828+ AuDbgRcntInc(rw);
27829+}
27830+
27831+static inline void au_rw_read_unlock(struct au_rwsem *rw)
27832+{
27833+ AuRwMustReadLock(rw);
27834+ AuDbgRcntDec(rw);
27835+ up_read(&rw->rwsem);
27836+}
27837+
27838+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
27839+{
27840+ AuRwMustWriteLock(rw);
27841+ AuDbgRcntInc(rw);
27842+ AuDbgWcntDec(rw);
27843+ downgrade_write(&rw->rwsem);
27844+}
27845+
27846+static inline void au_rw_write_lock(struct au_rwsem *rw)
27847+{
27848+ down_write(&rw->rwsem);
27849+ AuDbgWcntInc(rw);
27850+}
27851+
27852+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
27853+ unsigned int lsc)
27854+{
27855+ down_write_nested(&rw->rwsem, lsc);
27856+ AuDbgWcntInc(rw);
27857+}
1facf9fc 27858+
dece6358
AM
27859+static inline void au_rw_write_unlock(struct au_rwsem *rw)
27860+{
27861+ AuRwMustWriteLock(rw);
27862+ AuDbgWcntDec(rw);
27863+ up_write(&rw->rwsem);
27864+}
27865+
27866+/* why is not _nested version defined */
27867+static inline int au_rw_read_trylock(struct au_rwsem *rw)
27868+{
076b876e
AM
27869+ int ret;
27870+
27871+ ret = down_read_trylock(&rw->rwsem);
dece6358
AM
27872+ if (ret)
27873+ AuDbgRcntInc(rw);
27874+ return ret;
27875+}
27876+
27877+static inline int au_rw_write_trylock(struct au_rwsem *rw)
27878+{
076b876e
AM
27879+ int ret;
27880+
27881+ ret = down_write_trylock(&rw->rwsem);
dece6358
AM
27882+ if (ret)
27883+ AuDbgWcntInc(rw);
27884+ return ret;
27885+}
27886+
5afbbe0d 27887+#undef AuDbgCntDec
dece6358
AM
27888+#undef AuDbgRcntInc
27889+#undef AuDbgRcntDec
dece6358 27890+#undef AuDbgWcntDec
1facf9fc 27891+
27892+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
27893+static inline void prefix##_read_lock(param) \
dece6358 27894+{ au_rw_read_lock(rwsem); } \
1facf9fc 27895+static inline void prefix##_write_lock(param) \
dece6358 27896+{ au_rw_write_lock(rwsem); } \
1facf9fc 27897+static inline int prefix##_read_trylock(param) \
dece6358 27898+{ return au_rw_read_trylock(rwsem); } \
1facf9fc 27899+static inline int prefix##_write_trylock(param) \
dece6358 27900+{ return au_rw_write_trylock(rwsem); }
1facf9fc 27901+/* why is not _nested version defined */
27902+/* static inline void prefix##_read_trylock_nested(param, lsc)
dece6358 27903+{ au_rw_read_trylock_nested(rwsem, lsc)); }
1facf9fc 27904+static inline void prefix##_write_trylock_nestd(param, lsc)
dece6358 27905+{ au_rw_write_trylock_nested(rwsem, lsc); } */
1facf9fc 27906+
27907+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
27908+static inline void prefix##_read_unlock(param) \
dece6358 27909+{ au_rw_read_unlock(rwsem); } \
1facf9fc 27910+static inline void prefix##_write_unlock(param) \
dece6358 27911+{ au_rw_write_unlock(rwsem); } \
1facf9fc 27912+static inline void prefix##_downgrade_lock(param) \
dece6358 27913+{ au_rw_dgrade_lock(rwsem); }
1facf9fc 27914+
27915+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
27916+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
27917+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
27918+
27919+#endif /* __KERNEL__ */
27920+#endif /* __AUFS_RWSEM_H__ */
e8791d4f
AM
27921diff -urNp -x '*.orig' linux-4.9/fs/aufs/sbinfo.c linux-4.9/fs/aufs/sbinfo.c
27922--- linux-4.9/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
27923+++ linux-4.9/fs/aufs/sbinfo.c 2021-02-24 16:15:09.534907296 +0100
ae9dfd79 27924@@ -0,0 +1,304 @@
1facf9fc 27925+/*
ae9dfd79 27926+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 27927+ *
27928+ * This program, aufs is free software; you can redistribute it and/or modify
27929+ * it under the terms of the GNU General Public License as published by
27930+ * the Free Software Foundation; either version 2 of the License, or
27931+ * (at your option) any later version.
dece6358
AM
27932+ *
27933+ * This program is distributed in the hope that it will be useful,
27934+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27935+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27936+ * GNU General Public License for more details.
27937+ *
27938+ * You should have received a copy of the GNU General Public License
523b37e3 27939+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 27940+ */
27941+
27942+/*
27943+ * superblock private data
27944+ */
27945+
27946+#include "aufs.h"
27947+
27948+/*
27949+ * they are necessary regardless sysfs is disabled.
27950+ */
27951+void au_si_free(struct kobject *kobj)
27952+{
86dc4139 27953+ int i;
1facf9fc 27954+ struct au_sbinfo *sbinfo;
b752ccd1 27955+ char *locked __maybe_unused; /* debug only */
1facf9fc 27956+
27957+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
86dc4139 27958+ for (i = 0; i < AuPlink_NHASH; i++)
ae9dfd79 27959+ AuDebugOn(!hlist_bl_empty(sbinfo->si_plink + i));
f0c0a007 27960+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
5afbbe0d
AM
27961+
27962+ AuDebugOn(percpu_counter_sum(&sbinfo->si_ninodes));
27963+ percpu_counter_destroy(&sbinfo->si_ninodes);
27964+ AuDebugOn(percpu_counter_sum(&sbinfo->si_nfiles));
27965+ percpu_counter_destroy(&sbinfo->si_nfiles);
1facf9fc 27966+
e49829fe 27967+ au_rw_write_lock(&sbinfo->si_rwsem);
1facf9fc 27968+ au_br_free(sbinfo);
e49829fe 27969+ au_rw_write_unlock(&sbinfo->si_rwsem);
b752ccd1 27970+
ae9dfd79 27971+ kfree(sbinfo->si_branch);
1facf9fc 27972+ mutex_destroy(&sbinfo->si_xib_mtx);
dece6358 27973+ AuRwDestroy(&sbinfo->si_rwsem);
1facf9fc 27974+
ae9dfd79 27975+ kfree(sbinfo);
1facf9fc 27976+}
27977+
27978+int au_si_alloc(struct super_block *sb)
27979+{
86dc4139 27980+ int err, i;
1facf9fc 27981+ struct au_sbinfo *sbinfo;
27982+
27983+ err = -ENOMEM;
4a4d8108 27984+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
1facf9fc 27985+ if (unlikely(!sbinfo))
27986+ goto out;
27987+
27988+ /* will be reallocated separately */
27989+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
27990+ if (unlikely(!sbinfo->si_branch))
febd17d6 27991+ goto out_sbinfo;
1facf9fc 27992+
1facf9fc 27993+ err = sysaufs_si_init(sbinfo);
27994+ if (unlikely(err))
27995+ goto out_br;
27996+
27997+ au_nwt_init(&sbinfo->si_nowait);
dece6358 27998+ au_rw_init_wlock(&sbinfo->si_rwsem);
b752ccd1 27999+
5afbbe0d
AM
28000+ percpu_counter_init(&sbinfo->si_ninodes, 0, GFP_NOFS);
28001+ percpu_counter_init(&sbinfo->si_nfiles, 0, GFP_NOFS);
7f207e10 28002+
5afbbe0d 28003+ sbinfo->si_bbot = -1;
392086de 28004+ sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
1facf9fc 28005+
28006+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
28007+ sbinfo->si_wbr_create = AuWbrCreate_Def;
4a4d8108
AM
28008+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
28009+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
1facf9fc 28010+
076b876e
AM
28011+ au_fhsm_init(sbinfo);
28012+
e49829fe 28013+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
1facf9fc 28014+
392086de
AM
28015+ sbinfo->si_xino_jiffy = jiffies;
28016+ sbinfo->si_xino_expire
28017+ = msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
1facf9fc 28018+ mutex_init(&sbinfo->si_xib_mtx);
1facf9fc 28019+ sbinfo->si_xino_brid = -1;
28020+ /* leave si_xib_last_pindex and si_xib_next_bit */
28021+
ae9dfd79 28022+ INIT_HLIST_BL_HEAD(&sbinfo->si_aopen);
b912730e 28023+
e49829fe 28024+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
1facf9fc 28025+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
28026+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
28027+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
28028+
86dc4139 28029+ for (i = 0; i < AuPlink_NHASH; i++)
ae9dfd79 28030+ INIT_HLIST_BL_HEAD(sbinfo->si_plink + i);
1facf9fc 28031+ init_waitqueue_head(&sbinfo->si_plink_wq);
4a4d8108 28032+ spin_lock_init(&sbinfo->si_plink_maint_lock);
1facf9fc 28033+
ae9dfd79 28034+ INIT_HLIST_BL_HEAD(&sbinfo->si_files);
523b37e3 28035+
b95c5147
AM
28036+ /* with getattr by default */
28037+ sbinfo->si_iop_array = aufs_iop;
28038+
1facf9fc 28039+ /* leave other members for sysaufs and si_mnt. */
28040+ sbinfo->si_sb = sb;
28041+ sb->s_fs_info = sbinfo;
b752ccd1 28042+ si_pid_set(sb);
1facf9fc 28043+ return 0; /* success */
28044+
4f0767ce 28045+out_br:
ae9dfd79 28046+ kfree(sbinfo->si_branch);
4f0767ce 28047+out_sbinfo:
ae9dfd79 28048+ kfree(sbinfo);
4f0767ce 28049+out:
1facf9fc 28050+ return err;
28051+}
28052+
e2f27e51 28053+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink)
1facf9fc 28054+{
28055+ int err, sz;
28056+ struct au_branch **brp;
28057+
dece6358
AM
28058+ AuRwMustWriteLock(&sbinfo->si_rwsem);
28059+
1facf9fc 28060+ err = -ENOMEM;
5afbbe0d 28061+ sz = sizeof(*brp) * (sbinfo->si_bbot + 1);
1facf9fc 28062+ if (unlikely(!sz))
28063+ sz = sizeof(*brp);
e2f27e51
AM
28064+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS,
28065+ may_shrink);
1facf9fc 28066+ if (brp) {
28067+ sbinfo->si_branch = brp;
28068+ err = 0;
28069+ }
28070+
28071+ return err;
28072+}
28073+
28074+/* ---------------------------------------------------------------------- */
28075+
28076+unsigned int au_sigen_inc(struct super_block *sb)
28077+{
28078+ unsigned int gen;
5527c038 28079+ struct inode *inode;
1facf9fc 28080+
dece6358
AM
28081+ SiMustWriteLock(sb);
28082+
1facf9fc 28083+ gen = ++au_sbi(sb)->si_generation;
28084+ au_update_digen(sb->s_root);
5527c038
JR
28085+ inode = d_inode(sb->s_root);
28086+ au_update_iigen(inode, /*half*/0);
28087+ inode->i_version++;
1facf9fc 28088+ return gen;
28089+}
28090+
28091+aufs_bindex_t au_new_br_id(struct super_block *sb)
28092+{
28093+ aufs_bindex_t br_id;
28094+ int i;
28095+ struct au_sbinfo *sbinfo;
28096+
dece6358
AM
28097+ SiMustWriteLock(sb);
28098+
1facf9fc 28099+ sbinfo = au_sbi(sb);
28100+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
28101+ br_id = ++sbinfo->si_last_br_id;
7f207e10 28102+ AuDebugOn(br_id < 0);
1facf9fc 28103+ if (br_id && au_br_index(sb, br_id) < 0)
28104+ return br_id;
28105+ }
28106+
28107+ return -1;
28108+}
28109+
28110+/* ---------------------------------------------------------------------- */
28111+
e49829fe
JR
28112+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
28113+int si_read_lock(struct super_block *sb, int flags)
28114+{
28115+ int err;
28116+
28117+ err = 0;
28118+ if (au_ftest_lock(flags, FLUSH))
28119+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28120+
28121+ si_noflush_read_lock(sb);
28122+ err = au_plink_maint(sb, flags);
28123+ if (unlikely(err))
28124+ si_read_unlock(sb);
28125+
28126+ return err;
28127+}
28128+
28129+int si_write_lock(struct super_block *sb, int flags)
28130+{
28131+ int err;
28132+
28133+ if (au_ftest_lock(flags, FLUSH))
28134+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28135+
28136+ si_noflush_write_lock(sb);
28137+ err = au_plink_maint(sb, flags);
28138+ if (unlikely(err))
28139+ si_write_unlock(sb);
28140+
28141+ return err;
28142+}
28143+
1facf9fc 28144+/* dentry and super_block lock. call at entry point */
e49829fe 28145+int aufs_read_lock(struct dentry *dentry, int flags)
1facf9fc 28146+{
e49829fe 28147+ int err;
027c5e7a 28148+ struct super_block *sb;
e49829fe 28149+
027c5e7a
AM
28150+ sb = dentry->d_sb;
28151+ err = si_read_lock(sb, flags);
28152+ if (unlikely(err))
28153+ goto out;
28154+
28155+ if (au_ftest_lock(flags, DW))
28156+ di_write_lock_child(dentry);
28157+ else
28158+ di_read_lock_child(dentry, flags);
28159+
28160+ if (au_ftest_lock(flags, GEN)) {
28161+ err = au_digen_test(dentry, au_sigen(sb));
79b8bda9
AM
28162+ if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
28163+ AuDebugOn(!err && au_dbrange_test(dentry));
28164+ else if (!err)
28165+ err = au_dbrange_test(dentry);
027c5e7a
AM
28166+ if (unlikely(err))
28167+ aufs_read_unlock(dentry, flags);
e49829fe
JR
28168+ }
28169+
027c5e7a 28170+out:
e49829fe 28171+ return err;
1facf9fc 28172+}
28173+
28174+void aufs_read_unlock(struct dentry *dentry, int flags)
28175+{
28176+ if (au_ftest_lock(flags, DW))
28177+ di_write_unlock(dentry);
28178+ else
28179+ di_read_unlock(dentry, flags);
28180+ si_read_unlock(dentry->d_sb);
28181+}
28182+
28183+void aufs_write_lock(struct dentry *dentry)
28184+{
e49829fe 28185+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 28186+ di_write_lock_child(dentry);
28187+}
28188+
28189+void aufs_write_unlock(struct dentry *dentry)
28190+{
28191+ di_write_unlock(dentry);
28192+ si_write_unlock(dentry->d_sb);
28193+}
28194+
e49829fe 28195+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
1facf9fc 28196+{
e49829fe 28197+ int err;
027c5e7a
AM
28198+ unsigned int sigen;
28199+ struct super_block *sb;
e49829fe 28200+
027c5e7a
AM
28201+ sb = d1->d_sb;
28202+ err = si_read_lock(sb, flags);
28203+ if (unlikely(err))
28204+ goto out;
28205+
b95c5147 28206+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
027c5e7a
AM
28207+
28208+ if (au_ftest_lock(flags, GEN)) {
28209+ sigen = au_sigen(sb);
28210+ err = au_digen_test(d1, sigen);
28211+ AuDebugOn(!err && au_dbrange_test(d1));
28212+ if (!err) {
28213+ err = au_digen_test(d2, sigen);
28214+ AuDebugOn(!err && au_dbrange_test(d2));
28215+ }
28216+ if (unlikely(err))
28217+ aufs_read_and_write_unlock2(d1, d2);
28218+ }
28219+
28220+out:
e49829fe 28221+ return err;
1facf9fc 28222+}
28223+
28224+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
28225+{
28226+ di_write_unlock2(d1, d2);
28227+ si_read_unlock(d1->d_sb);
28228+}
e8791d4f
AM
28229diff -urNp -x '*.orig' linux-4.9/fs/aufs/super.c linux-4.9/fs/aufs/super.c
28230--- linux-4.9/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
28231+++ linux-4.9/fs/aufs/super.c 2021-02-24 16:15:09.541574180 +0100
28232@@ -0,0 +1,1049 @@
1facf9fc 28233+/*
ae9dfd79 28234+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 28235+ *
28236+ * This program, aufs is free software; you can redistribute it and/or modify
28237+ * it under the terms of the GNU General Public License as published by
28238+ * the Free Software Foundation; either version 2 of the License, or
28239+ * (at your option) any later version.
dece6358
AM
28240+ *
28241+ * This program is distributed in the hope that it will be useful,
28242+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28243+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28244+ * GNU General Public License for more details.
28245+ *
28246+ * You should have received a copy of the GNU General Public License
523b37e3 28247+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28248+ */
28249+
28250+/*
28251+ * mount and super_block operations
28252+ */
28253+
f6c5ef8b 28254+#include <linux/mm.h>
1facf9fc 28255+#include <linux/seq_file.h>
28256+#include <linux/statfs.h>
7f207e10 28257+#include <linux/vmalloc.h>
1facf9fc 28258+#include "aufs.h"
28259+
28260+/*
28261+ * super_operations
28262+ */
28263+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
28264+{
28265+ struct au_icntnr *c;
28266+
28267+ c = au_cache_alloc_icntnr();
28268+ if (c) {
027c5e7a 28269+ au_icntnr_init(c);
1facf9fc 28270+ c->vfs_inode.i_version = 1; /* sigen(sb); */
28271+ c->iinfo.ii_hinode = NULL;
28272+ return &c->vfs_inode;
28273+ }
28274+ return NULL;
28275+}
28276+
027c5e7a
AM
28277+static void aufs_destroy_inode_cb(struct rcu_head *head)
28278+{
28279+ struct inode *inode = container_of(head, struct inode, i_rcu);
28280+
ae9dfd79 28281+ au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
027c5e7a
AM
28282+}
28283+
1facf9fc 28284+static void aufs_destroy_inode(struct inode *inode)
28285+{
5afbbe0d
AM
28286+ if (!au_is_bad_inode(inode))
28287+ au_iinfo_fin(inode);
027c5e7a 28288+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
1facf9fc 28289+}
28290+
28291+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
28292+{
28293+ struct inode *inode;
28294+ int err;
28295+
28296+ inode = iget_locked(sb, ino);
28297+ if (unlikely(!inode)) {
28298+ inode = ERR_PTR(-ENOMEM);
28299+ goto out;
28300+ }
28301+ if (!(inode->i_state & I_NEW))
28302+ goto out;
28303+
28304+ err = au_xigen_new(inode);
28305+ if (!err)
28306+ err = au_iinfo_init(inode);
28307+ if (!err)
28308+ inode->i_version++;
28309+ else {
28310+ iget_failed(inode);
28311+ inode = ERR_PTR(err);
28312+ }
28313+
4f0767ce 28314+out:
1facf9fc 28315+ /* never return NULL */
28316+ AuDebugOn(!inode);
28317+ AuTraceErrPtr(inode);
28318+ return inode;
28319+}
28320+
28321+/* lock free root dinfo */
28322+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
28323+{
28324+ int err;
5afbbe0d 28325+ aufs_bindex_t bindex, bbot;
1facf9fc 28326+ struct path path;
4a4d8108 28327+ struct au_hdentry *hdp;
1facf9fc 28328+ struct au_branch *br;
076b876e 28329+ au_br_perm_str_t perm;
1facf9fc 28330+
28331+ err = 0;
5afbbe0d
AM
28332+ bbot = au_sbbot(sb);
28333+ bindex = 0;
28334+ hdp = au_hdentry(au_di(sb->s_root), bindex);
28335+ for (; !err && bindex <= bbot; bindex++, hdp++) {
1facf9fc 28336+ br = au_sbr(sb, bindex);
86dc4139 28337+ path.mnt = au_br_mnt(br);
5afbbe0d 28338+ path.dentry = hdp->hd_dentry;
1facf9fc 28339+ err = au_seq_path(seq, &path);
79b8bda9 28340+ if (!err) {
076b876e 28341+ au_optstr_br_perm(&perm, br->br_perm);
79b8bda9 28342+ seq_printf(seq, "=%s", perm.a);
5afbbe0d 28343+ if (bindex != bbot)
79b8bda9 28344+ seq_putc(seq, ':');
1e00d052 28345+ }
1facf9fc 28346+ }
79b8bda9
AM
28347+ if (unlikely(err || seq_has_overflowed(seq)))
28348+ err = -E2BIG;
1facf9fc 28349+
28350+ return err;
28351+}
28352+
f2c43d5f
AM
28353+static void au_gen_fmt(char *fmt, int len __maybe_unused, const char *pat,
28354+ const char *append)
28355+{
28356+ char *p;
28357+
28358+ p = fmt;
28359+ while (*pat != ':')
28360+ *p++ = *pat++;
28361+ *p++ = *pat++;
28362+ strcpy(p, append);
28363+ AuDebugOn(strlen(fmt) >= len);
28364+}
28365+
1facf9fc 28366+static void au_show_wbr_create(struct seq_file *m, int v,
28367+ struct au_sbinfo *sbinfo)
28368+{
28369+ const char *pat;
f2c43d5f
AM
28370+ char fmt[32];
28371+ struct au_wbr_mfs *mfs;
1facf9fc 28372+
dece6358
AM
28373+ AuRwMustAnyLock(&sbinfo->si_rwsem);
28374+
c2b27bf2 28375+ seq_puts(m, ",create=");
1facf9fc 28376+ pat = au_optstr_wbr_create(v);
f2c43d5f 28377+ mfs = &sbinfo->si_wbr_mfs;
1facf9fc 28378+ switch (v) {
28379+ case AuWbrCreate_TDP:
28380+ case AuWbrCreate_RR:
28381+ case AuWbrCreate_MFS:
28382+ case AuWbrCreate_PMFS:
c2b27bf2 28383+ seq_puts(m, pat);
1facf9fc 28384+ break;
f2c43d5f
AM
28385+ case AuWbrCreate_MFSRR:
28386+ case AuWbrCreate_TDMFS:
28387+ case AuWbrCreate_PMFSRR:
28388+ au_gen_fmt(fmt, sizeof(fmt), pat, "%llu");
28389+ seq_printf(m, fmt, mfs->mfsrr_watermark);
1facf9fc 28390+ break;
f2c43d5f 28391+ case AuWbrCreate_MFSV:
1facf9fc 28392+ case AuWbrCreate_PMFSV:
f2c43d5f
AM
28393+ au_gen_fmt(fmt, sizeof(fmt), pat, "%lu");
28394+ seq_printf(m, fmt,
28395+ jiffies_to_msecs(mfs->mfs_expire)
e49829fe 28396+ / MSEC_PER_SEC);
1facf9fc 28397+ break;
1facf9fc 28398+ case AuWbrCreate_MFSRRV:
f2c43d5f 28399+ case AuWbrCreate_TDMFSV:
392086de 28400+ case AuWbrCreate_PMFSRRV:
f2c43d5f
AM
28401+ au_gen_fmt(fmt, sizeof(fmt), pat, "%llu:%lu");
28402+ seq_printf(m, fmt, mfs->mfsrr_watermark,
28403+ jiffies_to_msecs(mfs->mfs_expire) / MSEC_PER_SEC);
392086de 28404+ break;
f2c43d5f
AM
28405+ default:
28406+ BUG();
1facf9fc 28407+ }
28408+}
28409+
7eafdf33 28410+static int au_show_xino(struct seq_file *seq, struct super_block *sb)
1facf9fc 28411+{
28412+#ifdef CONFIG_SYSFS
28413+ return 0;
28414+#else
28415+ int err;
28416+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
28417+ aufs_bindex_t bindex, brid;
1facf9fc 28418+ struct qstr *name;
28419+ struct file *f;
28420+ struct dentry *d, *h_root;
28421+
dece6358
AM
28422+ AuRwMustAnyLock(&sbinfo->si_rwsem);
28423+
1facf9fc 28424+ err = 0;
1facf9fc 28425+ f = au_sbi(sb)->si_xib;
28426+ if (!f)
28427+ goto out;
28428+
28429+ /* stop printing the default xino path on the first writable branch */
28430+ h_root = NULL;
28431+ brid = au_xino_brid(sb);
28432+ if (brid >= 0) {
28433+ bindex = au_br_index(sb, brid);
5afbbe0d 28434+ h_root = au_hdentry(au_di(sb->s_root), bindex)->hd_dentry;
1facf9fc 28435+ }
2000de60 28436+ d = f->f_path.dentry;
1facf9fc 28437+ name = &d->d_name;
28438+ /* safe ->d_parent because the file is unlinked */
28439+ if (d->d_parent == h_root
28440+ && name->len == len
28441+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
28442+ goto out;
28443+
28444+ seq_puts(seq, ",xino=");
28445+ err = au_xino_path(seq, f);
28446+
4f0767ce 28447+out:
1facf9fc 28448+ return err;
28449+#endif
28450+}
28451+
28452+/* seq_file will re-call me in case of too long string */
7eafdf33 28453+static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
1facf9fc 28454+{
027c5e7a 28455+ int err;
1facf9fc 28456+ unsigned int mnt_flags, v;
28457+ struct super_block *sb;
28458+ struct au_sbinfo *sbinfo;
28459+
28460+#define AuBool(name, str) do { \
28461+ v = au_opt_test(mnt_flags, name); \
28462+ if (v != au_opt_test(AuOpt_Def, name)) \
28463+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
28464+} while (0)
28465+
28466+#define AuStr(name, str) do { \
28467+ v = mnt_flags & AuOptMask_##name; \
28468+ if (v != (AuOpt_Def & AuOptMask_##name)) \
28469+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
28470+} while (0)
28471+
28472+#define AuUInt(name, str, val) do { \
28473+ if (val != AUFS_##name##_DEF) \
28474+ seq_printf(m, "," #str "=%u", val); \
28475+} while (0)
28476+
7eafdf33 28477+ sb = dentry->d_sb;
c1595e42
JR
28478+ if (sb->s_flags & MS_POSIXACL)
28479+ seq_puts(m, ",acl");
28480+
28481+ /* lock free root dinfo */
1facf9fc 28482+ si_noflush_read_lock(sb);
28483+ sbinfo = au_sbi(sb);
28484+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
28485+
28486+ mnt_flags = au_mntflags(sb);
28487+ if (au_opt_test(mnt_flags, XINO)) {
7eafdf33 28488+ err = au_show_xino(m, sb);
1facf9fc 28489+ if (unlikely(err))
28490+ goto out;
28491+ } else
28492+ seq_puts(m, ",noxino");
28493+
28494+ AuBool(TRUNC_XINO, trunc_xino);
28495+ AuStr(UDBA, udba);
dece6358 28496+ AuBool(SHWH, shwh);
1facf9fc 28497+ AuBool(PLINK, plink);
4a4d8108 28498+ AuBool(DIO, dio);
076b876e 28499+ AuBool(DIRPERM1, dirperm1);
1facf9fc 28500+
28501+ v = sbinfo->si_wbr_create;
28502+ if (v != AuWbrCreate_Def)
28503+ au_show_wbr_create(m, v, sbinfo);
28504+
28505+ v = sbinfo->si_wbr_copyup;
28506+ if (v != AuWbrCopyup_Def)
28507+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
28508+
28509+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
28510+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
28511+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
28512+
28513+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
28514+
027c5e7a
AM
28515+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
28516+ AuUInt(RDCACHE, rdcache, v);
1facf9fc 28517+
28518+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
28519+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
28520+
076b876e
AM
28521+ au_fhsm_show(m, sbinfo);
28522+
ae9dfd79 28523+ AuBool(DIRREN, dirren);
1facf9fc 28524+ AuBool(SUM, sum);
28525+ /* AuBool(SUM_W, wsum); */
28526+ AuBool(WARN_PERM, warn_perm);
28527+ AuBool(VERBOSE, verbose);
28528+
4f0767ce 28529+out:
1facf9fc 28530+ /* be sure to print "br:" last */
28531+ if (!sysaufs_brs) {
28532+ seq_puts(m, ",br:");
28533+ au_show_brs(m, sb);
28534+ }
28535+ si_read_unlock(sb);
28536+ return 0;
28537+
1facf9fc 28538+#undef AuBool
28539+#undef AuStr
4a4d8108 28540+#undef AuUInt
1facf9fc 28541+}
28542+
28543+/* ---------------------------------------------------------------------- */
28544+
28545+/* sum mode which returns the summation for statfs(2) */
28546+
28547+static u64 au_add_till_max(u64 a, u64 b)
28548+{
28549+ u64 old;
28550+
28551+ old = a;
28552+ a += b;
92d182d2
AM
28553+ if (old <= a)
28554+ return a;
28555+ return ULLONG_MAX;
28556+}
28557+
28558+static u64 au_mul_till_max(u64 a, long mul)
28559+{
28560+ u64 old;
28561+
28562+ old = a;
28563+ a *= mul;
28564+ if (old <= a)
1facf9fc 28565+ return a;
28566+ return ULLONG_MAX;
28567+}
28568+
28569+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
28570+{
28571+ int err;
92d182d2 28572+ long bsize, factor;
1facf9fc 28573+ u64 blocks, bfree, bavail, files, ffree;
5afbbe0d 28574+ aufs_bindex_t bbot, bindex, i;
1facf9fc 28575+ unsigned char shared;
7f207e10 28576+ struct path h_path;
1facf9fc 28577+ struct super_block *h_sb;
28578+
92d182d2
AM
28579+ err = 0;
28580+ bsize = LONG_MAX;
28581+ files = 0;
28582+ ffree = 0;
1facf9fc 28583+ blocks = 0;
28584+ bfree = 0;
28585+ bavail = 0;
5afbbe0d
AM
28586+ bbot = au_sbbot(sb);
28587+ for (bindex = 0; bindex <= bbot; bindex++) {
7f207e10
AM
28588+ h_path.mnt = au_sbr_mnt(sb, bindex);
28589+ h_sb = h_path.mnt->mnt_sb;
1facf9fc 28590+ shared = 0;
92d182d2 28591+ for (i = 0; !shared && i < bindex; i++)
1facf9fc 28592+ shared = (au_sbr_sb(sb, i) == h_sb);
28593+ if (shared)
28594+ continue;
28595+
28596+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
28597+ h_path.dentry = h_path.mnt->mnt_root;
28598+ err = vfs_statfs(&h_path, buf);
1facf9fc 28599+ if (unlikely(err))
28600+ goto out;
28601+
92d182d2
AM
28602+ if (bsize > buf->f_bsize) {
28603+ /*
28604+ * we will reduce bsize, so we have to expand blocks
28605+ * etc. to match them again
28606+ */
28607+ factor = (bsize / buf->f_bsize);
28608+ blocks = au_mul_till_max(blocks, factor);
28609+ bfree = au_mul_till_max(bfree, factor);
28610+ bavail = au_mul_till_max(bavail, factor);
28611+ bsize = buf->f_bsize;
28612+ }
28613+
28614+ factor = (buf->f_bsize / bsize);
28615+ blocks = au_add_till_max(blocks,
28616+ au_mul_till_max(buf->f_blocks, factor));
28617+ bfree = au_add_till_max(bfree,
28618+ au_mul_till_max(buf->f_bfree, factor));
28619+ bavail = au_add_till_max(bavail,
28620+ au_mul_till_max(buf->f_bavail, factor));
1facf9fc 28621+ files = au_add_till_max(files, buf->f_files);
28622+ ffree = au_add_till_max(ffree, buf->f_ffree);
28623+ }
28624+
92d182d2 28625+ buf->f_bsize = bsize;
1facf9fc 28626+ buf->f_blocks = blocks;
28627+ buf->f_bfree = bfree;
28628+ buf->f_bavail = bavail;
28629+ buf->f_files = files;
28630+ buf->f_ffree = ffree;
92d182d2 28631+ buf->f_frsize = 0;
1facf9fc 28632+
4f0767ce 28633+out:
1facf9fc 28634+ return err;
28635+}
28636+
28637+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
28638+{
28639+ int err;
7f207e10 28640+ struct path h_path;
1facf9fc 28641+ struct super_block *sb;
28642+
28643+ /* lock free root dinfo */
28644+ sb = dentry->d_sb;
28645+ si_noflush_read_lock(sb);
7f207e10 28646+ if (!au_opt_test(au_mntflags(sb), SUM)) {
1facf9fc 28647+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
28648+ h_path.mnt = au_sbr_mnt(sb, 0);
28649+ h_path.dentry = h_path.mnt->mnt_root;
28650+ err = vfs_statfs(&h_path, buf);
28651+ } else
1facf9fc 28652+ err = au_statfs_sum(sb, buf);
28653+ si_read_unlock(sb);
28654+
28655+ if (!err) {
28656+ buf->f_type = AUFS_SUPER_MAGIC;
4a4d8108 28657+ buf->f_namelen = AUFS_MAX_NAMELEN;
1facf9fc 28658+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
28659+ }
28660+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
28661+
28662+ return err;
28663+}
28664+
28665+/* ---------------------------------------------------------------------- */
28666+
537831f9
AM
28667+static int aufs_sync_fs(struct super_block *sb, int wait)
28668+{
28669+ int err, e;
5afbbe0d 28670+ aufs_bindex_t bbot, bindex;
537831f9
AM
28671+ struct au_branch *br;
28672+ struct super_block *h_sb;
28673+
28674+ err = 0;
28675+ si_noflush_read_lock(sb);
5afbbe0d
AM
28676+ bbot = au_sbbot(sb);
28677+ for (bindex = 0; bindex <= bbot; bindex++) {
537831f9
AM
28678+ br = au_sbr(sb, bindex);
28679+ if (!au_br_writable(br->br_perm))
28680+ continue;
28681+
28682+ h_sb = au_sbr_sb(sb, bindex);
ae9dfd79
AM
28683+ e = vfsub_sync_filesystem(h_sb, wait);
28684+ if (unlikely(e && !err))
28685+ err = e;
28686+ /* go on even if an error happens */
537831f9
AM
28687+ }
28688+ si_read_unlock(sb);
28689+
28690+ return err;
28691+}
28692+
28693+/* ---------------------------------------------------------------------- */
28694+
1facf9fc 28695+/* final actions when unmounting a file system */
28696+static void aufs_put_super(struct super_block *sb)
28697+{
28698+ struct au_sbinfo *sbinfo;
28699+
28700+ sbinfo = au_sbi(sb);
28701+ if (!sbinfo)
28702+ return;
28703+
1facf9fc 28704+ dbgaufs_si_fin(sbinfo);
28705+ kobject_put(&sbinfo->si_kobj);
28706+}
28707+
28708+/* ---------------------------------------------------------------------- */
28709+
79b8bda9
AM
28710+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
28711+ struct super_block *sb, void *arg)
7f207e10
AM
28712+{
28713+ void *array;
076b876e 28714+ unsigned long long n, sz;
7f207e10
AM
28715+
28716+ array = NULL;
28717+ n = 0;
28718+ if (!*hint)
28719+ goto out;
28720+
28721+ if (*hint > ULLONG_MAX / sizeof(array)) {
28722+ array = ERR_PTR(-EMFILE);
28723+ pr_err("hint %llu\n", *hint);
28724+ goto out;
28725+ }
28726+
076b876e
AM
28727+ sz = sizeof(array) * *hint;
28728+ array = kzalloc(sz, GFP_NOFS);
7f207e10 28729+ if (unlikely(!array))
076b876e 28730+ array = vzalloc(sz);
7f207e10
AM
28731+ if (unlikely(!array)) {
28732+ array = ERR_PTR(-ENOMEM);
28733+ goto out;
28734+ }
28735+
79b8bda9 28736+ n = cb(sb, array, *hint, arg);
7f207e10
AM
28737+ AuDebugOn(n > *hint);
28738+
28739+out:
28740+ *hint = n;
28741+ return array;
28742+}
28743+
79b8bda9 28744+static unsigned long long au_iarray_cb(struct super_block *sb, void *a,
7f207e10
AM
28745+ unsigned long long max __maybe_unused,
28746+ void *arg)
28747+{
28748+ unsigned long long n;
28749+ struct inode **p, *inode;
28750+ struct list_head *head;
28751+
28752+ n = 0;
28753+ p = a;
28754+ head = arg;
79b8bda9 28755+ spin_lock(&sb->s_inode_list_lock);
7f207e10 28756+ list_for_each_entry(inode, head, i_sb_list) {
5afbbe0d
AM
28757+ if (!au_is_bad_inode(inode)
28758+ && au_ii(inode)->ii_btop >= 0) {
2cbb1c4b
JR
28759+ spin_lock(&inode->i_lock);
28760+ if (atomic_read(&inode->i_count)) {
28761+ au_igrab(inode);
28762+ *p++ = inode;
28763+ n++;
28764+ AuDebugOn(n > max);
28765+ }
28766+ spin_unlock(&inode->i_lock);
7f207e10
AM
28767+ }
28768+ }
79b8bda9 28769+ spin_unlock(&sb->s_inode_list_lock);
7f207e10
AM
28770+
28771+ return n;
28772+}
28773+
28774+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
28775+{
5afbbe0d 28776+ *max = au_ninodes(sb);
79b8bda9 28777+ return au_array_alloc(max, au_iarray_cb, sb, &sb->s_inodes);
7f207e10
AM
28778+}
28779+
28780+void au_iarray_free(struct inode **a, unsigned long long max)
28781+{
28782+ unsigned long long ull;
28783+
28784+ for (ull = 0; ull < max; ull++)
28785+ iput(a[ull]);
be52b249 28786+ kvfree(a);
7f207e10
AM
28787+}
28788+
28789+/* ---------------------------------------------------------------------- */
28790+
1facf9fc 28791+/*
28792+ * refresh dentry and inode at remount time.
28793+ */
027c5e7a
AM
28794+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
28795+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
28796+ struct dentry *parent)
1facf9fc 28797+{
28798+ int err;
1facf9fc 28799+
28800+ di_write_lock_child(dentry);
1facf9fc 28801+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
28802+ err = au_refresh_dentry(dentry, parent);
28803+ if (!err && dir_flags)
5527c038 28804+ au_hn_reset(d_inode(dentry), dir_flags);
1facf9fc 28805+ di_read_unlock(parent, AuLock_IR);
1facf9fc 28806+ di_write_unlock(dentry);
28807+
28808+ return err;
28809+}
28810+
027c5e7a
AM
28811+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
28812+ struct au_sbinfo *sbinfo,
b95c5147 28813+ const unsigned int dir_flags, unsigned int do_idop)
1facf9fc 28814+{
027c5e7a
AM
28815+ int err;
28816+ struct dentry *parent;
027c5e7a
AM
28817+
28818+ err = 0;
28819+ parent = dget_parent(dentry);
28820+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
5527c038
JR
28821+ if (d_really_is_positive(dentry)) {
28822+ if (!d_is_dir(dentry))
027c5e7a
AM
28823+ err = au_do_refresh(dentry, /*dir_flags*/0,
28824+ parent);
28825+ else {
28826+ err = au_do_refresh(dentry, dir_flags, parent);
28827+ if (unlikely(err))
28828+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
28829+ }
28830+ } else
28831+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
28832+ AuDbgDentry(dentry);
28833+ }
28834+ dput(parent);
28835+
79b8bda9 28836+ if (!err) {
b95c5147 28837+ if (do_idop)
79b8bda9
AM
28838+ au_refresh_dop(dentry, /*force_reval*/0);
28839+ } else
28840+ au_refresh_dop(dentry, /*force_reval*/1);
28841+
027c5e7a
AM
28842+ AuTraceErr(err);
28843+ return err;
1facf9fc 28844+}
28845+
b95c5147 28846+static int au_refresh_d(struct super_block *sb, unsigned int do_idop)
1facf9fc 28847+{
28848+ int err, i, j, ndentry, e;
027c5e7a 28849+ unsigned int sigen;
1facf9fc 28850+ struct au_dcsub_pages dpages;
28851+ struct au_dpage *dpage;
027c5e7a
AM
28852+ struct dentry **dentries, *d;
28853+ struct au_sbinfo *sbinfo;
28854+ struct dentry *root = sb->s_root;
5527c038 28855+ const unsigned int dir_flags = au_hi_flags(d_inode(root), /*isdir*/1);
1facf9fc 28856+
b95c5147 28857+ if (do_idop)
79b8bda9
AM
28858+ au_refresh_dop(root, /*force_reval*/0);
28859+
027c5e7a
AM
28860+ err = au_dpages_init(&dpages, GFP_NOFS);
28861+ if (unlikely(err))
1facf9fc 28862+ goto out;
027c5e7a
AM
28863+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
28864+ if (unlikely(err))
1facf9fc 28865+ goto out_dpages;
1facf9fc 28866+
027c5e7a
AM
28867+ sigen = au_sigen(sb);
28868+ sbinfo = au_sbi(sb);
28869+ for (i = 0; i < dpages.ndpage; i++) {
1facf9fc 28870+ dpage = dpages.dpages + i;
28871+ dentries = dpage->dentries;
28872+ ndentry = dpage->ndentry;
027c5e7a 28873+ for (j = 0; j < ndentry; j++) {
1facf9fc 28874+ d = dentries[j];
79b8bda9 28875+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags,
b95c5147 28876+ do_idop);
027c5e7a
AM
28877+ if (unlikely(e && !err))
28878+ err = e;
28879+ /* go on even err */
1facf9fc 28880+ }
28881+ }
28882+
4f0767ce 28883+out_dpages:
1facf9fc 28884+ au_dpages_free(&dpages);
4f0767ce 28885+out:
1facf9fc 28886+ return err;
28887+}
28888+
b95c5147 28889+static int au_refresh_i(struct super_block *sb, unsigned int do_idop)
1facf9fc 28890+{
027c5e7a
AM
28891+ int err, e;
28892+ unsigned int sigen;
28893+ unsigned long long max, ull;
28894+ struct inode *inode, **array;
1facf9fc 28895+
027c5e7a
AM
28896+ array = au_iarray_alloc(sb, &max);
28897+ err = PTR_ERR(array);
28898+ if (IS_ERR(array))
28899+ goto out;
1facf9fc 28900+
28901+ err = 0;
027c5e7a
AM
28902+ sigen = au_sigen(sb);
28903+ for (ull = 0; ull < max; ull++) {
28904+ inode = array[ull];
076b876e
AM
28905+ if (unlikely(!inode))
28906+ break;
b95c5147
AM
28907+
28908+ e = 0;
28909+ ii_write_lock_child(inode);
537831f9 28910+ if (au_iigen(inode, NULL) != sigen) {
027c5e7a 28911+ e = au_refresh_hinode_self(inode);
1facf9fc 28912+ if (unlikely(e)) {
b95c5147 28913+ au_refresh_iop(inode, /*force_getattr*/1);
027c5e7a 28914+ pr_err("error %d, i%lu\n", e, inode->i_ino);
1facf9fc 28915+ if (!err)
28916+ err = e;
28917+ /* go on even if err */
28918+ }
28919+ }
b95c5147
AM
28920+ if (!e && do_idop)
28921+ au_refresh_iop(inode, /*force_getattr*/0);
28922+ ii_write_unlock(inode);
1facf9fc 28923+ }
28924+
027c5e7a 28925+ au_iarray_free(array, max);
1facf9fc 28926+
4f0767ce 28927+out:
1facf9fc 28928+ return err;
28929+}
28930+
b95c5147 28931+static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
1facf9fc 28932+{
027c5e7a
AM
28933+ int err, e;
28934+ unsigned int udba;
5afbbe0d 28935+ aufs_bindex_t bindex, bbot;
1facf9fc 28936+ struct dentry *root;
28937+ struct inode *inode;
027c5e7a 28938+ struct au_branch *br;
79b8bda9 28939+ struct au_sbinfo *sbi;
1facf9fc 28940+
28941+ au_sigen_inc(sb);
79b8bda9
AM
28942+ sbi = au_sbi(sb);
28943+ au_fclr_si(sbi, FAILED_REFRESH_DIR);
1facf9fc 28944+
28945+ root = sb->s_root;
28946+ DiMustNoWaiters(root);
5527c038 28947+ inode = d_inode(root);
1facf9fc 28948+ IiMustNoWaiters(inode);
1facf9fc 28949+
027c5e7a 28950+ udba = au_opt_udba(sb);
5afbbe0d
AM
28951+ bbot = au_sbbot(sb);
28952+ for (bindex = 0; bindex <= bbot; bindex++) {
027c5e7a
AM
28953+ br = au_sbr(sb, bindex);
28954+ err = au_hnotify_reset_br(udba, br, br->br_perm);
1facf9fc 28955+ if (unlikely(err))
027c5e7a
AM
28956+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
28957+ bindex, err);
28958+ /* go on even if err */
1facf9fc 28959+ }
027c5e7a 28960+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
1facf9fc 28961+
b95c5147 28962+ if (do_idop) {
79b8bda9
AM
28963+ if (au_ftest_si(sbi, NO_DREVAL)) {
28964+ AuDebugOn(sb->s_d_op == &aufs_dop_noreval);
28965+ sb->s_d_op = &aufs_dop_noreval;
b95c5147
AM
28966+ AuDebugOn(sbi->si_iop_array == aufs_iop_nogetattr);
28967+ sbi->si_iop_array = aufs_iop_nogetattr;
79b8bda9
AM
28968+ } else {
28969+ AuDebugOn(sb->s_d_op == &aufs_dop);
28970+ sb->s_d_op = &aufs_dop;
b95c5147
AM
28971+ AuDebugOn(sbi->si_iop_array == aufs_iop);
28972+ sbi->si_iop_array = aufs_iop;
79b8bda9 28973+ }
b95c5147
AM
28974+ pr_info("reset to %pf and %pf\n",
28975+ sb->s_d_op, sbi->si_iop_array);
79b8bda9
AM
28976+ }
28977+
027c5e7a 28978+ di_write_unlock(root);
b95c5147
AM
28979+ err = au_refresh_d(sb, do_idop);
28980+ e = au_refresh_i(sb, do_idop);
027c5e7a
AM
28981+ if (unlikely(e && !err))
28982+ err = e;
1facf9fc 28983+ /* aufs_write_lock() calls ..._child() */
28984+ di_write_lock_child(root);
027c5e7a
AM
28985+
28986+ au_cpup_attr_all(inode, /*force*/1);
28987+
28988+ if (unlikely(err))
28989+ AuIOErr("refresh failed, ignored, %d\n", err);
1facf9fc 28990+}
28991+
28992+/* stop extra interpretation of errno in mount(8), and strange error messages */
28993+static int cvt_err(int err)
28994+{
28995+ AuTraceErr(err);
28996+
28997+ switch (err) {
28998+ case -ENOENT:
28999+ case -ENOTDIR:
29000+ case -EEXIST:
29001+ case -EIO:
29002+ err = -EINVAL;
29003+ }
29004+ return err;
29005+}
29006+
29007+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
29008+{
4a4d8108
AM
29009+ int err, do_dx;
29010+ unsigned int mntflags;
be52b249
AM
29011+ struct au_opts opts = {
29012+ .opt = NULL
29013+ };
1facf9fc 29014+ struct dentry *root;
29015+ struct inode *inode;
29016+ struct au_sbinfo *sbinfo;
29017+
29018+ err = 0;
29019+ root = sb->s_root;
29020+ if (!data || !*data) {
e49829fe
JR
29021+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
29022+ if (!err) {
29023+ di_write_lock_child(root);
29024+ err = au_opts_verify(sb, *flags, /*pending*/0);
29025+ aufs_write_unlock(root);
29026+ }
1facf9fc 29027+ goto out;
29028+ }
29029+
29030+ err = -ENOMEM;
1facf9fc 29031+ opts.opt = (void *)__get_free_page(GFP_NOFS);
29032+ if (unlikely(!opts.opt))
29033+ goto out;
29034+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
29035+ opts.flags = AuOpts_REMOUNT;
29036+ opts.sb_flags = *flags;
29037+
29038+ /* parse it before aufs lock */
29039+ err = au_opts_parse(sb, data, &opts);
29040+ if (unlikely(err))
29041+ goto out_opts;
29042+
29043+ sbinfo = au_sbi(sb);
5527c038 29044+ inode = d_inode(root);
febd17d6 29045+ inode_lock(inode);
e49829fe
JR
29046+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
29047+ if (unlikely(err))
29048+ goto out_mtx;
29049+ di_write_lock_child(root);
1facf9fc 29050+
29051+ /* au_opts_remount() may return an error */
29052+ err = au_opts_remount(sb, &opts);
29053+ au_opts_free(&opts);
29054+
027c5e7a 29055+ if (au_ftest_opts(opts.flags, REFRESH))
b95c5147 29056+ au_remount_refresh(sb, au_ftest_opts(opts.flags, REFRESH_IDOP));
1facf9fc 29057+
4a4d8108
AM
29058+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
29059+ mntflags = au_mntflags(sb);
29060+ do_dx = !!au_opt_test(mntflags, DIO);
29061+ au_dy_arefresh(do_dx);
29062+ }
29063+
076b876e 29064+ au_fhsm_wrote_all(sb, /*force*/1); /* ?? */
1facf9fc 29065+ aufs_write_unlock(root);
953406b4 29066+
e49829fe 29067+out_mtx:
febd17d6 29068+ inode_unlock(inode);
4f0767ce 29069+out_opts:
ae9dfd79 29070+ free_page((unsigned long)opts.opt);
4f0767ce 29071+out:
1facf9fc 29072+ err = cvt_err(err);
29073+ AuTraceErr(err);
29074+ return err;
29075+}
29076+
4a4d8108 29077+static const struct super_operations aufs_sop = {
1facf9fc 29078+ .alloc_inode = aufs_alloc_inode,
29079+ .destroy_inode = aufs_destroy_inode,
b752ccd1 29080+ /* always deleting, no clearing */
1facf9fc 29081+ .drop_inode = generic_delete_inode,
29082+ .show_options = aufs_show_options,
29083+ .statfs = aufs_statfs,
29084+ .put_super = aufs_put_super,
537831f9 29085+ .sync_fs = aufs_sync_fs,
e8791d4f
AM
29086+ .remount_fs = aufs_remount_fs,
29087+#ifdef CONFIG_AUFS_BDEV_LOOP
29088+ .real_loop = aufs_real_loop
29089+#endif
1facf9fc 29090+};
29091+
29092+/* ---------------------------------------------------------------------- */
29093+
29094+static int alloc_root(struct super_block *sb)
29095+{
29096+ int err;
29097+ struct inode *inode;
29098+ struct dentry *root;
29099+
29100+ err = -ENOMEM;
29101+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
29102+ err = PTR_ERR(inode);
29103+ if (IS_ERR(inode))
29104+ goto out;
29105+
b95c5147 29106+ inode->i_op = aufs_iop + AuIop_DIR; /* with getattr by default */
1facf9fc 29107+ inode->i_fop = &aufs_dir_fop;
29108+ inode->i_mode = S_IFDIR;
9dbd164d 29109+ set_nlink(inode, 2);
1facf9fc 29110+ unlock_new_inode(inode);
29111+
92d182d2 29112+ root = d_make_root(inode);
1facf9fc 29113+ if (unlikely(!root))
92d182d2 29114+ goto out;
1facf9fc 29115+ err = PTR_ERR(root);
29116+ if (IS_ERR(root))
92d182d2 29117+ goto out;
1facf9fc 29118+
4a4d8108 29119+ err = au_di_init(root);
1facf9fc 29120+ if (!err) {
29121+ sb->s_root = root;
29122+ return 0; /* success */
29123+ }
29124+ dput(root);
1facf9fc 29125+
4f0767ce 29126+out:
1facf9fc 29127+ return err;
1facf9fc 29128+}
29129+
29130+static int aufs_fill_super(struct super_block *sb, void *raw_data,
29131+ int silent __maybe_unused)
29132+{
29133+ int err;
be52b249
AM
29134+ struct au_opts opts = {
29135+ .opt = NULL
29136+ };
79b8bda9 29137+ struct au_sbinfo *sbinfo;
1facf9fc 29138+ struct dentry *root;
29139+ struct inode *inode;
29140+ char *arg = raw_data;
29141+
29142+ if (unlikely(!arg || !*arg)) {
29143+ err = -EINVAL;
4a4d8108 29144+ pr_err("no arg\n");
1facf9fc 29145+ goto out;
29146+ }
29147+
29148+ err = -ENOMEM;
1facf9fc 29149+ opts.opt = (void *)__get_free_page(GFP_NOFS);
29150+ if (unlikely(!opts.opt))
29151+ goto out;
29152+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
29153+ opts.sb_flags = sb->s_flags;
29154+
29155+ err = au_si_alloc(sb);
29156+ if (unlikely(err))
29157+ goto out_opts;
79b8bda9 29158+ sbinfo = au_sbi(sb);
1facf9fc 29159+
29160+ /* all timestamps always follow the ones on the branch */
29161+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
29162+ sb->s_op = &aufs_sop;
027c5e7a 29163+ sb->s_d_op = &aufs_dop;
1facf9fc 29164+ sb->s_magic = AUFS_SUPER_MAGIC;
29165+ sb->s_maxbytes = 0;
c1595e42 29166+ sb->s_stack_depth = 1;
1facf9fc 29167+ au_export_init(sb);
f2c43d5f 29168+ au_xattr_init(sb);
1facf9fc 29169+
29170+ err = alloc_root(sb);
29171+ if (unlikely(err)) {
29172+ si_write_unlock(sb);
29173+ goto out_info;
29174+ }
29175+ root = sb->s_root;
5527c038 29176+ inode = d_inode(root);
1facf9fc 29177+
29178+ /*
29179+ * actually we can parse options regardless aufs lock here.
29180+ * but at remount time, parsing must be done before aufs lock.
29181+ * so we follow the same rule.
29182+ */
29183+ ii_write_lock_parent(inode);
29184+ aufs_write_unlock(root);
29185+ err = au_opts_parse(sb, arg, &opts);
29186+ if (unlikely(err))
29187+ goto out_root;
29188+
29189+ /* lock vfs_inode first, then aufs. */
febd17d6 29190+ inode_lock(inode);
1facf9fc 29191+ aufs_write_lock(root);
29192+ err = au_opts_mount(sb, &opts);
29193+ au_opts_free(&opts);
79b8bda9
AM
29194+ if (!err && au_ftest_si(sbinfo, NO_DREVAL)) {
29195+ sb->s_d_op = &aufs_dop_noreval;
29196+ pr_info("%pf\n", sb->s_d_op);
29197+ au_refresh_dop(root, /*force_reval*/0);
b95c5147
AM
29198+ sbinfo->si_iop_array = aufs_iop_nogetattr;
29199+ au_refresh_iop(inode, /*force_getattr*/0);
79b8bda9 29200+ }
1facf9fc 29201+ aufs_write_unlock(root);
febd17d6 29202+ inode_unlock(inode);
4a4d8108
AM
29203+ if (!err)
29204+ goto out_opts; /* success */
1facf9fc 29205+
4f0767ce 29206+out_root:
1facf9fc 29207+ dput(root);
29208+ sb->s_root = NULL;
4f0767ce 29209+out_info:
79b8bda9
AM
29210+ dbgaufs_si_fin(sbinfo);
29211+ kobject_put(&sbinfo->si_kobj);
1facf9fc 29212+ sb->s_fs_info = NULL;
4f0767ce 29213+out_opts:
ae9dfd79 29214+ free_page((unsigned long)opts.opt);
4f0767ce 29215+out:
1facf9fc 29216+ AuTraceErr(err);
29217+ err = cvt_err(err);
29218+ AuTraceErr(err);
29219+ return err;
29220+}
29221+
29222+/* ---------------------------------------------------------------------- */
29223+
027c5e7a
AM
29224+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
29225+ const char *dev_name __maybe_unused,
29226+ void *raw_data)
1facf9fc 29227+{
027c5e7a 29228+ struct dentry *root;
1facf9fc 29229+ struct super_block *sb;
29230+
29231+ /* all timestamps always follow the ones on the branch */
29232+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
027c5e7a
AM
29233+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
29234+ if (IS_ERR(root))
29235+ goto out;
29236+
29237+ sb = root->d_sb;
29238+ si_write_lock(sb, !AuLock_FLUSH);
29239+ sysaufs_brs_add(sb, 0);
29240+ si_write_unlock(sb);
29241+ au_sbilist_add(sb);
29242+
29243+out:
29244+ return root;
1facf9fc 29245+}
29246+
e49829fe
JR
29247+static void aufs_kill_sb(struct super_block *sb)
29248+{
29249+ struct au_sbinfo *sbinfo;
29250+
29251+ sbinfo = au_sbi(sb);
29252+ if (sbinfo) {
29253+ au_sbilist_del(sb);
29254+ aufs_write_lock(sb->s_root);
076b876e 29255+ au_fhsm_fin(sb);
e49829fe
JR
29256+ if (sbinfo->si_wbr_create_ops->fin)
29257+ sbinfo->si_wbr_create_ops->fin(sb);
29258+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
29259+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
b95c5147 29260+ au_remount_refresh(sb, /*do_idop*/0);
e49829fe
JR
29261+ }
29262+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
29263+ au_plink_put(sb, /*verbose*/1);
29264+ au_xino_clr(sb);
ae9dfd79 29265+ au_dr_opt_flush(sb);
1e00d052 29266+ sbinfo->si_sb = NULL;
e49829fe 29267+ aufs_write_unlock(sb->s_root);
e49829fe
JR
29268+ au_nwt_flush(&sbinfo->si_nowait);
29269+ }
98d9a5b1 29270+ kill_anon_super(sb);
e49829fe
JR
29271+}
29272+
1facf9fc 29273+struct file_system_type aufs_fs_type = {
29274+ .name = AUFS_FSTYPE,
c06a8ce3
AM
29275+ /* a race between rename and others */
29276+ .fs_flags = FS_RENAME_DOES_D_MOVE,
027c5e7a 29277+ .mount = aufs_mount,
e49829fe 29278+ .kill_sb = aufs_kill_sb,
1facf9fc 29279+ /* no need to __module_get() and module_put(). */
29280+ .owner = THIS_MODULE,
29281+};
e8791d4f
AM
29282diff -urNp -x '*.orig' linux-4.9/fs/aufs/super.h linux-4.9/fs/aufs/super.h
29283--- linux-4.9/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
29284+++ linux-4.9/fs/aufs/super.h 2021-02-24 16:15:09.534907296 +0100
ae9dfd79 29285@@ -0,0 +1,617 @@
1facf9fc 29286+/*
ae9dfd79 29287+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 29288+ *
29289+ * This program, aufs is free software; you can redistribute it and/or modify
29290+ * it under the terms of the GNU General Public License as published by
29291+ * the Free Software Foundation; either version 2 of the License, or
29292+ * (at your option) any later version.
dece6358
AM
29293+ *
29294+ * This program is distributed in the hope that it will be useful,
29295+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29296+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29297+ * GNU General Public License for more details.
29298+ *
29299+ * You should have received a copy of the GNU General Public License
523b37e3 29300+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29301+ */
29302+
29303+/*
29304+ * super_block operations
29305+ */
29306+
29307+#ifndef __AUFS_SUPER_H__
29308+#define __AUFS_SUPER_H__
29309+
29310+#ifdef __KERNEL__
29311+
29312+#include <linux/fs.h>
5527c038 29313+#include <linux/kobject.h>
ae9dfd79 29314+#include "hbl.h"
1facf9fc 29315+#include "rwsem.h"
1facf9fc 29316+#include "wkq.h"
29317+
1facf9fc 29318+/* policies to select one among multiple writable branches */
29319+struct au_wbr_copyup_operations {
29320+ int (*copyup)(struct dentry *dentry);
29321+};
29322+
392086de
AM
29323+#define AuWbr_DIR 1 /* target is a dir */
29324+#define AuWbr_PARENT (1 << 1) /* always require a parent */
29325+
29326+#define au_ftest_wbr(flags, name) ((flags) & AuWbr_##name)
29327+#define au_fset_wbr(flags, name) { (flags) |= AuWbr_##name; }
29328+#define au_fclr_wbr(flags, name) { (flags) &= ~AuWbr_##name; }
29329+
1facf9fc 29330+struct au_wbr_create_operations {
392086de 29331+ int (*create)(struct dentry *dentry, unsigned int flags);
1facf9fc 29332+ int (*init)(struct super_block *sb);
29333+ int (*fin)(struct super_block *sb);
29334+};
29335+
29336+struct au_wbr_mfs {
29337+ struct mutex mfs_lock; /* protect this structure */
29338+ unsigned long mfs_jiffy;
29339+ unsigned long mfs_expire;
29340+ aufs_bindex_t mfs_bindex;
29341+
29342+ unsigned long long mfsrr_bytes;
29343+ unsigned long long mfsrr_watermark;
29344+};
29345+
86dc4139
AM
29346+#define AuPlink_NHASH 100
29347+static inline int au_plink_hash(ino_t ino)
29348+{
29349+ return ino % AuPlink_NHASH;
29350+}
29351+
076b876e
AM
29352+/* File-based Hierarchical Storage Management */
29353+struct au_fhsm {
29354+#ifdef CONFIG_AUFS_FHSM
29355+ /* allow only one process who can receive the notification */
29356+ spinlock_t fhsm_spin;
29357+ pid_t fhsm_pid;
29358+ wait_queue_head_t fhsm_wqh;
29359+ atomic_t fhsm_readable;
29360+
c1595e42 29361+ /* these are protected by si_rwsem */
076b876e 29362+ unsigned long fhsm_expire;
c1595e42 29363+ aufs_bindex_t fhsm_bottom;
076b876e
AM
29364+#endif
29365+};
29366+
1facf9fc 29367+struct au_branch;
29368+struct au_sbinfo {
29369+ /* nowait tasks in the system-wide workqueue */
29370+ struct au_nowait_tasks si_nowait;
29371+
b752ccd1
AM
29372+ /*
29373+ * tried sb->s_umount, but failed due to the dependecy between i_mutex.
29374+ * rwsem for au_sbinfo is necessary.
29375+ */
dece6358 29376+ struct au_rwsem si_rwsem;
1facf9fc 29377+
7f207e10 29378+ /*
523b37e3
AM
29379+ * dirty approach to protect sb->sb_inodes and ->s_files (gone) from
29380+ * remount.
7f207e10 29381+ */
5afbbe0d 29382+ struct percpu_counter si_ninodes, si_nfiles;
7f207e10 29383+
1facf9fc 29384+ /* branch management */
29385+ unsigned int si_generation;
29386+
2000de60 29387+ /* see AuSi_ flags */
1facf9fc 29388+ unsigned char au_si_status;
29389+
5afbbe0d 29390+ aufs_bindex_t si_bbot;
7f207e10
AM
29391+
29392+ /* dirty trick to keep br_id plus */
29393+ unsigned int si_last_br_id :
29394+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
1facf9fc 29395+ struct au_branch **si_branch;
29396+
29397+ /* policy to select a writable branch */
29398+ unsigned char si_wbr_copyup;
29399+ unsigned char si_wbr_create;
29400+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
29401+ struct au_wbr_create_operations *si_wbr_create_ops;
29402+
29403+ /* round robin */
29404+ atomic_t si_wbr_rr_next;
29405+
29406+ /* most free space */
29407+ struct au_wbr_mfs si_wbr_mfs;
29408+
076b876e
AM
29409+ /* File-based Hierarchical Storage Management */
29410+ struct au_fhsm si_fhsm;
29411+
1facf9fc 29412+ /* mount flags */
29413+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
29414+ unsigned int si_mntflags;
29415+
29416+ /* external inode number (bitmap and translation table) */
5527c038
JR
29417+ vfs_readf_t si_xread;
29418+ vfs_writef_t si_xwrite;
1facf9fc 29419+ struct file *si_xib;
29420+ struct mutex si_xib_mtx; /* protect xib members */
29421+ unsigned long *si_xib_buf;
29422+ unsigned long si_xib_last_pindex;
29423+ int si_xib_next_bit;
29424+ aufs_bindex_t si_xino_brid;
392086de
AM
29425+ unsigned long si_xino_jiffy;
29426+ unsigned long si_xino_expire;
1facf9fc 29427+ /* reserved for future use */
29428+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
29429+
29430+#ifdef CONFIG_AUFS_EXPORT
29431+ /* i_generation */
29432+ struct file *si_xigen;
29433+ atomic_t si_xigen_next;
29434+#endif
29435+
b912730e 29436+ /* dirty trick to suppoer atomic_open */
ae9dfd79 29437+ struct hlist_bl_head si_aopen;
b912730e 29438+
1facf9fc 29439+ /* vdir parameters */
e49829fe 29440+ unsigned long si_rdcache; /* max cache time in jiffies */
1facf9fc 29441+ unsigned int si_rdblk; /* deblk size */
29442+ unsigned int si_rdhash; /* hash size */
29443+
29444+ /*
29445+ * If the number of whiteouts are larger than si_dirwh, leave all of
29446+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
29447+ * future fsck.aufs or kernel thread will remove them later.
29448+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
29449+ */
29450+ unsigned int si_dirwh;
29451+
1facf9fc 29452+ /* pseudo_link list */
ae9dfd79 29453+ struct hlist_bl_head si_plink[AuPlink_NHASH];
1facf9fc 29454+ wait_queue_head_t si_plink_wq;
4a4d8108 29455+ spinlock_t si_plink_maint_lock;
e49829fe 29456+ pid_t si_plink_maint_pid;
1facf9fc 29457+
523b37e3 29458+ /* file list */
ae9dfd79 29459+ struct hlist_bl_head si_files;
523b37e3 29460+
b95c5147
AM
29461+ /* with/without getattr, brother of sb->s_d_op */
29462+ struct inode_operations *si_iop_array;
29463+
1facf9fc 29464+ /*
29465+ * sysfs and lifetime management.
29466+ * this is not a small structure and it may be a waste of memory in case
29467+ * of sysfs is disabled, particulary when many aufs-es are mounted.
29468+ * but using sysfs is majority.
29469+ */
29470+ struct kobject si_kobj;
29471+#ifdef CONFIG_DEBUG_FS
86dc4139
AM
29472+ struct dentry *si_dbgaufs;
29473+ struct dentry *si_dbgaufs_plink;
29474+ struct dentry *si_dbgaufs_xib;
1facf9fc 29475+#ifdef CONFIG_AUFS_EXPORT
29476+ struct dentry *si_dbgaufs_xigen;
29477+#endif
29478+#endif
29479+
e49829fe 29480+#ifdef CONFIG_AUFS_SBILIST
ae9dfd79 29481+ struct hlist_bl_node si_list;
e49829fe
JR
29482+#endif
29483+
1facf9fc 29484+ /* dirty, necessary for unmounting, sysfs and sysrq */
29485+ struct super_block *si_sb;
29486+};
29487+
dece6358
AM
29488+/* sbinfo status flags */
29489+/*
29490+ * set true when refresh_dirs() failed at remount time.
29491+ * then try refreshing dirs at access time again.
29492+ * if it is false, refreshing dirs at access time is unnecesary
29493+ */
027c5e7a 29494+#define AuSi_FAILED_REFRESH_DIR 1
076b876e 29495+#define AuSi_FHSM (1 << 1) /* fhsm is active now */
79b8bda9 29496+#define AuSi_NO_DREVAL (1 << 2) /* disable all d_revalidate */
076b876e
AM
29497+
29498+#ifndef CONFIG_AUFS_FHSM
29499+#undef AuSi_FHSM
29500+#define AuSi_FHSM 0
29501+#endif
29502+
dece6358
AM
29503+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
29504+ unsigned int flag)
29505+{
29506+ AuRwMustAnyLock(&sbi->si_rwsem);
29507+ return sbi->au_si_status & flag;
29508+}
29509+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
29510+#define au_fset_si(sbinfo, name) do { \
29511+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
29512+ (sbinfo)->au_si_status |= AuSi_##name; \
29513+} while (0)
29514+#define au_fclr_si(sbinfo, name) do { \
29515+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
29516+ (sbinfo)->au_si_status &= ~AuSi_##name; \
29517+} while (0)
29518+
1facf9fc 29519+/* ---------------------------------------------------------------------- */
29520+
29521+/* policy to select one among writable branches */
4a4d8108
AM
29522+#define AuWbrCopyup(sbinfo, ...) \
29523+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
29524+#define AuWbrCreate(sbinfo, ...) \
29525+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
1facf9fc 29526+
29527+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
29528+#define AuLock_DW 1 /* write-lock dentry */
29529+#define AuLock_IR (1 << 1) /* read-lock inode */
29530+#define AuLock_IW (1 << 2) /* write-lock inode */
29531+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
b95c5147 29532+#define AuLock_DIRS (1 << 4) /* target is a pair of dirs */
f2c43d5f 29533+ /* except RENAME_EXCHANGE */
e49829fe
JR
29534+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
29535+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
027c5e7a 29536+#define AuLock_GEN (1 << 7) /* test digen/iigen */
1facf9fc 29537+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
7f207e10
AM
29538+#define au_fset_lock(flags, name) \
29539+ do { (flags) |= AuLock_##name; } while (0)
29540+#define au_fclr_lock(flags, name) \
29541+ do { (flags) &= ~AuLock_##name; } while (0)
1facf9fc 29542+
29543+/* ---------------------------------------------------------------------- */
29544+
29545+/* super.c */
29546+extern struct file_system_type aufs_fs_type;
29547+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
79b8bda9
AM
29548+typedef unsigned long long (*au_arraycb_t)(struct super_block *sb, void *array,
29549+ unsigned long long max, void *arg);
79b8bda9
AM
29550+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
29551+ struct super_block *sb, void *arg);
7f207e10
AM
29552+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
29553+void au_iarray_free(struct inode **a, unsigned long long max);
1facf9fc 29554+
29555+/* sbinfo.c */
29556+void au_si_free(struct kobject *kobj);
29557+int au_si_alloc(struct super_block *sb);
e2f27e51 29558+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink);
1facf9fc 29559+
29560+unsigned int au_sigen_inc(struct super_block *sb);
29561+aufs_bindex_t au_new_br_id(struct super_block *sb);
29562+
e49829fe
JR
29563+int si_read_lock(struct super_block *sb, int flags);
29564+int si_write_lock(struct super_block *sb, int flags);
29565+int aufs_read_lock(struct dentry *dentry, int flags);
1facf9fc 29566+void aufs_read_unlock(struct dentry *dentry, int flags);
29567+void aufs_write_lock(struct dentry *dentry);
29568+void aufs_write_unlock(struct dentry *dentry);
e49829fe 29569+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
1facf9fc 29570+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
29571+
29572+/* wbr_policy.c */
29573+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
29574+extern struct au_wbr_create_operations au_wbr_create_ops[];
29575+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
c2b27bf2 29576+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
5afbbe0d 29577+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop);
c2b27bf2
AM
29578+
29579+/* mvdown.c */
29580+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
1facf9fc 29581+
076b876e
AM
29582+#ifdef CONFIG_AUFS_FHSM
29583+/* fhsm.c */
29584+
29585+static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
29586+{
29587+ pid_t pid;
29588+
29589+ spin_lock(&fhsm->fhsm_spin);
29590+ pid = fhsm->fhsm_pid;
29591+ spin_unlock(&fhsm->fhsm_spin);
29592+
29593+ return pid;
29594+}
29595+
29596+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
29597+void au_fhsm_wrote_all(struct super_block *sb, int force);
29598+int au_fhsm_fd(struct super_block *sb, int oflags);
29599+int au_fhsm_br_alloc(struct au_branch *br);
c1595e42 29600+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
076b876e
AM
29601+void au_fhsm_fin(struct super_block *sb);
29602+void au_fhsm_init(struct au_sbinfo *sbinfo);
29603+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
29604+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
29605+#else
29606+AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
29607+ int force)
29608+AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
29609+AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
c1595e42
JR
29610+AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
29611+AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
29612+AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
076b876e
AM
29613+AuStubVoid(au_fhsm_fin, struct super_block *sb)
29614+AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
29615+AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
29616+AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
29617+#endif
29618+
1facf9fc 29619+/* ---------------------------------------------------------------------- */
29620+
29621+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
29622+{
29623+ return sb->s_fs_info;
29624+}
29625+
29626+/* ---------------------------------------------------------------------- */
29627+
29628+#ifdef CONFIG_AUFS_EXPORT
a2a7ad62 29629+int au_test_nfsd(void);
1facf9fc 29630+void au_export_init(struct super_block *sb);
b752ccd1 29631+void au_xigen_inc(struct inode *inode);
1facf9fc 29632+int au_xigen_new(struct inode *inode);
29633+int au_xigen_set(struct super_block *sb, struct file *base);
29634+void au_xigen_clr(struct super_block *sb);
29635+
29636+static inline int au_busy_or_stale(void)
29637+{
b752ccd1 29638+ if (!au_test_nfsd())
1facf9fc 29639+ return -EBUSY;
29640+ return -ESTALE;
29641+}
29642+#else
b752ccd1 29643+AuStubInt0(au_test_nfsd, void)
a2a7ad62 29644+AuStubVoid(au_export_init, struct super_block *sb)
b752ccd1 29645+AuStubVoid(au_xigen_inc, struct inode *inode)
4a4d8108
AM
29646+AuStubInt0(au_xigen_new, struct inode *inode)
29647+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
29648+AuStubVoid(au_xigen_clr, struct super_block *sb)
c1595e42 29649+AuStub(int, au_busy_or_stale, return -EBUSY, void)
1facf9fc 29650+#endif /* CONFIG_AUFS_EXPORT */
29651+
29652+/* ---------------------------------------------------------------------- */
29653+
e49829fe
JR
29654+#ifdef CONFIG_AUFS_SBILIST
29655+/* module.c */
ae9dfd79 29656+extern struct hlist_bl_head au_sbilist;
e49829fe
JR
29657+
29658+static inline void au_sbilist_init(void)
29659+{
ae9dfd79 29660+ INIT_HLIST_BL_HEAD(&au_sbilist);
e49829fe
JR
29661+}
29662+
29663+static inline void au_sbilist_add(struct super_block *sb)
29664+{
ae9dfd79 29665+ au_hbl_add(&au_sbi(sb)->si_list, &au_sbilist);
e49829fe
JR
29666+}
29667+
29668+static inline void au_sbilist_del(struct super_block *sb)
29669+{
ae9dfd79 29670+ au_hbl_del(&au_sbi(sb)->si_list, &au_sbilist);
e49829fe 29671+}
53392da6
AM
29672+
29673+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
29674+static inline void au_sbilist_lock(void)
29675+{
ae9dfd79 29676+ hlist_bl_lock(&au_sbilist);
53392da6
AM
29677+}
29678+
29679+static inline void au_sbilist_unlock(void)
29680+{
ae9dfd79 29681+ hlist_bl_unlock(&au_sbilist);
53392da6
AM
29682+}
29683+#define AuGFP_SBILIST GFP_ATOMIC
29684+#else
29685+AuStubVoid(au_sbilist_lock, void)
29686+AuStubVoid(au_sbilist_unlock, void)
29687+#define AuGFP_SBILIST GFP_NOFS
29688+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
e49829fe
JR
29689+#else
29690+AuStubVoid(au_sbilist_init, void)
c1595e42
JR
29691+AuStubVoid(au_sbilist_add, struct super_block *sb)
29692+AuStubVoid(au_sbilist_del, struct super_block *sb)
53392da6
AM
29693+AuStubVoid(au_sbilist_lock, void)
29694+AuStubVoid(au_sbilist_unlock, void)
29695+#define AuGFP_SBILIST GFP_NOFS
e49829fe
JR
29696+#endif
29697+
29698+/* ---------------------------------------------------------------------- */
29699+
1facf9fc 29700+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
29701+{
dece6358 29702+ /*
c1595e42 29703+ * This function is a dynamic '__init' function actually,
dece6358
AM
29704+ * so the tiny check for si_rwsem is unnecessary.
29705+ */
29706+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
1facf9fc 29707+#ifdef CONFIG_DEBUG_FS
29708+ sbinfo->si_dbgaufs = NULL;
86dc4139 29709+ sbinfo->si_dbgaufs_plink = NULL;
1facf9fc 29710+ sbinfo->si_dbgaufs_xib = NULL;
29711+#ifdef CONFIG_AUFS_EXPORT
29712+ sbinfo->si_dbgaufs_xigen = NULL;
29713+#endif
29714+#endif
29715+}
29716+
29717+/* ---------------------------------------------------------------------- */
29718+
ae9dfd79
AM
29719+/* current->atomic_flags */
29720+/* this value should never corrupt the ones defined in linux/sched.h */
29721+#define PFA_AUFS 7
29722+
29723+TASK_PFA_TEST(AUFS, test_aufs) /* task_test_aufs */
29724+TASK_PFA_SET(AUFS, aufs) /* task_set_aufs */
29725+TASK_PFA_CLEAR(AUFS, aufs) /* task_clear_aufs */
b752ccd1
AM
29726+
29727+static inline int si_pid_test(struct super_block *sb)
29728+{
ae9dfd79 29729+ return !!task_test_aufs(current);
b752ccd1
AM
29730+}
29731+
29732+static inline void si_pid_clr(struct super_block *sb)
29733+{
ae9dfd79
AM
29734+ AuDebugOn(!task_test_aufs(current));
29735+ task_clear_aufs(current);
b752ccd1
AM
29736+}
29737+
ae9dfd79
AM
29738+static inline void si_pid_set(struct super_block *sb)
29739+{
29740+ AuDebugOn(task_test_aufs(current));
29741+ task_set_aufs(current);
29742+}
febd17d6 29743+
b752ccd1
AM
29744+/* ---------------------------------------------------------------------- */
29745+
1facf9fc 29746+/* lock superblock. mainly for entry point functions */
29747+/*
b752ccd1
AM
29748+ * __si_read_lock, __si_write_lock,
29749+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
1facf9fc 29750+ */
b752ccd1 29751+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
1facf9fc 29752+
dece6358
AM
29753+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
29754+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
29755+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
29756+
b752ccd1
AM
29757+static inline void si_noflush_read_lock(struct super_block *sb)
29758+{
29759+ __si_read_lock(sb);
29760+ si_pid_set(sb);
29761+}
29762+
29763+static inline int si_noflush_read_trylock(struct super_block *sb)
29764+{
076b876e
AM
29765+ int locked;
29766+
29767+ locked = __si_read_trylock(sb);
b752ccd1
AM
29768+ if (locked)
29769+ si_pid_set(sb);
29770+ return locked;
29771+}
29772+
29773+static inline void si_noflush_write_lock(struct super_block *sb)
29774+{
29775+ __si_write_lock(sb);
29776+ si_pid_set(sb);
29777+}
29778+
29779+static inline int si_noflush_write_trylock(struct super_block *sb)
29780+{
076b876e
AM
29781+ int locked;
29782+
29783+ locked = __si_write_trylock(sb);
b752ccd1
AM
29784+ if (locked)
29785+ si_pid_set(sb);
29786+ return locked;
29787+}
29788+
7e9cd9fe 29789+#if 0 /* reserved */
1facf9fc 29790+static inline int si_read_trylock(struct super_block *sb, int flags)
29791+{
29792+ if (au_ftest_lock(flags, FLUSH))
29793+ au_nwt_flush(&au_sbi(sb)->si_nowait);
29794+ return si_noflush_read_trylock(sb);
29795+}
e49829fe 29796+#endif
1facf9fc 29797+
b752ccd1
AM
29798+static inline void si_read_unlock(struct super_block *sb)
29799+{
29800+ si_pid_clr(sb);
29801+ __si_read_unlock(sb);
29802+}
29803+
7e9cd9fe 29804+#if 0 /* reserved */
1facf9fc 29805+static inline int si_write_trylock(struct super_block *sb, int flags)
29806+{
29807+ if (au_ftest_lock(flags, FLUSH))
29808+ au_nwt_flush(&au_sbi(sb)->si_nowait);
29809+ return si_noflush_write_trylock(sb);
29810+}
b752ccd1
AM
29811+#endif
29812+
29813+static inline void si_write_unlock(struct super_block *sb)
29814+{
29815+ si_pid_clr(sb);
29816+ __si_write_unlock(sb);
29817+}
29818+
7e9cd9fe 29819+#if 0 /* reserved */
b752ccd1
AM
29820+static inline void si_downgrade_lock(struct super_block *sb)
29821+{
29822+ __si_downgrade_lock(sb);
29823+}
29824+#endif
1facf9fc 29825+
29826+/* ---------------------------------------------------------------------- */
29827+
5afbbe0d 29828+static inline aufs_bindex_t au_sbbot(struct super_block *sb)
1facf9fc 29829+{
dece6358 29830+ SiMustAnyLock(sb);
5afbbe0d 29831+ return au_sbi(sb)->si_bbot;
1facf9fc 29832+}
29833+
29834+static inline unsigned int au_mntflags(struct super_block *sb)
29835+{
dece6358 29836+ SiMustAnyLock(sb);
1facf9fc 29837+ return au_sbi(sb)->si_mntflags;
29838+}
29839+
29840+static inline unsigned int au_sigen(struct super_block *sb)
29841+{
dece6358 29842+ SiMustAnyLock(sb);
1facf9fc 29843+ return au_sbi(sb)->si_generation;
29844+}
29845+
5afbbe0d
AM
29846+static inline unsigned long long au_ninodes(struct super_block *sb)
29847+{
29848+ s64 n = percpu_counter_sum(&au_sbi(sb)->si_ninodes);
29849+
29850+ BUG_ON(n < 0);
29851+ return n;
29852+}
29853+
7f207e10
AM
29854+static inline void au_ninodes_inc(struct super_block *sb)
29855+{
5afbbe0d 29856+ percpu_counter_inc(&au_sbi(sb)->si_ninodes);
7f207e10
AM
29857+}
29858+
29859+static inline void au_ninodes_dec(struct super_block *sb)
29860+{
5afbbe0d
AM
29861+ percpu_counter_dec(&au_sbi(sb)->si_ninodes);
29862+}
29863+
29864+static inline unsigned long long au_nfiles(struct super_block *sb)
29865+{
29866+ s64 n = percpu_counter_sum(&au_sbi(sb)->si_nfiles);
29867+
29868+ BUG_ON(n < 0);
29869+ return n;
7f207e10
AM
29870+}
29871+
29872+static inline void au_nfiles_inc(struct super_block *sb)
29873+{
5afbbe0d 29874+ percpu_counter_inc(&au_sbi(sb)->si_nfiles);
7f207e10
AM
29875+}
29876+
29877+static inline void au_nfiles_dec(struct super_block *sb)
29878+{
5afbbe0d 29879+ percpu_counter_dec(&au_sbi(sb)->si_nfiles);
7f207e10
AM
29880+}
29881+
1facf9fc 29882+static inline struct au_branch *au_sbr(struct super_block *sb,
29883+ aufs_bindex_t bindex)
29884+{
dece6358 29885+ SiMustAnyLock(sb);
1facf9fc 29886+ return au_sbi(sb)->si_branch[0 + bindex];
29887+}
29888+
29889+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
29890+{
dece6358 29891+ SiMustWriteLock(sb);
1facf9fc 29892+ au_sbi(sb)->si_xino_brid = brid;
29893+}
29894+
29895+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
29896+{
dece6358 29897+ SiMustAnyLock(sb);
1facf9fc 29898+ return au_sbi(sb)->si_xino_brid;
29899+}
29900+
29901+#endif /* __KERNEL__ */
29902+#endif /* __AUFS_SUPER_H__ */
e8791d4f
AM
29903diff -urNp -x '*.orig' linux-4.9/fs/aufs/sysaufs.c linux-4.9/fs/aufs/sysaufs.c
29904--- linux-4.9/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
29905+++ linux-4.9/fs/aufs/sysaufs.c 2021-02-24 16:15:09.534907296 +0100
523b37e3 29906@@ -0,0 +1,104 @@
1facf9fc 29907+/*
ae9dfd79 29908+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 29909+ *
29910+ * This program, aufs is free software; you can redistribute it and/or modify
29911+ * it under the terms of the GNU General Public License as published by
29912+ * the Free Software Foundation; either version 2 of the License, or
29913+ * (at your option) any later version.
dece6358
AM
29914+ *
29915+ * This program is distributed in the hope that it will be useful,
29916+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29917+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29918+ * GNU General Public License for more details.
29919+ *
29920+ * You should have received a copy of the GNU General Public License
523b37e3 29921+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29922+ */
29923+
29924+/*
29925+ * sysfs interface and lifetime management
29926+ * they are necessary regardless sysfs is disabled.
29927+ */
29928+
1facf9fc 29929+#include <linux/random.h>
1facf9fc 29930+#include "aufs.h"
29931+
29932+unsigned long sysaufs_si_mask;
e49829fe 29933+struct kset *sysaufs_kset;
1facf9fc 29934+
29935+#define AuSiAttr(_name) { \
29936+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
29937+ .show = sysaufs_si_##_name, \
29938+}
29939+
29940+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
29941+struct attribute *sysaufs_si_attrs[] = {
29942+ &sysaufs_si_attr_xi_path.attr,
29943+ NULL,
29944+};
29945+
4a4d8108 29946+static const struct sysfs_ops au_sbi_ops = {
1facf9fc 29947+ .show = sysaufs_si_show
29948+};
29949+
29950+static struct kobj_type au_sbi_ktype = {
29951+ .release = au_si_free,
29952+ .sysfs_ops = &au_sbi_ops,
29953+ .default_attrs = sysaufs_si_attrs
29954+};
29955+
29956+/* ---------------------------------------------------------------------- */
29957+
29958+int sysaufs_si_init(struct au_sbinfo *sbinfo)
29959+{
29960+ int err;
29961+
e49829fe 29962+ sbinfo->si_kobj.kset = sysaufs_kset;
1facf9fc 29963+ /* cf. sysaufs_name() */
29964+ err = kobject_init_and_add
e49829fe 29965+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
1facf9fc 29966+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
29967+
29968+ dbgaufs_si_null(sbinfo);
29969+ if (!err) {
29970+ err = dbgaufs_si_init(sbinfo);
29971+ if (unlikely(err))
29972+ kobject_put(&sbinfo->si_kobj);
29973+ }
29974+ return err;
29975+}
29976+
29977+void sysaufs_fin(void)
29978+{
29979+ dbgaufs_fin();
e49829fe
JR
29980+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
29981+ kset_unregister(sysaufs_kset);
1facf9fc 29982+}
29983+
29984+int __init sysaufs_init(void)
29985+{
29986+ int err;
29987+
29988+ do {
29989+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
29990+ } while (!sysaufs_si_mask);
29991+
4a4d8108 29992+ err = -EINVAL;
e49829fe
JR
29993+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
29994+ if (unlikely(!sysaufs_kset))
4a4d8108 29995+ goto out;
e49829fe
JR
29996+ err = PTR_ERR(sysaufs_kset);
29997+ if (IS_ERR(sysaufs_kset))
1facf9fc 29998+ goto out;
e49829fe 29999+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
1facf9fc 30000+ if (unlikely(err)) {
e49829fe 30001+ kset_unregister(sysaufs_kset);
1facf9fc 30002+ goto out;
30003+ }
30004+
30005+ err = dbgaufs_init();
30006+ if (unlikely(err))
30007+ sysaufs_fin();
4f0767ce 30008+out:
1facf9fc 30009+ return err;
30010+}
e8791d4f
AM
30011diff -urNp -x '*.orig' linux-4.9/fs/aufs/sysaufs.h linux-4.9/fs/aufs/sysaufs.h
30012--- linux-4.9/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
30013+++ linux-4.9/fs/aufs/sysaufs.h 2021-02-24 16:15:09.534907296 +0100
c1595e42 30014@@ -0,0 +1,101 @@
1facf9fc 30015+/*
ae9dfd79 30016+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 30017+ *
30018+ * This program, aufs is free software; you can redistribute it and/or modify
30019+ * it under the terms of the GNU General Public License as published by
30020+ * the Free Software Foundation; either version 2 of the License, or
30021+ * (at your option) any later version.
dece6358
AM
30022+ *
30023+ * This program is distributed in the hope that it will be useful,
30024+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30025+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30026+ * GNU General Public License for more details.
30027+ *
30028+ * You should have received a copy of the GNU General Public License
523b37e3 30029+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 30030+ */
30031+
30032+/*
30033+ * sysfs interface and mount lifetime management
30034+ */
30035+
30036+#ifndef __SYSAUFS_H__
30037+#define __SYSAUFS_H__
30038+
30039+#ifdef __KERNEL__
30040+
1facf9fc 30041+#include <linux/sysfs.h>
1facf9fc 30042+#include "module.h"
30043+
dece6358
AM
30044+struct super_block;
30045+struct au_sbinfo;
30046+
1facf9fc 30047+struct sysaufs_si_attr {
30048+ struct attribute attr;
30049+ int (*show)(struct seq_file *seq, struct super_block *sb);
30050+};
30051+
30052+/* ---------------------------------------------------------------------- */
30053+
30054+/* sysaufs.c */
30055+extern unsigned long sysaufs_si_mask;
e49829fe 30056+extern struct kset *sysaufs_kset;
1facf9fc 30057+extern struct attribute *sysaufs_si_attrs[];
30058+int sysaufs_si_init(struct au_sbinfo *sbinfo);
30059+int __init sysaufs_init(void);
30060+void sysaufs_fin(void);
30061+
30062+/* ---------------------------------------------------------------------- */
30063+
30064+/* some people doesn't like to show a pointer in kernel */
30065+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
30066+{
30067+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
30068+}
30069+
30070+#define SysaufsSiNamePrefix "si_"
30071+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
30072+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
30073+{
30074+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
30075+ sysaufs_si_id(sbinfo));
30076+}
30077+
30078+struct au_branch;
30079+#ifdef CONFIG_SYSFS
30080+/* sysfs.c */
30081+extern struct attribute_group *sysaufs_attr_group;
30082+
30083+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
30084+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
30085+ char *buf);
076b876e
AM
30086+long au_brinfo_ioctl(struct file *file, unsigned long arg);
30087+#ifdef CONFIG_COMPAT
30088+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
30089+#endif
1facf9fc 30090+
30091+void sysaufs_br_init(struct au_branch *br);
30092+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
30093+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
30094+
30095+#define sysaufs_brs_init() do {} while (0)
30096+
30097+#else
30098+#define sysaufs_attr_group NULL
30099+
4a4d8108 30100+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
c1595e42
JR
30101+AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
30102+ struct attribute *attr, char *buf)
4a4d8108
AM
30103+AuStubVoid(sysaufs_br_init, struct au_branch *br)
30104+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
30105+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
1facf9fc 30106+
30107+static inline void sysaufs_brs_init(void)
30108+{
30109+ sysaufs_brs = 0;
30110+}
30111+
30112+#endif /* CONFIG_SYSFS */
30113+
30114+#endif /* __KERNEL__ */
30115+#endif /* __SYSAUFS_H__ */
e8791d4f
AM
30116diff -urNp -x '*.orig' linux-4.9/fs/aufs/sysfs.c linux-4.9/fs/aufs/sysfs.c
30117--- linux-4.9/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
30118+++ linux-4.9/fs/aufs/sysfs.c 2021-02-24 16:15:09.534907296 +0100
79b8bda9 30119@@ -0,0 +1,376 @@
1facf9fc 30120+/*
ae9dfd79 30121+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 30122+ *
30123+ * This program, aufs is free software; you can redistribute it and/or modify
30124+ * it under the terms of the GNU General Public License as published by
30125+ * the Free Software Foundation; either version 2 of the License, or
30126+ * (at your option) any later version.
dece6358
AM
30127+ *
30128+ * This program is distributed in the hope that it will be useful,
30129+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30130+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30131+ * GNU General Public License for more details.
30132+ *
30133+ * You should have received a copy of the GNU General Public License
523b37e3 30134+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 30135+ */
30136+
30137+/*
30138+ * sysfs interface
30139+ */
30140+
076b876e 30141+#include <linux/compat.h>
1facf9fc 30142+#include <linux/seq_file.h>
1facf9fc 30143+#include "aufs.h"
30144+
4a4d8108
AM
30145+#ifdef CONFIG_AUFS_FS_MODULE
30146+/* this entry violates the "one line per file" policy of sysfs */
30147+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
30148+ char *buf)
30149+{
30150+ ssize_t err;
30151+ static char *conf =
30152+/* this file is generated at compiling */
30153+#include "conf.str"
30154+ ;
30155+
30156+ err = snprintf(buf, PAGE_SIZE, conf);
30157+ if (unlikely(err >= PAGE_SIZE))
30158+ err = -EFBIG;
30159+ return err;
30160+}
30161+
30162+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
30163+#endif
30164+
1facf9fc 30165+static struct attribute *au_attr[] = {
4a4d8108
AM
30166+#ifdef CONFIG_AUFS_FS_MODULE
30167+ &au_config_attr.attr,
30168+#endif
1facf9fc 30169+ NULL, /* need to NULL terminate the list of attributes */
30170+};
30171+
30172+static struct attribute_group sysaufs_attr_group_body = {
30173+ .attrs = au_attr
30174+};
30175+
30176+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
30177+
30178+/* ---------------------------------------------------------------------- */
30179+
30180+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
30181+{
30182+ int err;
30183+
dece6358
AM
30184+ SiMustAnyLock(sb);
30185+
1facf9fc 30186+ err = 0;
30187+ if (au_opt_test(au_mntflags(sb), XINO)) {
30188+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
30189+ seq_putc(seq, '\n');
30190+ }
30191+ return err;
30192+}
30193+
30194+/*
30195+ * the lifetime of branch is independent from the entry under sysfs.
30196+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
30197+ * unlinked.
30198+ */
30199+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
392086de 30200+ aufs_bindex_t bindex, int idx)
1facf9fc 30201+{
1e00d052 30202+ int err;
1facf9fc 30203+ struct path path;
30204+ struct dentry *root;
30205+ struct au_branch *br;
076b876e 30206+ au_br_perm_str_t perm;
1facf9fc 30207+
30208+ AuDbg("b%d\n", bindex);
30209+
1e00d052 30210+ err = 0;
1facf9fc 30211+ root = sb->s_root;
30212+ di_read_lock_parent(root, !AuLock_IR);
30213+ br = au_sbr(sb, bindex);
392086de
AM
30214+
30215+ switch (idx) {
30216+ case AuBrSysfs_BR:
30217+ path.mnt = au_br_mnt(br);
30218+ path.dentry = au_h_dptr(root, bindex);
79b8bda9
AM
30219+ err = au_seq_path(seq, &path);
30220+ if (!err) {
30221+ au_optstr_br_perm(&perm, br->br_perm);
30222+ seq_printf(seq, "=%s\n", perm.a);
30223+ }
392086de
AM
30224+ break;
30225+ case AuBrSysfs_BRID:
79b8bda9 30226+ seq_printf(seq, "%d\n", br->br_id);
392086de
AM
30227+ break;
30228+ }
076b876e 30229+ di_read_unlock(root, !AuLock_IR);
79b8bda9 30230+ if (unlikely(err || seq_has_overflowed(seq)))
076b876e 30231+ err = -E2BIG;
392086de 30232+
1e00d052 30233+ return err;
1facf9fc 30234+}
30235+
30236+/* ---------------------------------------------------------------------- */
30237+
30238+static struct seq_file *au_seq(char *p, ssize_t len)
30239+{
30240+ struct seq_file *seq;
30241+
30242+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
30243+ if (seq) {
30244+ /* mutex_init(&seq.lock); */
30245+ seq->buf = p;
30246+ seq->size = len;
30247+ return seq; /* success */
30248+ }
30249+
30250+ seq = ERR_PTR(-ENOMEM);
30251+ return seq;
30252+}
30253+
392086de
AM
30254+#define SysaufsBr_PREFIX "br"
30255+#define SysaufsBrid_PREFIX "brid"
1facf9fc 30256+
30257+/* todo: file size may exceed PAGE_SIZE */
30258+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
1308ab2a 30259+ char *buf)
1facf9fc 30260+{
30261+ ssize_t err;
392086de 30262+ int idx;
1facf9fc 30263+ long l;
5afbbe0d 30264+ aufs_bindex_t bbot;
1facf9fc 30265+ struct au_sbinfo *sbinfo;
30266+ struct super_block *sb;
30267+ struct seq_file *seq;
30268+ char *name;
30269+ struct attribute **cattr;
30270+
30271+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
30272+ sb = sbinfo->si_sb;
1308ab2a 30273+
30274+ /*
30275+ * prevent a race condition between sysfs and aufs.
30276+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
30277+ * prohibits maintaining the sysfs entries.
30278+ * hew we acquire read lock after sysfs_get_active_two().
30279+ * on the other hand, the remount process may maintain the sysfs/aufs
30280+ * entries after acquiring write lock.
30281+ * it can cause a deadlock.
30282+ * simply we gave up processing read here.
30283+ */
30284+ err = -EBUSY;
30285+ if (unlikely(!si_noflush_read_trylock(sb)))
30286+ goto out;
1facf9fc 30287+
30288+ seq = au_seq(buf, PAGE_SIZE);
30289+ err = PTR_ERR(seq);
30290+ if (IS_ERR(seq))
1308ab2a 30291+ goto out_unlock;
1facf9fc 30292+
30293+ name = (void *)attr->name;
30294+ cattr = sysaufs_si_attrs;
30295+ while (*cattr) {
30296+ if (!strcmp(name, (*cattr)->name)) {
30297+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
30298+ ->show(seq, sb);
30299+ goto out_seq;
30300+ }
30301+ cattr++;
30302+ }
30303+
392086de
AM
30304+ if (!strncmp(name, SysaufsBrid_PREFIX,
30305+ sizeof(SysaufsBrid_PREFIX) - 1)) {
30306+ idx = AuBrSysfs_BRID;
30307+ name += sizeof(SysaufsBrid_PREFIX) - 1;
30308+ } else if (!strncmp(name, SysaufsBr_PREFIX,
30309+ sizeof(SysaufsBr_PREFIX) - 1)) {
30310+ idx = AuBrSysfs_BR;
1facf9fc 30311+ name += sizeof(SysaufsBr_PREFIX) - 1;
392086de
AM
30312+ } else
30313+ BUG();
30314+
30315+ err = kstrtol(name, 10, &l);
30316+ if (!err) {
5afbbe0d
AM
30317+ bbot = au_sbbot(sb);
30318+ if (l <= bbot)
392086de
AM
30319+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
30320+ else
30321+ err = -ENOENT;
1facf9fc 30322+ }
1facf9fc 30323+
4f0767ce 30324+out_seq:
1facf9fc 30325+ if (!err) {
30326+ err = seq->count;
30327+ /* sysfs limit */
30328+ if (unlikely(err == PAGE_SIZE))
30329+ err = -EFBIG;
30330+ }
ae9dfd79 30331+ kfree(seq);
4f0767ce 30332+out_unlock:
1facf9fc 30333+ si_read_unlock(sb);
4f0767ce 30334+out:
1facf9fc 30335+ return err;
30336+}
30337+
30338+/* ---------------------------------------------------------------------- */
30339+
076b876e
AM
30340+static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
30341+{
30342+ int err;
30343+ int16_t brid;
5afbbe0d 30344+ aufs_bindex_t bindex, bbot;
076b876e
AM
30345+ size_t sz;
30346+ char *buf;
30347+ struct seq_file *seq;
30348+ struct au_branch *br;
30349+
30350+ si_read_lock(sb, AuLock_FLUSH);
5afbbe0d
AM
30351+ bbot = au_sbbot(sb);
30352+ err = bbot + 1;
076b876e
AM
30353+ if (!arg)
30354+ goto out;
30355+
30356+ err = -ENOMEM;
30357+ buf = (void *)__get_free_page(GFP_NOFS);
30358+ if (unlikely(!buf))
30359+ goto out;
30360+
30361+ seq = au_seq(buf, PAGE_SIZE);
30362+ err = PTR_ERR(seq);
30363+ if (IS_ERR(seq))
30364+ goto out_buf;
30365+
30366+ sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
5afbbe0d 30367+ for (bindex = 0; bindex <= bbot; bindex++, arg++) {
076b876e
AM
30368+ err = !access_ok(VERIFY_WRITE, arg, sizeof(*arg));
30369+ if (unlikely(err))
30370+ break;
30371+
30372+ br = au_sbr(sb, bindex);
30373+ brid = br->br_id;
30374+ BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
30375+ err = __put_user(brid, &arg->id);
30376+ if (unlikely(err))
30377+ break;
30378+
30379+ BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
30380+ err = __put_user(br->br_perm, &arg->perm);
30381+ if (unlikely(err))
30382+ break;
30383+
79b8bda9
AM
30384+ err = au_seq_path(seq, &br->br_path);
30385+ if (unlikely(err))
30386+ break;
30387+ seq_putc(seq, '\0');
30388+ if (!seq_has_overflowed(seq)) {
076b876e
AM
30389+ err = copy_to_user(arg->path, seq->buf, seq->count);
30390+ seq->count = 0;
30391+ if (unlikely(err))
30392+ break;
30393+ } else {
30394+ err = -E2BIG;
30395+ goto out_seq;
30396+ }
30397+ }
30398+ if (unlikely(err))
30399+ err = -EFAULT;
30400+
30401+out_seq:
ae9dfd79 30402+ kfree(seq);
076b876e 30403+out_buf:
ae9dfd79 30404+ free_page((unsigned long)buf);
076b876e
AM
30405+out:
30406+ si_read_unlock(sb);
30407+ return err;
30408+}
30409+
30410+long au_brinfo_ioctl(struct file *file, unsigned long arg)
30411+{
2000de60 30412+ return au_brinfo(file->f_path.dentry->d_sb, (void __user *)arg);
076b876e
AM
30413+}
30414+
30415+#ifdef CONFIG_COMPAT
30416+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
30417+{
2000de60 30418+ return au_brinfo(file->f_path.dentry->d_sb, compat_ptr(arg));
076b876e
AM
30419+}
30420+#endif
30421+
30422+/* ---------------------------------------------------------------------- */
30423+
1facf9fc 30424+void sysaufs_br_init(struct au_branch *br)
30425+{
392086de
AM
30426+ int i;
30427+ struct au_brsysfs *br_sysfs;
30428+ struct attribute *attr;
4a4d8108 30429+
392086de
AM
30430+ br_sysfs = br->br_sysfs;
30431+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
30432+ attr = &br_sysfs->attr;
30433+ sysfs_attr_init(attr);
30434+ attr->name = br_sysfs->name;
30435+ attr->mode = S_IRUGO;
30436+ br_sysfs++;
30437+ }
1facf9fc 30438+}
30439+
30440+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
30441+{
30442+ struct au_branch *br;
30443+ struct kobject *kobj;
392086de
AM
30444+ struct au_brsysfs *br_sysfs;
30445+ int i;
5afbbe0d 30446+ aufs_bindex_t bbot;
1facf9fc 30447+
30448+ dbgaufs_brs_del(sb, bindex);
30449+
30450+ if (!sysaufs_brs)
30451+ return;
30452+
30453+ kobj = &au_sbi(sb)->si_kobj;
5afbbe0d
AM
30454+ bbot = au_sbbot(sb);
30455+ for (; bindex <= bbot; bindex++) {
1facf9fc 30456+ br = au_sbr(sb, bindex);
392086de
AM
30457+ br_sysfs = br->br_sysfs;
30458+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
30459+ sysfs_remove_file(kobj, &br_sysfs->attr);
30460+ br_sysfs++;
30461+ }
1facf9fc 30462+ }
30463+}
30464+
30465+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
30466+{
392086de 30467+ int err, i;
5afbbe0d 30468+ aufs_bindex_t bbot;
1facf9fc 30469+ struct kobject *kobj;
30470+ struct au_branch *br;
392086de 30471+ struct au_brsysfs *br_sysfs;
1facf9fc 30472+
30473+ dbgaufs_brs_add(sb, bindex);
30474+
30475+ if (!sysaufs_brs)
30476+ return;
30477+
30478+ kobj = &au_sbi(sb)->si_kobj;
5afbbe0d
AM
30479+ bbot = au_sbbot(sb);
30480+ for (; bindex <= bbot; bindex++) {
1facf9fc 30481+ br = au_sbr(sb, bindex);
392086de
AM
30482+ br_sysfs = br->br_sysfs;
30483+ snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
30484+ SysaufsBr_PREFIX "%d", bindex);
30485+ snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
30486+ SysaufsBrid_PREFIX "%d", bindex);
30487+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
30488+ err = sysfs_create_file(kobj, &br_sysfs->attr);
30489+ if (unlikely(err))
30490+ pr_warn("failed %s under sysfs(%d)\n",
30491+ br_sysfs->name, err);
30492+ br_sysfs++;
30493+ }
1facf9fc 30494+ }
30495+}
e8791d4f
AM
30496diff -urNp -x '*.orig' linux-4.9/fs/aufs/sysrq.c linux-4.9/fs/aufs/sysrq.c
30497--- linux-4.9/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
30498+++ linux-4.9/fs/aufs/sysrq.c 2021-02-24 16:15:09.534907296 +0100
ae9dfd79 30499@@ -0,0 +1,159 @@
1facf9fc 30500+/*
ae9dfd79 30501+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 30502+ *
30503+ * This program, aufs is free software; you can redistribute it and/or modify
30504+ * it under the terms of the GNU General Public License as published by
30505+ * the Free Software Foundation; either version 2 of the License, or
30506+ * (at your option) any later version.
dece6358
AM
30507+ *
30508+ * This program is distributed in the hope that it will be useful,
30509+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30510+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30511+ * GNU General Public License for more details.
30512+ *
30513+ * You should have received a copy of the GNU General Public License
523b37e3 30514+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 30515+ */
30516+
30517+/*
30518+ * magic sysrq hanlder
30519+ */
30520+
1facf9fc 30521+/* #include <linux/sysrq.h> */
027c5e7a 30522+#include <linux/writeback.h>
1facf9fc 30523+#include "aufs.h"
30524+
30525+/* ---------------------------------------------------------------------- */
30526+
30527+static void sysrq_sb(struct super_block *sb)
30528+{
30529+ char *plevel;
30530+ struct au_sbinfo *sbinfo;
30531+ struct file *file;
ae9dfd79
AM
30532+ struct hlist_bl_head *files;
30533+ struct hlist_bl_node *pos;
523b37e3 30534+ struct au_finfo *finfo;
1facf9fc 30535+
30536+ plevel = au_plevel;
30537+ au_plevel = KERN_WARNING;
1facf9fc 30538+
4a4d8108 30539+ /* since we define pr_fmt, call printk directly */
c06a8ce3
AM
30540+#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
30541+
30542+ sbinfo = au_sbi(sb);
4a4d8108 30543+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
c06a8ce3 30544+ pr("superblock\n");
1facf9fc 30545+ au_dpri_sb(sb);
027c5e7a
AM
30546+
30547+#if 0
c06a8ce3 30548+ pr("root dentry\n");
1facf9fc 30549+ au_dpri_dentry(sb->s_root);
c06a8ce3 30550+ pr("root inode\n");
5527c038 30551+ au_dpri_inode(d_inode(sb->s_root));
027c5e7a
AM
30552+#endif
30553+
1facf9fc 30554+#if 0
027c5e7a
AM
30555+ do {
30556+ int err, i, j, ndentry;
30557+ struct au_dcsub_pages dpages;
30558+ struct au_dpage *dpage;
30559+
30560+ err = au_dpages_init(&dpages, GFP_ATOMIC);
30561+ if (unlikely(err))
30562+ break;
30563+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
30564+ if (!err)
30565+ for (i = 0; i < dpages.ndpage; i++) {
30566+ dpage = dpages.dpages + i;
30567+ ndentry = dpage->ndentry;
30568+ for (j = 0; j < ndentry; j++)
30569+ au_dpri_dentry(dpage->dentries[j]);
30570+ }
30571+ au_dpages_free(&dpages);
30572+ } while (0);
30573+#endif
30574+
30575+#if 1
30576+ {
30577+ struct inode *i;
076b876e 30578+
c06a8ce3 30579+ pr("isolated inode\n");
79b8bda9 30580+ spin_lock(&sb->s_inode_list_lock);
2cbb1c4b
JR
30581+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
30582+ spin_lock(&i->i_lock);
b4510431 30583+ if (1 || hlist_empty(&i->i_dentry))
027c5e7a 30584+ au_dpri_inode(i);
2cbb1c4b
JR
30585+ spin_unlock(&i->i_lock);
30586+ }
79b8bda9 30587+ spin_unlock(&sb->s_inode_list_lock);
027c5e7a 30588+ }
1facf9fc 30589+#endif
c06a8ce3 30590+ pr("files\n");
523b37e3 30591+ files = &au_sbi(sb)->si_files;
ae9dfd79
AM
30592+ hlist_bl_lock(files);
30593+ hlist_bl_for_each_entry(finfo, pos, files, fi_hlist) {
4a4d8108 30594+ umode_t mode;
076b876e 30595+
523b37e3 30596+ file = finfo->fi_file;
c06a8ce3 30597+ mode = file_inode(file)->i_mode;
38d290e6 30598+ if (!special_file(mode))
1facf9fc 30599+ au_dpri_file(file);
523b37e3 30600+ }
ae9dfd79 30601+ hlist_bl_unlock(files);
c06a8ce3 30602+ pr("done\n");
1facf9fc 30603+
c06a8ce3 30604+#undef pr
1facf9fc 30605+ au_plevel = plevel;
1facf9fc 30606+}
30607+
30608+/* ---------------------------------------------------------------------- */
30609+
30610+/* module parameter */
30611+static char *aufs_sysrq_key = "a";
30612+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
30613+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
30614+
0c5527e5 30615+static void au_sysrq(int key __maybe_unused)
1facf9fc 30616+{
1facf9fc 30617+ struct au_sbinfo *sbinfo;
ae9dfd79 30618+ struct hlist_bl_node *pos;
1facf9fc 30619+
027c5e7a 30620+ lockdep_off();
53392da6 30621+ au_sbilist_lock();
ae9dfd79 30622+ hlist_bl_for_each_entry(sbinfo, pos, &au_sbilist, si_list)
1facf9fc 30623+ sysrq_sb(sbinfo->si_sb);
53392da6 30624+ au_sbilist_unlock();
027c5e7a 30625+ lockdep_on();
1facf9fc 30626+}
30627+
30628+static struct sysrq_key_op au_sysrq_op = {
30629+ .handler = au_sysrq,
30630+ .help_msg = "Aufs",
30631+ .action_msg = "Aufs",
30632+ .enable_mask = SYSRQ_ENABLE_DUMP
30633+};
30634+
30635+/* ---------------------------------------------------------------------- */
30636+
30637+int __init au_sysrq_init(void)
30638+{
30639+ int err;
30640+ char key;
30641+
30642+ err = -1;
30643+ key = *aufs_sysrq_key;
30644+ if ('a' <= key && key <= 'z')
30645+ err = register_sysrq_key(key, &au_sysrq_op);
30646+ if (unlikely(err))
4a4d8108 30647+ pr_err("err %d, sysrq=%c\n", err, key);
1facf9fc 30648+ return err;
30649+}
30650+
30651+void au_sysrq_fin(void)
30652+{
30653+ int err;
076b876e 30654+
1facf9fc 30655+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
30656+ if (unlikely(err))
4a4d8108 30657+ pr_err("err %d (ignored)\n", err);
1facf9fc 30658+}
e8791d4f
AM
30659diff -urNp -x '*.orig' linux-4.9/fs/aufs/vdir.c linux-4.9/fs/aufs/vdir.c
30660--- linux-4.9/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
30661+++ linux-4.9/fs/aufs/vdir.c 2021-02-24 16:15:09.538240738 +0100
ae9dfd79 30662@@ -0,0 +1,893 @@
1facf9fc 30663+/*
ae9dfd79 30664+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 30665+ *
30666+ * This program, aufs is free software; you can redistribute it and/or modify
30667+ * it under the terms of the GNU General Public License as published by
30668+ * the Free Software Foundation; either version 2 of the License, or
30669+ * (at your option) any later version.
dece6358
AM
30670+ *
30671+ * This program is distributed in the hope that it will be useful,
30672+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30673+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30674+ * GNU General Public License for more details.
30675+ *
30676+ * You should have received a copy of the GNU General Public License
523b37e3 30677+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 30678+ */
30679+
30680+/*
30681+ * virtual or vertical directory
30682+ */
30683+
30684+#include "aufs.h"
30685+
dece6358 30686+static unsigned int calc_size(int nlen)
1facf9fc 30687+{
dece6358 30688+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
1facf9fc 30689+}
30690+
30691+static int set_deblk_end(union au_vdir_deblk_p *p,
30692+ union au_vdir_deblk_p *deblk_end)
30693+{
30694+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
30695+ p->de->de_str.len = 0;
30696+ /* smp_mb(); */
30697+ return 0;
30698+ }
30699+ return -1; /* error */
30700+}
30701+
30702+/* returns true or false */
30703+static int is_deblk_end(union au_vdir_deblk_p *p,
30704+ union au_vdir_deblk_p *deblk_end)
30705+{
30706+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
30707+ return !p->de->de_str.len;
30708+ return 1;
30709+}
30710+
30711+static unsigned char *last_deblk(struct au_vdir *vdir)
30712+{
30713+ return vdir->vd_deblk[vdir->vd_nblk - 1];
30714+}
30715+
30716+/* ---------------------------------------------------------------------- */
30717+
79b8bda9 30718+/* estimate the appropriate size for name hash table */
1308ab2a 30719+unsigned int au_rdhash_est(loff_t sz)
30720+{
30721+ unsigned int n;
30722+
30723+ n = UINT_MAX;
30724+ sz >>= 10;
30725+ if (sz < n)
30726+ n = sz;
30727+ if (sz < AUFS_RDHASH_DEF)
30728+ n = AUFS_RDHASH_DEF;
4a4d8108 30729+ /* pr_info("n %u\n", n); */
1308ab2a 30730+ return n;
30731+}
30732+
1facf9fc 30733+/*
30734+ * the allocated memory has to be freed by
dece6358 30735+ * au_nhash_wh_free() or au_nhash_de_free().
1facf9fc 30736+ */
dece6358 30737+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
1facf9fc 30738+{
1facf9fc 30739+ struct hlist_head *head;
dece6358 30740+ unsigned int u;
076b876e 30741+ size_t sz;
1facf9fc 30742+
076b876e
AM
30743+ sz = sizeof(*nhash->nh_head) * num_hash;
30744+ head = kmalloc(sz, gfp);
dece6358
AM
30745+ if (head) {
30746+ nhash->nh_num = num_hash;
30747+ nhash->nh_head = head;
30748+ for (u = 0; u < num_hash; u++)
1facf9fc 30749+ INIT_HLIST_HEAD(head++);
dece6358 30750+ return 0; /* success */
1facf9fc 30751+ }
1facf9fc 30752+
dece6358 30753+ return -ENOMEM;
1facf9fc 30754+}
30755+
dece6358
AM
30756+static void nhash_count(struct hlist_head *head)
30757+{
30758+#if 0
30759+ unsigned long n;
30760+ struct hlist_node *pos;
30761+
30762+ n = 0;
30763+ hlist_for_each(pos, head)
30764+ n++;
4a4d8108 30765+ pr_info("%lu\n", n);
dece6358
AM
30766+#endif
30767+}
30768+
30769+static void au_nhash_wh_do_free(struct hlist_head *head)
1facf9fc 30770+{
c06a8ce3
AM
30771+ struct au_vdir_wh *pos;
30772+ struct hlist_node *node;
1facf9fc 30773+
c06a8ce3 30774+ hlist_for_each_entry_safe(pos, node, head, wh_hash)
ae9dfd79 30775+ kfree(pos);
1facf9fc 30776+}
30777+
dece6358 30778+static void au_nhash_de_do_free(struct hlist_head *head)
1facf9fc 30779+{
c06a8ce3
AM
30780+ struct au_vdir_dehstr *pos;
30781+ struct hlist_node *node;
1facf9fc 30782+
c06a8ce3 30783+ hlist_for_each_entry_safe(pos, node, head, hash)
ae9dfd79 30784+ au_cache_free_vdir_dehstr(pos);
1facf9fc 30785+}
30786+
dece6358
AM
30787+static void au_nhash_do_free(struct au_nhash *nhash,
30788+ void (*free)(struct hlist_head *head))
1facf9fc 30789+{
1308ab2a 30790+ unsigned int n;
1facf9fc 30791+ struct hlist_head *head;
1facf9fc 30792+
dece6358 30793+ n = nhash->nh_num;
1308ab2a 30794+ if (!n)
30795+ return;
30796+
dece6358 30797+ head = nhash->nh_head;
1308ab2a 30798+ while (n-- > 0) {
dece6358
AM
30799+ nhash_count(head);
30800+ free(head++);
1facf9fc 30801+ }
ae9dfd79 30802+ kfree(nhash->nh_head);
1facf9fc 30803+}
30804+
dece6358 30805+void au_nhash_wh_free(struct au_nhash *whlist)
1facf9fc 30806+{
dece6358
AM
30807+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
30808+}
1facf9fc 30809+
dece6358
AM
30810+static void au_nhash_de_free(struct au_nhash *delist)
30811+{
30812+ au_nhash_do_free(delist, au_nhash_de_do_free);
1facf9fc 30813+}
30814+
30815+/* ---------------------------------------------------------------------- */
30816+
30817+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
30818+ int limit)
30819+{
30820+ int num;
30821+ unsigned int u, n;
30822+ struct hlist_head *head;
c06a8ce3 30823+ struct au_vdir_wh *pos;
1facf9fc 30824+
30825+ num = 0;
30826+ n = whlist->nh_num;
30827+ head = whlist->nh_head;
1308ab2a 30828+ for (u = 0; u < n; u++, head++)
c06a8ce3
AM
30829+ hlist_for_each_entry(pos, head, wh_hash)
30830+ if (pos->wh_bindex == btgt && ++num > limit)
1facf9fc 30831+ return 1;
1facf9fc 30832+ return 0;
30833+}
30834+
30835+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
dece6358 30836+ unsigned char *name,
1facf9fc 30837+ unsigned int len)
30838+{
dece6358
AM
30839+ unsigned int v;
30840+ /* const unsigned int magic_bit = 12; */
30841+
1308ab2a 30842+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
30843+
dece6358 30844+ v = 0;
f0c0a007
AM
30845+ if (len > 8)
30846+ len = 8;
dece6358
AM
30847+ while (len--)
30848+ v += *name++;
30849+ /* v = hash_long(v, magic_bit); */
30850+ v %= nhash->nh_num;
30851+ return nhash->nh_head + v;
30852+}
30853+
30854+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
30855+ int nlen)
30856+{
30857+ return str->len == nlen && !memcmp(str->name, name, nlen);
1facf9fc 30858+}
30859+
30860+/* returns found or not */
dece6358 30861+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
1facf9fc 30862+{
30863+ struct hlist_head *head;
c06a8ce3 30864+ struct au_vdir_wh *pos;
1facf9fc 30865+ struct au_vdir_destr *str;
30866+
dece6358 30867+ head = au_name_hash(whlist, name, nlen);
c06a8ce3
AM
30868+ hlist_for_each_entry(pos, head, wh_hash) {
30869+ str = &pos->wh_str;
1facf9fc 30870+ AuDbg("%.*s\n", str->len, str->name);
dece6358
AM
30871+ if (au_nhash_test_name(str, name, nlen))
30872+ return 1;
30873+ }
30874+ return 0;
30875+}
30876+
30877+/* returns found(true) or not */
30878+static int test_known(struct au_nhash *delist, char *name, int nlen)
30879+{
30880+ struct hlist_head *head;
c06a8ce3 30881+ struct au_vdir_dehstr *pos;
dece6358
AM
30882+ struct au_vdir_destr *str;
30883+
30884+ head = au_name_hash(delist, name, nlen);
c06a8ce3
AM
30885+ hlist_for_each_entry(pos, head, hash) {
30886+ str = pos->str;
dece6358
AM
30887+ AuDbg("%.*s\n", str->len, str->name);
30888+ if (au_nhash_test_name(str, name, nlen))
1facf9fc 30889+ return 1;
30890+ }
30891+ return 0;
30892+}
30893+
dece6358
AM
30894+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
30895+ unsigned char d_type)
30896+{
30897+#ifdef CONFIG_AUFS_SHWH
30898+ wh->wh_ino = ino;
30899+ wh->wh_type = d_type;
30900+#endif
30901+}
30902+
30903+/* ---------------------------------------------------------------------- */
30904+
30905+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
30906+ unsigned int d_type, aufs_bindex_t bindex,
30907+ unsigned char shwh)
1facf9fc 30908+{
30909+ int err;
30910+ struct au_vdir_destr *str;
30911+ struct au_vdir_wh *wh;
30912+
dece6358 30913+ AuDbg("%.*s\n", nlen, name);
1308ab2a 30914+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
30915+
1facf9fc 30916+ err = -ENOMEM;
dece6358 30917+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
1facf9fc 30918+ if (unlikely(!wh))
30919+ goto out;
30920+
30921+ err = 0;
30922+ wh->wh_bindex = bindex;
dece6358
AM
30923+ if (shwh)
30924+ au_shwh_init_wh(wh, ino, d_type);
1facf9fc 30925+ str = &wh->wh_str;
dece6358
AM
30926+ str->len = nlen;
30927+ memcpy(str->name, name, nlen);
30928+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
1facf9fc 30929+ /* smp_mb(); */
30930+
4f0767ce 30931+out:
1facf9fc 30932+ return err;
30933+}
30934+
1facf9fc 30935+static int append_deblk(struct au_vdir *vdir)
30936+{
30937+ int err;
dece6358 30938+ unsigned long ul;
1facf9fc 30939+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
30940+ union au_vdir_deblk_p p, deblk_end;
30941+ unsigned char **o;
30942+
30943+ err = -ENOMEM;
e2f27e51
AM
30944+ o = au_krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
30945+ GFP_NOFS, /*may_shrink*/0);
1facf9fc 30946+ if (unlikely(!o))
30947+ goto out;
30948+
30949+ vdir->vd_deblk = o;
30950+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
30951+ if (p.deblk) {
30952+ ul = vdir->vd_nblk++;
30953+ vdir->vd_deblk[ul] = p.deblk;
30954+ vdir->vd_last.ul = ul;
30955+ vdir->vd_last.p.deblk = p.deblk;
30956+ deblk_end.deblk = p.deblk + deblk_sz;
30957+ err = set_deblk_end(&p, &deblk_end);
30958+ }
30959+
4f0767ce 30960+out:
1facf9fc 30961+ return err;
30962+}
30963+
dece6358
AM
30964+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
30965+ unsigned int d_type, struct au_nhash *delist)
30966+{
30967+ int err;
30968+ unsigned int sz;
30969+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
30970+ union au_vdir_deblk_p p, *room, deblk_end;
30971+ struct au_vdir_dehstr *dehstr;
30972+
30973+ p.deblk = last_deblk(vdir);
30974+ deblk_end.deblk = p.deblk + deblk_sz;
30975+ room = &vdir->vd_last.p;
30976+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
30977+ || !is_deblk_end(room, &deblk_end));
30978+
30979+ sz = calc_size(nlen);
30980+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
30981+ err = append_deblk(vdir);
30982+ if (unlikely(err))
30983+ goto out;
30984+
30985+ p.deblk = last_deblk(vdir);
30986+ deblk_end.deblk = p.deblk + deblk_sz;
30987+ /* smp_mb(); */
30988+ AuDebugOn(room->deblk != p.deblk);
30989+ }
30990+
30991+ err = -ENOMEM;
4a4d8108 30992+ dehstr = au_cache_alloc_vdir_dehstr();
dece6358
AM
30993+ if (unlikely(!dehstr))
30994+ goto out;
30995+
30996+ dehstr->str = &room->de->de_str;
30997+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
30998+ room->de->de_ino = ino;
30999+ room->de->de_type = d_type;
31000+ room->de->de_str.len = nlen;
31001+ memcpy(room->de->de_str.name, name, nlen);
31002+
31003+ err = 0;
31004+ room->deblk += sz;
31005+ if (unlikely(set_deblk_end(room, &deblk_end)))
31006+ err = append_deblk(vdir);
31007+ /* smp_mb(); */
31008+
4f0767ce 31009+out:
dece6358
AM
31010+ return err;
31011+}
31012+
31013+/* ---------------------------------------------------------------------- */
31014+
ae9dfd79 31015+void au_vdir_free(struct au_vdir *vdir)
dece6358
AM
31016+{
31017+ unsigned char **deblk;
31018+
31019+ deblk = vdir->vd_deblk;
ae9dfd79
AM
31020+ while (vdir->vd_nblk--)
31021+ kfree(*deblk++);
31022+ kfree(vdir->vd_deblk);
31023+ au_cache_free_vdir(vdir);
dece6358
AM
31024+}
31025+
1308ab2a 31026+static struct au_vdir *alloc_vdir(struct file *file)
1facf9fc 31027+{
31028+ struct au_vdir *vdir;
1308ab2a 31029+ struct super_block *sb;
1facf9fc 31030+ int err;
31031+
2000de60 31032+ sb = file->f_path.dentry->d_sb;
dece6358
AM
31033+ SiMustAnyLock(sb);
31034+
1facf9fc 31035+ err = -ENOMEM;
31036+ vdir = au_cache_alloc_vdir();
31037+ if (unlikely(!vdir))
31038+ goto out;
31039+
31040+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
31041+ if (unlikely(!vdir->vd_deblk))
31042+ goto out_free;
31043+
31044+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
1308ab2a 31045+ if (!vdir->vd_deblk_sz) {
79b8bda9 31046+ /* estimate the appropriate size for deblk */
1308ab2a 31047+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
4a4d8108 31048+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
1308ab2a 31049+ }
1facf9fc 31050+ vdir->vd_nblk = 0;
31051+ vdir->vd_version = 0;
31052+ vdir->vd_jiffy = 0;
31053+ err = append_deblk(vdir);
31054+ if (!err)
31055+ return vdir; /* success */
31056+
ae9dfd79 31057+ kfree(vdir->vd_deblk);
1facf9fc 31058+
4f0767ce 31059+out_free:
ae9dfd79 31060+ au_cache_free_vdir(vdir);
4f0767ce 31061+out:
1facf9fc 31062+ vdir = ERR_PTR(err);
31063+ return vdir;
31064+}
31065+
31066+static int reinit_vdir(struct au_vdir *vdir)
31067+{
31068+ int err;
31069+ union au_vdir_deblk_p p, deblk_end;
31070+
31071+ while (vdir->vd_nblk > 1) {
ae9dfd79 31072+ kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
1facf9fc 31073+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
31074+ vdir->vd_nblk--;
31075+ }
31076+ p.deblk = vdir->vd_deblk[0];
31077+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
31078+ err = set_deblk_end(&p, &deblk_end);
31079+ /* keep vd_dblk_sz */
31080+ vdir->vd_last.ul = 0;
31081+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
31082+ vdir->vd_version = 0;
31083+ vdir->vd_jiffy = 0;
31084+ /* smp_mb(); */
31085+ return err;
31086+}
31087+
31088+/* ---------------------------------------------------------------------- */
31089+
1facf9fc 31090+#define AuFillVdir_CALLED 1
31091+#define AuFillVdir_WHABLE (1 << 1)
dece6358 31092+#define AuFillVdir_SHWH (1 << 2)
1facf9fc 31093+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
7f207e10
AM
31094+#define au_fset_fillvdir(flags, name) \
31095+ do { (flags) |= AuFillVdir_##name; } while (0)
31096+#define au_fclr_fillvdir(flags, name) \
31097+ do { (flags) &= ~AuFillVdir_##name; } while (0)
1facf9fc 31098+
dece6358
AM
31099+#ifndef CONFIG_AUFS_SHWH
31100+#undef AuFillVdir_SHWH
31101+#define AuFillVdir_SHWH 0
31102+#endif
31103+
1facf9fc 31104+struct fillvdir_arg {
392086de 31105+ struct dir_context ctx;
1facf9fc 31106+ struct file *file;
31107+ struct au_vdir *vdir;
dece6358
AM
31108+ struct au_nhash delist;
31109+ struct au_nhash whlist;
1facf9fc 31110+ aufs_bindex_t bindex;
31111+ unsigned int flags;
31112+ int err;
31113+};
31114+
392086de 31115+static int fillvdir(struct dir_context *ctx, const char *__name, int nlen,
1facf9fc 31116+ loff_t offset __maybe_unused, u64 h_ino,
31117+ unsigned int d_type)
31118+{
392086de 31119+ struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
1facf9fc 31120+ char *name = (void *)__name;
31121+ struct super_block *sb;
1facf9fc 31122+ ino_t ino;
dece6358 31123+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
1facf9fc 31124+
1facf9fc 31125+ arg->err = 0;
2000de60 31126+ sb = arg->file->f_path.dentry->d_sb;
1facf9fc 31127+ au_fset_fillvdir(arg->flags, CALLED);
31128+ /* smp_mb(); */
dece6358 31129+ if (nlen <= AUFS_WH_PFX_LEN
1facf9fc 31130+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
dece6358
AM
31131+ if (test_known(&arg->delist, name, nlen)
31132+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
31133+ goto out; /* already exists or whiteouted */
1facf9fc 31134+
dece6358 31135+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
4a4d8108
AM
31136+ if (!arg->err) {
31137+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
31138+ d_type = DT_UNKNOWN;
dece6358
AM
31139+ arg->err = append_de(arg->vdir, name, nlen, ino,
31140+ d_type, &arg->delist);
4a4d8108 31141+ }
1facf9fc 31142+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
31143+ name += AUFS_WH_PFX_LEN;
dece6358
AM
31144+ nlen -= AUFS_WH_PFX_LEN;
31145+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
31146+ goto out; /* already whiteouted */
1facf9fc 31147+
dece6358
AM
31148+ if (shwh)
31149+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
31150+ &ino);
4a4d8108
AM
31151+ if (!arg->err) {
31152+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
31153+ d_type = DT_UNKNOWN;
1facf9fc 31154+ arg->err = au_nhash_append_wh
dece6358
AM
31155+ (&arg->whlist, name, nlen, ino, d_type,
31156+ arg->bindex, shwh);
4a4d8108 31157+ }
1facf9fc 31158+ }
31159+
4f0767ce 31160+out:
1facf9fc 31161+ if (!arg->err)
31162+ arg->vdir->vd_jiffy = jiffies;
31163+ /* smp_mb(); */
31164+ AuTraceErr(arg->err);
31165+ return arg->err;
31166+}
31167+
dece6358
AM
31168+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
31169+ struct au_nhash *whlist, struct au_nhash *delist)
31170+{
31171+#ifdef CONFIG_AUFS_SHWH
31172+ int err;
31173+ unsigned int nh, u;
31174+ struct hlist_head *head;
c06a8ce3
AM
31175+ struct au_vdir_wh *pos;
31176+ struct hlist_node *n;
dece6358
AM
31177+ char *p, *o;
31178+ struct au_vdir_destr *destr;
31179+
31180+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
31181+
31182+ err = -ENOMEM;
537831f9 31183+ o = p = (void *)__get_free_page(GFP_NOFS);
dece6358
AM
31184+ if (unlikely(!p))
31185+ goto out;
31186+
31187+ err = 0;
31188+ nh = whlist->nh_num;
31189+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
31190+ p += AUFS_WH_PFX_LEN;
31191+ for (u = 0; u < nh; u++) {
31192+ head = whlist->nh_head + u;
c06a8ce3
AM
31193+ hlist_for_each_entry_safe(pos, n, head, wh_hash) {
31194+ destr = &pos->wh_str;
dece6358
AM
31195+ memcpy(p, destr->name, destr->len);
31196+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
c06a8ce3 31197+ pos->wh_ino, pos->wh_type, delist);
dece6358
AM
31198+ if (unlikely(err))
31199+ break;
31200+ }
31201+ }
31202+
ae9dfd79 31203+ free_page((unsigned long)o);
dece6358 31204+
4f0767ce 31205+out:
dece6358
AM
31206+ AuTraceErr(err);
31207+ return err;
31208+#else
31209+ return 0;
31210+#endif
31211+}
31212+
1facf9fc 31213+static int au_do_read_vdir(struct fillvdir_arg *arg)
31214+{
31215+ int err;
dece6358 31216+ unsigned int rdhash;
1facf9fc 31217+ loff_t offset;
5afbbe0d 31218+ aufs_bindex_t bbot, bindex, btop;
dece6358 31219+ unsigned char shwh;
1facf9fc 31220+ struct file *hf, *file;
31221+ struct super_block *sb;
31222+
1facf9fc 31223+ file = arg->file;
2000de60 31224+ sb = file->f_path.dentry->d_sb;
dece6358
AM
31225+ SiMustAnyLock(sb);
31226+
31227+ rdhash = au_sbi(sb)->si_rdhash;
1308ab2a 31228+ if (!rdhash)
31229+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
dece6358
AM
31230+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
31231+ if (unlikely(err))
1facf9fc 31232+ goto out;
dece6358
AM
31233+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
31234+ if (unlikely(err))
1facf9fc 31235+ goto out_delist;
31236+
31237+ err = 0;
31238+ arg->flags = 0;
dece6358
AM
31239+ shwh = 0;
31240+ if (au_opt_test(au_mntflags(sb), SHWH)) {
31241+ shwh = 1;
31242+ au_fset_fillvdir(arg->flags, SHWH);
31243+ }
5afbbe0d
AM
31244+ btop = au_fbtop(file);
31245+ bbot = au_fbbot_dir(file);
31246+ for (bindex = btop; !err && bindex <= bbot; bindex++) {
4a4d8108 31247+ hf = au_hf_dir(file, bindex);
1facf9fc 31248+ if (!hf)
31249+ continue;
31250+
31251+ offset = vfsub_llseek(hf, 0, SEEK_SET);
31252+ err = offset;
31253+ if (unlikely(offset))
31254+ break;
31255+
31256+ arg->bindex = bindex;
31257+ au_fclr_fillvdir(arg->flags, WHABLE);
dece6358 31258+ if (shwh
5afbbe0d 31259+ || (bindex != bbot
dece6358 31260+ && au_br_whable(au_sbr_perm(sb, bindex))))
1facf9fc 31261+ au_fset_fillvdir(arg->flags, WHABLE);
31262+ do {
31263+ arg->err = 0;
31264+ au_fclr_fillvdir(arg->flags, CALLED);
31265+ /* smp_mb(); */
392086de 31266+ err = vfsub_iterate_dir(hf, &arg->ctx);
1facf9fc 31267+ if (err >= 0)
31268+ err = arg->err;
31269+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
392086de
AM
31270+
31271+ /*
31272+ * dir_relax() may be good for concurrency, but aufs should not
31273+ * use it since it will cause a lockdep problem.
31274+ */
1facf9fc 31275+ }
dece6358
AM
31276+
31277+ if (!err && shwh)
31278+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
31279+
31280+ au_nhash_wh_free(&arg->whlist);
1facf9fc 31281+
4f0767ce 31282+out_delist:
dece6358 31283+ au_nhash_de_free(&arg->delist);
4f0767ce 31284+out:
1facf9fc 31285+ return err;
31286+}
31287+
31288+static int read_vdir(struct file *file, int may_read)
31289+{
31290+ int err;
31291+ unsigned long expire;
31292+ unsigned char do_read;
392086de
AM
31293+ struct fillvdir_arg arg = {
31294+ .ctx = {
2000de60 31295+ .actor = fillvdir
392086de
AM
31296+ }
31297+ };
1facf9fc 31298+ struct inode *inode;
31299+ struct au_vdir *vdir, *allocated;
31300+
31301+ err = 0;
c06a8ce3 31302+ inode = file_inode(file);
1facf9fc 31303+ IMustLock(inode);
5afbbe0d 31304+ IiMustWriteLock(inode);
dece6358
AM
31305+ SiMustAnyLock(inode->i_sb);
31306+
1facf9fc 31307+ allocated = NULL;
31308+ do_read = 0;
31309+ expire = au_sbi(inode->i_sb)->si_rdcache;
31310+ vdir = au_ivdir(inode);
31311+ if (!vdir) {
31312+ do_read = 1;
1308ab2a 31313+ vdir = alloc_vdir(file);
1facf9fc 31314+ err = PTR_ERR(vdir);
31315+ if (IS_ERR(vdir))
31316+ goto out;
31317+ err = 0;
31318+ allocated = vdir;
31319+ } else if (may_read
31320+ && (inode->i_version != vdir->vd_version
31321+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
31322+ do_read = 1;
31323+ err = reinit_vdir(vdir);
31324+ if (unlikely(err))
31325+ goto out;
31326+ }
31327+
31328+ if (!do_read)
31329+ return 0; /* success */
31330+
31331+ arg.file = file;
31332+ arg.vdir = vdir;
31333+ err = au_do_read_vdir(&arg);
31334+ if (!err) {
392086de 31335+ /* file->f_pos = 0; */ /* todo: ctx->pos? */
1facf9fc 31336+ vdir->vd_version = inode->i_version;
31337+ vdir->vd_last.ul = 0;
31338+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
31339+ if (allocated)
31340+ au_set_ivdir(inode, allocated);
31341+ } else if (allocated)
ae9dfd79 31342+ au_vdir_free(allocated);
1facf9fc 31343+
4f0767ce 31344+out:
1facf9fc 31345+ return err;
31346+}
31347+
31348+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
31349+{
31350+ int err, rerr;
31351+ unsigned long ul, n;
31352+ const unsigned int deblk_sz = src->vd_deblk_sz;
31353+
31354+ AuDebugOn(tgt->vd_nblk != 1);
31355+
31356+ err = -ENOMEM;
31357+ if (tgt->vd_nblk < src->vd_nblk) {
31358+ unsigned char **p;
31359+
e2f27e51
AM
31360+ p = au_krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
31361+ GFP_NOFS, /*may_shrink*/0);
1facf9fc 31362+ if (unlikely(!p))
31363+ goto out;
31364+ tgt->vd_deblk = p;
31365+ }
31366+
1308ab2a 31367+ if (tgt->vd_deblk_sz != deblk_sz) {
31368+ unsigned char *p;
31369+
31370+ tgt->vd_deblk_sz = deblk_sz;
e2f27e51
AM
31371+ p = au_krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS,
31372+ /*may_shrink*/1);
1308ab2a 31373+ if (unlikely(!p))
31374+ goto out;
31375+ tgt->vd_deblk[0] = p;
31376+ }
1facf9fc 31377+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
1facf9fc 31378+ tgt->vd_version = src->vd_version;
31379+ tgt->vd_jiffy = src->vd_jiffy;
31380+
31381+ n = src->vd_nblk;
31382+ for (ul = 1; ul < n; ul++) {
dece6358
AM
31383+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
31384+ GFP_NOFS);
31385+ if (unlikely(!tgt->vd_deblk[ul]))
1facf9fc 31386+ goto out;
1308ab2a 31387+ tgt->vd_nblk++;
1facf9fc 31388+ }
1308ab2a 31389+ tgt->vd_nblk = n;
31390+ tgt->vd_last.ul = tgt->vd_last.ul;
31391+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
31392+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
31393+ - src->vd_deblk[src->vd_last.ul];
1facf9fc 31394+ /* smp_mb(); */
31395+ return 0; /* success */
31396+
4f0767ce 31397+out:
1facf9fc 31398+ rerr = reinit_vdir(tgt);
31399+ BUG_ON(rerr);
31400+ return err;
31401+}
31402+
31403+int au_vdir_init(struct file *file)
31404+{
31405+ int err;
31406+ struct inode *inode;
31407+ struct au_vdir *vdir_cache, *allocated;
31408+
392086de 31409+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 31410+ err = read_vdir(file, !file->f_pos);
31411+ if (unlikely(err))
31412+ goto out;
31413+
31414+ allocated = NULL;
31415+ vdir_cache = au_fvdir_cache(file);
31416+ if (!vdir_cache) {
1308ab2a 31417+ vdir_cache = alloc_vdir(file);
1facf9fc 31418+ err = PTR_ERR(vdir_cache);
31419+ if (IS_ERR(vdir_cache))
31420+ goto out;
31421+ allocated = vdir_cache;
31422+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
392086de 31423+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 31424+ err = reinit_vdir(vdir_cache);
31425+ if (unlikely(err))
31426+ goto out;
31427+ } else
31428+ return 0; /* success */
31429+
c06a8ce3 31430+ inode = file_inode(file);
1facf9fc 31431+ err = copy_vdir(vdir_cache, au_ivdir(inode));
31432+ if (!err) {
31433+ file->f_version = inode->i_version;
31434+ if (allocated)
31435+ au_set_fvdir_cache(file, allocated);
31436+ } else if (allocated)
ae9dfd79 31437+ au_vdir_free(allocated);
1facf9fc 31438+
4f0767ce 31439+out:
1facf9fc 31440+ return err;
31441+}
31442+
31443+static loff_t calc_offset(struct au_vdir *vdir)
31444+{
31445+ loff_t offset;
31446+ union au_vdir_deblk_p p;
31447+
31448+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
31449+ offset = vdir->vd_last.p.deblk - p.deblk;
31450+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
31451+ return offset;
31452+}
31453+
31454+/* returns true or false */
392086de 31455+static int seek_vdir(struct file *file, struct dir_context *ctx)
1facf9fc 31456+{
31457+ int valid;
31458+ unsigned int deblk_sz;
31459+ unsigned long ul, n;
31460+ loff_t offset;
31461+ union au_vdir_deblk_p p, deblk_end;
31462+ struct au_vdir *vdir_cache;
31463+
31464+ valid = 1;
31465+ vdir_cache = au_fvdir_cache(file);
31466+ offset = calc_offset(vdir_cache);
31467+ AuDbg("offset %lld\n", offset);
392086de 31468+ if (ctx->pos == offset)
1facf9fc 31469+ goto out;
31470+
31471+ vdir_cache->vd_last.ul = 0;
31472+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
392086de 31473+ if (!ctx->pos)
1facf9fc 31474+ goto out;
31475+
31476+ valid = 0;
31477+ deblk_sz = vdir_cache->vd_deblk_sz;
392086de 31478+ ul = div64_u64(ctx->pos, deblk_sz);
1facf9fc 31479+ AuDbg("ul %lu\n", ul);
31480+ if (ul >= vdir_cache->vd_nblk)
31481+ goto out;
31482+
31483+ n = vdir_cache->vd_nblk;
31484+ for (; ul < n; ul++) {
31485+ p.deblk = vdir_cache->vd_deblk[ul];
31486+ deblk_end.deblk = p.deblk + deblk_sz;
31487+ offset = ul;
31488+ offset *= deblk_sz;
392086de 31489+ while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
1facf9fc 31490+ unsigned int l;
31491+
31492+ l = calc_size(p.de->de_str.len);
31493+ offset += l;
31494+ p.deblk += l;
31495+ }
31496+ if (!is_deblk_end(&p, &deblk_end)) {
31497+ valid = 1;
31498+ vdir_cache->vd_last.ul = ul;
31499+ vdir_cache->vd_last.p = p;
31500+ break;
31501+ }
31502+ }
31503+
4f0767ce 31504+out:
1facf9fc 31505+ /* smp_mb(); */
ae9dfd79
AM
31506+ if (!valid)
31507+ AuDbg("valid %d\n", !valid);
1facf9fc 31508+ return valid;
31509+}
31510+
392086de 31511+int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
1facf9fc 31512+{
1facf9fc 31513+ unsigned int l, deblk_sz;
31514+ union au_vdir_deblk_p deblk_end;
31515+ struct au_vdir *vdir_cache;
31516+ struct au_vdir_de *de;
31517+
31518+ vdir_cache = au_fvdir_cache(file);
392086de 31519+ if (!seek_vdir(file, ctx))
1facf9fc 31520+ return 0;
31521+
31522+ deblk_sz = vdir_cache->vd_deblk_sz;
31523+ while (1) {
31524+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
31525+ deblk_end.deblk += deblk_sz;
31526+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
31527+ de = vdir_cache->vd_last.p.de;
31528+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
392086de 31529+ de->de_str.len, de->de_str.name, ctx->pos,
1facf9fc 31530+ (unsigned long)de->de_ino, de->de_type);
392086de
AM
31531+ if (unlikely(!dir_emit(ctx, de->de_str.name,
31532+ de->de_str.len, de->de_ino,
31533+ de->de_type))) {
1facf9fc 31534+ /* todo: ignore the error caused by udba? */
31535+ /* return err; */
31536+ return 0;
31537+ }
31538+
31539+ l = calc_size(de->de_str.len);
31540+ vdir_cache->vd_last.p.deblk += l;
392086de 31541+ ctx->pos += l;
1facf9fc 31542+ }
31543+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
31544+ vdir_cache->vd_last.ul++;
31545+ vdir_cache->vd_last.p.deblk
31546+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
392086de 31547+ ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
1facf9fc 31548+ continue;
31549+ }
31550+ break;
31551+ }
31552+
31553+ /* smp_mb(); */
31554+ return 0;
31555+}
e8791d4f
AM
31556diff -urNp -x '*.orig' linux-4.9/fs/aufs/vfsub.c linux-4.9/fs/aufs/vfsub.c
31557--- linux-4.9/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
31558+++ linux-4.9/fs/aufs/vfsub.c 2021-02-24 16:15:09.538240738 +0100
ae9dfd79 31559@@ -0,0 +1,894 @@
1facf9fc 31560+/*
ae9dfd79 31561+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 31562+ *
31563+ * This program, aufs is free software; you can redistribute it and/or modify
31564+ * it under the terms of the GNU General Public License as published by
31565+ * the Free Software Foundation; either version 2 of the License, or
31566+ * (at your option) any later version.
dece6358
AM
31567+ *
31568+ * This program is distributed in the hope that it will be useful,
31569+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31570+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31571+ * GNU General Public License for more details.
31572+ *
31573+ * You should have received a copy of the GNU General Public License
523b37e3 31574+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31575+ */
31576+
31577+/*
31578+ * sub-routines for VFS
31579+ */
31580+
ae9dfd79 31581+#include <linux/mnt_namespace.h>
dece6358 31582+#include <linux/namei.h>
8cdd5066 31583+#include <linux/nsproxy.h>
dece6358
AM
31584+#include <linux/security.h>
31585+#include <linux/splice.h>
1facf9fc 31586+#include "aufs.h"
31587+
8cdd5066
JR
31588+#ifdef CONFIG_AUFS_BR_FUSE
31589+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb)
31590+{
8cdd5066
JR
31591+ if (!au_test_fuse(h_sb) || !au_userns)
31592+ return 0;
31593+
ae9dfd79 31594+ return is_current_mnt_ns(mnt) ? 0 : -EACCES;
8cdd5066
JR
31595+}
31596+#endif
31597+
ae9dfd79
AM
31598+int vfsub_sync_filesystem(struct super_block *h_sb, int wait)
31599+{
31600+ int err;
31601+
31602+ lockdep_off();
31603+ down_read(&h_sb->s_umount);
31604+ err = __sync_filesystem(h_sb, wait);
31605+ up_read(&h_sb->s_umount);
31606+ lockdep_on();
31607+
31608+ return err;
31609+}
31610+
8cdd5066
JR
31611+/* ---------------------------------------------------------------------- */
31612+
1facf9fc 31613+int vfsub_update_h_iattr(struct path *h_path, int *did)
31614+{
31615+ int err;
31616+ struct kstat st;
31617+ struct super_block *h_sb;
31618+
31619+ /* for remote fs, leave work for its getattr or d_revalidate */
31620+ /* for bad i_attr fs, handle them in aufs_getattr() */
31621+ /* still some fs may acquire i_mutex. we need to skip them */
31622+ err = 0;
31623+ if (!did)
31624+ did = &err;
31625+ h_sb = h_path->dentry->d_sb;
31626+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
31627+ if (*did)
c06a8ce3 31628+ err = vfs_getattr(h_path, &st);
1facf9fc 31629+
31630+ return err;
31631+}
31632+
31633+/* ---------------------------------------------------------------------- */
31634+
4a4d8108 31635+struct file *vfsub_dentry_open(struct path *path, int flags)
1308ab2a 31636+{
31637+ struct file *file;
31638+
b4510431 31639+ file = dentry_open(path, flags /* | __FMODE_NONOTIFY */,
7f207e10 31640+ current_cred());
2cbb1c4b
JR
31641+ if (!IS_ERR_OR_NULL(file)
31642+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
5527c038 31643+ i_readcount_inc(d_inode(path->dentry));
4a4d8108 31644+
1308ab2a 31645+ return file;
31646+}
31647+
1facf9fc 31648+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
31649+{
31650+ struct file *file;
31651+
2cbb1c4b 31652+ lockdep_off();
7f207e10 31653+ file = filp_open(path,
2cbb1c4b 31654+ oflags /* | __FMODE_NONOTIFY */,
7f207e10 31655+ mode);
2cbb1c4b 31656+ lockdep_on();
1facf9fc 31657+ if (IS_ERR(file))
31658+ goto out;
31659+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
31660+
4f0767ce 31661+out:
1facf9fc 31662+ return file;
31663+}
31664+
b912730e
AM
31665+/*
31666+ * Ideally this function should call VFS:do_last() in order to keep all its
31667+ * checkings. But it is very hard for aufs to regenerate several VFS internal
31668+ * structure such as nameidata. This is a second (or third) best approach.
31669+ * cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
31670+ */
31671+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
31672+ struct vfsub_aopen_args *args, struct au_branch *br)
31673+{
31674+ int err;
31675+ struct file *file = args->file;
31676+ /* copied from linux/fs/namei.c:atomic_open() */
31677+ struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
31678+
31679+ IMustLock(dir);
31680+ AuDebugOn(!dir->i_op->atomic_open);
31681+
31682+ err = au_br_test_oflag(args->open_flag, br);
31683+ if (unlikely(err))
31684+ goto out;
31685+
31686+ args->file->f_path.dentry = DENTRY_NOT_SET;
31687+ args->file->f_path.mnt = au_br_mnt(br);
31688+ err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
31689+ args->create_mode, args->opened);
31690+ if (err >= 0) {
31691+ /* some filesystems don't set FILE_CREATED while succeeded? */
31692+ if (*args->opened & FILE_CREATED)
31693+ fsnotify_create(dir, dentry);
31694+ } else
31695+ goto out;
31696+
31697+
31698+ if (!err) {
31699+ /* todo: call VFS:may_open() here */
31700+ err = open_check_o_direct(file);
31701+ /* todo: ima_file_check() too? */
31702+ if (!err && (args->open_flag & __FMODE_EXEC))
31703+ err = deny_write_access(file);
31704+ if (unlikely(err))
31705+ /* note that the file is created and still opened */
31706+ goto out;
31707+ }
31708+
5afbbe0d 31709+ au_br_get(br);
b912730e
AM
31710+ fsnotify_open(file);
31711+
31712+out:
31713+ return err;
31714+}
31715+
1facf9fc 31716+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
31717+{
31718+ int err;
31719+
1facf9fc 31720+ err = kern_path(name, flags, path);
5527c038 31721+ if (!err && d_is_positive(path->dentry))
1facf9fc 31722+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
31723+ return err;
31724+}
31725+
febd17d6
JR
31726+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
31727+ struct dentry *parent, int len)
31728+{
31729+ struct path path = {
31730+ .mnt = NULL
31731+ };
31732+
31733+ path.dentry = lookup_one_len_unlocked(name, parent, len);
31734+ if (IS_ERR(path.dentry))
31735+ goto out;
31736+ if (d_is_positive(path.dentry))
31737+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
31738+
31739+out:
31740+ AuTraceErrPtr(path.dentry);
31741+ return path.dentry;
31742+}
31743+
1facf9fc 31744+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
31745+ int len)
31746+{
31747+ struct path path = {
31748+ .mnt = NULL
31749+ };
31750+
1308ab2a 31751+ /* VFS checks it too, but by WARN_ON_ONCE() */
5527c038 31752+ IMustLock(d_inode(parent));
1facf9fc 31753+
31754+ path.dentry = lookup_one_len(name, parent, len);
31755+ if (IS_ERR(path.dentry))
31756+ goto out;
5527c038 31757+ if (d_is_positive(path.dentry))
1facf9fc 31758+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
31759+
4f0767ce 31760+out:
4a4d8108 31761+ AuTraceErrPtr(path.dentry);
1facf9fc 31762+ return path.dentry;
31763+}
31764+
b4510431 31765+void vfsub_call_lkup_one(void *args)
2cbb1c4b 31766+{
b4510431
AM
31767+ struct vfsub_lkup_one_args *a = args;
31768+ *a->errp = vfsub_lkup_one(a->name, a->parent);
2cbb1c4b
JR
31769+}
31770+
1facf9fc 31771+/* ---------------------------------------------------------------------- */
31772+
31773+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
31774+ struct dentry *d2, struct au_hinode *hdir2)
31775+{
31776+ struct dentry *d;
31777+
2cbb1c4b 31778+ lockdep_off();
1facf9fc 31779+ d = lock_rename(d1, d2);
2cbb1c4b 31780+ lockdep_on();
4a4d8108 31781+ au_hn_suspend(hdir1);
1facf9fc 31782+ if (hdir1 != hdir2)
4a4d8108 31783+ au_hn_suspend(hdir2);
1facf9fc 31784+
31785+ return d;
31786+}
31787+
31788+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
31789+ struct dentry *d2, struct au_hinode *hdir2)
31790+{
4a4d8108 31791+ au_hn_resume(hdir1);
1facf9fc 31792+ if (hdir1 != hdir2)
4a4d8108 31793+ au_hn_resume(hdir2);
2cbb1c4b 31794+ lockdep_off();
1facf9fc 31795+ unlock_rename(d1, d2);
2cbb1c4b 31796+ lockdep_on();
1facf9fc 31797+}
31798+
31799+/* ---------------------------------------------------------------------- */
31800+
b4510431 31801+int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
1facf9fc 31802+{
31803+ int err;
31804+ struct dentry *d;
31805+
31806+ IMustLock(dir);
31807+
31808+ d = path->dentry;
31809+ path->dentry = d->d_parent;
b752ccd1 31810+ err = security_path_mknod(path, d, mode, 0);
1facf9fc 31811+ path->dentry = d;
31812+ if (unlikely(err))
31813+ goto out;
31814+
c1595e42 31815+ lockdep_off();
b4510431 31816+ err = vfs_create(dir, path->dentry, mode, want_excl);
c1595e42 31817+ lockdep_on();
1facf9fc 31818+ if (!err) {
31819+ struct path tmp = *path;
31820+ int did;
31821+
31822+ vfsub_update_h_iattr(&tmp, &did);
31823+ if (did) {
31824+ tmp.dentry = path->dentry->d_parent;
31825+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31826+ }
31827+ /*ignore*/
31828+ }
31829+
4f0767ce 31830+out:
1facf9fc 31831+ return err;
31832+}
31833+
31834+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
31835+{
31836+ int err;
31837+ struct dentry *d;
31838+
31839+ IMustLock(dir);
31840+
31841+ d = path->dentry;
31842+ path->dentry = d->d_parent;
b752ccd1 31843+ err = security_path_symlink(path, d, symname);
1facf9fc 31844+ path->dentry = d;
31845+ if (unlikely(err))
31846+ goto out;
31847+
c1595e42 31848+ lockdep_off();
1facf9fc 31849+ err = vfs_symlink(dir, path->dentry, symname);
c1595e42 31850+ lockdep_on();
1facf9fc 31851+ if (!err) {
31852+ struct path tmp = *path;
31853+ int did;
31854+
31855+ vfsub_update_h_iattr(&tmp, &did);
31856+ if (did) {
31857+ tmp.dentry = path->dentry->d_parent;
31858+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31859+ }
31860+ /*ignore*/
31861+ }
31862+
4f0767ce 31863+out:
1facf9fc 31864+ return err;
31865+}
31866+
31867+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
31868+{
31869+ int err;
31870+ struct dentry *d;
31871+
31872+ IMustLock(dir);
31873+
31874+ d = path->dentry;
31875+ path->dentry = d->d_parent;
027c5e7a 31876+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
1facf9fc 31877+ path->dentry = d;
31878+ if (unlikely(err))
31879+ goto out;
31880+
c1595e42 31881+ lockdep_off();
1facf9fc 31882+ err = vfs_mknod(dir, path->dentry, mode, dev);
c1595e42 31883+ lockdep_on();
1facf9fc 31884+ if (!err) {
31885+ struct path tmp = *path;
31886+ int did;
31887+
31888+ vfsub_update_h_iattr(&tmp, &did);
31889+ if (did) {
31890+ tmp.dentry = path->dentry->d_parent;
31891+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31892+ }
31893+ /*ignore*/
31894+ }
31895+
4f0767ce 31896+out:
1facf9fc 31897+ return err;
31898+}
31899+
31900+static int au_test_nlink(struct inode *inode)
31901+{
31902+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
31903+
31904+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
31905+ || inode->i_nlink < link_max)
31906+ return 0;
31907+ return -EMLINK;
31908+}
31909+
523b37e3
AM
31910+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
31911+ struct inode **delegated_inode)
1facf9fc 31912+{
31913+ int err;
31914+ struct dentry *d;
31915+
31916+ IMustLock(dir);
31917+
5527c038 31918+ err = au_test_nlink(d_inode(src_dentry));
1facf9fc 31919+ if (unlikely(err))
31920+ return err;
31921+
b4510431 31922+ /* we don't call may_linkat() */
1facf9fc 31923+ d = path->dentry;
31924+ path->dentry = d->d_parent;
b752ccd1 31925+ err = security_path_link(src_dentry, path, d);
1facf9fc 31926+ path->dentry = d;
31927+ if (unlikely(err))
31928+ goto out;
31929+
2cbb1c4b 31930+ lockdep_off();
523b37e3 31931+ err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
2cbb1c4b 31932+ lockdep_on();
1facf9fc 31933+ if (!err) {
31934+ struct path tmp = *path;
31935+ int did;
31936+
31937+ /* fuse has different memory inode for the same inumber */
31938+ vfsub_update_h_iattr(&tmp, &did);
31939+ if (did) {
31940+ tmp.dentry = path->dentry->d_parent;
31941+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31942+ tmp.dentry = src_dentry;
31943+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31944+ }
31945+ /*ignore*/
31946+ }
31947+
4f0767ce 31948+out:
1facf9fc 31949+ return err;
31950+}
31951+
31952+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
523b37e3 31953+ struct inode *dir, struct path *path,
f2c43d5f 31954+ struct inode **delegated_inode, unsigned int flags)
1facf9fc 31955+{
31956+ int err;
31957+ struct path tmp = {
31958+ .mnt = path->mnt
31959+ };
31960+ struct dentry *d;
31961+
31962+ IMustLock(dir);
31963+ IMustLock(src_dir);
31964+
31965+ d = path->dentry;
31966+ path->dentry = d->d_parent;
31967+ tmp.dentry = src_dentry->d_parent;
38d290e6 31968+ err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
1facf9fc 31969+ path->dentry = d;
31970+ if (unlikely(err))
31971+ goto out;
31972+
2cbb1c4b 31973+ lockdep_off();
523b37e3 31974+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
f2c43d5f 31975+ delegated_inode, flags);
2cbb1c4b 31976+ lockdep_on();
1facf9fc 31977+ if (!err) {
31978+ int did;
31979+
31980+ tmp.dentry = d->d_parent;
31981+ vfsub_update_h_iattr(&tmp, &did);
31982+ if (did) {
31983+ tmp.dentry = src_dentry;
31984+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31985+ tmp.dentry = src_dentry->d_parent;
31986+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31987+ }
31988+ /*ignore*/
31989+ }
31990+
4f0767ce 31991+out:
1facf9fc 31992+ return err;
31993+}
31994+
31995+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
31996+{
31997+ int err;
31998+ struct dentry *d;
31999+
32000+ IMustLock(dir);
32001+
32002+ d = path->dentry;
32003+ path->dentry = d->d_parent;
b752ccd1 32004+ err = security_path_mkdir(path, d, mode);
1facf9fc 32005+ path->dentry = d;
32006+ if (unlikely(err))
32007+ goto out;
32008+
c1595e42 32009+ lockdep_off();
1facf9fc 32010+ err = vfs_mkdir(dir, path->dentry, mode);
c1595e42 32011+ lockdep_on();
1facf9fc 32012+ if (!err) {
32013+ struct path tmp = *path;
32014+ int did;
32015+
32016+ vfsub_update_h_iattr(&tmp, &did);
32017+ if (did) {
32018+ tmp.dentry = path->dentry->d_parent;
32019+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
32020+ }
32021+ /*ignore*/
32022+ }
32023+
4f0767ce 32024+out:
1facf9fc 32025+ return err;
32026+}
32027+
32028+int vfsub_rmdir(struct inode *dir, struct path *path)
32029+{
32030+ int err;
32031+ struct dentry *d;
32032+
32033+ IMustLock(dir);
32034+
32035+ d = path->dentry;
32036+ path->dentry = d->d_parent;
b752ccd1 32037+ err = security_path_rmdir(path, d);
1facf9fc 32038+ path->dentry = d;
32039+ if (unlikely(err))
32040+ goto out;
32041+
2cbb1c4b 32042+ lockdep_off();
1facf9fc 32043+ err = vfs_rmdir(dir, path->dentry);
2cbb1c4b 32044+ lockdep_on();
1facf9fc 32045+ if (!err) {
32046+ struct path tmp = {
32047+ .dentry = path->dentry->d_parent,
32048+ .mnt = path->mnt
32049+ };
32050+
32051+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
32052+ }
32053+
4f0767ce 32054+out:
1facf9fc 32055+ return err;
32056+}
32057+
32058+/* ---------------------------------------------------------------------- */
32059+
9dbd164d 32060+/* todo: support mmap_sem? */
1facf9fc 32061+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
32062+ loff_t *ppos)
32063+{
32064+ ssize_t err;
32065+
2cbb1c4b 32066+ lockdep_off();
1facf9fc 32067+ err = vfs_read(file, ubuf, count, ppos);
2cbb1c4b 32068+ lockdep_on();
1facf9fc 32069+ if (err >= 0)
32070+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
32071+ return err;
32072+}
32073+
32074+/* todo: kernel_read()? */
32075+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
32076+ loff_t *ppos)
32077+{
32078+ ssize_t err;
32079+ mm_segment_t oldfs;
b752ccd1
AM
32080+ union {
32081+ void *k;
32082+ char __user *u;
32083+ } buf;
1facf9fc 32084+
b752ccd1 32085+ buf.k = kbuf;
1facf9fc 32086+ oldfs = get_fs();
32087+ set_fs(KERNEL_DS);
b752ccd1 32088+ err = vfsub_read_u(file, buf.u, count, ppos);
1facf9fc 32089+ set_fs(oldfs);
32090+ return err;
32091+}
32092+
32093+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
32094+ loff_t *ppos)
32095+{
32096+ ssize_t err;
32097+
2cbb1c4b 32098+ lockdep_off();
1facf9fc 32099+ err = vfs_write(file, ubuf, count, ppos);
2cbb1c4b 32100+ lockdep_on();
1facf9fc 32101+ if (err >= 0)
32102+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
32103+ return err;
32104+}
32105+
32106+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
32107+{
32108+ ssize_t err;
32109+ mm_segment_t oldfs;
b752ccd1
AM
32110+ union {
32111+ void *k;
32112+ const char __user *u;
32113+ } buf;
1facf9fc 32114+
b752ccd1 32115+ buf.k = kbuf;
1facf9fc 32116+ oldfs = get_fs();
32117+ set_fs(KERNEL_DS);
b752ccd1 32118+ err = vfsub_write_u(file, buf.u, count, ppos);
1facf9fc 32119+ set_fs(oldfs);
32120+ return err;
32121+}
32122+
4a4d8108
AM
32123+int vfsub_flush(struct file *file, fl_owner_t id)
32124+{
32125+ int err;
32126+
32127+ err = 0;
523b37e3 32128+ if (file->f_op->flush) {
2000de60 32129+ if (!au_test_nfs(file->f_path.dentry->d_sb))
2cbb1c4b
JR
32130+ err = file->f_op->flush(file, id);
32131+ else {
32132+ lockdep_off();
32133+ err = file->f_op->flush(file, id);
32134+ lockdep_on();
32135+ }
4a4d8108
AM
32136+ if (!err)
32137+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
32138+ /*ignore*/
32139+ }
32140+ return err;
32141+}
32142+
392086de 32143+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
1facf9fc 32144+{
32145+ int err;
32146+
523b37e3 32147+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 32148+
2cbb1c4b 32149+ lockdep_off();
392086de 32150+ err = iterate_dir(file, ctx);
2cbb1c4b 32151+ lockdep_on();
1facf9fc 32152+ if (err >= 0)
32153+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
ae9dfd79 32154+
1facf9fc 32155+ return err;
32156+}
32157+
32158+long vfsub_splice_to(struct file *in, loff_t *ppos,
32159+ struct pipe_inode_info *pipe, size_t len,
32160+ unsigned int flags)
32161+{
32162+ long err;
32163+
2cbb1c4b 32164+ lockdep_off();
0fc653ad 32165+ err = do_splice_to(in, ppos, pipe, len, flags);
2cbb1c4b 32166+ lockdep_on();
4a4d8108 32167+ file_accessed(in);
1facf9fc 32168+ if (err >= 0)
32169+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
32170+ return err;
32171+}
32172+
32173+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
32174+ loff_t *ppos, size_t len, unsigned int flags)
32175+{
32176+ long err;
32177+
2cbb1c4b 32178+ lockdep_off();
0fc653ad 32179+ err = do_splice_from(pipe, out, ppos, len, flags);
2cbb1c4b 32180+ lockdep_on();
1facf9fc 32181+ if (err >= 0)
32182+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
32183+ return err;
32184+}
32185+
53392da6
AM
32186+int vfsub_fsync(struct file *file, struct path *path, int datasync)
32187+{
32188+ int err;
32189+
32190+ /* file can be NULL */
32191+ lockdep_off();
32192+ err = vfs_fsync(file, datasync);
32193+ lockdep_on();
32194+ if (!err) {
32195+ if (!path) {
32196+ AuDebugOn(!file);
32197+ path = &file->f_path;
32198+ }
32199+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
32200+ }
32201+ return err;
32202+}
32203+
1facf9fc 32204+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
32205+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
32206+ struct file *h_file)
32207+{
32208+ int err;
32209+ struct inode *h_inode;
c06a8ce3 32210+ struct super_block *h_sb;
1facf9fc 32211+
1facf9fc 32212+ if (!h_file) {
c06a8ce3
AM
32213+ err = vfsub_truncate(h_path, length);
32214+ goto out;
1facf9fc 32215+ }
32216+
5527c038 32217+ h_inode = d_inode(h_path->dentry);
c06a8ce3
AM
32218+ h_sb = h_inode->i_sb;
32219+ lockdep_off();
32220+ sb_start_write(h_sb);
32221+ lockdep_on();
1facf9fc 32222+ err = locks_verify_truncate(h_inode, h_file, length);
32223+ if (!err)
953406b4 32224+ err = security_path_truncate(h_path);
2cbb1c4b
JR
32225+ if (!err) {
32226+ lockdep_off();
1facf9fc 32227+ err = do_truncate(h_path->dentry, length, attr, h_file);
2cbb1c4b
JR
32228+ lockdep_on();
32229+ }
c06a8ce3
AM
32230+ lockdep_off();
32231+ sb_end_write(h_sb);
32232+ lockdep_on();
1facf9fc 32233+
4f0767ce 32234+out:
1facf9fc 32235+ return err;
32236+}
32237+
32238+/* ---------------------------------------------------------------------- */
32239+
32240+struct au_vfsub_mkdir_args {
32241+ int *errp;
32242+ struct inode *dir;
32243+ struct path *path;
32244+ int mode;
32245+};
32246+
32247+static void au_call_vfsub_mkdir(void *args)
32248+{
32249+ struct au_vfsub_mkdir_args *a = args;
32250+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
32251+}
32252+
32253+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
32254+{
32255+ int err, do_sio, wkq_err;
32256+
32257+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
32258+ if (!do_sio) {
32259+ lockdep_off();
1facf9fc 32260+ err = vfsub_mkdir(dir, path, mode);
c1595e42
JR
32261+ lockdep_on();
32262+ } else {
1facf9fc 32263+ struct au_vfsub_mkdir_args args = {
32264+ .errp = &err,
32265+ .dir = dir,
32266+ .path = path,
32267+ .mode = mode
32268+ };
32269+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
32270+ if (unlikely(wkq_err))
32271+ err = wkq_err;
32272+ }
32273+
32274+ return err;
32275+}
32276+
32277+struct au_vfsub_rmdir_args {
32278+ int *errp;
32279+ struct inode *dir;
32280+ struct path *path;
32281+};
32282+
32283+static void au_call_vfsub_rmdir(void *args)
32284+{
32285+ struct au_vfsub_rmdir_args *a = args;
32286+ *a->errp = vfsub_rmdir(a->dir, a->path);
32287+}
32288+
32289+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
32290+{
32291+ int err, do_sio, wkq_err;
32292+
32293+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
32294+ if (!do_sio) {
32295+ lockdep_off();
1facf9fc 32296+ err = vfsub_rmdir(dir, path);
c1595e42
JR
32297+ lockdep_on();
32298+ } else {
1facf9fc 32299+ struct au_vfsub_rmdir_args args = {
32300+ .errp = &err,
32301+ .dir = dir,
32302+ .path = path
32303+ };
32304+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
32305+ if (unlikely(wkq_err))
32306+ err = wkq_err;
32307+ }
32308+
32309+ return err;
32310+}
32311+
32312+/* ---------------------------------------------------------------------- */
32313+
32314+struct notify_change_args {
32315+ int *errp;
32316+ struct path *path;
32317+ struct iattr *ia;
523b37e3 32318+ struct inode **delegated_inode;
1facf9fc 32319+};
32320+
32321+static void call_notify_change(void *args)
32322+{
32323+ struct notify_change_args *a = args;
32324+ struct inode *h_inode;
32325+
5527c038 32326+ h_inode = d_inode(a->path->dentry);
1facf9fc 32327+ IMustLock(h_inode);
32328+
32329+ *a->errp = -EPERM;
32330+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
c1595e42 32331+ lockdep_off();
523b37e3
AM
32332+ *a->errp = notify_change(a->path->dentry, a->ia,
32333+ a->delegated_inode);
c1595e42 32334+ lockdep_on();
1facf9fc 32335+ if (!*a->errp)
32336+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
32337+ }
32338+ AuTraceErr(*a->errp);
32339+}
32340+
523b37e3
AM
32341+int vfsub_notify_change(struct path *path, struct iattr *ia,
32342+ struct inode **delegated_inode)
1facf9fc 32343+{
32344+ int err;
32345+ struct notify_change_args args = {
523b37e3
AM
32346+ .errp = &err,
32347+ .path = path,
32348+ .ia = ia,
32349+ .delegated_inode = delegated_inode
1facf9fc 32350+ };
32351+
32352+ call_notify_change(&args);
32353+
32354+ return err;
32355+}
32356+
523b37e3
AM
32357+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
32358+ struct inode **delegated_inode)
1facf9fc 32359+{
32360+ int err, wkq_err;
32361+ struct notify_change_args args = {
523b37e3
AM
32362+ .errp = &err,
32363+ .path = path,
32364+ .ia = ia,
32365+ .delegated_inode = delegated_inode
1facf9fc 32366+ };
32367+
32368+ wkq_err = au_wkq_wait(call_notify_change, &args);
32369+ if (unlikely(wkq_err))
32370+ err = wkq_err;
32371+
32372+ return err;
32373+}
32374+
32375+/* ---------------------------------------------------------------------- */
32376+
32377+struct unlink_args {
32378+ int *errp;
32379+ struct inode *dir;
32380+ struct path *path;
523b37e3 32381+ struct inode **delegated_inode;
1facf9fc 32382+};
32383+
32384+static void call_unlink(void *args)
32385+{
32386+ struct unlink_args *a = args;
32387+ struct dentry *d = a->path->dentry;
32388+ struct inode *h_inode;
32389+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
c1595e42 32390+ && au_dcount(d) == 1);
1facf9fc 32391+
32392+ IMustLock(a->dir);
32393+
32394+ a->path->dentry = d->d_parent;
32395+ *a->errp = security_path_unlink(a->path, d);
32396+ a->path->dentry = d;
32397+ if (unlikely(*a->errp))
32398+ return;
32399+
32400+ if (!stop_sillyrename)
32401+ dget(d);
5527c038
JR
32402+ h_inode = NULL;
32403+ if (d_is_positive(d)) {
32404+ h_inode = d_inode(d);
027c5e7a 32405+ ihold(h_inode);
5527c038 32406+ }
1facf9fc 32407+
2cbb1c4b 32408+ lockdep_off();
523b37e3 32409+ *a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
2cbb1c4b 32410+ lockdep_on();
1facf9fc 32411+ if (!*a->errp) {
32412+ struct path tmp = {
32413+ .dentry = d->d_parent,
32414+ .mnt = a->path->mnt
32415+ };
32416+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
32417+ }
32418+
32419+ if (!stop_sillyrename)
32420+ dput(d);
32421+ if (h_inode)
32422+ iput(h_inode);
32423+
32424+ AuTraceErr(*a->errp);
32425+}
32426+
32427+/*
32428+ * @dir: must be locked.
32429+ * @dentry: target dentry.
32430+ */
523b37e3
AM
32431+int vfsub_unlink(struct inode *dir, struct path *path,
32432+ struct inode **delegated_inode, int force)
1facf9fc 32433+{
32434+ int err;
32435+ struct unlink_args args = {
523b37e3
AM
32436+ .errp = &err,
32437+ .dir = dir,
32438+ .path = path,
32439+ .delegated_inode = delegated_inode
1facf9fc 32440+ };
32441+
32442+ if (!force)
32443+ call_unlink(&args);
32444+ else {
32445+ int wkq_err;
32446+
32447+ wkq_err = au_wkq_wait(call_unlink, &args);
32448+ if (unlikely(wkq_err))
32449+ err = wkq_err;
32450+ }
32451+
32452+ return err;
32453+}
e8791d4f
AM
32454diff -urNp -x '*.orig' linux-4.9/fs/aufs/vfsub.h linux-4.9/fs/aufs/vfsub.h
32455--- linux-4.9/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
32456+++ linux-4.9/fs/aufs/vfsub.h 2021-02-24 16:15:09.538240738 +0100
ae9dfd79 32457@@ -0,0 +1,355 @@
1facf9fc 32458+/*
ae9dfd79 32459+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 32460+ *
32461+ * This program, aufs is free software; you can redistribute it and/or modify
32462+ * it under the terms of the GNU General Public License as published by
32463+ * the Free Software Foundation; either version 2 of the License, or
32464+ * (at your option) any later version.
dece6358
AM
32465+ *
32466+ * This program is distributed in the hope that it will be useful,
32467+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
32468+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32469+ * GNU General Public License for more details.
32470+ *
32471+ * You should have received a copy of the GNU General Public License
523b37e3 32472+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 32473+ */
32474+
32475+/*
32476+ * sub-routines for VFS
32477+ */
32478+
32479+#ifndef __AUFS_VFSUB_H__
32480+#define __AUFS_VFSUB_H__
32481+
32482+#ifdef __KERNEL__
32483+
32484+#include <linux/fs.h>
b4510431 32485+#include <linux/mount.h>
8cdd5066 32486+#include <linux/posix_acl.h>
c1595e42 32487+#include <linux/xattr.h>
7f207e10 32488+#include "debug.h"
1facf9fc 32489+
7f207e10 32490+/* copied from linux/fs/internal.h */
2cbb1c4b 32491+/* todo: BAD approach!! */
c06a8ce3 32492+extern void __mnt_drop_write(struct vfsmount *);
b912730e 32493+extern int open_check_o_direct(struct file *f);
7f207e10
AM
32494+
32495+/* ---------------------------------------------------------------------- */
1facf9fc 32496+
32497+/* lock subclass for lower inode */
32498+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
32499+/* reduce? gave up. */
32500+enum {
c1595e42 32501+ AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
1facf9fc 32502+ AuLsc_I_PARENT, /* lower inode, parent first */
32503+ AuLsc_I_PARENT2, /* copyup dirs */
dece6358 32504+ AuLsc_I_PARENT3, /* copyup wh */
1facf9fc 32505+ AuLsc_I_CHILD,
32506+ AuLsc_I_CHILD2,
32507+ AuLsc_I_End
32508+};
32509+
32510+/* to debug easier, do not make them inlined functions */
32511+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
febd17d6 32512+#define IMustLock(i) AuDebugOn(!inode_is_locked(i))
1facf9fc 32513+
ae9dfd79
AM
32514+/* why VFS doesn't define it? */
32515+static inline
32516+void vfsub_inode_lock_shared_nested(struct inode *inode, unsigned int sc)
32517+{
32518+ down_read_nested(&inode->i_rwsem, sc);
32519+}
32520+
1facf9fc 32521+/* ---------------------------------------------------------------------- */
32522+
7f207e10
AM
32523+static inline void vfsub_drop_nlink(struct inode *inode)
32524+{
32525+ AuDebugOn(!inode->i_nlink);
32526+ drop_nlink(inode);
32527+}
32528+
027c5e7a
AM
32529+static inline void vfsub_dead_dir(struct inode *inode)
32530+{
32531+ AuDebugOn(!S_ISDIR(inode->i_mode));
32532+ inode->i_flags |= S_DEAD;
32533+ clear_nlink(inode);
32534+}
32535+
392086de
AM
32536+static inline int vfsub_native_ro(struct inode *inode)
32537+{
32538+ return (inode->i_sb->s_flags & MS_RDONLY)
32539+ || IS_RDONLY(inode)
32540+ /* || IS_APPEND(inode) */
32541+ || IS_IMMUTABLE(inode);
32542+}
32543+
8cdd5066
JR
32544+#ifdef CONFIG_AUFS_BR_FUSE
32545+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb);
32546+#else
32547+AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
32548+#endif
32549+
ae9dfd79
AM
32550+int vfsub_sync_filesystem(struct super_block *h_sb, int wait);
32551+
7f207e10
AM
32552+/* ---------------------------------------------------------------------- */
32553+
32554+int vfsub_update_h_iattr(struct path *h_path, int *did);
32555+struct file *vfsub_dentry_open(struct path *path, int flags);
32556+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
b912730e
AM
32557+struct vfsub_aopen_args {
32558+ struct file *file;
32559+ unsigned int open_flag;
32560+ umode_t create_mode;
32561+ int *opened;
32562+};
32563+struct au_branch;
32564+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
32565+ struct vfsub_aopen_args *args, struct au_branch *br);
1facf9fc 32566+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
b4510431 32567+
febd17d6
JR
32568+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
32569+ struct dentry *parent, int len);
1facf9fc 32570+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
32571+ int len);
b4510431
AM
32572+
32573+struct vfsub_lkup_one_args {
32574+ struct dentry **errp;
32575+ struct qstr *name;
32576+ struct dentry *parent;
32577+};
32578+
32579+static inline struct dentry *vfsub_lkup_one(struct qstr *name,
32580+ struct dentry *parent)
32581+{
32582+ return vfsub_lookup_one_len(name->name, parent, name->len);
32583+}
32584+
32585+void vfsub_call_lkup_one(void *args);
32586+
32587+/* ---------------------------------------------------------------------- */
32588+
32589+static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
32590+{
32591+ int err;
076b876e 32592+
b4510431
AM
32593+ lockdep_off();
32594+ err = mnt_want_write(mnt);
32595+ lockdep_on();
32596+ return err;
32597+}
32598+
32599+static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
32600+{
32601+ lockdep_off();
32602+ mnt_drop_write(mnt);
32603+ lockdep_on();
32604+}
1facf9fc 32605+
7e9cd9fe 32606+#if 0 /* reserved */
c06a8ce3
AM
32607+static inline void vfsub_mnt_drop_write_file(struct file *file)
32608+{
32609+ lockdep_off();
32610+ mnt_drop_write_file(file);
32611+ lockdep_on();
32612+}
7e9cd9fe 32613+#endif
c06a8ce3 32614+
1facf9fc 32615+/* ---------------------------------------------------------------------- */
32616+
32617+struct au_hinode;
32618+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
32619+ struct dentry *d2, struct au_hinode *hdir2);
32620+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
32621+ struct dentry *d2, struct au_hinode *hdir2);
32622+
537831f9
AM
32623+int vfsub_create(struct inode *dir, struct path *path, int mode,
32624+ bool want_excl);
1facf9fc 32625+int vfsub_symlink(struct inode *dir, struct path *path,
32626+ const char *symname);
32627+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
32628+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
523b37e3 32629+ struct path *path, struct inode **delegated_inode);
1facf9fc 32630+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
523b37e3 32631+ struct inode *hdir, struct path *path,
f2c43d5f 32632+ struct inode **delegated_inode, unsigned int flags);
1facf9fc 32633+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
32634+int vfsub_rmdir(struct inode *dir, struct path *path);
32635+
32636+/* ---------------------------------------------------------------------- */
32637+
32638+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
32639+ loff_t *ppos);
32640+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
32641+ loff_t *ppos);
32642+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
32643+ loff_t *ppos);
32644+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
32645+ loff_t *ppos);
4a4d8108 32646+int vfsub_flush(struct file *file, fl_owner_t id);
392086de
AM
32647+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
32648+
c06a8ce3
AM
32649+static inline loff_t vfsub_f_size_read(struct file *file)
32650+{
32651+ return i_size_read(file_inode(file));
32652+}
32653+
4a4d8108
AM
32654+static inline unsigned int vfsub_file_flags(struct file *file)
32655+{
32656+ unsigned int flags;
32657+
32658+ spin_lock(&file->f_lock);
32659+ flags = file->f_flags;
32660+ spin_unlock(&file->f_lock);
32661+
32662+ return flags;
32663+}
1308ab2a 32664+
f0c0a007
AM
32665+static inline int vfsub_file_execed(struct file *file)
32666+{
32667+ /* todo: direct access f_flags */
32668+ return !!(vfsub_file_flags(file) & __FMODE_EXEC);
32669+}
32670+
7e9cd9fe 32671+#if 0 /* reserved */
1facf9fc 32672+static inline void vfsub_file_accessed(struct file *h_file)
32673+{
32674+ file_accessed(h_file);
32675+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
32676+}
7e9cd9fe 32677+#endif
1facf9fc 32678+
79b8bda9 32679+#if 0 /* reserved */
1facf9fc 32680+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
32681+ struct dentry *h_dentry)
32682+{
32683+ struct path h_path = {
32684+ .dentry = h_dentry,
32685+ .mnt = h_mnt
32686+ };
92d182d2 32687+ touch_atime(&h_path);
1facf9fc 32688+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
32689+}
79b8bda9 32690+#endif
1facf9fc 32691+
0c3ec466
AM
32692+static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts,
32693+ int flags)
32694+{
5afbbe0d 32695+ return update_time(h_inode, ts, flags);
0c3ec466
AM
32696+ /* no vfsub_update_h_iattr() since we don't have struct path */
32697+}
32698+
8cdd5066
JR
32699+#ifdef CONFIG_FS_POSIX_ACL
32700+static inline int vfsub_acl_chmod(struct inode *h_inode, umode_t h_mode)
32701+{
32702+ int err;
32703+
32704+ err = posix_acl_chmod(h_inode, h_mode);
32705+ if (err == -EOPNOTSUPP)
32706+ err = 0;
32707+ return err;
32708+}
32709+#else
32710+AuStubInt0(vfsub_acl_chmod, struct inode *h_inode, umode_t h_mode);
32711+#endif
32712+
4a4d8108
AM
32713+long vfsub_splice_to(struct file *in, loff_t *ppos,
32714+ struct pipe_inode_info *pipe, size_t len,
32715+ unsigned int flags);
32716+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
32717+ loff_t *ppos, size_t len, unsigned int flags);
c06a8ce3
AM
32718+
32719+static inline long vfsub_truncate(struct path *path, loff_t length)
32720+{
32721+ long err;
076b876e 32722+
c06a8ce3
AM
32723+ lockdep_off();
32724+ err = vfs_truncate(path, length);
32725+ lockdep_on();
32726+ return err;
32727+}
32728+
4a4d8108
AM
32729+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
32730+ struct file *h_file);
53392da6 32731+int vfsub_fsync(struct file *file, struct path *path, int datasync);
4a4d8108 32732+
ae9dfd79
AM
32733+/*
32734+ * re-use branch fs's ioctl(FICLONE) while aufs itself doesn't support such
32735+ * ioctl.
32736+ */
32737+static inline int vfsub_clone_file_range(struct file *src, struct file *dst,
32738+ u64 len)
32739+{
32740+ int err;
32741+
32742+ lockdep_off();
32743+ err = vfs_clone_file_range(src, 0, dst, 0, len);
32744+ lockdep_on();
32745+
32746+ return err;
32747+}
32748+
32749+/* copy_file_range(2) is a systemcall */
32750+static inline ssize_t vfsub_copy_file_range(struct file *src, loff_t src_pos,
32751+ struct file *dst, loff_t dst_pos,
32752+ size_t len, unsigned int flags)
32753+{
32754+ ssize_t ssz;
32755+
32756+ lockdep_off();
32757+ ssz = vfs_copy_file_range(src, src_pos, dst, dst_pos, len, flags);
32758+ lockdep_on();
32759+
32760+ return ssz;
32761+}
32762+
1facf9fc 32763+/* ---------------------------------------------------------------------- */
32764+
32765+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
32766+{
32767+ loff_t err;
32768+
2cbb1c4b 32769+ lockdep_off();
1facf9fc 32770+ err = vfs_llseek(file, offset, origin);
2cbb1c4b 32771+ lockdep_on();
1facf9fc 32772+ return err;
32773+}
32774+
32775+/* ---------------------------------------------------------------------- */
32776+
4a4d8108
AM
32777+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
32778+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
523b37e3
AM
32779+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
32780+ struct inode **delegated_inode);
32781+int vfsub_notify_change(struct path *path, struct iattr *ia,
32782+ struct inode **delegated_inode);
32783+int vfsub_unlink(struct inode *dir, struct path *path,
32784+ struct inode **delegated_inode, int force);
4a4d8108 32785+
c1595e42
JR
32786+/* ---------------------------------------------------------------------- */
32787+
32788+static inline int vfsub_setxattr(struct dentry *dentry, const char *name,
32789+ const void *value, size_t size, int flags)
32790+{
32791+ int err;
32792+
32793+ lockdep_off();
32794+ err = vfs_setxattr(dentry, name, value, size, flags);
32795+ lockdep_on();
32796+
32797+ return err;
32798+}
32799+
32800+static inline int vfsub_removexattr(struct dentry *dentry, const char *name)
32801+{
32802+ int err;
32803+
32804+ lockdep_off();
32805+ err = vfs_removexattr(dentry, name);
32806+ lockdep_on();
32807+
32808+ return err;
32809+}
32810+
1facf9fc 32811+#endif /* __KERNEL__ */
32812+#endif /* __AUFS_VFSUB_H__ */
e8791d4f
AM
32813diff -urNp -x '*.orig' linux-4.9/fs/aufs/wbr_policy.c linux-4.9/fs/aufs/wbr_policy.c
32814--- linux-4.9/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
32815+++ linux-4.9/fs/aufs/wbr_policy.c 2021-02-24 16:15:09.538240738 +0100
f2c43d5f 32816@@ -0,0 +1,830 @@
1facf9fc 32817+/*
ae9dfd79 32818+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 32819+ *
32820+ * This program, aufs is free software; you can redistribute it and/or modify
32821+ * it under the terms of the GNU General Public License as published by
32822+ * the Free Software Foundation; either version 2 of the License, or
32823+ * (at your option) any later version.
dece6358
AM
32824+ *
32825+ * This program is distributed in the hope that it will be useful,
32826+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
32827+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32828+ * GNU General Public License for more details.
32829+ *
32830+ * You should have received a copy of the GNU General Public License
523b37e3 32831+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 32832+ */
32833+
32834+/*
32835+ * policies for selecting one among multiple writable branches
32836+ */
32837+
32838+#include <linux/statfs.h>
32839+#include "aufs.h"
32840+
32841+/* subset of cpup_attr() */
32842+static noinline_for_stack
32843+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
32844+{
32845+ int err, sbits;
32846+ struct iattr ia;
32847+ struct inode *h_isrc;
32848+
5527c038 32849+ h_isrc = d_inode(h_src);
1facf9fc 32850+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
32851+ ia.ia_mode = h_isrc->i_mode;
32852+ ia.ia_uid = h_isrc->i_uid;
32853+ ia.ia_gid = h_isrc->i_gid;
32854+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
5527c038 32855+ au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags);
523b37e3
AM
32856+ /* no delegation since it is just created */
32857+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 32858+
32859+ /* is this nfs only? */
32860+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
32861+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
32862+ ia.ia_mode = h_isrc->i_mode;
523b37e3 32863+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 32864+ }
32865+
32866+ return err;
32867+}
32868+
32869+#define AuCpdown_PARENT_OPQ 1
32870+#define AuCpdown_WHED (1 << 1)
32871+#define AuCpdown_MADE_DIR (1 << 2)
32872+#define AuCpdown_DIROPQ (1 << 3)
32873+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
7f207e10
AM
32874+#define au_fset_cpdown(flags, name) \
32875+ do { (flags) |= AuCpdown_##name; } while (0)
32876+#define au_fclr_cpdown(flags, name) \
32877+ do { (flags) &= ~AuCpdown_##name; } while (0)
1facf9fc 32878+
1facf9fc 32879+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
c2b27bf2 32880+ unsigned int *flags)
1facf9fc 32881+{
32882+ int err;
32883+ struct dentry *opq_dentry;
32884+
32885+ opq_dentry = au_diropq_create(dentry, bdst);
32886+ err = PTR_ERR(opq_dentry);
32887+ if (IS_ERR(opq_dentry))
32888+ goto out;
32889+ dput(opq_dentry);
c2b27bf2 32890+ au_fset_cpdown(*flags, DIROPQ);
1facf9fc 32891+
4f0767ce 32892+out:
1facf9fc 32893+ return err;
32894+}
32895+
32896+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
32897+ struct inode *dir, aufs_bindex_t bdst)
32898+{
32899+ int err;
32900+ struct path h_path;
32901+ struct au_branch *br;
32902+
32903+ br = au_sbr(dentry->d_sb, bdst);
32904+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
32905+ err = PTR_ERR(h_path.dentry);
32906+ if (IS_ERR(h_path.dentry))
32907+ goto out;
32908+
32909+ err = 0;
5527c038 32910+ if (d_is_positive(h_path.dentry)) {
86dc4139 32911+ h_path.mnt = au_br_mnt(br);
1facf9fc 32912+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
32913+ dentry);
32914+ }
32915+ dput(h_path.dentry);
32916+
4f0767ce 32917+out:
1facf9fc 32918+ return err;
32919+}
32920+
32921+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 32922+ struct au_pin *pin,
1facf9fc 32923+ struct dentry *h_parent, void *arg)
32924+{
32925+ int err, rerr;
5afbbe0d 32926+ aufs_bindex_t bopq, btop;
1facf9fc 32927+ struct path h_path;
32928+ struct dentry *parent;
32929+ struct inode *h_dir, *h_inode, *inode, *dir;
c2b27bf2 32930+ unsigned int *flags = arg;
1facf9fc 32931+
5afbbe0d 32932+ btop = au_dbtop(dentry);
1facf9fc 32933+ /* dentry is di-locked */
32934+ parent = dget_parent(dentry);
5527c038
JR
32935+ dir = d_inode(parent);
32936+ h_dir = d_inode(h_parent);
1facf9fc 32937+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
32938+ IMustLock(h_dir);
32939+
86dc4139 32940+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
1facf9fc 32941+ if (unlikely(err < 0))
32942+ goto out;
32943+ h_path.dentry = au_h_dptr(dentry, bdst);
32944+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
32945+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
32946+ S_IRWXU | S_IRUGO | S_IXUGO);
32947+ if (unlikely(err))
32948+ goto out_put;
c2b27bf2 32949+ au_fset_cpdown(*flags, MADE_DIR);
1facf9fc 32950+
1facf9fc 32951+ bopq = au_dbdiropq(dentry);
c2b27bf2
AM
32952+ au_fclr_cpdown(*flags, WHED);
32953+ au_fclr_cpdown(*flags, DIROPQ);
1facf9fc 32954+ if (au_dbwh(dentry) == bdst)
c2b27bf2
AM
32955+ au_fset_cpdown(*flags, WHED);
32956+ if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
32957+ au_fset_cpdown(*flags, PARENT_OPQ);
5527c038 32958+ h_inode = d_inode(h_path.dentry);
febd17d6 32959+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
c2b27bf2
AM
32960+ if (au_ftest_cpdown(*flags, WHED)) {
32961+ err = au_cpdown_dir_opq(dentry, bdst, flags);
1facf9fc 32962+ if (unlikely(err)) {
febd17d6 32963+ inode_unlock(h_inode);
1facf9fc 32964+ goto out_dir;
32965+ }
32966+ }
32967+
5afbbe0d 32968+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, btop));
febd17d6 32969+ inode_unlock(h_inode);
1facf9fc 32970+ if (unlikely(err))
32971+ goto out_opq;
32972+
c2b27bf2 32973+ if (au_ftest_cpdown(*flags, WHED)) {
1facf9fc 32974+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
32975+ if (unlikely(err))
32976+ goto out_opq;
32977+ }
32978+
5527c038 32979+ inode = d_inode(dentry);
5afbbe0d
AM
32980+ if (au_ibbot(inode) < bdst)
32981+ au_set_ibbot(inode, bdst);
1facf9fc 32982+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
32983+ au_hi_flags(inode, /*isdir*/1));
076b876e 32984+ au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
1facf9fc 32985+ goto out; /* success */
32986+
32987+ /* revert */
4f0767ce 32988+out_opq:
c2b27bf2 32989+ if (au_ftest_cpdown(*flags, DIROPQ)) {
febd17d6 32990+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
1facf9fc 32991+ rerr = au_diropq_remove(dentry, bdst);
febd17d6 32992+ inode_unlock(h_inode);
1facf9fc 32993+ if (unlikely(rerr)) {
523b37e3
AM
32994+ AuIOErr("failed removing diropq for %pd b%d (%d)\n",
32995+ dentry, bdst, rerr);
1facf9fc 32996+ err = -EIO;
32997+ goto out;
32998+ }
32999+ }
4f0767ce 33000+out_dir:
c2b27bf2 33001+ if (au_ftest_cpdown(*flags, MADE_DIR)) {
1facf9fc 33002+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
33003+ if (unlikely(rerr)) {
523b37e3
AM
33004+ AuIOErr("failed removing %pd b%d (%d)\n",
33005+ dentry, bdst, rerr);
1facf9fc 33006+ err = -EIO;
33007+ }
33008+ }
4f0767ce 33009+out_put:
1facf9fc 33010+ au_set_h_dptr(dentry, bdst, NULL);
5afbbe0d
AM
33011+ if (au_dbbot(dentry) == bdst)
33012+ au_update_dbbot(dentry);
4f0767ce 33013+out:
1facf9fc 33014+ dput(parent);
33015+ return err;
33016+}
33017+
33018+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
33019+{
33020+ int err;
c2b27bf2 33021+ unsigned int flags;
1facf9fc 33022+
c2b27bf2
AM
33023+ flags = 0;
33024+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
1facf9fc 33025+
33026+ return err;
33027+}
33028+
33029+/* ---------------------------------------------------------------------- */
33030+
33031+/* policies for create */
33032+
c2b27bf2 33033+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
4a4d8108
AM
33034+{
33035+ int err, i, j, ndentry;
33036+ aufs_bindex_t bopq;
33037+ struct au_dcsub_pages dpages;
33038+ struct au_dpage *dpage;
33039+ struct dentry **dentries, *parent, *d;
33040+
33041+ err = au_dpages_init(&dpages, GFP_NOFS);
33042+ if (unlikely(err))
33043+ goto out;
33044+ parent = dget_parent(dentry);
027c5e7a 33045+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
4a4d8108
AM
33046+ if (unlikely(err))
33047+ goto out_free;
33048+
33049+ err = bindex;
33050+ for (i = 0; i < dpages.ndpage; i++) {
33051+ dpage = dpages.dpages + i;
33052+ dentries = dpage->dentries;
33053+ ndentry = dpage->ndentry;
33054+ for (j = 0; j < ndentry; j++) {
33055+ d = dentries[j];
33056+ di_read_lock_parent2(d, !AuLock_IR);
33057+ bopq = au_dbdiropq(d);
33058+ di_read_unlock(d, !AuLock_IR);
33059+ if (bopq >= 0 && bopq < err)
33060+ err = bopq;
33061+ }
33062+ }
33063+
33064+out_free:
33065+ dput(parent);
33066+ au_dpages_free(&dpages);
33067+out:
33068+ return err;
33069+}
33070+
1facf9fc 33071+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
33072+{
33073+ for (; bindex >= 0; bindex--)
33074+ if (!au_br_rdonly(au_sbr(sb, bindex)))
33075+ return bindex;
33076+ return -EROFS;
33077+}
33078+
33079+/* top down parent */
392086de
AM
33080+static int au_wbr_create_tdp(struct dentry *dentry,
33081+ unsigned int flags __maybe_unused)
1facf9fc 33082+{
33083+ int err;
5afbbe0d 33084+ aufs_bindex_t btop, bindex;
1facf9fc 33085+ struct super_block *sb;
33086+ struct dentry *parent, *h_parent;
33087+
33088+ sb = dentry->d_sb;
5afbbe0d
AM
33089+ btop = au_dbtop(dentry);
33090+ err = btop;
33091+ if (!au_br_rdonly(au_sbr(sb, btop)))
1facf9fc 33092+ goto out;
33093+
33094+ err = -EROFS;
33095+ parent = dget_parent(dentry);
5afbbe0d 33096+ for (bindex = au_dbtop(parent); bindex < btop; bindex++) {
1facf9fc 33097+ h_parent = au_h_dptr(parent, bindex);
5527c038 33098+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 33099+ continue;
33100+
33101+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
33102+ err = bindex;
33103+ break;
33104+ }
33105+ }
33106+ dput(parent);
33107+
33108+ /* bottom up here */
4a4d8108 33109+ if (unlikely(err < 0)) {
5afbbe0d 33110+ err = au_wbr_bu(sb, btop - 1);
4a4d8108
AM
33111+ if (err >= 0)
33112+ err = au_wbr_nonopq(dentry, err);
33113+ }
1facf9fc 33114+
4f0767ce 33115+out:
1facf9fc 33116+ AuDbg("b%d\n", err);
33117+ return err;
33118+}
33119+
33120+/* ---------------------------------------------------------------------- */
33121+
33122+/* an exception for the policy other than tdp */
33123+static int au_wbr_create_exp(struct dentry *dentry)
33124+{
33125+ int err;
33126+ aufs_bindex_t bwh, bdiropq;
33127+ struct dentry *parent;
33128+
33129+ err = -1;
33130+ bwh = au_dbwh(dentry);
33131+ parent = dget_parent(dentry);
33132+ bdiropq = au_dbdiropq(parent);
33133+ if (bwh >= 0) {
33134+ if (bdiropq >= 0)
33135+ err = min(bdiropq, bwh);
33136+ else
33137+ err = bwh;
33138+ AuDbg("%d\n", err);
33139+ } else if (bdiropq >= 0) {
33140+ err = bdiropq;
33141+ AuDbg("%d\n", err);
33142+ }
33143+ dput(parent);
33144+
4a4d8108
AM
33145+ if (err >= 0)
33146+ err = au_wbr_nonopq(dentry, err);
33147+
1facf9fc 33148+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
33149+ err = -1;
33150+
33151+ AuDbg("%d\n", err);
33152+ return err;
33153+}
33154+
33155+/* ---------------------------------------------------------------------- */
33156+
33157+/* round robin */
33158+static int au_wbr_create_init_rr(struct super_block *sb)
33159+{
33160+ int err;
33161+
5afbbe0d 33162+ err = au_wbr_bu(sb, au_sbbot(sb));
1facf9fc 33163+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
dece6358 33164+ /* smp_mb(); */
1facf9fc 33165+
33166+ AuDbg("b%d\n", err);
33167+ return err;
33168+}
33169+
392086de 33170+static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
1facf9fc 33171+{
33172+ int err, nbr;
33173+ unsigned int u;
5afbbe0d 33174+ aufs_bindex_t bindex, bbot;
1facf9fc 33175+ struct super_block *sb;
33176+ atomic_t *next;
33177+
33178+ err = au_wbr_create_exp(dentry);
33179+ if (err >= 0)
33180+ goto out;
33181+
33182+ sb = dentry->d_sb;
33183+ next = &au_sbi(sb)->si_wbr_rr_next;
5afbbe0d
AM
33184+ bbot = au_sbbot(sb);
33185+ nbr = bbot + 1;
33186+ for (bindex = 0; bindex <= bbot; bindex++) {
392086de 33187+ if (!au_ftest_wbr(flags, DIR)) {
1facf9fc 33188+ err = atomic_dec_return(next) + 1;
33189+ /* modulo for 0 is meaningless */
33190+ if (unlikely(!err))
33191+ err = atomic_dec_return(next) + 1;
33192+ } else
33193+ err = atomic_read(next);
33194+ AuDbg("%d\n", err);
33195+ u = err;
33196+ err = u % nbr;
33197+ AuDbg("%d\n", err);
33198+ if (!au_br_rdonly(au_sbr(sb, err)))
33199+ break;
33200+ err = -EROFS;
33201+ }
33202+
4a4d8108
AM
33203+ if (err >= 0)
33204+ err = au_wbr_nonopq(dentry, err);
33205+
4f0767ce 33206+out:
1facf9fc 33207+ AuDbg("%d\n", err);
33208+ return err;
33209+}
33210+
33211+/* ---------------------------------------------------------------------- */
33212+
33213+/* most free space */
392086de 33214+static void au_mfs(struct dentry *dentry, struct dentry *parent)
1facf9fc 33215+{
33216+ struct super_block *sb;
33217+ struct au_branch *br;
33218+ struct au_wbr_mfs *mfs;
392086de 33219+ struct dentry *h_parent;
5afbbe0d 33220+ aufs_bindex_t bindex, bbot;
1facf9fc 33221+ int err;
33222+ unsigned long long b, bavail;
7f207e10 33223+ struct path h_path;
1facf9fc 33224+ /* reduce the stack usage */
33225+ struct kstatfs *st;
33226+
33227+ st = kmalloc(sizeof(*st), GFP_NOFS);
33228+ if (unlikely(!st)) {
33229+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
33230+ return;
33231+ }
33232+
33233+ bavail = 0;
33234+ sb = dentry->d_sb;
33235+ mfs = &au_sbi(sb)->si_wbr_mfs;
dece6358 33236+ MtxMustLock(&mfs->mfs_lock);
1facf9fc 33237+ mfs->mfs_bindex = -EROFS;
33238+ mfs->mfsrr_bytes = 0;
392086de
AM
33239+ if (!parent) {
33240+ bindex = 0;
5afbbe0d 33241+ bbot = au_sbbot(sb);
392086de 33242+ } else {
5afbbe0d
AM
33243+ bindex = au_dbtop(parent);
33244+ bbot = au_dbtaildir(parent);
392086de
AM
33245+ }
33246+
5afbbe0d 33247+ for (; bindex <= bbot; bindex++) {
392086de
AM
33248+ if (parent) {
33249+ h_parent = au_h_dptr(parent, bindex);
5527c038 33250+ if (!h_parent || d_is_negative(h_parent))
392086de
AM
33251+ continue;
33252+ }
1facf9fc 33253+ br = au_sbr(sb, bindex);
33254+ if (au_br_rdonly(br))
33255+ continue;
33256+
33257+ /* sb->s_root for NFS is unreliable */
86dc4139 33258+ h_path.mnt = au_br_mnt(br);
7f207e10
AM
33259+ h_path.dentry = h_path.mnt->mnt_root;
33260+ err = vfs_statfs(&h_path, st);
1facf9fc 33261+ if (unlikely(err)) {
33262+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
33263+ continue;
33264+ }
33265+
33266+ /* when the available size is equal, select the lower one */
33267+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
33268+ || sizeof(b) < sizeof(st->f_bsize));
33269+ b = st->f_bavail * st->f_bsize;
33270+ br->br_wbr->wbr_bytes = b;
33271+ if (b >= bavail) {
33272+ bavail = b;
33273+ mfs->mfs_bindex = bindex;
33274+ mfs->mfs_jiffy = jiffies;
33275+ }
33276+ }
33277+
33278+ mfs->mfsrr_bytes = bavail;
33279+ AuDbg("b%d\n", mfs->mfs_bindex);
ae9dfd79 33280+ kfree(st);
1facf9fc 33281+}
33282+
392086de 33283+static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
1facf9fc 33284+{
33285+ int err;
392086de 33286+ struct dentry *parent;
1facf9fc 33287+ struct super_block *sb;
33288+ struct au_wbr_mfs *mfs;
33289+
33290+ err = au_wbr_create_exp(dentry);
33291+ if (err >= 0)
33292+ goto out;
33293+
33294+ sb = dentry->d_sb;
392086de
AM
33295+ parent = NULL;
33296+ if (au_ftest_wbr(flags, PARENT))
33297+ parent = dget_parent(dentry);
1facf9fc 33298+ mfs = &au_sbi(sb)->si_wbr_mfs;
33299+ mutex_lock(&mfs->mfs_lock);
33300+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
33301+ || mfs->mfs_bindex < 0
33302+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
392086de 33303+ au_mfs(dentry, parent);
1facf9fc 33304+ mutex_unlock(&mfs->mfs_lock);
33305+ err = mfs->mfs_bindex;
392086de 33306+ dput(parent);
1facf9fc 33307+
4a4d8108
AM
33308+ if (err >= 0)
33309+ err = au_wbr_nonopq(dentry, err);
33310+
4f0767ce 33311+out:
1facf9fc 33312+ AuDbg("b%d\n", err);
33313+ return err;
33314+}
33315+
33316+static int au_wbr_create_init_mfs(struct super_block *sb)
33317+{
33318+ struct au_wbr_mfs *mfs;
33319+
33320+ mfs = &au_sbi(sb)->si_wbr_mfs;
33321+ mutex_init(&mfs->mfs_lock);
33322+ mfs->mfs_jiffy = 0;
33323+ mfs->mfs_bindex = -EROFS;
33324+
33325+ return 0;
33326+}
33327+
33328+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
33329+{
33330+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
33331+ return 0;
33332+}
33333+
33334+/* ---------------------------------------------------------------------- */
33335+
f2c43d5f
AM
33336+/* top down regardless parent, and then mfs */
33337+static int au_wbr_create_tdmfs(struct dentry *dentry,
33338+ unsigned int flags __maybe_unused)
33339+{
33340+ int err;
33341+ aufs_bindex_t bwh, btail, bindex, bfound, bmfs;
33342+ unsigned long long watermark;
33343+ struct super_block *sb;
33344+ struct au_wbr_mfs *mfs;
33345+ struct au_branch *br;
33346+ struct dentry *parent;
33347+
33348+ sb = dentry->d_sb;
33349+ mfs = &au_sbi(sb)->si_wbr_mfs;
33350+ mutex_lock(&mfs->mfs_lock);
33351+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
33352+ || mfs->mfs_bindex < 0)
33353+ au_mfs(dentry, /*parent*/NULL);
33354+ watermark = mfs->mfsrr_watermark;
33355+ bmfs = mfs->mfs_bindex;
33356+ mutex_unlock(&mfs->mfs_lock);
33357+
33358+ /* another style of au_wbr_create_exp() */
33359+ bwh = au_dbwh(dentry);
33360+ parent = dget_parent(dentry);
33361+ btail = au_dbtaildir(parent);
33362+ if (bwh >= 0 && bwh < btail)
33363+ btail = bwh;
33364+
33365+ err = au_wbr_nonopq(dentry, btail);
33366+ if (unlikely(err < 0))
33367+ goto out;
33368+ btail = err;
33369+ bfound = -1;
33370+ for (bindex = 0; bindex <= btail; bindex++) {
33371+ br = au_sbr(sb, bindex);
33372+ if (au_br_rdonly(br))
33373+ continue;
33374+ if (br->br_wbr->wbr_bytes > watermark) {
33375+ bfound = bindex;
33376+ break;
33377+ }
33378+ }
33379+ err = bfound;
33380+ if (err < 0)
33381+ err = bmfs;
33382+
33383+out:
33384+ dput(parent);
33385+ AuDbg("b%d\n", err);
33386+ return err;
33387+}
33388+
33389+/* ---------------------------------------------------------------------- */
33390+
1facf9fc 33391+/* most free space and then round robin */
392086de 33392+static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
1facf9fc 33393+{
33394+ int err;
33395+ struct au_wbr_mfs *mfs;
33396+
392086de 33397+ err = au_wbr_create_mfs(dentry, flags);
1facf9fc 33398+ if (err >= 0) {
33399+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
dece6358 33400+ mutex_lock(&mfs->mfs_lock);
1facf9fc 33401+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
392086de 33402+ err = au_wbr_create_rr(dentry, flags);
dece6358 33403+ mutex_unlock(&mfs->mfs_lock);
1facf9fc 33404+ }
33405+
33406+ AuDbg("b%d\n", err);
33407+ return err;
33408+}
33409+
33410+static int au_wbr_create_init_mfsrr(struct super_block *sb)
33411+{
33412+ int err;
33413+
33414+ au_wbr_create_init_mfs(sb); /* ignore */
33415+ err = au_wbr_create_init_rr(sb);
33416+
33417+ return err;
33418+}
33419+
33420+/* ---------------------------------------------------------------------- */
33421+
33422+/* top down parent and most free space */
392086de 33423+static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
1facf9fc 33424+{
33425+ int err, e2;
33426+ unsigned long long b;
5afbbe0d 33427+ aufs_bindex_t bindex, btop, bbot;
1facf9fc 33428+ struct super_block *sb;
33429+ struct dentry *parent, *h_parent;
33430+ struct au_branch *br;
33431+
392086de 33432+ err = au_wbr_create_tdp(dentry, flags);
1facf9fc 33433+ if (unlikely(err < 0))
33434+ goto out;
33435+ parent = dget_parent(dentry);
5afbbe0d
AM
33436+ btop = au_dbtop(parent);
33437+ bbot = au_dbtaildir(parent);
33438+ if (btop == bbot)
1facf9fc 33439+ goto out_parent; /* success */
33440+
392086de 33441+ e2 = au_wbr_create_mfs(dentry, flags);
1facf9fc 33442+ if (e2 < 0)
33443+ goto out_parent; /* success */
33444+
33445+ /* when the available size is equal, select upper one */
33446+ sb = dentry->d_sb;
33447+ br = au_sbr(sb, err);
33448+ b = br->br_wbr->wbr_bytes;
33449+ AuDbg("b%d, %llu\n", err, b);
33450+
5afbbe0d 33451+ for (bindex = btop; bindex <= bbot; bindex++) {
1facf9fc 33452+ h_parent = au_h_dptr(parent, bindex);
5527c038 33453+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 33454+ continue;
33455+
33456+ br = au_sbr(sb, bindex);
33457+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
33458+ b = br->br_wbr->wbr_bytes;
33459+ err = bindex;
33460+ AuDbg("b%d, %llu\n", err, b);
33461+ }
33462+ }
33463+
4a4d8108
AM
33464+ if (err >= 0)
33465+ err = au_wbr_nonopq(dentry, err);
33466+
4f0767ce 33467+out_parent:
1facf9fc 33468+ dput(parent);
4f0767ce 33469+out:
1facf9fc 33470+ AuDbg("b%d\n", err);
33471+ return err;
33472+}
33473+
33474+/* ---------------------------------------------------------------------- */
33475+
392086de
AM
33476+/*
33477+ * - top down parent
33478+ * - most free space with parent
33479+ * - most free space round-robin regardless parent
33480+ */
33481+static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
33482+{
33483+ int err;
33484+ unsigned long long watermark;
33485+ struct super_block *sb;
33486+ struct au_branch *br;
33487+ struct au_wbr_mfs *mfs;
33488+
33489+ err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
33490+ if (unlikely(err < 0))
33491+ goto out;
33492+
33493+ sb = dentry->d_sb;
33494+ br = au_sbr(sb, err);
33495+ mfs = &au_sbi(sb)->si_wbr_mfs;
33496+ mutex_lock(&mfs->mfs_lock);
33497+ watermark = mfs->mfsrr_watermark;
33498+ mutex_unlock(&mfs->mfs_lock);
33499+ if (br->br_wbr->wbr_bytes < watermark)
33500+ /* regardless the parent dir */
33501+ err = au_wbr_create_mfsrr(dentry, flags);
33502+
33503+out:
33504+ AuDbg("b%d\n", err);
33505+ return err;
33506+}
33507+
33508+/* ---------------------------------------------------------------------- */
33509+
1facf9fc 33510+/* policies for copyup */
33511+
33512+/* top down parent */
33513+static int au_wbr_copyup_tdp(struct dentry *dentry)
33514+{
392086de 33515+ return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
1facf9fc 33516+}
33517+
33518+/* bottom up parent */
33519+static int au_wbr_copyup_bup(struct dentry *dentry)
33520+{
33521+ int err;
5afbbe0d 33522+ aufs_bindex_t bindex, btop;
1facf9fc 33523+ struct dentry *parent, *h_parent;
33524+ struct super_block *sb;
33525+
33526+ err = -EROFS;
33527+ sb = dentry->d_sb;
33528+ parent = dget_parent(dentry);
5afbbe0d
AM
33529+ btop = au_dbtop(parent);
33530+ for (bindex = au_dbtop(dentry); bindex >= btop; bindex--) {
1facf9fc 33531+ h_parent = au_h_dptr(parent, bindex);
5527c038 33532+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 33533+ continue;
33534+
33535+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
33536+ err = bindex;
33537+ break;
33538+ }
33539+ }
33540+ dput(parent);
33541+
33542+ /* bottom up here */
33543+ if (unlikely(err < 0))
5afbbe0d 33544+ err = au_wbr_bu(sb, btop - 1);
1facf9fc 33545+
33546+ AuDbg("b%d\n", err);
33547+ return err;
33548+}
33549+
33550+/* bottom up */
5afbbe0d 33551+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop)
1facf9fc 33552+{
33553+ int err;
33554+
5afbbe0d 33555+ err = au_wbr_bu(dentry->d_sb, btop);
4a4d8108 33556+ AuDbg("b%d\n", err);
5afbbe0d 33557+ if (err > btop)
4a4d8108 33558+ err = au_wbr_nonopq(dentry, err);
1facf9fc 33559+
33560+ AuDbg("b%d\n", err);
33561+ return err;
33562+}
33563+
076b876e
AM
33564+static int au_wbr_copyup_bu(struct dentry *dentry)
33565+{
33566+ int err;
5afbbe0d 33567+ aufs_bindex_t btop;
076b876e 33568+
5afbbe0d
AM
33569+ btop = au_dbtop(dentry);
33570+ err = au_wbr_do_copyup_bu(dentry, btop);
076b876e
AM
33571+ return err;
33572+}
33573+
1facf9fc 33574+/* ---------------------------------------------------------------------- */
33575+
33576+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
33577+ [AuWbrCopyup_TDP] = {
33578+ .copyup = au_wbr_copyup_tdp
33579+ },
33580+ [AuWbrCopyup_BUP] = {
33581+ .copyup = au_wbr_copyup_bup
33582+ },
33583+ [AuWbrCopyup_BU] = {
33584+ .copyup = au_wbr_copyup_bu
33585+ }
33586+};
33587+
33588+struct au_wbr_create_operations au_wbr_create_ops[] = {
33589+ [AuWbrCreate_TDP] = {
33590+ .create = au_wbr_create_tdp
33591+ },
33592+ [AuWbrCreate_RR] = {
33593+ .create = au_wbr_create_rr,
33594+ .init = au_wbr_create_init_rr
33595+ },
33596+ [AuWbrCreate_MFS] = {
33597+ .create = au_wbr_create_mfs,
33598+ .init = au_wbr_create_init_mfs,
33599+ .fin = au_wbr_create_fin_mfs
33600+ },
33601+ [AuWbrCreate_MFSV] = {
33602+ .create = au_wbr_create_mfs,
33603+ .init = au_wbr_create_init_mfs,
33604+ .fin = au_wbr_create_fin_mfs
33605+ },
33606+ [AuWbrCreate_MFSRR] = {
33607+ .create = au_wbr_create_mfsrr,
33608+ .init = au_wbr_create_init_mfsrr,
33609+ .fin = au_wbr_create_fin_mfs
33610+ },
33611+ [AuWbrCreate_MFSRRV] = {
33612+ .create = au_wbr_create_mfsrr,
33613+ .init = au_wbr_create_init_mfsrr,
33614+ .fin = au_wbr_create_fin_mfs
33615+ },
f2c43d5f
AM
33616+ [AuWbrCreate_TDMFS] = {
33617+ .create = au_wbr_create_tdmfs,
33618+ .init = au_wbr_create_init_mfs,
33619+ .fin = au_wbr_create_fin_mfs
33620+ },
33621+ [AuWbrCreate_TDMFSV] = {
33622+ .create = au_wbr_create_tdmfs,
33623+ .init = au_wbr_create_init_mfs,
33624+ .fin = au_wbr_create_fin_mfs
33625+ },
1facf9fc 33626+ [AuWbrCreate_PMFS] = {
33627+ .create = au_wbr_create_pmfs,
33628+ .init = au_wbr_create_init_mfs,
33629+ .fin = au_wbr_create_fin_mfs
33630+ },
33631+ [AuWbrCreate_PMFSV] = {
33632+ .create = au_wbr_create_pmfs,
33633+ .init = au_wbr_create_init_mfs,
33634+ .fin = au_wbr_create_fin_mfs
392086de
AM
33635+ },
33636+ [AuWbrCreate_PMFSRR] = {
33637+ .create = au_wbr_create_pmfsrr,
33638+ .init = au_wbr_create_init_mfsrr,
33639+ .fin = au_wbr_create_fin_mfs
33640+ },
33641+ [AuWbrCreate_PMFSRRV] = {
33642+ .create = au_wbr_create_pmfsrr,
33643+ .init = au_wbr_create_init_mfsrr,
33644+ .fin = au_wbr_create_fin_mfs
1facf9fc 33645+ }
33646+};
e8791d4f
AM
33647diff -urNp -x '*.orig' linux-4.9/fs/aufs/whout.c linux-4.9/fs/aufs/whout.c
33648--- linux-4.9/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
33649+++ linux-4.9/fs/aufs/whout.c 2021-02-24 16:15:09.538240738 +0100
f2c43d5f 33650@@ -0,0 +1,1061 @@
1facf9fc 33651+/*
ae9dfd79 33652+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 33653+ *
33654+ * This program, aufs is free software; you can redistribute it and/or modify
33655+ * it under the terms of the GNU General Public License as published by
33656+ * the Free Software Foundation; either version 2 of the License, or
33657+ * (at your option) any later version.
dece6358
AM
33658+ *
33659+ * This program is distributed in the hope that it will be useful,
33660+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33661+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33662+ * GNU General Public License for more details.
33663+ *
33664+ * You should have received a copy of the GNU General Public License
523b37e3 33665+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33666+ */
33667+
33668+/*
33669+ * whiteout for logical deletion and opaque directory
33670+ */
33671+
1facf9fc 33672+#include "aufs.h"
33673+
33674+#define WH_MASK S_IRUGO
33675+
33676+/*
33677+ * If a directory contains this file, then it is opaque. We start with the
33678+ * .wh. flag so that it is blocked by lookup.
33679+ */
0c3ec466
AM
33680+static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ,
33681+ sizeof(AUFS_WH_DIROPQ) - 1);
1facf9fc 33682+
33683+/*
33684+ * generate whiteout name, which is NOT terminated by NULL.
33685+ * @name: original d_name.name
33686+ * @len: original d_name.len
33687+ * @wh: whiteout qstr
33688+ * returns zero when succeeds, otherwise error.
33689+ * succeeded value as wh->name should be freed by kfree().
33690+ */
33691+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
33692+{
33693+ char *p;
33694+
33695+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
33696+ return -ENAMETOOLONG;
33697+
33698+ wh->len = name->len + AUFS_WH_PFX_LEN;
33699+ p = kmalloc(wh->len, GFP_NOFS);
33700+ wh->name = p;
33701+ if (p) {
33702+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
33703+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
33704+ /* smp_mb(); */
33705+ return 0;
33706+ }
33707+ return -ENOMEM;
33708+}
33709+
33710+/* ---------------------------------------------------------------------- */
33711+
33712+/*
33713+ * test if the @wh_name exists under @h_parent.
33714+ * @try_sio specifies the necessary of super-io.
33715+ */
076b876e 33716+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio)
1facf9fc 33717+{
33718+ int err;
33719+ struct dentry *wh_dentry;
1facf9fc 33720+
1facf9fc 33721+ if (!try_sio)
b4510431 33722+ wh_dentry = vfsub_lkup_one(wh_name, h_parent);
1facf9fc 33723+ else
076b876e 33724+ wh_dentry = au_sio_lkup_one(wh_name, h_parent);
1facf9fc 33725+ err = PTR_ERR(wh_dentry);
2000de60
JR
33726+ if (IS_ERR(wh_dentry)) {
33727+ if (err == -ENAMETOOLONG)
33728+ err = 0;
1facf9fc 33729+ goto out;
2000de60 33730+ }
1facf9fc 33731+
33732+ err = 0;
5527c038 33733+ if (d_is_negative(wh_dentry))
1facf9fc 33734+ goto out_wh; /* success */
33735+
33736+ err = 1;
7e9cd9fe 33737+ if (d_is_reg(wh_dentry))
1facf9fc 33738+ goto out_wh; /* success */
33739+
33740+ err = -EIO;
523b37e3 33741+ AuIOErr("%pd Invalid whiteout entry type 0%o.\n",
5527c038 33742+ wh_dentry, d_inode(wh_dentry)->i_mode);
1facf9fc 33743+
4f0767ce 33744+out_wh:
1facf9fc 33745+ dput(wh_dentry);
4f0767ce 33746+out:
1facf9fc 33747+ return err;
33748+}
33749+
33750+/*
33751+ * test if the @h_dentry sets opaque or not.
33752+ */
076b876e 33753+int au_diropq_test(struct dentry *h_dentry)
1facf9fc 33754+{
33755+ int err;
33756+ struct inode *h_dir;
33757+
5527c038 33758+ h_dir = d_inode(h_dentry);
076b876e 33759+ err = au_wh_test(h_dentry, &diropq_name,
1facf9fc 33760+ au_test_h_perm_sio(h_dir, MAY_EXEC));
33761+ return err;
33762+}
33763+
33764+/*
33765+ * returns a negative dentry whose name is unique and temporary.
33766+ */
33767+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
33768+ struct qstr *prefix)
33769+{
1facf9fc 33770+ struct dentry *dentry;
33771+ int i;
027c5e7a 33772+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
4a4d8108 33773+ *name, *p;
027c5e7a 33774+ /* strict atomic_t is unnecessary here */
1facf9fc 33775+ static unsigned short cnt;
33776+ struct qstr qs;
33777+
4a4d8108
AM
33778+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
33779+
1facf9fc 33780+ name = defname;
027c5e7a
AM
33781+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
33782+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
1facf9fc 33783+ dentry = ERR_PTR(-ENAMETOOLONG);
4a4d8108 33784+ if (unlikely(qs.len > NAME_MAX))
1facf9fc 33785+ goto out;
33786+ dentry = ERR_PTR(-ENOMEM);
33787+ name = kmalloc(qs.len + 1, GFP_NOFS);
33788+ if (unlikely(!name))
33789+ goto out;
33790+ }
33791+
33792+ /* doubly whiteout-ed */
33793+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
33794+ p = name + AUFS_WH_PFX_LEN * 2;
33795+ memcpy(p, prefix->name, prefix->len);
33796+ p += prefix->len;
33797+ *p++ = '.';
4a4d8108 33798+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
1facf9fc 33799+
33800+ qs.name = name;
33801+ for (i = 0; i < 3; i++) {
b752ccd1 33802+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
076b876e 33803+ dentry = au_sio_lkup_one(&qs, h_parent);
5527c038 33804+ if (IS_ERR(dentry) || d_is_negative(dentry))
1facf9fc 33805+ goto out_name;
33806+ dput(dentry);
33807+ }
0c3ec466 33808+ /* pr_warn("could not get random name\n"); */
1facf9fc 33809+ dentry = ERR_PTR(-EEXIST);
33810+ AuDbg("%.*s\n", AuLNPair(&qs));
33811+ BUG();
33812+
4f0767ce 33813+out_name:
1facf9fc 33814+ if (name != defname)
ae9dfd79 33815+ kfree(name);
4f0767ce 33816+out:
4a4d8108 33817+ AuTraceErrPtr(dentry);
1facf9fc 33818+ return dentry;
1facf9fc 33819+}
33820+
33821+/*
33822+ * rename the @h_dentry on @br to the whiteouted temporary name.
33823+ */
33824+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
33825+{
33826+ int err;
33827+ struct path h_path = {
86dc4139 33828+ .mnt = au_br_mnt(br)
1facf9fc 33829+ };
523b37e3 33830+ struct inode *h_dir, *delegated;
1facf9fc 33831+ struct dentry *h_parent;
33832+
33833+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5527c038 33834+ h_dir = d_inode(h_parent);
1facf9fc 33835+ IMustLock(h_dir);
33836+
33837+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
33838+ err = PTR_ERR(h_path.dentry);
33839+ if (IS_ERR(h_path.dentry))
33840+ goto out;
33841+
33842+ /* under the same dir, no need to lock_rename() */
523b37e3 33843+ delegated = NULL;
f2c43d5f
AM
33844+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated,
33845+ /*flags*/0);
1facf9fc 33846+ AuTraceErr(err);
523b37e3
AM
33847+ if (unlikely(err == -EWOULDBLOCK)) {
33848+ pr_warn("cannot retry for NFSv4 delegation"
33849+ " for an internal rename\n");
33850+ iput(delegated);
33851+ }
1facf9fc 33852+ dput(h_path.dentry);
33853+
4f0767ce 33854+out:
4a4d8108 33855+ AuTraceErr(err);
1facf9fc 33856+ return err;
33857+}
33858+
33859+/* ---------------------------------------------------------------------- */
33860+/*
33861+ * functions for removing a whiteout
33862+ */
33863+
33864+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
33865+{
523b37e3
AM
33866+ int err, force;
33867+ struct inode *delegated;
1facf9fc 33868+
33869+ /*
33870+ * forces superio when the dir has a sticky bit.
33871+ * this may be a violation of unix fs semantics.
33872+ */
33873+ force = (h_dir->i_mode & S_ISVTX)
5527c038 33874+ && !uid_eq(current_fsuid(), d_inode(h_path->dentry)->i_uid);
523b37e3
AM
33875+ delegated = NULL;
33876+ err = vfsub_unlink(h_dir, h_path, &delegated, force);
33877+ if (unlikely(err == -EWOULDBLOCK)) {
33878+ pr_warn("cannot retry for NFSv4 delegation"
33879+ " for an internal unlink\n");
33880+ iput(delegated);
33881+ }
33882+ return err;
1facf9fc 33883+}
33884+
33885+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
33886+ struct dentry *dentry)
33887+{
33888+ int err;
33889+
33890+ err = do_unlink_wh(h_dir, h_path);
33891+ if (!err && dentry)
33892+ au_set_dbwh(dentry, -1);
33893+
33894+ return err;
33895+}
33896+
33897+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
33898+ struct au_branch *br)
33899+{
33900+ int err;
33901+ struct path h_path = {
86dc4139 33902+ .mnt = au_br_mnt(br)
1facf9fc 33903+ };
33904+
33905+ err = 0;
b4510431 33906+ h_path.dentry = vfsub_lkup_one(wh, h_parent);
1facf9fc 33907+ if (IS_ERR(h_path.dentry))
33908+ err = PTR_ERR(h_path.dentry);
33909+ else {
5527c038
JR
33910+ if (d_is_reg(h_path.dentry))
33911+ err = do_unlink_wh(d_inode(h_parent), &h_path);
1facf9fc 33912+ dput(h_path.dentry);
33913+ }
33914+
33915+ return err;
33916+}
33917+
33918+/* ---------------------------------------------------------------------- */
33919+/*
33920+ * initialize/clean whiteout for a branch
33921+ */
33922+
33923+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
33924+ const int isdir)
33925+{
33926+ int err;
523b37e3 33927+ struct inode *delegated;
1facf9fc 33928+
5527c038 33929+ if (d_is_negative(whpath->dentry))
1facf9fc 33930+ return;
33931+
86dc4139
AM
33932+ if (isdir)
33933+ err = vfsub_rmdir(h_dir, whpath);
523b37e3
AM
33934+ else {
33935+ delegated = NULL;
33936+ err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0);
33937+ if (unlikely(err == -EWOULDBLOCK)) {
33938+ pr_warn("cannot retry for NFSv4 delegation"
33939+ " for an internal unlink\n");
33940+ iput(delegated);
33941+ }
33942+ }
1facf9fc 33943+ if (unlikely(err))
523b37e3
AM
33944+ pr_warn("failed removing %pd (%d), ignored.\n",
33945+ whpath->dentry, err);
1facf9fc 33946+}
33947+
33948+static int test_linkable(struct dentry *h_root)
33949+{
5527c038 33950+ struct inode *h_dir = d_inode(h_root);
1facf9fc 33951+
33952+ if (h_dir->i_op->link)
33953+ return 0;
33954+
523b37e3
AM
33955+ pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n",
33956+ h_root, au_sbtype(h_root->d_sb));
1facf9fc 33957+ return -ENOSYS;
33958+}
33959+
33960+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
33961+static int au_whdir(struct inode *h_dir, struct path *path)
33962+{
33963+ int err;
33964+
33965+ err = -EEXIST;
5527c038 33966+ if (d_is_negative(path->dentry)) {
1facf9fc 33967+ int mode = S_IRWXU;
33968+
33969+ if (au_test_nfs(path->dentry->d_sb))
33970+ mode |= S_IXUGO;
86dc4139 33971+ err = vfsub_mkdir(h_dir, path, mode);
2000de60 33972+ } else if (d_is_dir(path->dentry))
1facf9fc 33973+ err = 0;
33974+ else
523b37e3 33975+ pr_err("unknown %pd exists\n", path->dentry);
1facf9fc 33976+
33977+ return err;
33978+}
33979+
33980+struct au_wh_base {
33981+ const struct qstr *name;
33982+ struct dentry *dentry;
33983+};
33984+
33985+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
33986+ struct path *h_path)
33987+{
33988+ h_path->dentry = base[AuBrWh_BASE].dentry;
33989+ au_wh_clean(h_dir, h_path, /*isdir*/0);
33990+ h_path->dentry = base[AuBrWh_PLINK].dentry;
33991+ au_wh_clean(h_dir, h_path, /*isdir*/1);
33992+ h_path->dentry = base[AuBrWh_ORPH].dentry;
33993+ au_wh_clean(h_dir, h_path, /*isdir*/1);
33994+}
33995+
33996+/*
33997+ * returns tri-state,
c1595e42 33998+ * minus: error, caller should print the message
1facf9fc 33999+ * zero: succuess
c1595e42 34000+ * plus: error, caller should NOT print the message
1facf9fc 34001+ */
34002+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
34003+ int do_plink, struct au_wh_base base[],
34004+ struct path *h_path)
34005+{
34006+ int err;
34007+ struct inode *h_dir;
34008+
5527c038 34009+ h_dir = d_inode(h_root);
1facf9fc 34010+ h_path->dentry = base[AuBrWh_BASE].dentry;
34011+ au_wh_clean(h_dir, h_path, /*isdir*/0);
34012+ h_path->dentry = base[AuBrWh_PLINK].dentry;
34013+ if (do_plink) {
34014+ err = test_linkable(h_root);
34015+ if (unlikely(err)) {
34016+ err = 1;
34017+ goto out;
34018+ }
34019+
34020+ err = au_whdir(h_dir, h_path);
34021+ if (unlikely(err))
34022+ goto out;
34023+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
34024+ } else
34025+ au_wh_clean(h_dir, h_path, /*isdir*/1);
34026+ h_path->dentry = base[AuBrWh_ORPH].dentry;
34027+ err = au_whdir(h_dir, h_path);
34028+ if (unlikely(err))
34029+ goto out;
34030+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
34031+
4f0767ce 34032+out:
1facf9fc 34033+ return err;
34034+}
34035+
34036+/*
34037+ * for the moment, aufs supports the branch filesystem which does not support
34038+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
34039+ * copyup failed. finally, such filesystem will not be used as the writable
34040+ * branch.
34041+ *
34042+ * returns tri-state, see above.
34043+ */
34044+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
34045+ int do_plink, struct au_wh_base base[],
34046+ struct path *h_path)
34047+{
34048+ int err;
34049+ struct inode *h_dir;
34050+
1308ab2a 34051+ WbrWhMustWriteLock(wbr);
34052+
1facf9fc 34053+ err = test_linkable(h_root);
34054+ if (unlikely(err)) {
34055+ err = 1;
34056+ goto out;
34057+ }
34058+
34059+ /*
34060+ * todo: should this create be done in /sbin/mount.aufs helper?
34061+ */
34062+ err = -EEXIST;
5527c038
JR
34063+ h_dir = d_inode(h_root);
34064+ if (d_is_negative(base[AuBrWh_BASE].dentry)) {
86dc4139
AM
34065+ h_path->dentry = base[AuBrWh_BASE].dentry;
34066+ err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true);
7e9cd9fe 34067+ } else if (d_is_reg(base[AuBrWh_BASE].dentry))
1facf9fc 34068+ err = 0;
34069+ else
523b37e3 34070+ pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry);
1facf9fc 34071+ if (unlikely(err))
34072+ goto out;
34073+
34074+ h_path->dentry = base[AuBrWh_PLINK].dentry;
34075+ if (do_plink) {
34076+ err = au_whdir(h_dir, h_path);
34077+ if (unlikely(err))
34078+ goto out;
34079+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
34080+ } else
34081+ au_wh_clean(h_dir, h_path, /*isdir*/1);
34082+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
34083+
34084+ h_path->dentry = base[AuBrWh_ORPH].dentry;
34085+ err = au_whdir(h_dir, h_path);
34086+ if (unlikely(err))
34087+ goto out;
34088+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
34089+
4f0767ce 34090+out:
1facf9fc 34091+ return err;
34092+}
34093+
34094+/*
34095+ * initialize the whiteout base file/dir for @br.
34096+ */
86dc4139 34097+int au_wh_init(struct au_branch *br, struct super_block *sb)
1facf9fc 34098+{
34099+ int err, i;
34100+ const unsigned char do_plink
34101+ = !!au_opt_test(au_mntflags(sb), PLINK);
1facf9fc 34102+ struct inode *h_dir;
86dc4139
AM
34103+ struct path path = br->br_path;
34104+ struct dentry *h_root = path.dentry;
1facf9fc 34105+ struct au_wbr *wbr = br->br_wbr;
34106+ static const struct qstr base_name[] = {
0c3ec466
AM
34107+ [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME,
34108+ sizeof(AUFS_BASE_NAME) - 1),
34109+ [AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME,
34110+ sizeof(AUFS_PLINKDIR_NAME) - 1),
34111+ [AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME,
34112+ sizeof(AUFS_ORPHDIR_NAME) - 1)
1facf9fc 34113+ };
34114+ struct au_wh_base base[] = {
34115+ [AuBrWh_BASE] = {
34116+ .name = base_name + AuBrWh_BASE,
34117+ .dentry = NULL
34118+ },
34119+ [AuBrWh_PLINK] = {
34120+ .name = base_name + AuBrWh_PLINK,
34121+ .dentry = NULL
34122+ },
34123+ [AuBrWh_ORPH] = {
34124+ .name = base_name + AuBrWh_ORPH,
34125+ .dentry = NULL
34126+ }
34127+ };
34128+
1308ab2a 34129+ if (wbr)
34130+ WbrWhMustWriteLock(wbr);
1facf9fc 34131+
1facf9fc 34132+ for (i = 0; i < AuBrWh_Last; i++) {
34133+ /* doubly whiteouted */
34134+ struct dentry *d;
34135+
34136+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
34137+ err = PTR_ERR(d);
34138+ if (IS_ERR(d))
34139+ goto out;
34140+
34141+ base[i].dentry = d;
34142+ AuDebugOn(wbr
34143+ && wbr->wbr_wh[i]
34144+ && wbr->wbr_wh[i] != base[i].dentry);
34145+ }
34146+
34147+ if (wbr)
34148+ for (i = 0; i < AuBrWh_Last; i++) {
34149+ dput(wbr->wbr_wh[i]);
34150+ wbr->wbr_wh[i] = NULL;
34151+ }
34152+
34153+ err = 0;
1e00d052 34154+ if (!au_br_writable(br->br_perm)) {
5527c038 34155+ h_dir = d_inode(h_root);
1facf9fc 34156+ au_wh_init_ro(h_dir, base, &path);
1e00d052 34157+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 34158+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
34159+ if (err > 0)
34160+ goto out;
34161+ else if (err)
34162+ goto out_err;
1e00d052 34163+ } else {
1facf9fc 34164+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
34165+ if (err > 0)
34166+ goto out;
34167+ else if (err)
34168+ goto out_err;
1facf9fc 34169+ }
34170+ goto out; /* success */
34171+
4f0767ce 34172+out_err:
523b37e3
AM
34173+ pr_err("an error(%d) on the writable branch %pd(%s)\n",
34174+ err, h_root, au_sbtype(h_root->d_sb));
4f0767ce 34175+out:
1facf9fc 34176+ for (i = 0; i < AuBrWh_Last; i++)
34177+ dput(base[i].dentry);
34178+ return err;
34179+}
34180+
34181+/* ---------------------------------------------------------------------- */
34182+/*
34183+ * whiteouts are all hard-linked usually.
34184+ * when its link count reaches a ceiling, we create a new whiteout base
34185+ * asynchronously.
34186+ */
34187+
34188+struct reinit_br_wh {
34189+ struct super_block *sb;
34190+ struct au_branch *br;
34191+};
34192+
34193+static void reinit_br_wh(void *arg)
34194+{
34195+ int err;
34196+ aufs_bindex_t bindex;
34197+ struct path h_path;
34198+ struct reinit_br_wh *a = arg;
34199+ struct au_wbr *wbr;
523b37e3 34200+ struct inode *dir, *delegated;
1facf9fc 34201+ struct dentry *h_root;
34202+ struct au_hinode *hdir;
34203+
34204+ err = 0;
34205+ wbr = a->br->br_wbr;
34206+ /* big aufs lock */
34207+ si_noflush_write_lock(a->sb);
34208+ if (!au_br_writable(a->br->br_perm))
34209+ goto out;
34210+ bindex = au_br_index(a->sb, a->br->br_id);
34211+ if (unlikely(bindex < 0))
34212+ goto out;
34213+
1308ab2a 34214+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
5527c038 34215+ dir = d_inode(a->sb->s_root);
1facf9fc 34216+ hdir = au_hi(dir, bindex);
34217+ h_root = au_h_dptr(a->sb->s_root, bindex);
86dc4139 34218+ AuDebugOn(h_root != au_br_dentry(a->br));
1facf9fc 34219+
5afbbe0d 34220+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 34221+ wbr_wh_write_lock(wbr);
34222+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
34223+ h_root, a->br);
34224+ if (!err) {
86dc4139
AM
34225+ h_path.dentry = wbr->wbr_whbase;
34226+ h_path.mnt = au_br_mnt(a->br);
523b37e3
AM
34227+ delegated = NULL;
34228+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated,
34229+ /*force*/0);
34230+ if (unlikely(err == -EWOULDBLOCK)) {
34231+ pr_warn("cannot retry for NFSv4 delegation"
34232+ " for an internal unlink\n");
34233+ iput(delegated);
34234+ }
1facf9fc 34235+ } else {
523b37e3 34236+ pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase);
1facf9fc 34237+ err = 0;
34238+ }
34239+ dput(wbr->wbr_whbase);
34240+ wbr->wbr_whbase = NULL;
34241+ if (!err)
86dc4139 34242+ err = au_wh_init(a->br, a->sb);
1facf9fc 34243+ wbr_wh_write_unlock(wbr);
5afbbe0d 34244+ au_hn_inode_unlock(hdir);
1308ab2a 34245+ di_read_unlock(a->sb->s_root, AuLock_IR);
076b876e
AM
34246+ if (!err)
34247+ au_fhsm_wrote(a->sb, bindex, /*force*/0);
1facf9fc 34248+
4f0767ce 34249+out:
1facf9fc 34250+ if (wbr)
34251+ atomic_dec(&wbr->wbr_wh_running);
5afbbe0d 34252+ au_br_put(a->br);
1facf9fc 34253+ si_write_unlock(a->sb);
027c5e7a 34254+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
ae9dfd79 34255+ kfree(arg);
1facf9fc 34256+ if (unlikely(err))
34257+ AuIOErr("err %d\n", err);
34258+}
34259+
34260+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
34261+{
34262+ int do_dec, wkq_err;
34263+ struct reinit_br_wh *arg;
34264+
34265+ do_dec = 1;
34266+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
34267+ goto out;
34268+
34269+ /* ignore ENOMEM */
34270+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
34271+ if (arg) {
34272+ /*
34273+ * dec(wh_running), kfree(arg) and dec(br_count)
34274+ * in reinit function
34275+ */
34276+ arg->sb = sb;
34277+ arg->br = br;
5afbbe0d 34278+ au_br_get(br);
53392da6 34279+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
1facf9fc 34280+ if (unlikely(wkq_err)) {
34281+ atomic_dec(&br->br_wbr->wbr_wh_running);
5afbbe0d 34282+ au_br_put(br);
ae9dfd79 34283+ kfree(arg);
1facf9fc 34284+ }
34285+ do_dec = 0;
34286+ }
34287+
4f0767ce 34288+out:
1facf9fc 34289+ if (do_dec)
34290+ atomic_dec(&br->br_wbr->wbr_wh_running);
34291+}
34292+
34293+/* ---------------------------------------------------------------------- */
34294+
34295+/*
34296+ * create the whiteout @wh.
34297+ */
34298+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
34299+ struct dentry *wh)
34300+{
34301+ int err;
34302+ struct path h_path = {
34303+ .dentry = wh
34304+ };
34305+ struct au_branch *br;
34306+ struct au_wbr *wbr;
34307+ struct dentry *h_parent;
523b37e3 34308+ struct inode *h_dir, *delegated;
1facf9fc 34309+
34310+ h_parent = wh->d_parent; /* dir inode is locked */
5527c038 34311+ h_dir = d_inode(h_parent);
1facf9fc 34312+ IMustLock(h_dir);
34313+
34314+ br = au_sbr(sb, bindex);
86dc4139 34315+ h_path.mnt = au_br_mnt(br);
1facf9fc 34316+ wbr = br->br_wbr;
34317+ wbr_wh_read_lock(wbr);
34318+ if (wbr->wbr_whbase) {
523b37e3
AM
34319+ delegated = NULL;
34320+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated);
34321+ if (unlikely(err == -EWOULDBLOCK)) {
34322+ pr_warn("cannot retry for NFSv4 delegation"
34323+ " for an internal link\n");
34324+ iput(delegated);
34325+ }
1facf9fc 34326+ if (!err || err != -EMLINK)
34327+ goto out;
34328+
34329+ /* link count full. re-initialize br_whbase. */
34330+ kick_reinit_br_wh(sb, br);
34331+ }
34332+
34333+ /* return this error in this context */
b4510431 34334+ err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true);
076b876e
AM
34335+ if (!err)
34336+ au_fhsm_wrote(sb, bindex, /*force*/0);
1facf9fc 34337+
4f0767ce 34338+out:
1facf9fc 34339+ wbr_wh_read_unlock(wbr);
34340+ return err;
34341+}
34342+
34343+/* ---------------------------------------------------------------------- */
34344+
34345+/*
34346+ * create or remove the diropq.
34347+ */
34348+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
34349+ unsigned int flags)
34350+{
34351+ struct dentry *opq_dentry, *h_dentry;
34352+ struct super_block *sb;
34353+ struct au_branch *br;
34354+ int err;
34355+
34356+ sb = dentry->d_sb;
34357+ br = au_sbr(sb, bindex);
34358+ h_dentry = au_h_dptr(dentry, bindex);
b4510431 34359+ opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry);
1facf9fc 34360+ if (IS_ERR(opq_dentry))
34361+ goto out;
34362+
34363+ if (au_ftest_diropq(flags, CREATE)) {
34364+ err = link_or_create_wh(sb, bindex, opq_dentry);
34365+ if (!err) {
34366+ au_set_dbdiropq(dentry, bindex);
34367+ goto out; /* success */
34368+ }
34369+ } else {
34370+ struct path tmp = {
34371+ .dentry = opq_dentry,
86dc4139 34372+ .mnt = au_br_mnt(br)
1facf9fc 34373+ };
5527c038 34374+ err = do_unlink_wh(au_h_iptr(d_inode(dentry), bindex), &tmp);
1facf9fc 34375+ if (!err)
34376+ au_set_dbdiropq(dentry, -1);
34377+ }
34378+ dput(opq_dentry);
34379+ opq_dentry = ERR_PTR(err);
34380+
4f0767ce 34381+out:
1facf9fc 34382+ return opq_dentry;
34383+}
34384+
34385+struct do_diropq_args {
34386+ struct dentry **errp;
34387+ struct dentry *dentry;
34388+ aufs_bindex_t bindex;
34389+ unsigned int flags;
34390+};
34391+
34392+static void call_do_diropq(void *args)
34393+{
34394+ struct do_diropq_args *a = args;
34395+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
34396+}
34397+
34398+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
34399+ unsigned int flags)
34400+{
34401+ struct dentry *diropq, *h_dentry;
34402+
34403+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 34404+ if (!au_test_h_perm_sio(d_inode(h_dentry), MAY_EXEC | MAY_WRITE))
1facf9fc 34405+ diropq = do_diropq(dentry, bindex, flags);
34406+ else {
34407+ int wkq_err;
34408+ struct do_diropq_args args = {
34409+ .errp = &diropq,
34410+ .dentry = dentry,
34411+ .bindex = bindex,
34412+ .flags = flags
34413+ };
34414+
34415+ wkq_err = au_wkq_wait(call_do_diropq, &args);
34416+ if (unlikely(wkq_err))
34417+ diropq = ERR_PTR(wkq_err);
34418+ }
34419+
34420+ return diropq;
34421+}
34422+
34423+/* ---------------------------------------------------------------------- */
34424+
34425+/*
34426+ * lookup whiteout dentry.
34427+ * @h_parent: lower parent dentry which must exist and be locked
34428+ * @base_name: name of dentry which will be whiteouted
34429+ * returns dentry for whiteout.
34430+ */
34431+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
34432+ struct au_branch *br)
34433+{
34434+ int err;
34435+ struct qstr wh_name;
34436+ struct dentry *wh_dentry;
34437+
34438+ err = au_wh_name_alloc(&wh_name, base_name);
34439+ wh_dentry = ERR_PTR(err);
34440+ if (!err) {
b4510431 34441+ wh_dentry = vfsub_lkup_one(&wh_name, h_parent);
ae9dfd79 34442+ kfree(wh_name.name);
1facf9fc 34443+ }
34444+ return wh_dentry;
34445+}
34446+
34447+/*
34448+ * link/create a whiteout for @dentry on @bindex.
34449+ */
34450+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
34451+ struct dentry *h_parent)
34452+{
34453+ struct dentry *wh_dentry;
34454+ struct super_block *sb;
34455+ int err;
34456+
34457+ sb = dentry->d_sb;
34458+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
5527c038 34459+ if (!IS_ERR(wh_dentry) && d_is_negative(wh_dentry)) {
1facf9fc 34460+ err = link_or_create_wh(sb, bindex, wh_dentry);
076b876e 34461+ if (!err) {
1facf9fc 34462+ au_set_dbwh(dentry, bindex);
076b876e
AM
34463+ au_fhsm_wrote(sb, bindex, /*force*/0);
34464+ } else {
1facf9fc 34465+ dput(wh_dentry);
34466+ wh_dentry = ERR_PTR(err);
34467+ }
34468+ }
34469+
34470+ return wh_dentry;
34471+}
34472+
34473+/* ---------------------------------------------------------------------- */
34474+
34475+/* Delete all whiteouts in this directory on branch bindex. */
34476+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
34477+ aufs_bindex_t bindex, struct au_branch *br)
34478+{
34479+ int err;
34480+ unsigned long ul, n;
34481+ struct qstr wh_name;
34482+ char *p;
34483+ struct hlist_head *head;
c06a8ce3 34484+ struct au_vdir_wh *pos;
1facf9fc 34485+ struct au_vdir_destr *str;
34486+
34487+ err = -ENOMEM;
537831f9 34488+ p = (void *)__get_free_page(GFP_NOFS);
1facf9fc 34489+ wh_name.name = p;
34490+ if (unlikely(!wh_name.name))
34491+ goto out;
34492+
34493+ err = 0;
34494+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
34495+ p += AUFS_WH_PFX_LEN;
34496+ n = whlist->nh_num;
34497+ head = whlist->nh_head;
34498+ for (ul = 0; !err && ul < n; ul++, head++) {
c06a8ce3
AM
34499+ hlist_for_each_entry(pos, head, wh_hash) {
34500+ if (pos->wh_bindex != bindex)
1facf9fc 34501+ continue;
34502+
c06a8ce3 34503+ str = &pos->wh_str;
1facf9fc 34504+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
34505+ memcpy(p, str->name, str->len);
34506+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
34507+ err = unlink_wh_name(h_dentry, &wh_name, br);
34508+ if (!err)
34509+ continue;
34510+ break;
34511+ }
34512+ AuIOErr("whiteout name too long %.*s\n",
34513+ str->len, str->name);
34514+ err = -EIO;
34515+ break;
34516+ }
34517+ }
ae9dfd79 34518+ free_page((unsigned long)wh_name.name);
1facf9fc 34519+
4f0767ce 34520+out:
1facf9fc 34521+ return err;
34522+}
34523+
34524+struct del_wh_children_args {
34525+ int *errp;
34526+ struct dentry *h_dentry;
1308ab2a 34527+ struct au_nhash *whlist;
1facf9fc 34528+ aufs_bindex_t bindex;
34529+ struct au_branch *br;
34530+};
34531+
34532+static void call_del_wh_children(void *args)
34533+{
34534+ struct del_wh_children_args *a = args;
1308ab2a 34535+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
1facf9fc 34536+}
34537+
34538+/* ---------------------------------------------------------------------- */
34539+
34540+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
34541+{
34542+ struct au_whtmp_rmdir *whtmp;
dece6358 34543+ int err;
1308ab2a 34544+ unsigned int rdhash;
dece6358
AM
34545+
34546+ SiMustAnyLock(sb);
1facf9fc 34547+
be52b249 34548+ whtmp = kzalloc(sizeof(*whtmp), gfp);
dece6358
AM
34549+ if (unlikely(!whtmp)) {
34550+ whtmp = ERR_PTR(-ENOMEM);
1facf9fc 34551+ goto out;
dece6358 34552+ }
1facf9fc 34553+
1308ab2a 34554+ /* no estimation for dir size */
34555+ rdhash = au_sbi(sb)->si_rdhash;
34556+ if (!rdhash)
34557+ rdhash = AUFS_RDHASH_DEF;
34558+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
34559+ if (unlikely(err)) {
ae9dfd79 34560+ kfree(whtmp);
1308ab2a 34561+ whtmp = ERR_PTR(err);
34562+ }
dece6358 34563+
4f0767ce 34564+out:
dece6358 34565+ return whtmp;
1facf9fc 34566+}
34567+
34568+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
34569+{
027c5e7a 34570+ if (whtmp->br)
5afbbe0d 34571+ au_br_put(whtmp->br);
1facf9fc 34572+ dput(whtmp->wh_dentry);
34573+ iput(whtmp->dir);
dece6358 34574+ au_nhash_wh_free(&whtmp->whlist);
ae9dfd79 34575+ kfree(whtmp);
1facf9fc 34576+}
34577+
34578+/*
34579+ * rmdir the whiteouted temporary named dir @h_dentry.
34580+ * @whlist: whiteouted children.
34581+ */
34582+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
34583+ struct dentry *wh_dentry, struct au_nhash *whlist)
34584+{
34585+ int err;
2000de60 34586+ unsigned int h_nlink;
1facf9fc 34587+ struct path h_tmp;
34588+ struct inode *wh_inode, *h_dir;
34589+ struct au_branch *br;
34590+
5527c038 34591+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
1facf9fc 34592+ IMustLock(h_dir);
34593+
34594+ br = au_sbr(dir->i_sb, bindex);
5527c038 34595+ wh_inode = d_inode(wh_dentry);
febd17d6 34596+ inode_lock_nested(wh_inode, AuLsc_I_CHILD);
1facf9fc 34597+
34598+ /*
34599+ * someone else might change some whiteouts while we were sleeping.
34600+ * it means this whlist may have an obsoleted entry.
34601+ */
34602+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
34603+ err = del_wh_children(wh_dentry, whlist, bindex, br);
34604+ else {
34605+ int wkq_err;
34606+ struct del_wh_children_args args = {
34607+ .errp = &err,
34608+ .h_dentry = wh_dentry,
1308ab2a 34609+ .whlist = whlist,
1facf9fc 34610+ .bindex = bindex,
34611+ .br = br
34612+ };
34613+
34614+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
34615+ if (unlikely(wkq_err))
34616+ err = wkq_err;
34617+ }
febd17d6 34618+ inode_unlock(wh_inode);
1facf9fc 34619+
34620+ if (!err) {
34621+ h_tmp.dentry = wh_dentry;
86dc4139 34622+ h_tmp.mnt = au_br_mnt(br);
2000de60 34623+ h_nlink = h_dir->i_nlink;
1facf9fc 34624+ err = vfsub_rmdir(h_dir, &h_tmp);
2000de60
JR
34625+ /* some fs doesn't change the parent nlink in some cases */
34626+ h_nlink -= h_dir->i_nlink;
1facf9fc 34627+ }
34628+
34629+ if (!err) {
5afbbe0d 34630+ if (au_ibtop(dir) == bindex) {
7f207e10 34631+ /* todo: dir->i_mutex is necessary */
1facf9fc 34632+ au_cpup_attr_timesizes(dir);
2000de60
JR
34633+ if (h_nlink)
34634+ vfsub_drop_nlink(dir);
1facf9fc 34635+ }
34636+ return 0; /* success */
34637+ }
34638+
523b37e3 34639+ pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
1facf9fc 34640+ return err;
34641+}
34642+
34643+static void call_rmdir_whtmp(void *args)
34644+{
34645+ int err;
e49829fe 34646+ aufs_bindex_t bindex;
1facf9fc 34647+ struct au_whtmp_rmdir *a = args;
34648+ struct super_block *sb;
34649+ struct dentry *h_parent;
34650+ struct inode *h_dir;
1facf9fc 34651+ struct au_hinode *hdir;
34652+
34653+ /* rmdir by nfsd may cause deadlock with this i_mutex */
febd17d6 34654+ /* inode_lock(a->dir); */
e49829fe 34655+ err = -EROFS;
1facf9fc 34656+ sb = a->dir->i_sb;
e49829fe
JR
34657+ si_read_lock(sb, !AuLock_FLUSH);
34658+ if (!au_br_writable(a->br->br_perm))
34659+ goto out;
34660+ bindex = au_br_index(sb, a->br->br_id);
34661+ if (unlikely(bindex < 0))
1facf9fc 34662+ goto out;
34663+
34664+ err = -EIO;
1facf9fc 34665+ ii_write_lock_parent(a->dir);
34666+ h_parent = dget_parent(a->wh_dentry);
5527c038 34667+ h_dir = d_inode(h_parent);
e49829fe 34668+ hdir = au_hi(a->dir, bindex);
86dc4139
AM
34669+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
34670+ if (unlikely(err))
34671+ goto out_mnt;
5afbbe0d 34672+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
e49829fe
JR
34673+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
34674+ a->br);
86dc4139
AM
34675+ if (!err)
34676+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
5afbbe0d 34677+ au_hn_inode_unlock(hdir);
86dc4139
AM
34678+ vfsub_mnt_drop_write(au_br_mnt(a->br));
34679+
34680+out_mnt:
1facf9fc 34681+ dput(h_parent);
34682+ ii_write_unlock(a->dir);
4f0767ce 34683+out:
febd17d6 34684+ /* inode_unlock(a->dir); */
1facf9fc 34685+ au_whtmp_rmdir_free(a);
027c5e7a
AM
34686+ si_read_unlock(sb);
34687+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 34688+ if (unlikely(err))
34689+ AuIOErr("err %d\n", err);
34690+}
34691+
34692+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
34693+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
34694+{
34695+ int wkq_err;
e49829fe 34696+ struct super_block *sb;
1facf9fc 34697+
34698+ IMustLock(dir);
34699+
34700+ /* all post-process will be done in do_rmdir_whtmp(). */
e49829fe 34701+ sb = dir->i_sb;
1facf9fc 34702+ args->dir = au_igrab(dir);
e49829fe 34703+ args->br = au_sbr(sb, bindex);
5afbbe0d 34704+ au_br_get(args->br);
1facf9fc 34705+ args->wh_dentry = dget(wh_dentry);
53392da6 34706+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1facf9fc 34707+ if (unlikely(wkq_err)) {
523b37e3 34708+ pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
1facf9fc 34709+ au_whtmp_rmdir_free(args);
34710+ }
34711+}
e8791d4f
AM
34712diff -urNp -x '*.orig' linux-4.9/fs/aufs/whout.h linux-4.9/fs/aufs/whout.h
34713--- linux-4.9/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
34714+++ linux-4.9/fs/aufs/whout.h 2021-02-24 16:15:09.538240738 +0100
076b876e 34715@@ -0,0 +1,85 @@
1facf9fc 34716+/*
ae9dfd79 34717+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 34718+ *
34719+ * This program, aufs is free software; you can redistribute it and/or modify
34720+ * it under the terms of the GNU General Public License as published by
34721+ * the Free Software Foundation; either version 2 of the License, or
34722+ * (at your option) any later version.
dece6358
AM
34723+ *
34724+ * This program is distributed in the hope that it will be useful,
34725+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34726+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34727+ * GNU General Public License for more details.
34728+ *
34729+ * You should have received a copy of the GNU General Public License
523b37e3 34730+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 34731+ */
34732+
34733+/*
34734+ * whiteout for logical deletion and opaque directory
34735+ */
34736+
34737+#ifndef __AUFS_WHOUT_H__
34738+#define __AUFS_WHOUT_H__
34739+
34740+#ifdef __KERNEL__
34741+
1facf9fc 34742+#include "dir.h"
34743+
34744+/* whout.c */
34745+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
076b876e
AM
34746+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
34747+int au_diropq_test(struct dentry *h_dentry);
7e9cd9fe 34748+struct au_branch;
1facf9fc 34749+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
34750+ struct qstr *prefix);
34751+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
34752+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
34753+ struct dentry *dentry);
86dc4139 34754+int au_wh_init(struct au_branch *br, struct super_block *sb);
1facf9fc 34755+
34756+/* diropq flags */
34757+#define AuDiropq_CREATE 1
34758+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
7f207e10
AM
34759+#define au_fset_diropq(flags, name) \
34760+ do { (flags) |= AuDiropq_##name; } while (0)
34761+#define au_fclr_diropq(flags, name) \
34762+ do { (flags) &= ~AuDiropq_##name; } while (0)
1facf9fc 34763+
34764+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
34765+ unsigned int flags);
34766+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
34767+ struct au_branch *br);
34768+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
34769+ struct dentry *h_parent);
34770+
34771+/* real rmdir for the whiteout-ed dir */
34772+struct au_whtmp_rmdir {
34773+ struct inode *dir;
e49829fe 34774+ struct au_branch *br;
1facf9fc 34775+ struct dentry *wh_dentry;
dece6358 34776+ struct au_nhash whlist;
1facf9fc 34777+};
34778+
34779+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
34780+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
34781+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
34782+ struct dentry *wh_dentry, struct au_nhash *whlist);
34783+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
34784+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
34785+
34786+/* ---------------------------------------------------------------------- */
34787+
34788+static inline struct dentry *au_diropq_create(struct dentry *dentry,
34789+ aufs_bindex_t bindex)
34790+{
34791+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
34792+}
34793+
34794+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
34795+{
34796+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
34797+}
34798+
34799+#endif /* __KERNEL__ */
34800+#endif /* __AUFS_WHOUT_H__ */
e8791d4f
AM
34801diff -urNp -x '*.orig' linux-4.9/fs/aufs/wkq.c linux-4.9/fs/aufs/wkq.c
34802--- linux-4.9/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
34803+++ linux-4.9/fs/aufs/wkq.c 2021-02-24 16:15:09.538240738 +0100
ae9dfd79 34804@@ -0,0 +1,212 @@
1facf9fc 34805+/*
ae9dfd79 34806+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 34807+ *
34808+ * This program, aufs is free software; you can redistribute it and/or modify
34809+ * it under the terms of the GNU General Public License as published by
34810+ * the Free Software Foundation; either version 2 of the License, or
34811+ * (at your option) any later version.
dece6358
AM
34812+ *
34813+ * This program is distributed in the hope that it will be useful,
34814+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34815+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34816+ * GNU General Public License for more details.
34817+ *
34818+ * You should have received a copy of the GNU General Public License
523b37e3 34819+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 34820+ */
34821+
34822+/*
34823+ * workqueue for asynchronous/super-io operations
34824+ * todo: try new dredential scheme
34825+ */
34826+
dece6358 34827+#include <linux/module.h>
1facf9fc 34828+#include "aufs.h"
34829+
9dbd164d 34830+/* internal workqueue named AUFS_WKQ_NAME */
b752ccd1 34831+
9dbd164d 34832+static struct workqueue_struct *au_wkq;
1facf9fc 34833+
34834+struct au_wkinfo {
34835+ struct work_struct wk;
7f207e10 34836+ struct kobject *kobj;
1facf9fc 34837+
34838+ unsigned int flags; /* see wkq.h */
34839+
34840+ au_wkq_func_t func;
34841+ void *args;
34842+
1facf9fc 34843+ struct completion *comp;
34844+};
34845+
34846+/* ---------------------------------------------------------------------- */
34847+
1facf9fc 34848+static void wkq_func(struct work_struct *wk)
34849+{
34850+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
34851+
2dfbb274 34852+ AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
7f207e10
AM
34853+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
34854+
1facf9fc 34855+ wkinfo->func(wkinfo->args);
1facf9fc 34856+ if (au_ftest_wkq(wkinfo->flags, WAIT))
34857+ complete(wkinfo->comp);
34858+ else {
7f207e10 34859+ kobject_put(wkinfo->kobj);
9dbd164d 34860+ module_put(THIS_MODULE); /* todo: ?? */
ae9dfd79 34861+ kfree(wkinfo);
1facf9fc 34862+ }
34863+}
34864+
34865+/*
34866+ * Since struct completion is large, try allocating it dynamically.
34867+ */
c2b27bf2 34868+#if 1 /* defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS) */
1facf9fc 34869+#define AuWkqCompDeclare(name) struct completion *comp = NULL
34870+
34871+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
34872+{
34873+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
34874+ if (*comp) {
34875+ init_completion(*comp);
34876+ wkinfo->comp = *comp;
34877+ return 0;
34878+ }
34879+ return -ENOMEM;
34880+}
34881+
34882+static void au_wkq_comp_free(struct completion *comp)
34883+{
ae9dfd79 34884+ kfree(comp);
1facf9fc 34885+}
34886+
34887+#else
34888+
34889+/* no braces */
34890+#define AuWkqCompDeclare(name) \
34891+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
34892+ struct completion *comp = &_ ## name
34893+
34894+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
34895+{
34896+ wkinfo->comp = *comp;
34897+ return 0;
34898+}
34899+
34900+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
34901+{
34902+ /* empty */
34903+}
34904+#endif /* 4KSTACKS */
34905+
53392da6 34906+static void au_wkq_run(struct au_wkinfo *wkinfo)
1facf9fc 34907+{
53392da6
AM
34908+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
34909+ if (au_wkq_test()) {
38d290e6
JR
34910+ AuWarn1("wkq from wkq, unless silly-rename on NFS,"
34911+ " due to a dead dir by UDBA?\n");
53392da6
AM
34912+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
34913+ }
34914+ } else
34915+ au_dbg_verify_kthread();
34916+
34917+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
a1f66529 34918+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
9dbd164d 34919+ queue_work(au_wkq, &wkinfo->wk);
4a4d8108
AM
34920+ } else {
34921+ INIT_WORK(&wkinfo->wk, wkq_func);
34922+ schedule_work(&wkinfo->wk);
34923+ }
1facf9fc 34924+}
34925+
7f207e10
AM
34926+/*
34927+ * Be careful. It is easy to make deadlock happen.
34928+ * processA: lock, wkq and wait
34929+ * processB: wkq and wait, lock in wkq
34930+ * --> deadlock
34931+ */
b752ccd1 34932+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
1facf9fc 34933+{
34934+ int err;
34935+ AuWkqCompDeclare(comp);
34936+ struct au_wkinfo wkinfo = {
b752ccd1 34937+ .flags = flags,
1facf9fc 34938+ .func = func,
34939+ .args = args
34940+ };
34941+
34942+ err = au_wkq_comp_alloc(&wkinfo, &comp);
34943+ if (!err) {
53392da6 34944+ au_wkq_run(&wkinfo);
1facf9fc 34945+ /* no timeout, no interrupt */
34946+ wait_for_completion(wkinfo.comp);
34947+ au_wkq_comp_free(comp);
34948+ }
34949+
ae9dfd79 34950+ destroy_work_on_stack(&wkinfo.wk);
1facf9fc 34951+ return err;
1facf9fc 34952+}
34953+
027c5e7a
AM
34954+/*
34955+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
34956+ * problem in a concurrent umounting.
34957+ */
53392da6
AM
34958+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
34959+ unsigned int flags)
1facf9fc 34960+{
34961+ int err;
34962+ struct au_wkinfo *wkinfo;
34963+
f0c0a007 34964+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
1facf9fc 34965+
34966+ /*
34967+ * wkq_func() must free this wkinfo.
34968+ * it highly depends upon the implementation of workqueue.
34969+ */
34970+ err = 0;
34971+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
34972+ if (wkinfo) {
7f207e10 34973+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
53392da6 34974+ wkinfo->flags = flags & ~AuWkq_WAIT;
1facf9fc 34975+ wkinfo->func = func;
34976+ wkinfo->args = args;
34977+ wkinfo->comp = NULL;
7f207e10 34978+ kobject_get(wkinfo->kobj);
9dbd164d 34979+ __module_get(THIS_MODULE); /* todo: ?? */
1facf9fc 34980+
53392da6 34981+ au_wkq_run(wkinfo);
1facf9fc 34982+ } else {
34983+ err = -ENOMEM;
e49829fe 34984+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 34985+ }
34986+
34987+ return err;
34988+}
34989+
34990+/* ---------------------------------------------------------------------- */
34991+
34992+void au_nwt_init(struct au_nowait_tasks *nwt)
34993+{
f0c0a007
AM
34994+ atomic_set(&nwt->nw_len, 0);
34995+ /* smp_mb(); */ /* atomic_set */
1facf9fc 34996+ init_waitqueue_head(&nwt->nw_wq);
34997+}
34998+
34999+void au_wkq_fin(void)
35000+{
9dbd164d 35001+ destroy_workqueue(au_wkq);
1facf9fc 35002+}
35003+
35004+int __init au_wkq_init(void)
35005+{
9dbd164d 35006+ int err;
b752ccd1
AM
35007+
35008+ err = 0;
86dc4139 35009+ au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
9dbd164d
AM
35010+ if (IS_ERR(au_wkq))
35011+ err = PTR_ERR(au_wkq);
35012+ else if (!au_wkq)
35013+ err = -ENOMEM;
b752ccd1
AM
35014+
35015+ return err;
1facf9fc 35016+}
e8791d4f
AM
35017diff -urNp -x '*.orig' linux-4.9/fs/aufs/wkq.h linux-4.9/fs/aufs/wkq.h
35018--- linux-4.9/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
35019+++ linux-4.9/fs/aufs/wkq.h 2021-02-24 16:15:09.538240738 +0100
f0c0a007 35020@@ -0,0 +1,93 @@
1facf9fc 35021+/*
ae9dfd79 35022+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 35023+ *
35024+ * This program, aufs is free software; you can redistribute it and/or modify
35025+ * it under the terms of the GNU General Public License as published by
35026+ * the Free Software Foundation; either version 2 of the License, or
35027+ * (at your option) any later version.
dece6358
AM
35028+ *
35029+ * This program is distributed in the hope that it will be useful,
35030+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35031+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35032+ * GNU General Public License for more details.
35033+ *
35034+ * You should have received a copy of the GNU General Public License
523b37e3 35035+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 35036+ */
35037+
35038+/*
35039+ * workqueue for asynchronous/super-io operations
35040+ * todo: try new credentials management scheme
35041+ */
35042+
35043+#ifndef __AUFS_WKQ_H__
35044+#define __AUFS_WKQ_H__
35045+
35046+#ifdef __KERNEL__
35047+
ae9dfd79 35048+#include <linux/wait.h>
5afbbe0d 35049+
dece6358
AM
35050+struct super_block;
35051+
1facf9fc 35052+/* ---------------------------------------------------------------------- */
35053+
35054+/*
35055+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
35056+ */
35057+struct au_nowait_tasks {
f0c0a007 35058+ atomic_t nw_len;
1facf9fc 35059+ wait_queue_head_t nw_wq;
35060+};
35061+
35062+/* ---------------------------------------------------------------------- */
35063+
35064+typedef void (*au_wkq_func_t)(void *args);
35065+
35066+/* wkq flags */
35067+#define AuWkq_WAIT 1
9dbd164d 35068+#define AuWkq_NEST (1 << 1)
1facf9fc 35069+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
7f207e10
AM
35070+#define au_fset_wkq(flags, name) \
35071+ do { (flags) |= AuWkq_##name; } while (0)
35072+#define au_fclr_wkq(flags, name) \
35073+ do { (flags) &= ~AuWkq_##name; } while (0)
1facf9fc 35074+
9dbd164d
AM
35075+#ifndef CONFIG_AUFS_HNOTIFY
35076+#undef AuWkq_NEST
35077+#define AuWkq_NEST 0
35078+#endif
35079+
1facf9fc 35080+/* wkq.c */
b752ccd1 35081+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
53392da6
AM
35082+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
35083+ unsigned int flags);
1facf9fc 35084+void au_nwt_init(struct au_nowait_tasks *nwt);
35085+int __init au_wkq_init(void);
35086+void au_wkq_fin(void);
35087+
35088+/* ---------------------------------------------------------------------- */
35089+
53392da6
AM
35090+static inline int au_wkq_test(void)
35091+{
35092+ return current->flags & PF_WQ_WORKER;
35093+}
35094+
b752ccd1 35095+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
1facf9fc 35096+{
b752ccd1 35097+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
1facf9fc 35098+}
35099+
35100+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
35101+{
f0c0a007 35102+ if (atomic_dec_and_test(&nwt->nw_len))
1facf9fc 35103+ wake_up_all(&nwt->nw_wq);
35104+}
35105+
35106+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
35107+{
f0c0a007 35108+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
1facf9fc 35109+ return 0;
35110+}
35111+
35112+#endif /* __KERNEL__ */
35113+#endif /* __AUFS_WKQ_H__ */
e8791d4f
AM
35114diff -urNp -x '*.orig' linux-4.9/fs/aufs/xattr.c linux-4.9/fs/aufs/xattr.c
35115--- linux-4.9/fs/aufs/xattr.c 1970-01-01 01:00:00.000000000 +0100
35116+++ linux-4.9/fs/aufs/xattr.c 2021-02-24 16:15:09.538240738 +0100
ae9dfd79 35117@@ -0,0 +1,355 @@
c1595e42 35118+/*
ae9dfd79 35119+ * Copyright (C) 2014-2018 Junjiro R. Okajima
c1595e42
JR
35120+ *
35121+ * This program, aufs is free software; you can redistribute it and/or modify
35122+ * it under the terms of the GNU General Public License as published by
35123+ * the Free Software Foundation; either version 2 of the License, or
35124+ * (at your option) any later version.
35125+ *
35126+ * This program is distributed in the hope that it will be useful,
35127+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35128+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35129+ * GNU General Public License for more details.
35130+ *
35131+ * You should have received a copy of the GNU General Public License
35132+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
35133+ */
35134+
35135+/*
35136+ * handling xattr functions
35137+ */
35138+
ae9dfd79
AM
35139+#include <linux/fs.h>
35140+#include <linux/posix_acl_xattr.h>
c1595e42
JR
35141+#include <linux/xattr.h>
35142+#include "aufs.h"
35143+
35144+static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
35145+{
35146+ if (!ignore_flags)
35147+ goto out;
35148+ switch (err) {
35149+ case -ENOMEM:
35150+ case -EDQUOT:
35151+ goto out;
35152+ }
35153+
35154+ if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
35155+ err = 0;
35156+ goto out;
35157+ }
35158+
35159+#define cmp(brattr, prefix) do { \
35160+ if (!strncmp(name, XATTR_##prefix##_PREFIX, \
35161+ XATTR_##prefix##_PREFIX_LEN)) { \
35162+ if (ignore_flags & AuBrAttr_ICEX_##brattr) \
35163+ err = 0; \
35164+ goto out; \
35165+ } \
35166+ } while (0)
35167+
35168+ cmp(SEC, SECURITY);
35169+ cmp(SYS, SYSTEM);
35170+ cmp(TR, TRUSTED);
35171+ cmp(USR, USER);
35172+#undef cmp
35173+
35174+ if (ignore_flags & AuBrAttr_ICEX_OTH)
35175+ err = 0;
35176+
35177+out:
35178+ return err;
35179+}
35180+
35181+static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
35182+
35183+static int au_do_cpup_xattr(struct dentry *h_dst, struct dentry *h_src,
7e9cd9fe
AM
35184+ char *name, char **buf, unsigned int ignore_flags,
35185+ unsigned int verbose)
c1595e42
JR
35186+{
35187+ int err;
35188+ ssize_t ssz;
35189+ struct inode *h_idst;
35190+
35191+ ssz = vfs_getxattr_alloc(h_src, name, buf, 0, GFP_NOFS);
35192+ err = ssz;
35193+ if (unlikely(err <= 0)) {
c1595e42
JR
35194+ if (err == -ENODATA
35195+ || (err == -EOPNOTSUPP
b912730e 35196+ && ((ignore_flags & au_xattr_out_of_list)
5527c038 35197+ || (au_test_nfs_noacl(d_inode(h_src))
b912730e
AM
35198+ && (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)
35199+ || !strcmp(name,
35200+ XATTR_NAME_POSIX_ACL_DEFAULT))))
35201+ ))
c1595e42 35202+ err = 0;
b912730e
AM
35203+ if (err && (verbose || au_debug_test()))
35204+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
35205+ goto out;
35206+ }
35207+
35208+ /* unlock it temporary */
5527c038 35209+ h_idst = d_inode(h_dst);
febd17d6 35210+ inode_unlock(h_idst);
c1595e42 35211+ err = vfsub_setxattr(h_dst, name, *buf, ssz, /*flags*/0);
febd17d6 35212+ inode_lock_nested(h_idst, AuLsc_I_CHILD2);
c1595e42 35213+ if (unlikely(err)) {
7e9cd9fe
AM
35214+ if (verbose || au_debug_test())
35215+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
35216+ err = au_xattr_ignore(err, name, ignore_flags);
35217+ }
35218+
35219+out:
35220+ return err;
35221+}
35222+
7e9cd9fe
AM
35223+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
35224+ unsigned int verbose)
c1595e42
JR
35225+{
35226+ int err, unlocked, acl_access, acl_default;
35227+ ssize_t ssz;
35228+ struct inode *h_isrc, *h_idst;
35229+ char *value, *p, *o, *e;
35230+
35231+ /* try stopping to update the source inode while we are referencing */
7e9cd9fe 35232+ /* there should not be the parent-child relationship between them */
5527c038
JR
35233+ h_isrc = d_inode(h_src);
35234+ h_idst = d_inode(h_dst);
febd17d6 35235+ inode_unlock(h_idst);
ae9dfd79 35236+ vfsub_inode_lock_shared_nested(h_isrc, AuLsc_I_CHILD);
febd17d6 35237+ inode_lock_nested(h_idst, AuLsc_I_CHILD2);
c1595e42
JR
35238+ unlocked = 0;
35239+
35240+ /* some filesystems don't list POSIX ACL, for example tmpfs */
35241+ ssz = vfs_listxattr(h_src, NULL, 0);
35242+ err = ssz;
35243+ if (unlikely(err < 0)) {
35244+ AuTraceErr(err);
35245+ if (err == -ENODATA
35246+ || err == -EOPNOTSUPP)
35247+ err = 0; /* ignore */
35248+ goto out;
35249+ }
35250+
35251+ err = 0;
35252+ p = NULL;
35253+ o = NULL;
35254+ if (ssz) {
35255+ err = -ENOMEM;
35256+ p = kmalloc(ssz, GFP_NOFS);
35257+ o = p;
35258+ if (unlikely(!p))
35259+ goto out;
35260+ err = vfs_listxattr(h_src, p, ssz);
35261+ }
ae9dfd79 35262+ inode_unlock_shared(h_isrc);
c1595e42
JR
35263+ unlocked = 1;
35264+ AuDbg("err %d, ssz %zd\n", err, ssz);
35265+ if (unlikely(err < 0))
35266+ goto out_free;
35267+
35268+ err = 0;
35269+ e = p + ssz;
35270+ value = NULL;
35271+ acl_access = 0;
35272+ acl_default = 0;
35273+ while (!err && p < e) {
35274+ acl_access |= !strncmp(p, XATTR_NAME_POSIX_ACL_ACCESS,
35275+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1);
35276+ acl_default |= !strncmp(p, XATTR_NAME_POSIX_ACL_DEFAULT,
35277+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)
35278+ - 1);
7e9cd9fe
AM
35279+ err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
35280+ verbose);
c1595e42
JR
35281+ p += strlen(p) + 1;
35282+ }
35283+ AuTraceErr(err);
35284+ ignore_flags |= au_xattr_out_of_list;
35285+ if (!err && !acl_access) {
35286+ err = au_do_cpup_xattr(h_dst, h_src,
35287+ XATTR_NAME_POSIX_ACL_ACCESS, &value,
7e9cd9fe 35288+ ignore_flags, verbose);
c1595e42
JR
35289+ AuTraceErr(err);
35290+ }
35291+ if (!err && !acl_default) {
35292+ err = au_do_cpup_xattr(h_dst, h_src,
35293+ XATTR_NAME_POSIX_ACL_DEFAULT, &value,
7e9cd9fe 35294+ ignore_flags, verbose);
c1595e42
JR
35295+ AuTraceErr(err);
35296+ }
35297+
ae9dfd79 35298+ kfree(value);
c1595e42
JR
35299+
35300+out_free:
ae9dfd79 35301+ kfree(o);
c1595e42
JR
35302+out:
35303+ if (!unlocked)
ae9dfd79 35304+ inode_unlock_shared(h_isrc);
c1595e42
JR
35305+ AuTraceErr(err);
35306+ return err;
35307+}
35308+
35309+/* ---------------------------------------------------------------------- */
35310+
ae9dfd79
AM
35311+static int au_smack_reentering(struct super_block *sb)
35312+{
35313+#if IS_ENABLED(CONFIG_SECURITY_SMACK)
35314+ /*
35315+ * as a part of lookup, smack_d_instantiate() is called, and it calls
35316+ * i_op->getxattr(). ouch.
35317+ */
35318+ return si_pid_test(sb);
35319+#else
35320+ return 0;
35321+#endif
35322+}
35323+
c1595e42
JR
35324+enum {
35325+ AU_XATTR_LIST,
35326+ AU_XATTR_GET
35327+};
35328+
35329+struct au_lgxattr {
35330+ int type;
35331+ union {
35332+ struct {
35333+ char *list;
35334+ size_t size;
35335+ } list;
35336+ struct {
35337+ const char *name;
35338+ void *value;
35339+ size_t size;
35340+ } get;
35341+ } u;
35342+};
35343+
35344+static ssize_t au_lgxattr(struct dentry *dentry, struct au_lgxattr *arg)
35345+{
35346+ ssize_t err;
ae9dfd79 35347+ int reenter;
c1595e42
JR
35348+ struct path h_path;
35349+ struct super_block *sb;
35350+
35351+ sb = dentry->d_sb;
ae9dfd79
AM
35352+ reenter = au_smack_reentering(sb);
35353+ if (!reenter) {
35354+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
35355+ if (unlikely(err))
35356+ goto out;
35357+ }
35358+ err = au_h_path_getattr(dentry, /*force*/1, &h_path, reenter);
c1595e42
JR
35359+ if (unlikely(err))
35360+ goto out_si;
35361+ if (unlikely(!h_path.dentry))
35362+ /* illegally overlapped or something */
35363+ goto out_di; /* pretending success */
35364+
35365+ /* always topmost entry only */
35366+ switch (arg->type) {
35367+ case AU_XATTR_LIST:
35368+ err = vfs_listxattr(h_path.dentry,
35369+ arg->u.list.list, arg->u.list.size);
35370+ break;
35371+ case AU_XATTR_GET:
5afbbe0d 35372+ AuDebugOn(d_is_negative(h_path.dentry));
c1595e42
JR
35373+ err = vfs_getxattr(h_path.dentry,
35374+ arg->u.get.name, arg->u.get.value,
35375+ arg->u.get.size);
35376+ break;
35377+ }
35378+
35379+out_di:
ae9dfd79
AM
35380+ if (!reenter)
35381+ di_read_unlock(dentry, AuLock_IR);
c1595e42 35382+out_si:
ae9dfd79
AM
35383+ if (!reenter)
35384+ si_read_unlock(sb);
c1595e42
JR
35385+out:
35386+ AuTraceErr(err);
35387+ return err;
35388+}
35389+
35390+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
35391+{
35392+ struct au_lgxattr arg = {
35393+ .type = AU_XATTR_LIST,
35394+ .u.list = {
35395+ .list = list,
35396+ .size = size
35397+ },
35398+ };
35399+
35400+ return au_lgxattr(dentry, &arg);
35401+}
35402+
f2c43d5f
AM
35403+static ssize_t au_getxattr(struct dentry *dentry,
35404+ struct inode *inode __maybe_unused,
35405+ const char *name, void *value, size_t size)
c1595e42
JR
35406+{
35407+ struct au_lgxattr arg = {
35408+ .type = AU_XATTR_GET,
35409+ .u.get = {
35410+ .name = name,
35411+ .value = value,
35412+ .size = size
35413+ },
35414+ };
35415+
35416+ return au_lgxattr(dentry, &arg);
35417+}
35418+
f2c43d5f
AM
35419+static int au_setxattr(struct dentry *dentry, struct inode *inode,
35420+ const char *name, const void *value, size_t size,
35421+ int flags)
c1595e42 35422+{
f2c43d5f 35423+ struct au_sxattr arg = {
c1595e42
JR
35424+ .type = AU_XATTR_SET,
35425+ .u.set = {
35426+ .name = name,
35427+ .value = value,
35428+ .size = size,
35429+ .flags = flags
35430+ },
35431+ };
35432+
f2c43d5f 35433+ return au_sxattr(dentry, inode, &arg);
c1595e42
JR
35434+}
35435+
35436+/* ---------------------------------------------------------------------- */
35437+
f2c43d5f
AM
35438+static int au_xattr_get(const struct xattr_handler *handler,
35439+ struct dentry *dentry, struct inode *inode,
35440+ const char *name, void *buffer, size_t size)
c1595e42 35441+{
f2c43d5f 35442+ return au_getxattr(dentry, inode, name, buffer, size);
c1595e42
JR
35443+}
35444+
f2c43d5f
AM
35445+static int au_xattr_set(const struct xattr_handler *handler,
35446+ struct dentry *dentry, struct inode *inode,
35447+ const char *name, const void *value, size_t size,
35448+ int flags)
c1595e42 35449+{
f2c43d5f 35450+ return au_setxattr(dentry, inode, name, value, size, flags);
c1595e42
JR
35451+}
35452+
35453+static const struct xattr_handler au_xattr_handler = {
f2c43d5f
AM
35454+ .name = "",
35455+ .prefix = "",
c1595e42
JR
35456+ .get = au_xattr_get,
35457+ .set = au_xattr_set
c1595e42
JR
35458+};
35459+
35460+static const struct xattr_handler *au_xattr_handlers[] = {
ae9dfd79
AM
35461+#ifdef CONFIG_FS_POSIX_ACL
35462+ &posix_acl_access_xattr_handler,
35463+ &posix_acl_default_xattr_handler,
35464+#endif
35465+ &au_xattr_handler, /* must be last */
f2c43d5f 35466+ NULL
c1595e42
JR
35467+};
35468+
35469+void au_xattr_init(struct super_block *sb)
35470+{
f2c43d5f 35471+ sb->s_xattr = au_xattr_handlers;
c1595e42 35472+}
e8791d4f
AM
35473diff -urNp -x '*.orig' linux-4.9/fs/aufs/xino.c linux-4.9/fs/aufs/xino.c
35474--- linux-4.9/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
35475+++ linux-4.9/fs/aufs/xino.c 2021-02-24 16:15:09.538240738 +0100
ae9dfd79 35476@@ -0,0 +1,1415 @@
1facf9fc 35477+/*
ae9dfd79 35478+ * Copyright (C) 2005-2018 Junjiro R. Okajima
1facf9fc 35479+ *
35480+ * This program, aufs is free software; you can redistribute it and/or modify
35481+ * it under the terms of the GNU General Public License as published by
35482+ * the Free Software Foundation; either version 2 of the License, or
35483+ * (at your option) any later version.
dece6358
AM
35484+ *
35485+ * This program is distributed in the hope that it will be useful,
35486+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35487+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35488+ * GNU General Public License for more details.
35489+ *
35490+ * You should have received a copy of the GNU General Public License
523b37e3 35491+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 35492+ */
35493+
35494+/*
35495+ * external inode number translation table and bitmap
35496+ */
35497+
35498+#include <linux/seq_file.h>
392086de 35499+#include <linux/statfs.h>
1facf9fc 35500+#include "aufs.h"
35501+
9dbd164d 35502+/* todo: unnecessary to support mmap_sem since kernel-space? */
5527c038 35503+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
1facf9fc 35504+ loff_t *pos)
35505+{
35506+ ssize_t err;
35507+ mm_segment_t oldfs;
b752ccd1
AM
35508+ union {
35509+ void *k;
35510+ char __user *u;
35511+ } buf;
1facf9fc 35512+
b752ccd1 35513+ buf.k = kbuf;
1facf9fc 35514+ oldfs = get_fs();
35515+ set_fs(KERNEL_DS);
35516+ do {
35517+ /* todo: signal_pending? */
b752ccd1 35518+ err = func(file, buf.u, size, pos);
1facf9fc 35519+ } while (err == -EAGAIN || err == -EINTR);
35520+ set_fs(oldfs);
35521+
35522+#if 0 /* reserved for future use */
35523+ if (err > 0)
2000de60 35524+ fsnotify_access(file->f_path.dentry);
1facf9fc 35525+#endif
35526+
35527+ return err;
35528+}
35529+
35530+/* ---------------------------------------------------------------------- */
35531+
be52b249
AM
35532+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
35533+ size_t size, loff_t *pos);
35534+
5527c038 35535+static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
1facf9fc 35536+ size_t size, loff_t *pos)
35537+{
35538+ ssize_t err;
35539+ mm_segment_t oldfs;
b752ccd1
AM
35540+ union {
35541+ void *k;
35542+ const char __user *u;
35543+ } buf;
be52b249
AM
35544+ int i;
35545+ const int prevent_endless = 10;
1facf9fc 35546+
be52b249 35547+ i = 0;
b752ccd1 35548+ buf.k = kbuf;
1facf9fc 35549+ oldfs = get_fs();
35550+ set_fs(KERNEL_DS);
1facf9fc 35551+ do {
b752ccd1 35552+ err = func(file, buf.u, size, pos);
be52b249
AM
35553+ if (err == -EINTR
35554+ && !au_wkq_test()
35555+ && fatal_signal_pending(current)) {
35556+ set_fs(oldfs);
35557+ err = xino_fwrite_wkq(func, file, kbuf, size, pos);
35558+ BUG_ON(err == -EINTR);
35559+ oldfs = get_fs();
35560+ set_fs(KERNEL_DS);
35561+ }
35562+ } while (i++ < prevent_endless
35563+ && (err == -EAGAIN || err == -EINTR));
1facf9fc 35564+ set_fs(oldfs);
35565+
35566+#if 0 /* reserved for future use */
35567+ if (err > 0)
2000de60 35568+ fsnotify_modify(file->f_path.dentry);
1facf9fc 35569+#endif
35570+
35571+ return err;
35572+}
35573+
35574+struct do_xino_fwrite_args {
35575+ ssize_t *errp;
5527c038 35576+ vfs_writef_t func;
1facf9fc 35577+ struct file *file;
35578+ void *buf;
35579+ size_t size;
35580+ loff_t *pos;
35581+};
35582+
35583+static void call_do_xino_fwrite(void *args)
35584+{
35585+ struct do_xino_fwrite_args *a = args;
35586+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
35587+}
35588+
be52b249
AM
35589+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
35590+ size_t size, loff_t *pos)
35591+{
35592+ ssize_t err;
35593+ int wkq_err;
35594+ struct do_xino_fwrite_args args = {
35595+ .errp = &err,
35596+ .func = func,
35597+ .file = file,
35598+ .buf = buf,
35599+ .size = size,
35600+ .pos = pos
35601+ };
35602+
35603+ /*
35604+ * it breaks RLIMIT_FSIZE and normal user's limit,
35605+ * users should care about quota and real 'filesystem full.'
35606+ */
35607+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
35608+ if (unlikely(wkq_err))
35609+ err = wkq_err;
35610+
35611+ return err;
35612+}
35613+
5527c038
JR
35614+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
35615+ size_t size, loff_t *pos)
1facf9fc 35616+{
35617+ ssize_t err;
35618+
b752ccd1
AM
35619+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
35620+ lockdep_off();
35621+ err = do_xino_fwrite(func, file, buf, size, pos);
35622+ lockdep_on();
be52b249
AM
35623+ } else
35624+ err = xino_fwrite_wkq(func, file, buf, size, pos);
1facf9fc 35625+
35626+ return err;
35627+}
35628+
35629+/* ---------------------------------------------------------------------- */
35630+
35631+/*
35632+ * create a new xinofile at the same place/path as @base_file.
35633+ */
35634+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
35635+{
35636+ struct file *file;
4a4d8108 35637+ struct dentry *base, *parent;
523b37e3 35638+ struct inode *dir, *delegated;
1facf9fc 35639+ struct qstr *name;
1308ab2a 35640+ struct path path;
4a4d8108 35641+ int err;
1facf9fc 35642+
2000de60 35643+ base = base_file->f_path.dentry;
1facf9fc 35644+ parent = base->d_parent; /* dir inode is locked */
5527c038 35645+ dir = d_inode(parent);
1facf9fc 35646+ IMustLock(dir);
35647+
35648+ file = ERR_PTR(-EINVAL);
35649+ name = &base->d_name;
4a4d8108
AM
35650+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
35651+ if (IS_ERR(path.dentry)) {
35652+ file = (void *)path.dentry;
523b37e3
AM
35653+ pr_err("%pd lookup err %ld\n",
35654+ base, PTR_ERR(path.dentry));
1facf9fc 35655+ goto out;
35656+ }
35657+
35658+ /* no need to mnt_want_write() since we call dentry_open() later */
4a4d8108 35659+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
1facf9fc 35660+ if (unlikely(err)) {
35661+ file = ERR_PTR(err);
523b37e3 35662+ pr_err("%pd create err %d\n", base, err);
1facf9fc 35663+ goto out_dput;
35664+ }
35665+
c06a8ce3 35666+ path.mnt = base_file->f_path.mnt;
4a4d8108 35667+ file = vfsub_dentry_open(&path,
7f207e10 35668+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 35669+ /* | __FMODE_NONOTIFY */);
1facf9fc 35670+ if (IS_ERR(file)) {
523b37e3 35671+ pr_err("%pd open err %ld\n", base, PTR_ERR(file));
1facf9fc 35672+ goto out_dput;
35673+ }
35674+
523b37e3
AM
35675+ delegated = NULL;
35676+ err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
35677+ if (unlikely(err == -EWOULDBLOCK)) {
35678+ pr_warn("cannot retry for NFSv4 delegation"
35679+ " for an internal unlink\n");
35680+ iput(delegated);
35681+ }
1facf9fc 35682+ if (unlikely(err)) {
523b37e3 35683+ pr_err("%pd unlink err %d\n", base, err);
1facf9fc 35684+ goto out_fput;
35685+ }
35686+
35687+ if (copy_src) {
35688+ /* no one can touch copy_src xino */
c06a8ce3 35689+ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
1facf9fc 35690+ if (unlikely(err)) {
523b37e3 35691+ pr_err("%pd copy err %d\n", base, err);
1facf9fc 35692+ goto out_fput;
35693+ }
35694+ }
35695+ goto out_dput; /* success */
35696+
4f0767ce 35697+out_fput:
1facf9fc 35698+ fput(file);
35699+ file = ERR_PTR(err);
4f0767ce 35700+out_dput:
4a4d8108 35701+ dput(path.dentry);
4f0767ce 35702+out:
1facf9fc 35703+ return file;
35704+}
35705+
35706+struct au_xino_lock_dir {
35707+ struct au_hinode *hdir;
35708+ struct dentry *parent;
febd17d6 35709+ struct inode *dir;
1facf9fc 35710+};
35711+
35712+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
35713+ struct au_xino_lock_dir *ldir)
35714+{
35715+ aufs_bindex_t brid, bindex;
35716+
35717+ ldir->hdir = NULL;
35718+ bindex = -1;
35719+ brid = au_xino_brid(sb);
35720+ if (brid >= 0)
35721+ bindex = au_br_index(sb, brid);
35722+ if (bindex >= 0) {
5527c038 35723+ ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
5afbbe0d 35724+ au_hn_inode_lock_nested(ldir->hdir, AuLsc_I_PARENT);
1facf9fc 35725+ } else {
2000de60 35726+ ldir->parent = dget_parent(xino->f_path.dentry);
febd17d6
JR
35727+ ldir->dir = d_inode(ldir->parent);
35728+ inode_lock_nested(ldir->dir, AuLsc_I_PARENT);
1facf9fc 35729+ }
35730+}
35731+
35732+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
35733+{
35734+ if (ldir->hdir)
5afbbe0d 35735+ au_hn_inode_unlock(ldir->hdir);
1facf9fc 35736+ else {
febd17d6 35737+ inode_unlock(ldir->dir);
1facf9fc 35738+ dput(ldir->parent);
35739+ }
35740+}
35741+
35742+/* ---------------------------------------------------------------------- */
35743+
35744+/* trucate xino files asynchronously */
35745+
35746+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
35747+{
35748+ int err;
392086de
AM
35749+ unsigned long jiffy;
35750+ blkcnt_t blocks;
5afbbe0d 35751+ aufs_bindex_t bi, bbot;
392086de 35752+ struct kstatfs *st;
1facf9fc 35753+ struct au_branch *br;
35754+ struct file *new_xino, *file;
35755+ struct super_block *h_sb;
35756+ struct au_xino_lock_dir ldir;
35757+
392086de 35758+ err = -ENOMEM;
be52b249 35759+ st = kmalloc(sizeof(*st), GFP_NOFS);
392086de
AM
35760+ if (unlikely(!st))
35761+ goto out;
35762+
1facf9fc 35763+ err = -EINVAL;
5afbbe0d
AM
35764+ bbot = au_sbbot(sb);
35765+ if (unlikely(bindex < 0 || bbot < bindex))
392086de 35766+ goto out_st;
1facf9fc 35767+ br = au_sbr(sb, bindex);
35768+ file = br->br_xino.xi_file;
35769+ if (!file)
392086de
AM
35770+ goto out_st;
35771+
35772+ err = vfs_statfs(&file->f_path, st);
35773+ if (unlikely(err))
35774+ AuErr1("statfs err %d, ignored\n", err);
35775+ jiffy = jiffies;
35776+ blocks = file_inode(file)->i_blocks;
35777+ pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
35778+ bindex, (u64)blocks, st->f_bfree, st->f_blocks);
1facf9fc 35779+
35780+ au_xino_lock_dir(sb, file, &ldir);
35781+ /* mnt_want_write() is unnecessary here */
35782+ new_xino = au_xino_create2(file, file);
35783+ au_xino_unlock_dir(&ldir);
35784+ err = PTR_ERR(new_xino);
392086de
AM
35785+ if (IS_ERR(new_xino)) {
35786+ pr_err("err %d, ignored\n", err);
35787+ goto out_st;
35788+ }
1facf9fc 35789+ err = 0;
35790+ fput(file);
35791+ br->br_xino.xi_file = new_xino;
35792+
86dc4139 35793+ h_sb = au_br_sb(br);
5afbbe0d 35794+ for (bi = 0; bi <= bbot; bi++) {
1facf9fc 35795+ if (unlikely(bi == bindex))
35796+ continue;
35797+ br = au_sbr(sb, bi);
86dc4139 35798+ if (au_br_sb(br) != h_sb)
1facf9fc 35799+ continue;
35800+
35801+ fput(br->br_xino.xi_file);
35802+ br->br_xino.xi_file = new_xino;
35803+ get_file(new_xino);
35804+ }
35805+
392086de
AM
35806+ err = vfs_statfs(&new_xino->f_path, st);
35807+ if (!err) {
35808+ pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
35809+ bindex, (u64)file_inode(new_xino)->i_blocks,
35810+ st->f_bfree, st->f_blocks);
35811+ if (file_inode(new_xino)->i_blocks < blocks)
35812+ au_sbi(sb)->si_xino_jiffy = jiffy;
35813+ } else
35814+ AuErr1("statfs err %d, ignored\n", err);
35815+
35816+out_st:
ae9dfd79 35817+ kfree(st);
4f0767ce 35818+out:
1facf9fc 35819+ return err;
35820+}
35821+
35822+struct xino_do_trunc_args {
35823+ struct super_block *sb;
35824+ struct au_branch *br;
35825+};
35826+
35827+static void xino_do_trunc(void *_args)
35828+{
35829+ struct xino_do_trunc_args *args = _args;
35830+ struct super_block *sb;
35831+ struct au_branch *br;
35832+ struct inode *dir;
35833+ int err;
35834+ aufs_bindex_t bindex;
35835+
35836+ err = 0;
35837+ sb = args->sb;
5527c038 35838+ dir = d_inode(sb->s_root);
1facf9fc 35839+ br = args->br;
35840+
35841+ si_noflush_write_lock(sb);
35842+ ii_read_lock_parent(dir);
35843+ bindex = au_br_index(sb, br->br_id);
35844+ err = au_xino_trunc(sb, bindex);
1facf9fc 35845+ ii_read_unlock(dir);
35846+ if (unlikely(err))
392086de 35847+ pr_warn("err b%d, (%d)\n", bindex, err);
1facf9fc 35848+ atomic_dec(&br->br_xino_running);
5afbbe0d 35849+ au_br_put(br);
1facf9fc 35850+ si_write_unlock(sb);
027c5e7a 35851+ au_nwt_done(&au_sbi(sb)->si_nowait);
ae9dfd79 35852+ kfree(args);
1facf9fc 35853+}
35854+
392086de
AM
35855+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
35856+{
35857+ int err;
35858+ struct kstatfs st;
35859+ struct au_sbinfo *sbinfo;
35860+
35861+ /* todo: si_xino_expire and the ratio should be customizable */
35862+ sbinfo = au_sbi(sb);
35863+ if (time_before(jiffies,
35864+ sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
35865+ return 0;
35866+
35867+ /* truncation border */
35868+ err = vfs_statfs(&br->br_xino.xi_file->f_path, &st);
35869+ if (unlikely(err)) {
35870+ AuErr1("statfs err %d, ignored\n", err);
35871+ return 0;
35872+ }
35873+ if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC)
35874+ return 0;
35875+
35876+ return 1;
35877+}
35878+
1facf9fc 35879+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
35880+{
35881+ struct xino_do_trunc_args *args;
35882+ int wkq_err;
35883+
392086de 35884+ if (!xino_trunc_test(sb, br))
1facf9fc 35885+ return;
35886+
35887+ if (atomic_inc_return(&br->br_xino_running) > 1)
35888+ goto out;
35889+
35890+ /* lock and kfree() will be called in trunc_xino() */
35891+ args = kmalloc(sizeof(*args), GFP_NOFS);
35892+ if (unlikely(!args)) {
35893+ AuErr1("no memory\n");
f0c0a007 35894+ goto out;
1facf9fc 35895+ }
35896+
5afbbe0d 35897+ au_br_get(br);
1facf9fc 35898+ args->sb = sb;
35899+ args->br = br;
53392da6 35900+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
1facf9fc 35901+ if (!wkq_err)
35902+ return; /* success */
35903+
4a4d8108 35904+ pr_err("wkq %d\n", wkq_err);
5afbbe0d 35905+ au_br_put(br);
ae9dfd79 35906+ kfree(args);
1facf9fc 35907+
4f0767ce 35908+out:
e49829fe 35909+ atomic_dec(&br->br_xino_running);
1facf9fc 35910+}
35911+
35912+/* ---------------------------------------------------------------------- */
35913+
5527c038 35914+static int au_xino_do_write(vfs_writef_t write, struct file *file,
1facf9fc 35915+ ino_t h_ino, ino_t ino)
35916+{
35917+ loff_t pos;
35918+ ssize_t sz;
35919+
35920+ pos = h_ino;
35921+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
35922+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
35923+ return -EFBIG;
35924+ }
35925+ pos *= sizeof(ino);
35926+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
35927+ if (sz == sizeof(ino))
35928+ return 0; /* success */
35929+
35930+ AuIOErr("write failed (%zd)\n", sz);
35931+ return -EIO;
35932+}
35933+
35934+/*
35935+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
35936+ * at the position of @h_ino.
35937+ * even if @ino is zero, it is written to the xinofile and means no entry.
35938+ * if the size of the xino file on a specific filesystem exceeds the watermark,
35939+ * try truncating it.
35940+ */
35941+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
35942+ ino_t ino)
35943+{
35944+ int err;
35945+ unsigned int mnt_flags;
35946+ struct au_branch *br;
35947+
35948+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
35949+ || ((loff_t)-1) > 0);
dece6358 35950+ SiMustAnyLock(sb);
1facf9fc 35951+
35952+ mnt_flags = au_mntflags(sb);
35953+ if (!au_opt_test(mnt_flags, XINO))
35954+ return 0;
35955+
35956+ br = au_sbr(sb, bindex);
35957+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
35958+ h_ino, ino);
35959+ if (!err) {
35960+ if (au_opt_test(mnt_flags, TRUNC_XINO)
86dc4139 35961+ && au_test_fs_trunc_xino(au_br_sb(br)))
1facf9fc 35962+ xino_try_trunc(sb, br);
35963+ return 0; /* success */
35964+ }
35965+
35966+ AuIOErr("write failed (%d)\n", err);
35967+ return -EIO;
35968+}
35969+
35970+/* ---------------------------------------------------------------------- */
35971+
35972+/* aufs inode number bitmap */
35973+
35974+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
35975+static ino_t xib_calc_ino(unsigned long pindex, int bit)
35976+{
35977+ ino_t ino;
35978+
35979+ AuDebugOn(bit < 0 || page_bits <= bit);
35980+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
35981+ return ino;
35982+}
35983+
35984+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
35985+{
35986+ AuDebugOn(ino < AUFS_FIRST_INO);
35987+ ino -= AUFS_FIRST_INO;
35988+ *pindex = ino / page_bits;
35989+ *bit = ino % page_bits;
35990+}
35991+
35992+static int xib_pindex(struct super_block *sb, unsigned long pindex)
35993+{
35994+ int err;
35995+ loff_t pos;
35996+ ssize_t sz;
35997+ struct au_sbinfo *sbinfo;
35998+ struct file *xib;
35999+ unsigned long *p;
36000+
36001+ sbinfo = au_sbi(sb);
36002+ MtxMustLock(&sbinfo->si_xib_mtx);
36003+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
36004+ || !au_opt_test(sbinfo->si_mntflags, XINO));
36005+
36006+ if (pindex == sbinfo->si_xib_last_pindex)
36007+ return 0;
36008+
36009+ xib = sbinfo->si_xib;
36010+ p = sbinfo->si_xib_buf;
36011+ pos = sbinfo->si_xib_last_pindex;
36012+ pos *= PAGE_SIZE;
36013+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
36014+ if (unlikely(sz != PAGE_SIZE))
36015+ goto out;
36016+
36017+ pos = pindex;
36018+ pos *= PAGE_SIZE;
c06a8ce3 36019+ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
1facf9fc 36020+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
36021+ else {
36022+ memset(p, 0, PAGE_SIZE);
36023+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
36024+ }
36025+ if (sz == PAGE_SIZE) {
36026+ sbinfo->si_xib_last_pindex = pindex;
36027+ return 0; /* success */
36028+ }
36029+
4f0767ce 36030+out:
b752ccd1
AM
36031+ AuIOErr1("write failed (%zd)\n", sz);
36032+ err = sz;
36033+ if (sz >= 0)
36034+ err = -EIO;
36035+ return err;
36036+}
36037+
36038+/* ---------------------------------------------------------------------- */
36039+
36040+static void au_xib_clear_bit(struct inode *inode)
36041+{
36042+ int err, bit;
36043+ unsigned long pindex;
36044+ struct super_block *sb;
36045+ struct au_sbinfo *sbinfo;
36046+
36047+ AuDebugOn(inode->i_nlink);
36048+
36049+ sb = inode->i_sb;
36050+ xib_calc_bit(inode->i_ino, &pindex, &bit);
36051+ AuDebugOn(page_bits <= bit);
36052+ sbinfo = au_sbi(sb);
36053+ mutex_lock(&sbinfo->si_xib_mtx);
36054+ err = xib_pindex(sb, pindex);
36055+ if (!err) {
36056+ clear_bit(bit, sbinfo->si_xib_buf);
36057+ sbinfo->si_xib_next_bit = bit;
36058+ }
36059+ mutex_unlock(&sbinfo->si_xib_mtx);
36060+}
36061+
36062+/* for s_op->delete_inode() */
36063+void au_xino_delete_inode(struct inode *inode, const int unlinked)
36064+{
36065+ int err;
36066+ unsigned int mnt_flags;
5afbbe0d 36067+ aufs_bindex_t bindex, bbot, bi;
b752ccd1
AM
36068+ unsigned char try_trunc;
36069+ struct au_iinfo *iinfo;
36070+ struct super_block *sb;
36071+ struct au_hinode *hi;
36072+ struct inode *h_inode;
36073+ struct au_branch *br;
5527c038 36074+ vfs_writef_t xwrite;
b752ccd1 36075+
5afbbe0d
AM
36076+ AuDebugOn(au_is_bad_inode(inode));
36077+
b752ccd1
AM
36078+ sb = inode->i_sb;
36079+ mnt_flags = au_mntflags(sb);
36080+ if (!au_opt_test(mnt_flags, XINO)
36081+ || inode->i_ino == AUFS_ROOT_INO)
36082+ return;
36083+
36084+ if (unlinked) {
36085+ au_xigen_inc(inode);
36086+ au_xib_clear_bit(inode);
36087+ }
36088+
36089+ iinfo = au_ii(inode);
5afbbe0d 36090+ bindex = iinfo->ii_btop;
b752ccd1
AM
36091+ if (bindex < 0)
36092+ return;
1facf9fc 36093+
b752ccd1
AM
36094+ xwrite = au_sbi(sb)->si_xwrite;
36095+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
5afbbe0d
AM
36096+ hi = au_hinode(iinfo, bindex);
36097+ bbot = iinfo->ii_bbot;
36098+ for (; bindex <= bbot; bindex++, hi++) {
b752ccd1
AM
36099+ h_inode = hi->hi_inode;
36100+ if (!h_inode
36101+ || (!unlinked && h_inode->i_nlink))
36102+ continue;
1facf9fc 36103+
b752ccd1
AM
36104+ /* inode may not be revalidated */
36105+ bi = au_br_index(sb, hi->hi_id);
36106+ if (bi < 0)
36107+ continue;
1facf9fc 36108+
b752ccd1
AM
36109+ br = au_sbr(sb, bi);
36110+ err = au_xino_do_write(xwrite, br->br_xino.xi_file,
36111+ h_inode->i_ino, /*ino*/0);
36112+ if (!err && try_trunc
86dc4139 36113+ && au_test_fs_trunc_xino(au_br_sb(br)))
b752ccd1 36114+ xino_try_trunc(sb, br);
1facf9fc 36115+ }
1facf9fc 36116+}
36117+
36118+/* get an unused inode number from bitmap */
36119+ino_t au_xino_new_ino(struct super_block *sb)
36120+{
36121+ ino_t ino;
36122+ unsigned long *p, pindex, ul, pend;
36123+ struct au_sbinfo *sbinfo;
36124+ struct file *file;
36125+ int free_bit, err;
36126+
36127+ if (!au_opt_test(au_mntflags(sb), XINO))
36128+ return iunique(sb, AUFS_FIRST_INO);
36129+
36130+ sbinfo = au_sbi(sb);
36131+ mutex_lock(&sbinfo->si_xib_mtx);
36132+ p = sbinfo->si_xib_buf;
36133+ free_bit = sbinfo->si_xib_next_bit;
36134+ if (free_bit < page_bits && !test_bit(free_bit, p))
36135+ goto out; /* success */
36136+ free_bit = find_first_zero_bit(p, page_bits);
36137+ if (free_bit < page_bits)
36138+ goto out; /* success */
36139+
36140+ pindex = sbinfo->si_xib_last_pindex;
36141+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
36142+ err = xib_pindex(sb, ul);
36143+ if (unlikely(err))
36144+ goto out_err;
36145+ free_bit = find_first_zero_bit(p, page_bits);
36146+ if (free_bit < page_bits)
36147+ goto out; /* success */
36148+ }
36149+
36150+ file = sbinfo->si_xib;
c06a8ce3 36151+ pend = vfsub_f_size_read(file) / PAGE_SIZE;
1facf9fc 36152+ for (ul = pindex + 1; ul <= pend; ul++) {
36153+ err = xib_pindex(sb, ul);
36154+ if (unlikely(err))
36155+ goto out_err;
36156+ free_bit = find_first_zero_bit(p, page_bits);
36157+ if (free_bit < page_bits)
36158+ goto out; /* success */
36159+ }
36160+ BUG();
36161+
4f0767ce 36162+out:
1facf9fc 36163+ set_bit(free_bit, p);
7f207e10 36164+ sbinfo->si_xib_next_bit = free_bit + 1;
1facf9fc 36165+ pindex = sbinfo->si_xib_last_pindex;
36166+ mutex_unlock(&sbinfo->si_xib_mtx);
36167+ ino = xib_calc_ino(pindex, free_bit);
36168+ AuDbg("i%lu\n", (unsigned long)ino);
36169+ return ino;
4f0767ce 36170+out_err:
1facf9fc 36171+ mutex_unlock(&sbinfo->si_xib_mtx);
36172+ AuDbg("i0\n");
36173+ return 0;
36174+}
36175+
36176+/*
36177+ * read @ino from xinofile for the specified branch{@sb, @bindex}
36178+ * at the position of @h_ino.
36179+ * if @ino does not exist and @do_new is true, get new one.
36180+ */
36181+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
36182+ ino_t *ino)
36183+{
36184+ int err;
36185+ ssize_t sz;
36186+ loff_t pos;
36187+ struct file *file;
36188+ struct au_sbinfo *sbinfo;
36189+
36190+ *ino = 0;
36191+ if (!au_opt_test(au_mntflags(sb), XINO))
36192+ return 0; /* no xino */
36193+
36194+ err = 0;
36195+ sbinfo = au_sbi(sb);
36196+ pos = h_ino;
36197+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
36198+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
36199+ return -EFBIG;
36200+ }
36201+ pos *= sizeof(*ino);
36202+
36203+ file = au_sbr(sb, bindex)->br_xino.xi_file;
c06a8ce3 36204+ if (vfsub_f_size_read(file) < pos + sizeof(*ino))
1facf9fc 36205+ return 0; /* no ino */
36206+
36207+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
36208+ if (sz == sizeof(*ino))
36209+ return 0; /* success */
36210+
36211+ err = sz;
36212+ if (unlikely(sz >= 0)) {
36213+ err = -EIO;
36214+ AuIOErr("xino read error (%zd)\n", sz);
36215+ }
36216+
36217+ return err;
36218+}
36219+
36220+/* ---------------------------------------------------------------------- */
36221+
36222+/* create and set a new xino file */
36223+
36224+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
36225+{
36226+ struct file *file;
36227+ struct dentry *h_parent, *d;
b912730e 36228+ struct inode *h_dir, *inode;
1facf9fc 36229+ int err;
36230+
36231+ /*
36232+ * at mount-time, and the xino file is the default path,
4a4d8108 36233+ * hnotify is disabled so we have no notify events to ignore.
1facf9fc 36234+ * when a user specified the xino, we cannot get au_hdir to be ignored.
36235+ */
7f207e10 36236+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 36237+ /* | __FMODE_NONOTIFY */,
1facf9fc 36238+ S_IRUGO | S_IWUGO);
36239+ if (IS_ERR(file)) {
36240+ if (!silent)
4a4d8108 36241+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
1facf9fc 36242+ return file;
36243+ }
36244+
36245+ /* keep file count */
b912730e
AM
36246+ err = 0;
36247+ inode = file_inode(file);
2000de60 36248+ h_parent = dget_parent(file->f_path.dentry);
5527c038 36249+ h_dir = d_inode(h_parent);
febd17d6 36250+ inode_lock_nested(h_dir, AuLsc_I_PARENT);
1facf9fc 36251+ /* mnt_want_write() is unnecessary here */
523b37e3 36252+ /* no delegation since it is just created */
b912730e
AM
36253+ if (inode->i_nlink)
36254+ err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
36255+ /*force*/0);
febd17d6 36256+ inode_unlock(h_dir);
1facf9fc 36257+ dput(h_parent);
36258+ if (unlikely(err)) {
36259+ if (!silent)
4a4d8108 36260+ pr_err("unlink %s(%d)\n", fname, err);
1facf9fc 36261+ goto out;
36262+ }
36263+
36264+ err = -EINVAL;
2000de60 36265+ d = file->f_path.dentry;
1facf9fc 36266+ if (unlikely(sb == d->d_sb)) {
36267+ if (!silent)
4a4d8108 36268+ pr_err("%s must be outside\n", fname);
1facf9fc 36269+ goto out;
36270+ }
36271+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
36272+ if (!silent)
4a4d8108
AM
36273+ pr_err("xino doesn't support %s(%s)\n",
36274+ fname, au_sbtype(d->d_sb));
1facf9fc 36275+ goto out;
36276+ }
36277+ return file; /* success */
36278+
4f0767ce 36279+out:
1facf9fc 36280+ fput(file);
36281+ file = ERR_PTR(err);
36282+ return file;
36283+}
36284+
36285+/*
36286+ * find another branch who is on the same filesystem of the specified
5afbbe0d 36287+ * branch{@btgt}. search until @bbot.
1facf9fc 36288+ */
36289+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
5afbbe0d 36290+ aufs_bindex_t bbot)
1facf9fc 36291+{
36292+ aufs_bindex_t bindex;
36293+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
36294+
36295+ for (bindex = 0; bindex < btgt; bindex++)
36296+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
36297+ return bindex;
5afbbe0d 36298+ for (bindex++; bindex <= bbot; bindex++)
1facf9fc 36299+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
36300+ return bindex;
36301+ return -1;
36302+}
36303+
36304+/* ---------------------------------------------------------------------- */
36305+
36306+/*
36307+ * initialize the xinofile for the specified branch @br
36308+ * at the place/path where @base_file indicates.
36309+ * test whether another branch is on the same filesystem or not,
36310+ * if @do_test is true.
36311+ */
36312+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
36313+ struct file *base_file, int do_test)
36314+{
36315+ int err;
36316+ ino_t ino;
5afbbe0d 36317+ aufs_bindex_t bbot, bindex;
1facf9fc 36318+ struct au_branch *shared_br, *b;
36319+ struct file *file;
36320+ struct super_block *tgt_sb;
36321+
36322+ shared_br = NULL;
5afbbe0d 36323+ bbot = au_sbbot(sb);
1facf9fc 36324+ if (do_test) {
86dc4139 36325+ tgt_sb = au_br_sb(br);
5afbbe0d 36326+ for (bindex = 0; bindex <= bbot; bindex++) {
1facf9fc 36327+ b = au_sbr(sb, bindex);
86dc4139 36328+ if (tgt_sb == au_br_sb(b)) {
1facf9fc 36329+ shared_br = b;
36330+ break;
36331+ }
36332+ }
36333+ }
36334+
36335+ if (!shared_br || !shared_br->br_xino.xi_file) {
36336+ struct au_xino_lock_dir ldir;
36337+
36338+ au_xino_lock_dir(sb, base_file, &ldir);
36339+ /* mnt_want_write() is unnecessary here */
36340+ file = au_xino_create2(base_file, NULL);
36341+ au_xino_unlock_dir(&ldir);
36342+ err = PTR_ERR(file);
36343+ if (IS_ERR(file))
36344+ goto out;
36345+ br->br_xino.xi_file = file;
36346+ } else {
36347+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
36348+ get_file(br->br_xino.xi_file);
36349+ }
36350+
36351+ ino = AUFS_ROOT_INO;
36352+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
36353+ h_ino, ino);
b752ccd1
AM
36354+ if (unlikely(err)) {
36355+ fput(br->br_xino.xi_file);
36356+ br->br_xino.xi_file = NULL;
36357+ }
1facf9fc 36358+
4f0767ce 36359+out:
1facf9fc 36360+ return err;
36361+}
36362+
36363+/* ---------------------------------------------------------------------- */
36364+
36365+/* trucate a xino bitmap file */
36366+
36367+/* todo: slow */
36368+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
36369+{
36370+ int err, bit;
36371+ ssize_t sz;
36372+ unsigned long pindex;
36373+ loff_t pos, pend;
36374+ struct au_sbinfo *sbinfo;
5527c038 36375+ vfs_readf_t func;
1facf9fc 36376+ ino_t *ino;
36377+ unsigned long *p;
36378+
36379+ err = 0;
36380+ sbinfo = au_sbi(sb);
dece6358 36381+ MtxMustLock(&sbinfo->si_xib_mtx);
1facf9fc 36382+ p = sbinfo->si_xib_buf;
36383+ func = sbinfo->si_xread;
c06a8ce3 36384+ pend = vfsub_f_size_read(file);
1facf9fc 36385+ pos = 0;
36386+ while (pos < pend) {
36387+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
36388+ err = sz;
36389+ if (unlikely(sz <= 0))
36390+ goto out;
36391+
36392+ err = 0;
36393+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
36394+ if (unlikely(*ino < AUFS_FIRST_INO))
36395+ continue;
36396+
36397+ xib_calc_bit(*ino, &pindex, &bit);
36398+ AuDebugOn(page_bits <= bit);
36399+ err = xib_pindex(sb, pindex);
36400+ if (!err)
36401+ set_bit(bit, p);
36402+ else
36403+ goto out;
36404+ }
36405+ }
36406+
4f0767ce 36407+out:
1facf9fc 36408+ return err;
36409+}
36410+
36411+static int xib_restore(struct super_block *sb)
36412+{
36413+ int err;
5afbbe0d 36414+ aufs_bindex_t bindex, bbot;
1facf9fc 36415+ void *page;
36416+
36417+ err = -ENOMEM;
36418+ page = (void *)__get_free_page(GFP_NOFS);
36419+ if (unlikely(!page))
36420+ goto out;
36421+
36422+ err = 0;
5afbbe0d
AM
36423+ bbot = au_sbbot(sb);
36424+ for (bindex = 0; !err && bindex <= bbot; bindex++)
1facf9fc 36425+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
36426+ err = do_xib_restore
36427+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
36428+ else
36429+ AuDbg("b%d\n", bindex);
ae9dfd79 36430+ free_page((unsigned long)page);
1facf9fc 36431+
4f0767ce 36432+out:
1facf9fc 36433+ return err;
36434+}
36435+
36436+int au_xib_trunc(struct super_block *sb)
36437+{
36438+ int err;
36439+ ssize_t sz;
36440+ loff_t pos;
36441+ struct au_xino_lock_dir ldir;
36442+ struct au_sbinfo *sbinfo;
36443+ unsigned long *p;
36444+ struct file *file;
36445+
dece6358
AM
36446+ SiMustWriteLock(sb);
36447+
1facf9fc 36448+ err = 0;
36449+ sbinfo = au_sbi(sb);
36450+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
36451+ goto out;
36452+
36453+ file = sbinfo->si_xib;
c06a8ce3 36454+ if (vfsub_f_size_read(file) <= PAGE_SIZE)
1facf9fc 36455+ goto out;
36456+
36457+ au_xino_lock_dir(sb, file, &ldir);
36458+ /* mnt_want_write() is unnecessary here */
36459+ file = au_xino_create2(sbinfo->si_xib, NULL);
36460+ au_xino_unlock_dir(&ldir);
36461+ err = PTR_ERR(file);
36462+ if (IS_ERR(file))
36463+ goto out;
36464+ fput(sbinfo->si_xib);
36465+ sbinfo->si_xib = file;
36466+
36467+ p = sbinfo->si_xib_buf;
36468+ memset(p, 0, PAGE_SIZE);
36469+ pos = 0;
36470+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
36471+ if (unlikely(sz != PAGE_SIZE)) {
36472+ err = sz;
36473+ AuIOErr("err %d\n", err);
36474+ if (sz >= 0)
36475+ err = -EIO;
36476+ goto out;
36477+ }
36478+
36479+ mutex_lock(&sbinfo->si_xib_mtx);
36480+ /* mnt_want_write() is unnecessary here */
36481+ err = xib_restore(sb);
36482+ mutex_unlock(&sbinfo->si_xib_mtx);
36483+
36484+out:
36485+ return err;
36486+}
36487+
36488+/* ---------------------------------------------------------------------- */
36489+
36490+/*
36491+ * xino mount option handlers
36492+ */
1facf9fc 36493+
36494+/* xino bitmap */
36495+static void xino_clear_xib(struct super_block *sb)
36496+{
36497+ struct au_sbinfo *sbinfo;
36498+
dece6358
AM
36499+ SiMustWriteLock(sb);
36500+
1facf9fc 36501+ sbinfo = au_sbi(sb);
36502+ sbinfo->si_xread = NULL;
36503+ sbinfo->si_xwrite = NULL;
36504+ if (sbinfo->si_xib)
36505+ fput(sbinfo->si_xib);
36506+ sbinfo->si_xib = NULL;
f0c0a007 36507+ if (sbinfo->si_xib_buf)
ae9dfd79 36508+ free_page((unsigned long)sbinfo->si_xib_buf);
1facf9fc 36509+ sbinfo->si_xib_buf = NULL;
36510+}
36511+
36512+static int au_xino_set_xib(struct super_block *sb, struct file *base)
36513+{
36514+ int err;
36515+ loff_t pos;
36516+ struct au_sbinfo *sbinfo;
36517+ struct file *file;
36518+
dece6358
AM
36519+ SiMustWriteLock(sb);
36520+
1facf9fc 36521+ sbinfo = au_sbi(sb);
36522+ file = au_xino_create2(base, sbinfo->si_xib);
36523+ err = PTR_ERR(file);
36524+ if (IS_ERR(file))
36525+ goto out;
36526+ if (sbinfo->si_xib)
36527+ fput(sbinfo->si_xib);
36528+ sbinfo->si_xib = file;
5527c038
JR
36529+ sbinfo->si_xread = vfs_readf(file);
36530+ sbinfo->si_xwrite = vfs_writef(file);
1facf9fc 36531+
36532+ err = -ENOMEM;
36533+ if (!sbinfo->si_xib_buf)
36534+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
36535+ if (unlikely(!sbinfo->si_xib_buf))
36536+ goto out_unset;
36537+
36538+ sbinfo->si_xib_last_pindex = 0;
36539+ sbinfo->si_xib_next_bit = 0;
c06a8ce3 36540+ if (vfsub_f_size_read(file) < PAGE_SIZE) {
1facf9fc 36541+ pos = 0;
36542+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
36543+ PAGE_SIZE, &pos);
36544+ if (unlikely(err != PAGE_SIZE))
36545+ goto out_free;
36546+ }
36547+ err = 0;
36548+ goto out; /* success */
36549+
4f0767ce 36550+out_free:
f0c0a007 36551+ if (sbinfo->si_xib_buf)
ae9dfd79 36552+ free_page((unsigned long)sbinfo->si_xib_buf);
b752ccd1
AM
36553+ sbinfo->si_xib_buf = NULL;
36554+ if (err >= 0)
36555+ err = -EIO;
4f0767ce 36556+out_unset:
b752ccd1
AM
36557+ fput(sbinfo->si_xib);
36558+ sbinfo->si_xib = NULL;
36559+ sbinfo->si_xread = NULL;
36560+ sbinfo->si_xwrite = NULL;
4f0767ce 36561+out:
b752ccd1 36562+ return err;
1facf9fc 36563+}
36564+
b752ccd1
AM
36565+/* xino for each branch */
36566+static void xino_clear_br(struct super_block *sb)
36567+{
5afbbe0d 36568+ aufs_bindex_t bindex, bbot;
b752ccd1 36569+ struct au_branch *br;
1facf9fc 36570+
5afbbe0d
AM
36571+ bbot = au_sbbot(sb);
36572+ for (bindex = 0; bindex <= bbot; bindex++) {
b752ccd1
AM
36573+ br = au_sbr(sb, bindex);
36574+ if (!br || !br->br_xino.xi_file)
36575+ continue;
36576+
36577+ fput(br->br_xino.xi_file);
36578+ br->br_xino.xi_file = NULL;
36579+ }
36580+}
36581+
36582+static int au_xino_set_br(struct super_block *sb, struct file *base)
1facf9fc 36583+{
36584+ int err;
b752ccd1 36585+ ino_t ino;
5afbbe0d 36586+ aufs_bindex_t bindex, bbot, bshared;
b752ccd1
AM
36587+ struct {
36588+ struct file *old, *new;
36589+ } *fpair, *p;
36590+ struct au_branch *br;
36591+ struct inode *inode;
5527c038 36592+ vfs_writef_t writef;
1facf9fc 36593+
b752ccd1
AM
36594+ SiMustWriteLock(sb);
36595+
36596+ err = -ENOMEM;
5afbbe0d
AM
36597+ bbot = au_sbbot(sb);
36598+ fpair = kcalloc(bbot + 1, sizeof(*fpair), GFP_NOFS);
b752ccd1 36599+ if (unlikely(!fpair))
1facf9fc 36600+ goto out;
36601+
5527c038 36602+ inode = d_inode(sb->s_root);
b752ccd1
AM
36603+ ino = AUFS_ROOT_INO;
36604+ writef = au_sbi(sb)->si_xwrite;
5afbbe0d 36605+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
b752ccd1
AM
36606+ bshared = is_sb_shared(sb, bindex, bindex - 1);
36607+ if (bshared >= 0) {
36608+ /* shared xino */
36609+ *p = fpair[bshared];
36610+ get_file(p->new);
36611+ }
36612+
36613+ if (!p->new) {
36614+ /* new xino */
5afbbe0d 36615+ br = au_sbr(sb, bindex);
b752ccd1
AM
36616+ p->old = br->br_xino.xi_file;
36617+ p->new = au_xino_create2(base, br->br_xino.xi_file);
36618+ err = PTR_ERR(p->new);
36619+ if (IS_ERR(p->new)) {
36620+ p->new = NULL;
36621+ goto out_pair;
36622+ }
36623+ }
36624+
36625+ err = au_xino_do_write(writef, p->new,
36626+ au_h_iptr(inode, bindex)->i_ino, ino);
36627+ if (unlikely(err))
36628+ goto out_pair;
36629+ }
36630+
5afbbe0d 36631+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
b752ccd1
AM
36632+ br = au_sbr(sb, bindex);
36633+ if (br->br_xino.xi_file)
36634+ fput(br->br_xino.xi_file);
36635+ get_file(p->new);
36636+ br->br_xino.xi_file = p->new;
36637+ }
1facf9fc 36638+
4f0767ce 36639+out_pair:
5afbbe0d 36640+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++)
b752ccd1
AM
36641+ if (p->new)
36642+ fput(p->new);
36643+ else
36644+ break;
ae9dfd79 36645+ kfree(fpair);
4f0767ce 36646+out:
1facf9fc 36647+ return err;
36648+}
b752ccd1
AM
36649+
36650+void au_xino_clr(struct super_block *sb)
36651+{
36652+ struct au_sbinfo *sbinfo;
36653+
36654+ au_xigen_clr(sb);
36655+ xino_clear_xib(sb);
36656+ xino_clear_br(sb);
36657+ sbinfo = au_sbi(sb);
36658+ /* lvalue, do not call au_mntflags() */
36659+ au_opt_clr(sbinfo->si_mntflags, XINO);
36660+}
36661+
36662+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
36663+{
36664+ int err, skip;
36665+ struct dentry *parent, *cur_parent;
36666+ struct qstr *dname, *cur_name;
36667+ struct file *cur_xino;
36668+ struct inode *dir;
36669+ struct au_sbinfo *sbinfo;
36670+
36671+ SiMustWriteLock(sb);
36672+
36673+ err = 0;
36674+ sbinfo = au_sbi(sb);
2000de60 36675+ parent = dget_parent(xino->file->f_path.dentry);
b752ccd1
AM
36676+ if (remount) {
36677+ skip = 0;
2000de60 36678+ dname = &xino->file->f_path.dentry->d_name;
b752ccd1
AM
36679+ cur_xino = sbinfo->si_xib;
36680+ if (cur_xino) {
2000de60
JR
36681+ cur_parent = dget_parent(cur_xino->f_path.dentry);
36682+ cur_name = &cur_xino->f_path.dentry->d_name;
b752ccd1 36683+ skip = (cur_parent == parent
38d290e6 36684+ && au_qstreq(dname, cur_name));
b752ccd1
AM
36685+ dput(cur_parent);
36686+ }
36687+ if (skip)
36688+ goto out;
36689+ }
36690+
36691+ au_opt_set(sbinfo->si_mntflags, XINO);
5527c038 36692+ dir = d_inode(parent);
febd17d6 36693+ inode_lock_nested(dir, AuLsc_I_PARENT);
b752ccd1
AM
36694+ /* mnt_want_write() is unnecessary here */
36695+ err = au_xino_set_xib(sb, xino->file);
36696+ if (!err)
36697+ err = au_xigen_set(sb, xino->file);
36698+ if (!err)
36699+ err = au_xino_set_br(sb, xino->file);
febd17d6 36700+ inode_unlock(dir);
b752ccd1
AM
36701+ if (!err)
36702+ goto out; /* success */
36703+
36704+ /* reset all */
36705+ AuIOErr("failed creating xino(%d).\n", err);
c1595e42
JR
36706+ au_xigen_clr(sb);
36707+ xino_clear_xib(sb);
b752ccd1 36708+
4f0767ce 36709+out:
b752ccd1
AM
36710+ dput(parent);
36711+ return err;
36712+}
36713+
36714+/* ---------------------------------------------------------------------- */
36715+
36716+/*
36717+ * create a xinofile at the default place/path.
36718+ */
36719+struct file *au_xino_def(struct super_block *sb)
36720+{
36721+ struct file *file;
36722+ char *page, *p;
36723+ struct au_branch *br;
36724+ struct super_block *h_sb;
36725+ struct path path;
5afbbe0d 36726+ aufs_bindex_t bbot, bindex, bwr;
b752ccd1
AM
36727+
36728+ br = NULL;
5afbbe0d 36729+ bbot = au_sbbot(sb);
b752ccd1 36730+ bwr = -1;
5afbbe0d 36731+ for (bindex = 0; bindex <= bbot; bindex++) {
b752ccd1
AM
36732+ br = au_sbr(sb, bindex);
36733+ if (au_br_writable(br->br_perm)
86dc4139 36734+ && !au_test_fs_bad_xino(au_br_sb(br))) {
b752ccd1
AM
36735+ bwr = bindex;
36736+ break;
36737+ }
36738+ }
36739+
7f207e10
AM
36740+ if (bwr >= 0) {
36741+ file = ERR_PTR(-ENOMEM);
537831f9 36742+ page = (void *)__get_free_page(GFP_NOFS);
7f207e10
AM
36743+ if (unlikely(!page))
36744+ goto out;
86dc4139 36745+ path.mnt = au_br_mnt(br);
7f207e10
AM
36746+ path.dentry = au_h_dptr(sb->s_root, bwr);
36747+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
36748+ file = (void *)p;
36749+ if (!IS_ERR(p)) {
36750+ strcat(p, "/" AUFS_XINO_FNAME);
36751+ AuDbg("%s\n", p);
36752+ file = au_xino_create(sb, p, /*silent*/0);
36753+ if (!IS_ERR(file))
36754+ au_xino_brid_set(sb, br->br_id);
36755+ }
ae9dfd79 36756+ free_page((unsigned long)page);
7f207e10
AM
36757+ } else {
36758+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
36759+ if (IS_ERR(file))
36760+ goto out;
2000de60 36761+ h_sb = file->f_path.dentry->d_sb;
7f207e10
AM
36762+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
36763+ pr_err("xino doesn't support %s(%s)\n",
36764+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
36765+ fput(file);
36766+ file = ERR_PTR(-EINVAL);
36767+ }
36768+ if (!IS_ERR(file))
36769+ au_xino_brid_set(sb, -1);
36770+ }
0c5527e5 36771+
7f207e10
AM
36772+out:
36773+ return file;
36774+}
36775+
36776+/* ---------------------------------------------------------------------- */
36777+
36778+int au_xino_path(struct seq_file *seq, struct file *file)
36779+{
36780+ int err;
36781+
36782+ err = au_seq_path(seq, &file->f_path);
79b8bda9 36783+ if (unlikely(err))
7f207e10
AM
36784+ goto out;
36785+
7f207e10
AM
36786+#define Deleted "\\040(deleted)"
36787+ seq->count -= sizeof(Deleted) - 1;
36788+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
36789+ sizeof(Deleted) - 1));
36790+#undef Deleted
36791+
36792+out:
36793+ return err;
36794+}
ae9dfd79
AM
36795+
36796+/* ---------------------------------------------------------------------- */
36797+
36798+void au_xinondir_leave(struct super_block *sb, aufs_bindex_t bindex,
36799+ ino_t h_ino, int idx)
36800+{
36801+ struct au_xino_file *xino;
36802+
36803+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
36804+ xino = &au_sbr(sb, bindex)->br_xino;
36805+ AuDebugOn(idx < 0 || xino->xi_nondir.total <= idx);
36806+
36807+ spin_lock(&xino->xi_nondir.spin);
36808+ AuDebugOn(xino->xi_nondir.array[idx] != h_ino);
36809+ xino->xi_nondir.array[idx] = 0;
36810+ spin_unlock(&xino->xi_nondir.spin);
36811+ wake_up_all(&xino->xi_nondir.wqh);
36812+}
36813+
36814+static int au_xinondir_find(struct au_xino_file *xino, ino_t h_ino)
36815+{
36816+ int found, total, i;
36817+
36818+ found = -1;
36819+ total = xino->xi_nondir.total;
36820+ for (i = 0; i < total; i++) {
36821+ if (xino->xi_nondir.array[i] != h_ino)
36822+ continue;
36823+ found = i;
36824+ break;
36825+ }
36826+
36827+ return found;
36828+}
36829+
36830+static int au_xinondir_expand(struct au_xino_file *xino)
36831+{
36832+ int err, sz;
36833+ ino_t *p;
36834+
36835+ BUILD_BUG_ON(KMALLOC_MAX_SIZE > INT_MAX);
36836+
36837+ err = -ENOMEM;
36838+ sz = xino->xi_nondir.total * sizeof(ino_t);
36839+ if (unlikely(sz > KMALLOC_MAX_SIZE / 2))
36840+ goto out;
36841+ p = au_kzrealloc(xino->xi_nondir.array, sz, sz << 1, GFP_ATOMIC,
36842+ /*may_shrink*/0);
36843+ if (p) {
36844+ xino->xi_nondir.array = p;
36845+ xino->xi_nondir.total <<= 1;
36846+ AuDbg("xi_nondir.total %d\n", xino->xi_nondir.total);
36847+ err = 0;
36848+ }
36849+
36850+out:
36851+ return err;
36852+}
36853+
36854+int au_xinondir_enter(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
36855+ int *idx)
36856+{
36857+ int err, found, empty;
36858+ struct au_xino_file *xino;
36859+
36860+ err = 0;
36861+ *idx = -1;
36862+ if (!au_opt_test(au_mntflags(sb), XINO))
36863+ goto out; /* no xino */
36864+
36865+ xino = &au_sbr(sb, bindex)->br_xino;
36866+
36867+again:
36868+ spin_lock(&xino->xi_nondir.spin);
36869+ found = au_xinondir_find(xino, h_ino);
36870+ if (found == -1) {
36871+ empty = au_xinondir_find(xino, /*h_ino*/0);
36872+ if (empty == -1) {
36873+ empty = xino->xi_nondir.total;
36874+ err = au_xinondir_expand(xino);
36875+ if (unlikely(err))
36876+ goto out_unlock;
36877+ }
36878+ xino->xi_nondir.array[empty] = h_ino;
36879+ *idx = empty;
36880+ } else {
36881+ spin_unlock(&xino->xi_nondir.spin);
36882+ wait_event(xino->xi_nondir.wqh,
36883+ xino->xi_nondir.array[found] != h_ino);
36884+ goto again;
36885+ }
36886+
e8791d4f
AM
36887+out_unlock:
36888+ spin_unlock(&xino->xi_nondir.spin);
36889+out:
36890+ return err;
36891+}
36892diff -urNp -x '*.orig' linux-4.9/fs/dcache.c linux-4.9/fs/dcache.c
36893--- linux-4.9/fs/dcache.c 2021-02-24 16:14:57.291175760 +0100
36894+++ linux-4.9/fs/dcache.c 2021-02-24 16:15:09.514906646 +0100
36895@@ -1206,7 +1206,7 @@ enum d_walk_ret {
36896 *
36897 * The @enter() and @finish() callbacks are called with d_lock held.
36898 */
36899-static void d_walk(struct dentry *parent, void *data,
36900+void d_walk(struct dentry *parent, void *data,
36901 enum d_walk_ret (*enter)(void *, struct dentry *),
36902 void (*finish)(void *))
36903 {
36904@@ -1314,6 +1314,7 @@ rename_retry:
36905 seq = 1;
36906 goto again;
36907 }
36908+EXPORT_SYMBOL_GPL(d_walk);
36909
36910 /*
36911 * Search for at least 1 mount point in the dentry's subdirs.
36912@@ -2935,6 +2936,7 @@ void d_exchange(struct dentry *dentry1,
36913
36914 write_sequnlock(&rename_lock);
36915 }
36916+EXPORT_SYMBOL_GPL(d_exchange);
36917
36918 /**
36919 * d_ancestor - search for an ancestor
36920diff -urNp -x '*.orig' linux-4.9/fs/exec.c linux-4.9/fs/exec.c
36921--- linux-4.9/fs/exec.c 2021-02-24 16:14:56.427814341 +0100
36922+++ linux-4.9/fs/exec.c 2021-02-24 16:15:09.514906646 +0100
36923@@ -104,6 +104,7 @@ bool path_noexec(const struct path *path
36924 return (path->mnt->mnt_flags & MNT_NOEXEC) ||
36925 (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
36926 }
36927+EXPORT_SYMBOL_GPL(path_noexec);
36928
36929 #ifdef CONFIG_USELIB
36930 /*
36931diff -urNp -x '*.orig' linux-4.9/fs/fcntl.c linux-4.9/fs/fcntl.c
36932--- linux-4.9/fs/fcntl.c 2021-02-24 16:14:57.297842644 +0100
36933+++ linux-4.9/fs/fcntl.c 2021-02-24 16:15:09.514906646 +0100
36934@@ -30,7 +30,7 @@
36935
36936 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
36937
36938-static int setfl(int fd, struct file * filp, unsigned long arg)
36939+int setfl(int fd, struct file * filp, unsigned long arg)
36940 {
36941 struct inode * inode = file_inode(filp);
36942 int error = 0;
36943@@ -61,6 +61,8 @@ static int setfl(int fd, struct file * f
36944
36945 if (filp->f_op->check_flags)
36946 error = filp->f_op->check_flags(arg);
36947+ if (!error && filp->f_op->setfl)
36948+ error = filp->f_op->setfl(filp, arg);
36949 if (error)
36950 return error;
36951
36952@@ -81,6 +83,7 @@ static int setfl(int fd, struct file * f
36953 out:
36954 return error;
36955 }
36956+EXPORT_SYMBOL_GPL(setfl);
36957
36958 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
36959 int force)
36960diff -urNp -x '*.orig' linux-4.9/fs/file_table.c linux-4.9/fs/file_table.c
36961--- linux-4.9/fs/file_table.c 2021-02-24 16:14:57.297842644 +0100
36962+++ linux-4.9/fs/file_table.c 2021-02-24 16:15:09.514906646 +0100
36963@@ -151,6 +151,7 @@ over:
36964 }
36965 return ERR_PTR(-ENFILE);
36966 }
36967+EXPORT_SYMBOL_GPL(get_empty_filp);
36968
36969 /**
36970 * alloc_file - allocate and initialize a 'struct file'
36971@@ -264,6 +265,7 @@ void flush_delayed_fput(void)
36972 {
36973 delayed_fput(NULL);
36974 }
36975+EXPORT_SYMBOL_GPL(flush_delayed_fput);
36976
36977 static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
36978
36979@@ -306,6 +308,7 @@ void __fput_sync(struct file *file)
36980 }
36981
36982 EXPORT_SYMBOL(fput);
36983+EXPORT_SYMBOL_GPL(__fput_sync);
36984
36985 void put_filp(struct file *file)
36986 {
36987@@ -316,6 +319,7 @@ void put_filp(struct file *file)
36988 file_free(file);
36989 }
36990 }
36991+EXPORT_SYMBOL_GPL(put_filp);
36992
36993 void __init files_init(void)
36994 {
36995diff -urNp -x '*.orig' linux-4.9/fs/inode.c linux-4.9/fs/inode.c
36996--- linux-4.9/fs/inode.c 2021-02-24 16:14:57.301176086 +0100
36997+++ linux-4.9/fs/inode.c 2021-02-24 16:15:09.514906646 +0100
36998@@ -1651,7 +1651,7 @@ EXPORT_SYMBOL(generic_update_time);
36999 * This does the actual work of updating an inodes time or version. Must have
37000 * had called mnt_want_write() before calling this.
37001 */
37002-static int update_time(struct inode *inode, struct timespec *time, int flags)
37003+int update_time(struct inode *inode, struct timespec *time, int flags)
37004 {
37005 int (*update_time)(struct inode *, struct timespec *, int);
37006
37007@@ -1660,6 +1660,7 @@ static int update_time(struct inode *ino
37008
37009 return update_time(inode, time, flags);
37010 }
37011+EXPORT_SYMBOL_GPL(update_time);
37012
37013 /**
37014 * touch_atime - update the access time
37015diff -urNp -x '*.orig' linux-4.9/fs/namespace.c linux-4.9/fs/namespace.c
37016--- linux-4.9/fs/namespace.c 2021-02-24 16:14:57.304509527 +0100
37017+++ linux-4.9/fs/namespace.c 2021-02-24 16:15:09.514906646 +0100
37018@@ -472,6 +472,7 @@ void __mnt_drop_write(struct vfsmount *m
37019 mnt_dec_writers(real_mount(mnt));
37020 preempt_enable();
37021 }
37022+EXPORT_SYMBOL_GPL(__mnt_drop_write);
37023
37024 /**
37025 * mnt_drop_write - give up write access to a mount
37026@@ -804,6 +805,13 @@ static inline int check_mnt(struct mount
37027 return mnt->mnt_ns == current->nsproxy->mnt_ns;
37028 }
37029
37030+/* for aufs, CONFIG_AUFS_BR_FUSE */
37031+int is_current_mnt_ns(struct vfsmount *mnt)
37032+{
37033+ return check_mnt(real_mount(mnt));
37034+}
37035+EXPORT_SYMBOL_GPL(is_current_mnt_ns);
37036+
37037 /*
37038 * vfsmount lock must be held for write
37039 */
37040@@ -1905,6 +1913,7 @@ int iterate_mounts(int (*f)(struct vfsmo
37041 }
37042 return 0;
37043 }
37044+EXPORT_SYMBOL_GPL(iterate_mounts);
37045
37046 static void cleanup_group_ids(struct mount *mnt, struct mount *end)
37047 {
37048diff -urNp -x '*.orig' linux-4.9/fs/notify/group.c linux-4.9/fs/notify/group.c
37049--- linux-4.9/fs/notify/group.c 2016-12-11 20:17:54.000000000 +0100
37050+++ linux-4.9/fs/notify/group.c 2021-02-24 16:15:09.514906646 +0100
37051@@ -22,6 +22,7 @@
37052 #include <linux/srcu.h>
37053 #include <linux/rculist.h>
37054 #include <linux/wait.h>
37055+#include <linux/module.h>
37056
37057 #include <linux/fsnotify_backend.h>
37058 #include "fsnotify.h"
37059@@ -100,6 +101,7 @@ void fsnotify_get_group(struct fsnotify_
37060 {
37061 atomic_inc(&group->refcnt);
37062 }
37063+EXPORT_SYMBOL_GPL(fsnotify_get_group);
37064
37065 /*
37066 * Drop a reference to a group. Free it if it's through.
37067@@ -109,6 +111,7 @@ void fsnotify_put_group(struct fsnotify_
37068 if (atomic_dec_and_test(&group->refcnt))
37069 fsnotify_final_destroy_group(group);
37070 }
37071+EXPORT_SYMBOL_GPL(fsnotify_put_group);
37072
37073 /*
37074 * Create a new fsnotify_group and hold a reference for the group returned.
37075@@ -137,6 +140,7 @@ struct fsnotify_group *fsnotify_alloc_gr
37076
37077 return group;
37078 }
37079+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
37080
37081 int fsnotify_fasync(int fd, struct file *file, int on)
37082 {
37083diff -urNp -x '*.orig' linux-4.9/fs/notify/mark.c linux-4.9/fs/notify/mark.c
37084--- linux-4.9/fs/notify/mark.c 2016-12-11 20:17:54.000000000 +0100
37085+++ linux-4.9/fs/notify/mark.c 2021-02-24 16:15:09.514906646 +0100
37086@@ -113,6 +113,7 @@ void fsnotify_put_mark(struct fsnotify_m
37087 mark->free_mark(mark);
37088 }
37089 }
37090+EXPORT_SYMBOL_GPL(fsnotify_put_mark);
37091
37092 /* Calculate mask of events for a list of marks */
37093 u32 fsnotify_recalc_mask(struct hlist_head *head)
37094@@ -230,6 +231,7 @@ void fsnotify_destroy_mark(struct fsnoti
37095 mutex_unlock(&group->mark_mutex);
37096 fsnotify_free_mark(mark);
37097 }
37098+EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
37099
37100 void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
37101 {
37102@@ -415,6 +417,7 @@ err:
37103
37104 return ret;
37105 }
37106+EXPORT_SYMBOL_GPL(fsnotify_add_mark);
37107
37108 int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
37109 struct inode *inode, struct vfsmount *mnt, int allow_dups)
37110@@ -533,6 +536,7 @@ void fsnotify_init_mark(struct fsnotify_
37111 atomic_set(&mark->refcnt, 1);
37112 mark->free_mark = free_mark;
37113 }
37114+EXPORT_SYMBOL_GPL(fsnotify_init_mark);
37115
37116 /*
37117 * Destroy all marks in destroy_list, waits for SRCU period to finish before
37118diff -urNp -x '*.orig' linux-4.9/fs/open.c linux-4.9/fs/open.c
37119--- linux-4.9/fs/open.c 2021-02-24 16:14:57.311176411 +0100
37120+++ linux-4.9/fs/open.c 2021-02-24 16:15:09.514906646 +0100
37121@@ -69,6 +69,7 @@ int do_truncate(struct dentry *dentry, l
37122 inode_unlock(dentry->d_inode);
37123 return ret;
37124 }
37125+EXPORT_SYMBOL_GPL(do_truncate);
37126
37127 long vfs_truncate(struct path *path, loff_t length)
37128 {
37129@@ -737,6 +738,7 @@ int open_check_o_direct(struct file *f)
37130 }
37131 return 0;
37132 }
37133+EXPORT_SYMBOL_GPL(open_check_o_direct);
37134
37135 static int do_dentry_open(struct file *f,
37136 struct inode *inode,
37137diff -urNp -x '*.orig' linux-4.9/fs/proc/base.c linux-4.9/fs/proc/base.c
37138--- linux-4.9/fs/proc/base.c 2021-02-24 16:14:57.374511804 +0100
37139+++ linux-4.9/fs/proc/base.c 2021-02-24 16:15:09.508239763 +0100
37140@@ -2016,7 +2016,7 @@ static int map_files_get_link(struct den
37141 down_read(&mm->mmap_sem);
37142 vma = find_exact_vma(mm, vm_start, vm_end);
37143 if (vma && vma->vm_file) {
37144- *path = vma->vm_file->f_path;
37145+ *path = vma_pr_or_file(vma)->f_path;
37146 path_get(path);
37147 rc = 0;
37148 }
37149diff -urNp -x '*.orig' linux-4.9/fs/proc/nommu.c linux-4.9/fs/proc/nommu.c
37150--- linux-4.9/fs/proc/nommu.c 2016-12-11 20:17:54.000000000 +0100
37151+++ linux-4.9/fs/proc/nommu.c 2021-02-24 16:15:09.508239763 +0100
37152@@ -45,7 +45,10 @@ static int nommu_region_show(struct seq_
37153 file = region->vm_file;
37154
37155 if (file) {
37156- struct inode *inode = file_inode(region->vm_file);
37157+ struct inode *inode;
37158+
37159+ file = vmr_pr_or_file(region);
37160+ inode = file_inode(file);
37161 dev = inode->i_sb->s_dev;
37162 ino = inode->i_ino;
37163 }
37164diff -urNp -x '*.orig' linux-4.9/fs/proc/task_mmu.c linux-4.9/fs/proc/task_mmu.c
37165--- linux-4.9/fs/proc/task_mmu.c 2021-02-24 16:14:56.501150059 +0100
37166+++ linux-4.9/fs/proc/task_mmu.c 2021-02-24 16:15:09.508239763 +0100
37167@@ -291,7 +291,10 @@ show_map_vma(struct seq_file *m, struct
37168 const char *name = NULL;
37169
37170 if (file) {
37171- struct inode *inode = file_inode(vma->vm_file);
37172+ struct inode *inode;
37173+
37174+ file = vma_pr_or_file(vma);
37175+ inode = file_inode(file);
37176 dev = inode->i_sb->s_dev;
37177 ino = inode->i_ino;
37178 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
37179@@ -1648,7 +1651,7 @@ static int show_numa_map(struct seq_file
37180 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
37181 struct vm_area_struct *vma = v;
37182 struct numa_maps *md = &numa_priv->md;
37183- struct file *file = vma->vm_file;
37184+ struct file *file = vma_pr_or_file(vma);
37185 struct mm_struct *mm = vma->vm_mm;
37186 struct mm_walk walk = {
37187 .hugetlb_entry = gather_hugetlb_stats,
37188diff -urNp -x '*.orig' linux-4.9/fs/proc/task_nommu.c linux-4.9/fs/proc/task_nommu.c
37189--- linux-4.9/fs/proc/task_nommu.c 2016-12-11 20:17:54.000000000 +0100
37190+++ linux-4.9/fs/proc/task_nommu.c 2021-02-24 16:15:09.508239763 +0100
37191@@ -155,7 +155,10 @@ static int nommu_vma_show(struct seq_fil
37192 file = vma->vm_file;
37193
37194 if (file) {
37195- struct inode *inode = file_inode(vma->vm_file);
37196+ struct inode *inode;
37197+
37198+ file = vma_pr_or_file(vma);
37199+ inode = file_inode(file);
37200 dev = inode->i_sb->s_dev;
37201 ino = inode->i_ino;
37202 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
37203diff -urNp -x '*.orig' linux-4.9/fs/read_write.c linux-4.9/fs/read_write.c
37204--- linux-4.9/fs/read_write.c 2021-02-24 16:14:56.504483501 +0100
37205+++ linux-4.9/fs/read_write.c 2021-02-24 16:15:09.514906646 +0100
37206@@ -517,6 +517,30 @@ ssize_t __vfs_write(struct file *file, c
37207 }
37208 EXPORT_SYMBOL(__vfs_write);
37209
37210+vfs_readf_t vfs_readf(struct file *file)
37211+{
37212+ const struct file_operations *fop = file->f_op;
37213+
37214+ if (fop->read)
37215+ return fop->read;
37216+ if (fop->read_iter)
37217+ return new_sync_read;
37218+ return ERR_PTR(-ENOSYS);
37219+}
37220+EXPORT_SYMBOL_GPL(vfs_readf);
37221+
37222+vfs_writef_t vfs_writef(struct file *file)
37223+{
37224+ const struct file_operations *fop = file->f_op;
37225+
37226+ if (fop->write)
37227+ return fop->write;
37228+ if (fop->write_iter)
37229+ return new_sync_write;
37230+ return ERR_PTR(-ENOSYS);
37231+}
37232+EXPORT_SYMBOL_GPL(vfs_writef);
37233+
37234 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
37235 {
37236 mm_segment_t old_fs;
37237diff -urNp -x '*.orig' linux-4.9/fs/splice.c linux-4.9/fs/splice.c
37238--- linux-4.9/fs/splice.c 2021-02-24 16:14:56.507816943 +0100
37239+++ linux-4.9/fs/splice.c 2021-02-24 16:15:09.518240088 +0100
37240@@ -856,8 +856,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
37241 /*
37242 * Attempt to initiate a splice from pipe to file.
37243 */
37244-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
37245- loff_t *ppos, size_t len, unsigned int flags)
37246+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
37247+ loff_t *ppos, size_t len, unsigned int flags)
37248 {
37249 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
37250 loff_t *, size_t, unsigned int);
37251@@ -869,13 +869,14 @@ static long do_splice_from(struct pipe_i
37252
37253 return splice_write(pipe, out, ppos, len, flags);
37254 }
37255+EXPORT_SYMBOL_GPL(do_splice_from);
37256
37257 /*
37258 * Attempt to initiate a splice from a file to a pipe.
37259 */
37260-static long do_splice_to(struct file *in, loff_t *ppos,
37261- struct pipe_inode_info *pipe, size_t len,
37262- unsigned int flags)
37263+long do_splice_to(struct file *in, loff_t *ppos,
37264+ struct pipe_inode_info *pipe, size_t len,
37265+ unsigned int flags)
37266 {
37267 ssize_t (*splice_read)(struct file *, loff_t *,
37268 struct pipe_inode_info *, size_t, unsigned int);
37269@@ -898,6 +899,7 @@ static long do_splice_to(struct file *in
37270
37271 return splice_read(in, ppos, pipe, len, flags);
37272 }
37273+EXPORT_SYMBOL_GPL(do_splice_to);
37274
37275 /**
37276 * splice_direct_to_actor - splices data directly between two non-pipes
37277diff -urNp -x '*.orig' linux-4.9/fs/sync.c linux-4.9/fs/sync.c
37278--- linux-4.9/fs/sync.c 2016-12-11 20:17:54.000000000 +0100
37279+++ linux-4.9/fs/sync.c 2021-02-24 16:15:09.518240088 +0100
37280@@ -27,7 +27,7 @@
37281 * wait == 1 case since in that case write_inode() functions do
37282 * sync_dirty_buffer() and thus effectively write one block at a time.
37283 */
37284-static int __sync_filesystem(struct super_block *sb, int wait)
37285+int __sync_filesystem(struct super_block *sb, int wait)
37286 {
37287 if (wait)
37288 sync_inodes_sb(sb);
37289@@ -38,6 +38,7 @@ static int __sync_filesystem(struct supe
37290 sb->s_op->sync_fs(sb, wait);
37291 return __sync_blockdev(sb->s_bdev, wait);
37292 }
37293+EXPORT_SYMBOL_GPL(__sync_filesystem);
37294
37295 /*
37296 * Write out and wait upon all dirty data associated with this
37297diff -urNp -x '*.orig' linux-4.9/fs/xattr.c linux-4.9/fs/xattr.c
37298--- linux-4.9/fs/xattr.c 2021-02-24 16:14:57.317843295 +0100
37299+++ linux-4.9/fs/xattr.c 2021-02-24 16:15:09.518240088 +0100
37300@@ -334,6 +334,7 @@ vfs_getxattr_alloc(struct dentry *dentry
37301 *xattr_value = value;
37302 return error;
37303 }
37304+EXPORT_SYMBOL_GPL(vfs_getxattr_alloc);
37305
37306 ssize_t
37307 __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
37308diff -urNp -x '*.orig' linux-4.9/include/linux/file.h linux-4.9/include/linux/file.h
37309--- linux-4.9/include/linux/file.h 2016-12-11 20:17:54.000000000 +0100
37310+++ linux-4.9/include/linux/file.h 2021-02-24 16:15:09.508239763 +0100
37311@@ -19,6 +19,7 @@ struct dentry;
37312 struct path;
37313 extern struct file *alloc_file(struct path *, fmode_t mode,
37314 const struct file_operations *fop);
37315+extern struct file *get_empty_filp(void);
37316
37317 static inline void fput_light(struct file *file, int fput_needed)
37318 {
37319diff -urNp -x '*.orig' linux-4.9/include/linux/fs.h linux-4.9/include/linux/fs.h
37320--- linux-4.9/include/linux/fs.h 2021-02-24 16:14:57.317843295 +0100
37321+++ linux-4.9/include/linux/fs.h 2021-02-24 16:15:09.541574180 +0100
37322@@ -1315,6 +1315,7 @@ extern void fasync_free(struct fasync_st
37323 /* can be called from interrupts */
37324 extern void kill_fasync(struct fasync_struct **, int, int);
37325
37326+extern int setfl(int fd, struct file * filp, unsigned long arg);
37327 extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
37328 extern void f_setown(struct file *filp, unsigned long arg, int force);
37329 extern void f_delown(struct file *filp);
37330@@ -1745,6 +1746,7 @@ struct file_operations {
37331 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
37332 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
37333 int (*check_flags)(int);
37334+ int (*setfl)(struct file *, unsigned long);
37335 int (*flock) (struct file *, int, struct file_lock *);
37336 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
37337 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
37338@@ -1800,6 +1802,12 @@ ssize_t rw_copy_check_uvector(int type,
37339 struct iovec **ret_pointer);
37340 ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
37341
37342+typedef ssize_t (*vfs_readf_t)(struct file *, char __user *, size_t, loff_t *);
37343+typedef ssize_t (*vfs_writef_t)(struct file *, const char __user *, size_t,
37344+ loff_t *);
37345+vfs_readf_t vfs_readf(struct file *file);
37346+vfs_writef_t vfs_writef(struct file *file);
37347+
37348 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
37349 extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
37350 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
37351@@ -1847,6 +1855,10 @@ struct super_operations {
37352 struct shrink_control *);
37353 long (*free_cached_objects)(struct super_block *,
37354 struct shrink_control *);
37355+#if defined(CONFIG_BLK_DEV_LOOP) || defined(CONFIG_BLK_DEV_LOOP_MODULE)
37356+ /* and aufs */
37357+ struct file *(*real_loop)(struct file *);
37358+#endif
37359 };
37360
37361 /*
37362@@ -2197,6 +2209,7 @@ extern int current_umask(void);
37363 extern void ihold(struct inode * inode);
37364 extern void iput(struct inode *);
37365 extern int generic_update_time(struct inode *, struct timespec *, int);
37366+extern int update_time(struct inode *, struct timespec *, int);
37367
37368 /* /sys/fs */
37369 extern struct kobject *fs_kobj;
37370@@ -2479,6 +2492,7 @@ static inline bool sb_is_blkdev_sb(struc
37371 return false;
37372 }
37373 #endif
37374+extern int __sync_filesystem(struct super_block *, int);
37375 extern int sync_filesystem(struct super_block *);
37376 extern const struct file_operations def_blk_fops;
37377 extern const struct file_operations def_chr_fops;
37378diff -urNp -x '*.orig' linux-4.9/include/linux/mm.h linux-4.9/include/linux/mm.h
37379--- linux-4.9/include/linux/mm.h 2021-02-24 16:14:56.574485779 +0100
37380+++ linux-4.9/include/linux/mm.h 2021-02-24 16:15:09.508239763 +0100
37381@@ -1314,6 +1314,28 @@ static inline int fixup_user_fault(struc
37382 }
37383 #endif
37384
37385+extern void vma_do_file_update_time(struct vm_area_struct *, const char[], int);
37386+extern struct file *vma_do_pr_or_file(struct vm_area_struct *, const char[],
37387+ int);
37388+extern void vma_do_get_file(struct vm_area_struct *, const char[], int);
37389+extern void vma_do_fput(struct vm_area_struct *, const char[], int);
37390+
37391+#define vma_file_update_time(vma) vma_do_file_update_time(vma, __func__, \
37392+ __LINE__)
37393+#define vma_pr_or_file(vma) vma_do_pr_or_file(vma, __func__, \
37394+ __LINE__)
37395+#define vma_get_file(vma) vma_do_get_file(vma, __func__, __LINE__)
37396+#define vma_fput(vma) vma_do_fput(vma, __func__, __LINE__)
37397+
37398+#ifndef CONFIG_MMU
37399+extern struct file *vmr_do_pr_or_file(struct vm_region *, const char[], int);
37400+extern void vmr_do_fput(struct vm_region *, const char[], int);
37401+
37402+#define vmr_pr_or_file(region) vmr_do_pr_or_file(region, __func__, \
37403+ __LINE__)
37404+#define vmr_fput(region) vmr_do_fput(region, __func__, __LINE__)
37405+#endif /* !CONFIG_MMU */
37406+
37407 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len,
37408 unsigned int gup_flags);
37409 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
37410diff -urNp -x '*.orig' linux-4.9/include/linux/mm_types.h linux-4.9/include/linux/mm_types.h
37411--- linux-4.9/include/linux/mm_types.h 2021-02-24 16:14:56.574485779 +0100
37412+++ linux-4.9/include/linux/mm_types.h 2021-02-24 16:15:09.508239763 +0100
37413@@ -280,6 +280,7 @@ struct vm_region {
37414 unsigned long vm_top; /* region allocated to here */
37415 unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
37416 struct file *vm_file; /* the backing file or NULL */
37417+ struct file *vm_prfile; /* the virtual backing file or NULL */
37418
37419 int vm_usage; /* region usage count (access under nommu_region_sem) */
37420 bool vm_icache_flushed : 1; /* true if the icache has been flushed for
37421@@ -354,6 +355,7 @@ struct vm_area_struct {
37422 unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
37423 units */
37424 struct file * vm_file; /* File we map to (can be NULL). */
37425+ struct file *vm_prfile; /* shadow of vm_file */
37426 void * vm_private_data; /* was vm_pte (shared mem) */
37427
37428 #ifndef CONFIG_MMU
37429diff -urNp -x '*.orig' linux-4.9/include/linux/mnt_namespace.h linux-4.9/include/linux/mnt_namespace.h
37430--- linux-4.9/include/linux/mnt_namespace.h 2016-12-11 20:17:54.000000000 +0100
37431+++ linux-4.9/include/linux/mnt_namespace.h 2021-02-24 16:15:09.508239763 +0100
37432@@ -5,11 +5,14 @@
37433 struct mnt_namespace;
37434 struct fs_struct;
37435 struct user_namespace;
37436+struct vfsmount;
37437
37438 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
37439 struct user_namespace *, struct fs_struct *);
37440 extern void put_mnt_ns(struct mnt_namespace *ns);
37441
37442+extern int is_current_mnt_ns(struct vfsmount *mnt);
37443+
37444 extern const struct file_operations proc_mounts_operations;
37445 extern const struct file_operations proc_mountinfo_operations;
37446 extern const struct file_operations proc_mountstats_operations;
37447diff -urNp -x '*.orig' linux-4.9/include/linux/splice.h linux-4.9/include/linux/splice.h
37448--- linux-4.9/include/linux/splice.h 2016-12-11 20:17:54.000000000 +0100
37449+++ linux-4.9/include/linux/splice.h 2021-02-24 16:15:09.508239763 +0100
37450@@ -86,4 +86,10 @@ extern void spd_release_page(struct spli
37451
37452 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
37453 extern const struct pipe_buf_operations default_pipe_buf_ops;
37454+
37455+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
37456+ loff_t *ppos, size_t len, unsigned int flags);
37457+extern long do_splice_to(struct file *in, loff_t *ppos,
37458+ struct pipe_inode_info *pipe, size_t len,
37459+ unsigned int flags);
37460 #endif
37461diff -urNp -x '*.orig' linux-4.9/include/uapi/linux/Kbuild linux-4.9/include/uapi/linux/Kbuild
37462--- linux-4.9/include/uapi/linux/Kbuild 2021-02-24 16:14:56.617820522 +0100
37463+++ linux-4.9/include/uapi/linux/Kbuild 2021-02-24 16:15:09.501572879 +0100
37464@@ -59,6 +59,7 @@ header-y += atmsvc.h
37465 header-y += atm_tcp.h
37466 header-y += atm_zatm.h
37467 header-y += audit.h
37468+header-y += aufs_type.h
37469 header-y += auto_fs4.h
37470 header-y += auto_fs.h
37471 header-y += auxvec.h
37472diff -urNp -x '*.orig' linux-4.9/include/uapi/linux/aufs_type.h linux-4.9/include/uapi/linux/aufs_type.h
37473--- linux-4.9/include/uapi/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
37474+++ linux-4.9/include/uapi/linux/aufs_type.h 2021-02-24 16:15:09.538240738 +0100
ae9dfd79 37475@@ -0,0 +1,447 @@
7f207e10 37476+/*
ae9dfd79 37477+ * Copyright (C) 2005-2018 Junjiro R. Okajima
7f207e10
AM
37478+ *
37479+ * This program, aufs is free software; you can redistribute it and/or modify
37480+ * it under the terms of the GNU General Public License as published by
37481+ * the Free Software Foundation; either version 2 of the License, or
37482+ * (at your option) any later version.
37483+ *
37484+ * This program is distributed in the hope that it will be useful,
37485+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
37486+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37487+ * GNU General Public License for more details.
37488+ *
37489+ * You should have received a copy of the GNU General Public License
523b37e3 37490+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
37491+ */
37492+
37493+#ifndef __AUFS_TYPE_H__
37494+#define __AUFS_TYPE_H__
37495+
f6c5ef8b
AM
37496+#define AUFS_NAME "aufs"
37497+
9dbd164d 37498+#ifdef __KERNEL__
f6c5ef8b
AM
37499+/*
37500+ * define it before including all other headers.
37501+ * sched.h may use pr_* macros before defining "current", so define the
37502+ * no-current version first, and re-define later.
37503+ */
37504+#define pr_fmt(fmt) AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
37505+#include <linux/sched.h>
37506+#undef pr_fmt
a2a7ad62
AM
37507+#define pr_fmt(fmt) \
37508+ AUFS_NAME " %s:%d:%.*s[%d]: " fmt, __func__, __LINE__, \
37509+ (int)sizeof(current->comm), current->comm, current->pid
9dbd164d
AM
37510+#else
37511+#include <stdint.h>
37512+#include <sys/types.h>
f6c5ef8b 37513+#endif /* __KERNEL__ */
7f207e10 37514+
f6c5ef8b
AM
37515+#include <linux/limits.h>
37516+
ae9dfd79 37517+#define AUFS_VERSION "4.9-20180409"
7f207e10
AM
37518+
37519+/* todo? move this to linux-2.6.19/include/magic.h */
37520+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
37521+
37522+/* ---------------------------------------------------------------------- */
37523+
37524+#ifdef CONFIG_AUFS_BRANCH_MAX_127
9dbd164d 37525+typedef int8_t aufs_bindex_t;
7f207e10
AM
37526+#define AUFS_BRANCH_MAX 127
37527+#else
9dbd164d 37528+typedef int16_t aufs_bindex_t;
7f207e10
AM
37529+#ifdef CONFIG_AUFS_BRANCH_MAX_511
37530+#define AUFS_BRANCH_MAX 511
37531+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
37532+#define AUFS_BRANCH_MAX 1023
37533+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
37534+#define AUFS_BRANCH_MAX 32767
37535+#endif
37536+#endif
37537+
37538+#ifdef __KERNEL__
37539+#ifndef AUFS_BRANCH_MAX
37540+#error unknown CONFIG_AUFS_BRANCH_MAX value
37541+#endif
37542+#endif /* __KERNEL__ */
37543+
37544+/* ---------------------------------------------------------------------- */
37545+
7f207e10
AM
37546+#define AUFS_FSTYPE AUFS_NAME
37547+
37548+#define AUFS_ROOT_INO 2
37549+#define AUFS_FIRST_INO 11
37550+
37551+#define AUFS_WH_PFX ".wh."
37552+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
37553+#define AUFS_WH_TMP_LEN 4
86dc4139 37554+/* a limit for rmdir/rename a dir and copyup */
7f207e10
AM
37555+#define AUFS_MAX_NAMELEN (NAME_MAX \
37556+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
37557+ - 1 /* dot */\
37558+ - AUFS_WH_TMP_LEN) /* hex */
37559+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
37560+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
392086de
AM
37561+#define AUFS_XINO_DEF_SEC 30 /* seconds */
37562+#define AUFS_XINO_DEF_TRUNC 45 /* percentage */
7f207e10
AM
37563+#define AUFS_DIRWH_DEF 3
37564+#define AUFS_RDCACHE_DEF 10 /* seconds */
027c5e7a 37565+#define AUFS_RDCACHE_MAX 3600 /* seconds */
7f207e10
AM
37566+#define AUFS_RDBLK_DEF 512 /* bytes */
37567+#define AUFS_RDHASH_DEF 32
37568+#define AUFS_WKQ_NAME AUFS_NAME "d"
027c5e7a
AM
37569+#define AUFS_MFS_DEF_SEC 30 /* seconds */
37570+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
076b876e 37571+#define AUFS_FHSM_CACHE_DEF_SEC 30 /* seconds */
86dc4139 37572+#define AUFS_PLINK_WARN 50 /* number of plinks in a single bucket */
7f207e10
AM
37573+
37574+/* pseudo-link maintenace under /proc */
37575+#define AUFS_PLINK_MAINT_NAME "plink_maint"
37576+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
37577+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
37578+
ae9dfd79
AM
37579+/* dirren, renamed dir */
37580+#define AUFS_DR_INFO_PFX AUFS_WH_PFX ".dr."
37581+#define AUFS_DR_BRHINO_NAME AUFS_WH_PFX "hino"
37582+/* whiteouted doubly */
37583+#define AUFS_WH_DR_INFO_PFX AUFS_WH_PFX AUFS_DR_INFO_PFX
37584+#define AUFS_WH_DR_BRHINO AUFS_WH_PFX AUFS_DR_BRHINO_NAME
37585+
7f207e10
AM
37586+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
37587+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
37588+
37589+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
37590+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
37591+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
37592+
37593+/* doubly whiteouted */
37594+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
37595+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
37596+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
37597+
1e00d052 37598+/* branch permissions and attributes */
7f207e10
AM
37599+#define AUFS_BRPERM_RW "rw"
37600+#define AUFS_BRPERM_RO "ro"
37601+#define AUFS_BRPERM_RR "rr"
076b876e
AM
37602+#define AUFS_BRATTR_COO_REG "coo_reg"
37603+#define AUFS_BRATTR_COO_ALL "coo_all"
37604+#define AUFS_BRATTR_FHSM "fhsm"
37605+#define AUFS_BRATTR_UNPIN "unpin"
c1595e42
JR
37606+#define AUFS_BRATTR_ICEX "icex"
37607+#define AUFS_BRATTR_ICEX_SEC "icexsec"
37608+#define AUFS_BRATTR_ICEX_SYS "icexsys"
37609+#define AUFS_BRATTR_ICEX_TR "icextr"
37610+#define AUFS_BRATTR_ICEX_USR "icexusr"
37611+#define AUFS_BRATTR_ICEX_OTH "icexoth"
1e00d052
AM
37612+#define AUFS_BRRATTR_WH "wh"
37613+#define AUFS_BRWATTR_NLWH "nolwh"
076b876e
AM
37614+#define AUFS_BRWATTR_MOO "moo"
37615+
37616+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */
37617+#define AuBrPerm_RO (1 << 1) /* readonly */
37618+#define AuBrPerm_RR (1 << 2) /* natively readonly */
37619+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
37620+
37621+#define AuBrAttr_COO_REG (1 << 3) /* copy-up on open */
37622+#define AuBrAttr_COO_ALL (1 << 4)
37623+#define AuBrAttr_COO_Mask (AuBrAttr_COO_REG | AuBrAttr_COO_ALL)
37624+
37625+#define AuBrAttr_FHSM (1 << 5) /* file-based hsm */
37626+#define AuBrAttr_UNPIN (1 << 6) /* rename-able top dir of
c1595e42
JR
37627+ branch. meaningless since
37628+ linux-3.18-rc1 */
37629+
37630+/* ignore error in copying XATTR */
37631+#define AuBrAttr_ICEX_SEC (1 << 7)
37632+#define AuBrAttr_ICEX_SYS (1 << 8)
37633+#define AuBrAttr_ICEX_TR (1 << 9)
37634+#define AuBrAttr_ICEX_USR (1 << 10)
37635+#define AuBrAttr_ICEX_OTH (1 << 11)
37636+#define AuBrAttr_ICEX (AuBrAttr_ICEX_SEC \
37637+ | AuBrAttr_ICEX_SYS \
37638+ | AuBrAttr_ICEX_TR \
37639+ | AuBrAttr_ICEX_USR \
37640+ | AuBrAttr_ICEX_OTH)
37641+
37642+#define AuBrRAttr_WH (1 << 12) /* whiteout-able */
076b876e
AM
37643+#define AuBrRAttr_Mask AuBrRAttr_WH
37644+
c1595e42
JR
37645+#define AuBrWAttr_NoLinkWH (1 << 13) /* un-hardlinkable whiteouts */
37646+#define AuBrWAttr_MOO (1 << 14) /* move-up on open */
076b876e
AM
37647+#define AuBrWAttr_Mask (AuBrWAttr_NoLinkWH | AuBrWAttr_MOO)
37648+
37649+#define AuBrAttr_CMOO_Mask (AuBrAttr_COO_Mask | AuBrWAttr_MOO)
37650+
c1595e42 37651+/* #warning test userspace */
076b876e
AM
37652+#ifdef __KERNEL__
37653+#ifndef CONFIG_AUFS_FHSM
37654+#undef AuBrAttr_FHSM
37655+#define AuBrAttr_FHSM 0
37656+#endif
c1595e42
JR
37657+#ifndef CONFIG_AUFS_XATTR
37658+#undef AuBrAttr_ICEX
37659+#define AuBrAttr_ICEX 0
37660+#undef AuBrAttr_ICEX_SEC
37661+#define AuBrAttr_ICEX_SEC 0
37662+#undef AuBrAttr_ICEX_SYS
37663+#define AuBrAttr_ICEX_SYS 0
37664+#undef AuBrAttr_ICEX_TR
37665+#define AuBrAttr_ICEX_TR 0
37666+#undef AuBrAttr_ICEX_USR
37667+#define AuBrAttr_ICEX_USR 0
37668+#undef AuBrAttr_ICEX_OTH
37669+#define AuBrAttr_ICEX_OTH 0
37670+#endif
076b876e
AM
37671+#endif
37672+
37673+/* the longest combination */
c1595e42
JR
37674+/* AUFS_BRATTR_ICEX and AUFS_BRATTR_ICEX_TR don't affect here */
37675+#define AuBrPermStrSz sizeof(AUFS_BRPERM_RW \
37676+ "+" AUFS_BRATTR_COO_REG \
37677+ "+" AUFS_BRATTR_FHSM \
37678+ "+" AUFS_BRATTR_UNPIN \
7e9cd9fe
AM
37679+ "+" AUFS_BRATTR_ICEX_SEC \
37680+ "+" AUFS_BRATTR_ICEX_SYS \
37681+ "+" AUFS_BRATTR_ICEX_USR \
37682+ "+" AUFS_BRATTR_ICEX_OTH \
076b876e
AM
37683+ "+" AUFS_BRWATTR_NLWH)
37684+
37685+typedef struct {
37686+ char a[AuBrPermStrSz];
37687+} au_br_perm_str_t;
37688+
37689+static inline int au_br_writable(int brperm)
37690+{
37691+ return brperm & AuBrPerm_RW;
37692+}
37693+
37694+static inline int au_br_whable(int brperm)
37695+{
37696+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
37697+}
37698+
37699+static inline int au_br_wh_linkable(int brperm)
37700+{
37701+ return !(brperm & AuBrWAttr_NoLinkWH);
37702+}
37703+
37704+static inline int au_br_cmoo(int brperm)
37705+{
37706+ return brperm & AuBrAttr_CMOO_Mask;
37707+}
37708+
37709+static inline int au_br_fhsm(int brperm)
37710+{
37711+ return brperm & AuBrAttr_FHSM;
37712+}
7f207e10
AM
37713+
37714+/* ---------------------------------------------------------------------- */
37715+
37716+/* ioctl */
37717+enum {
37718+ /* readdir in userspace */
37719+ AuCtl_RDU,
37720+ AuCtl_RDU_INO,
37721+
076b876e
AM
37722+ AuCtl_WBR_FD, /* pathconf wrapper */
37723+ AuCtl_IBUSY, /* busy inode */
37724+ AuCtl_MVDOWN, /* move-down */
37725+ AuCtl_BR, /* info about branches */
37726+ AuCtl_FHSM_FD /* connection for fhsm */
7f207e10
AM
37727+};
37728+
37729+/* borrowed from linux/include/linux/kernel.h */
37730+#ifndef ALIGN
37731+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
37732+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
37733+#endif
37734+
37735+/* borrowed from linux/include/linux/compiler-gcc3.h */
37736+#ifndef __aligned
37737+#define __aligned(x) __attribute__((aligned(x)))
53392da6
AM
37738+#endif
37739+
37740+#ifdef __KERNEL__
37741+#ifndef __packed
7f207e10
AM
37742+#define __packed __attribute__((packed))
37743+#endif
53392da6 37744+#endif
7f207e10
AM
37745+
37746+struct au_rdu_cookie {
9dbd164d
AM
37747+ uint64_t h_pos;
37748+ int16_t bindex;
37749+ uint8_t flags;
37750+ uint8_t pad;
37751+ uint32_t generation;
7f207e10
AM
37752+} __aligned(8);
37753+
37754+struct au_rdu_ent {
9dbd164d
AM
37755+ uint64_t ino;
37756+ int16_t bindex;
37757+ uint8_t type;
37758+ uint8_t nlen;
37759+ uint8_t wh;
7f207e10
AM
37760+ char name[0];
37761+} __aligned(8);
37762+
37763+static inline int au_rdu_len(int nlen)
37764+{
37765+ /* include the terminating NULL */
37766+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
9dbd164d 37767+ sizeof(uint64_t));
7f207e10
AM
37768+}
37769+
37770+union au_rdu_ent_ul {
37771+ struct au_rdu_ent __user *e;
9dbd164d 37772+ uint64_t ul;
7f207e10
AM
37773+};
37774+
37775+enum {
37776+ AufsCtlRduV_SZ,
37777+ AufsCtlRduV_End
37778+};
37779+
37780+struct aufs_rdu {
37781+ /* input */
37782+ union {
9dbd164d
AM
37783+ uint64_t sz; /* AuCtl_RDU */
37784+ uint64_t nent; /* AuCtl_RDU_INO */
7f207e10
AM
37785+ };
37786+ union au_rdu_ent_ul ent;
9dbd164d 37787+ uint16_t verify[AufsCtlRduV_End];
7f207e10
AM
37788+
37789+ /* input/output */
9dbd164d 37790+ uint32_t blk;
7f207e10
AM
37791+
37792+ /* output */
37793+ union au_rdu_ent_ul tail;
37794+ /* number of entries which were added in a single call */
9dbd164d
AM
37795+ uint64_t rent;
37796+ uint8_t full;
37797+ uint8_t shwh;
7f207e10
AM
37798+
37799+ struct au_rdu_cookie cookie;
37800+} __aligned(8);
37801+
1e00d052
AM
37802+/* ---------------------------------------------------------------------- */
37803+
ae9dfd79
AM
37804+/* dirren. the branch is identified by the filename who contains this */
37805+struct au_drinfo {
37806+ uint64_t ino;
37807+ union {
37808+ uint8_t oldnamelen;
37809+ uint64_t _padding;
37810+ };
37811+ uint8_t oldname[0];
37812+} __aligned(8);
37813+
37814+struct au_drinfo_fdata {
37815+ uint32_t magic;
37816+ struct au_drinfo drinfo;
37817+} __aligned(8);
37818+
37819+#define AUFS_DRINFO_MAGIC_V1 ('a' << 24 | 'd' << 16 | 'r' << 8 | 0x01)
37820+/* future */
37821+#define AUFS_DRINFO_MAGIC_V2 ('a' << 24 | 'd' << 16 | 'r' << 8 | 0x02)
37822+
37823+/* ---------------------------------------------------------------------- */
37824+
1e00d052 37825+struct aufs_wbr_fd {
9dbd164d
AM
37826+ uint32_t oflags;
37827+ int16_t brid;
1e00d052
AM
37828+} __aligned(8);
37829+
37830+/* ---------------------------------------------------------------------- */
37831+
027c5e7a 37832+struct aufs_ibusy {
9dbd164d
AM
37833+ uint64_t ino, h_ino;
37834+ int16_t bindex;
027c5e7a
AM
37835+} __aligned(8);
37836+
1e00d052
AM
37837+/* ---------------------------------------------------------------------- */
37838+
392086de
AM
37839+/* error code for move-down */
37840+/* the actual message strings are implemented in aufs-util.git */
37841+enum {
37842+ EAU_MVDOWN_OPAQUE = 1,
37843+ EAU_MVDOWN_WHITEOUT,
37844+ EAU_MVDOWN_UPPER,
37845+ EAU_MVDOWN_BOTTOM,
37846+ EAU_MVDOWN_NOUPPER,
37847+ EAU_MVDOWN_NOLOWERBR,
37848+ EAU_Last
37849+};
37850+
c2b27bf2 37851+/* flags for move-down */
392086de
AM
37852+#define AUFS_MVDOWN_DMSG 1
37853+#define AUFS_MVDOWN_OWLOWER (1 << 1) /* overwrite lower */
37854+#define AUFS_MVDOWN_KUPPER (1 << 2) /* keep upper */
37855+#define AUFS_MVDOWN_ROLOWER (1 << 3) /* do even if lower is RO */
37856+#define AUFS_MVDOWN_ROLOWER_R (1 << 4) /* did on lower RO */
37857+#define AUFS_MVDOWN_ROUPPER (1 << 5) /* do even if upper is RO */
37858+#define AUFS_MVDOWN_ROUPPER_R (1 << 6) /* did on upper RO */
37859+#define AUFS_MVDOWN_BRID_UPPER (1 << 7) /* upper brid */
37860+#define AUFS_MVDOWN_BRID_LOWER (1 << 8) /* lower brid */
076b876e
AM
37861+#define AUFS_MVDOWN_FHSM_LOWER (1 << 9) /* find fhsm attr for lower */
37862+#define AUFS_MVDOWN_STFS (1 << 10) /* req. stfs */
37863+#define AUFS_MVDOWN_STFS_FAILED (1 << 11) /* output: stfs is unusable */
37864+#define AUFS_MVDOWN_BOTTOM (1 << 12) /* output: no more lowers */
c2b27bf2 37865+
076b876e 37866+/* index for move-down */
392086de
AM
37867+enum {
37868+ AUFS_MVDOWN_UPPER,
37869+ AUFS_MVDOWN_LOWER,
37870+ AUFS_MVDOWN_NARRAY
37871+};
37872+
076b876e
AM
37873+/*
37874+ * additional info of move-down
37875+ * number of free blocks and inodes.
37876+ * subset of struct kstatfs, but smaller and always 64bit.
37877+ */
37878+struct aufs_stfs {
37879+ uint64_t f_blocks;
37880+ uint64_t f_bavail;
37881+ uint64_t f_files;
37882+ uint64_t f_ffree;
37883+};
37884+
37885+struct aufs_stbr {
37886+ int16_t brid; /* optional input */
37887+ int16_t bindex; /* output */
37888+ struct aufs_stfs stfs; /* output when AUFS_MVDOWN_STFS set */
37889+} __aligned(8);
37890+
c2b27bf2 37891+struct aufs_mvdown {
076b876e
AM
37892+ uint32_t flags; /* input/output */
37893+ struct aufs_stbr stbr[AUFS_MVDOWN_NARRAY]; /* input/output */
37894+ int8_t au_errno; /* output */
37895+} __aligned(8);
37896+
37897+/* ---------------------------------------------------------------------- */
37898+
37899+union aufs_brinfo {
37900+ /* PATH_MAX may differ between kernel-space and user-space */
37901+ char _spacer[4096];
392086de 37902+ struct {
076b876e
AM
37903+ int16_t id;
37904+ int perm;
37905+ char path[0];
37906+ };
c2b27bf2
AM
37907+} __aligned(8);
37908+
37909+/* ---------------------------------------------------------------------- */
37910+
7f207e10
AM
37911+#define AuCtlType 'A'
37912+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
37913+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
1e00d052
AM
37914+#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \
37915+ struct aufs_wbr_fd)
027c5e7a 37916+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
392086de
AM
37917+#define AUFS_CTL_MVDOWN _IOWR(AuCtlType, AuCtl_MVDOWN, \
37918+ struct aufs_mvdown)
076b876e
AM
37919+#define AUFS_CTL_BRINFO _IOW(AuCtlType, AuCtl_BR, union aufs_brinfo)
37920+#define AUFS_CTL_FHSM_FD _IOW(AuCtlType, AuCtl_FHSM_FD, int)
7f207e10
AM
37921+
37922+#endif /* __AUFS_TYPE_H__ */
e8791d4f
AM
37923diff -urNp -x '*.orig' linux-4.9/kernel/fork.c linux-4.9/kernel/fork.c
37924--- linux-4.9/kernel/fork.c 2021-02-24 16:14:57.337843945 +0100
37925+++ linux-4.9/kernel/fork.c 2021-02-24 16:15:09.508239763 +0100
37926@@ -638,7 +638,7 @@ static __latent_entropy int dup_mmap(str
37927 struct inode *inode = file_inode(file);
37928 struct address_space *mapping = file->f_mapping;
37929
37930- get_file(file);
37931+ vma_get_file(tmp);
37932 if (tmp->vm_flags & VM_DENYWRITE)
37933 atomic_dec(&inode->i_writecount);
37934 i_mmap_lock_write(mapping);
37935diff -urNp -x '*.orig' linux-4.9/kernel/task_work.c linux-4.9/kernel/task_work.c
37936--- linux-4.9/kernel/task_work.c 2016-12-11 20:17:54.000000000 +0100
37937+++ linux-4.9/kernel/task_work.c 2021-02-24 16:15:09.518240088 +0100
37938@@ -119,3 +119,4 @@ void task_work_run(void)
37939 } while (work);
37940 }
5527c038 37941 }
e8791d4f
AM
37942+EXPORT_SYMBOL_GPL(task_work_run);
37943diff -urNp -x '*.orig' linux-4.9/mm/Makefile linux-4.9/mm/Makefile
37944--- linux-4.9/mm/Makefile 2016-12-11 20:17:54.000000000 +0100
37945+++ linux-4.9/mm/Makefile 2021-02-24 16:15:09.511573204 +0100
37946@@ -37,7 +37,7 @@ obj-y := filemap.o mempool.o oom_kill.
37947 mm_init.o mmu_context.o percpu.o slab_common.o \
37948 compaction.o vmacache.o \
37949 interval_tree.o list_lru.o workingset.o \
37950- debug.o $(mmu-y)
37951+ prfile.o debug.o $(mmu-y)
5527c038 37952
e8791d4f 37953 obj-y += init-mm.o
5527c038 37954
e8791d4f
AM
37955diff -urNp -x '*.orig' linux-4.9/mm/filemap.c linux-4.9/mm/filemap.c
37956--- linux-4.9/mm/filemap.c 2021-02-24 16:14:56.697823124 +0100
37957+++ linux-4.9/mm/filemap.c 2021-02-24 16:15:09.511573204 +0100
37958@@ -2312,7 +2312,7 @@ int filemap_page_mkwrite(struct vm_area_
37959 int ret = VM_FAULT_LOCKED;
5527c038 37960
e8791d4f
AM
37961 sb_start_pagefault(inode->i_sb);
37962- file_update_time(vma->vm_file);
37963+ vma_file_update_time(vma);
37964 lock_page(page);
37965 if (page->mapping != inode->i_mapping) {
37966 unlock_page(page);
37967diff -urNp -x '*.orig' linux-4.9/mm/memory.c linux-4.9/mm/memory.c
37968--- linux-4.9/mm/memory.c 2021-02-24 16:14:56.704490008 +0100
37969+++ linux-4.9/mm/memory.c 2021-02-24 16:15:09.511573204 +0100
37970@@ -2124,7 +2124,7 @@ static inline int wp_page_reuse(struct f
37971 }
5527c038 37972
e8791d4f
AM
37973 if (!page_mkwrite)
37974- file_update_time(vma->vm_file);
37975+ vma_file_update_time(vma);
37976 }
5527c038 37977
e8791d4f
AM
37978 return VM_FAULT_WRITE;
37979diff -urNp -x '*.orig' linux-4.9/mm/mmap.c linux-4.9/mm/mmap.c
37980--- linux-4.9/mm/mmap.c 2021-02-24 16:14:56.707823450 +0100
37981+++ linux-4.9/mm/mmap.c 2021-02-24 16:15:09.511573204 +0100
37982@@ -164,7 +164,7 @@ static struct vm_area_struct *remove_vma
37983 if (vma->vm_ops && vma->vm_ops->close)
37984 vma->vm_ops->close(vma);
37985 if (vma->vm_file)
37986- fput(vma->vm_file);
37987+ vma_fput(vma);
37988 mpol_put(vma_policy(vma));
37989 kmem_cache_free(vm_area_cachep, vma);
37990 return next;
37991@@ -887,7 +887,7 @@ again:
37992 if (remove_next) {
37993 if (file) {
37994 uprobe_munmap(next, next->vm_start, next->vm_end);
37995- fput(file);
37996+ vma_fput(vma);
37997 }
37998 if (next->anon_vma)
37999 anon_vma_merge(vma, next);
38000@@ -1767,8 +1767,8 @@ out:
38001 return addr;
5527c038 38002
e8791d4f
AM
38003 unmap_and_free_vma:
38004+ vma_fput(vma);
38005 vma->vm_file = NULL;
38006- fput(file);
5527c038 38007
e8791d4f
AM
38008 /* Undo any partial mapping done by a device driver. */
38009 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
38010@@ -2602,7 +2602,7 @@ static int __split_vma(struct mm_struct
38011 goto out_free_mpol;
50b859bf 38012
e8791d4f
AM
38013 if (new->vm_file)
38014- get_file(new->vm_file);
38015+ vma_get_file(new);
5527c038 38016
e8791d4f
AM
38017 if (new->vm_ops && new->vm_ops->open)
38018 new->vm_ops->open(new);
38019@@ -2621,7 +2621,7 @@ static int __split_vma(struct mm_struct
38020 if (new->vm_ops && new->vm_ops->close)
38021 new->vm_ops->close(new);
38022 if (new->vm_file)
38023- fput(new->vm_file);
38024+ vma_fput(new);
38025 unlink_anon_vmas(new);
38026 out_free_mpol:
38027 mpol_put(vma_policy(new));
38028@@ -2772,7 +2772,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsign
38029 struct vm_area_struct *vma;
38030 unsigned long populate = 0;
38031 unsigned long ret = -EINVAL;
38032- struct file *file;
38033+ struct file *file, *prfile;
5527c038 38034
e8791d4f
AM
38035 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n",
38036 current->comm, current->pid);
38037@@ -2847,10 +2847,27 @@ SYSCALL_DEFINE5(remap_file_pages, unsign
38038 }
38039 }
5527c038 38040
e8791d4f
AM
38041- file = get_file(vma->vm_file);
38042+ vma_get_file(vma);
38043+ file = vma->vm_file;
38044+ prfile = vma->vm_prfile;
38045 ret = do_mmap_pgoff(vma->vm_file, start, size,
38046 prot, flags, pgoff, &populate);
38047+ if (!IS_ERR_VALUE(ret) && file && prfile) {
38048+ struct vm_area_struct *new_vma;
38049+
38050+ new_vma = find_vma(mm, ret);
38051+ if (!new_vma->vm_prfile)
38052+ new_vma->vm_prfile = prfile;
38053+ if (new_vma != vma)
38054+ get_file(prfile);
38055+ }
38056+ /*
38057+ * two fput()s instead of vma_fput(vma),
38058+ * coz vma may not be available anymore.
38059+ */
38060 fput(file);
38061+ if (prfile)
38062+ fput(prfile);
38063 out:
38064 up_write(&mm->mmap_sem);
38065 if (populate)
38066@@ -3127,7 +3144,7 @@ struct vm_area_struct *copy_vma(struct v
38067 if (anon_vma_clone(new_vma, vma))
38068 goto out_free_mempol;
38069 if (new_vma->vm_file)
38070- get_file(new_vma->vm_file);
38071+ vma_get_file(new_vma);
38072 if (new_vma->vm_ops && new_vma->vm_ops->open)
38073 new_vma->vm_ops->open(new_vma);
38074 vma_link(mm, new_vma, prev, rb_link, rb_parent);
38075diff -urNp -x '*.orig' linux-4.9/mm/nommu.c linux-4.9/mm/nommu.c
38076--- linux-4.9/mm/nommu.c 2021-02-24 16:14:56.707823450 +0100
38077+++ linux-4.9/mm/nommu.c 2021-02-24 16:15:09.511573204 +0100
38078@@ -640,7 +640,7 @@ static void __put_nommu_region(struct vm
38079 up_write(&nommu_region_sem);
5527c038 38080
e8791d4f
AM
38081 if (region->vm_file)
38082- fput(region->vm_file);
38083+ vmr_fput(region);
5527c038 38084
e8791d4f
AM
38085 /* IO memory and memory shared directly out of the pagecache
38086 * from ramfs/tmpfs mustn't be released here */
38087@@ -798,7 +798,7 @@ static void delete_vma(struct mm_struct
38088 if (vma->vm_ops && vma->vm_ops->close)
38089 vma->vm_ops->close(vma);
38090 if (vma->vm_file)
38091- fput(vma->vm_file);
38092+ vma_fput(vma);
38093 put_nommu_region(vma->vm_region);
38094 kmem_cache_free(vm_area_cachep, vma);
5527c038 38095 }
e8791d4f
AM
38096@@ -1324,7 +1324,7 @@ unsigned long do_mmap(struct file *file,
38097 goto error_just_free;
38098 }
38099 }
38100- fput(region->vm_file);
38101+ vmr_fput(region);
38102 kmem_cache_free(vm_region_jar, region);
38103 region = pregion;
38104 result = start;
38105@@ -1399,10 +1399,10 @@ error_just_free:
38106 up_write(&nommu_region_sem);
38107 error:
38108 if (region->vm_file)
38109- fput(region->vm_file);
38110+ vmr_fput(region);
38111 kmem_cache_free(vm_region_jar, region);
38112 if (vma->vm_file)
38113- fput(vma->vm_file);
38114+ vma_fput(vma);
38115 kmem_cache_free(vm_area_cachep, vma);
38116 return ret;
38117
38118diff -urNp -x '*.orig' linux-4.9/mm/prfile.c linux-4.9/mm/prfile.c
38119--- linux-4.9/mm/prfile.c 1970-01-01 01:00:00.000000000 +0100
38120+++ linux-4.9/mm/prfile.c 2021-02-24 16:15:09.511573204 +0100
38121@@ -0,0 +1,85 @@
38122+/*
38123+ * Mainly for aufs which mmap(2) different file and wants to print different
38124+ * path in /proc/PID/maps.
38125+ * Call these functions via macros defined in linux/mm.h.
38126+ *
38127+ * See Documentation/filesystems/aufs/design/06mmap.txt
38128+ *
38129+ * Copyright (c) 2014-2018 Junjro R. Okajima
38130+ * Copyright (c) 2014 Ian Campbell
38131+ */
38132+
38133+#include <linux/mm.h>
38134+#include <linux/file.h>
38135+#include <linux/fs.h>
38136+
38137+/* #define PRFILE_TRACE */
38138+static inline void prfile_trace(struct file *f, struct file *pr,
38139+ const char func[], int line, const char func2[])
38140+{
38141+#ifdef PRFILE_TRACE
38142+ if (pr)
38143+ pr_info("%s:%d: %s, %pD2\n", func, line, func2, f);
38144+#endif
38145+}
38146+
38147+void vma_do_file_update_time(struct vm_area_struct *vma, const char func[],
38148+ int line)
38149+{
38150+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
38151+
38152+ prfile_trace(f, pr, func, line, __func__);
38153+ file_update_time(f);
38154+ if (f && pr)
38155+ file_update_time(pr);
38156+}
38157+
38158+struct file *vma_do_pr_or_file(struct vm_area_struct *vma, const char func[],
38159+ int line)
38160+{
38161+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
38162+
38163+ prfile_trace(f, pr, func, line, __func__);
38164+ return (f && pr) ? pr : f;
38165+}
38166+
38167+void vma_do_get_file(struct vm_area_struct *vma, const char func[], int line)
38168+{
38169+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
38170+
38171+ prfile_trace(f, pr, func, line, __func__);
38172+ get_file(f);
38173+ if (f && pr)
38174+ get_file(pr);
38175+}
38176+
38177+void vma_do_fput(struct vm_area_struct *vma, const char func[], int line)
38178+{
38179+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
38180+
38181+ prfile_trace(f, pr, func, line, __func__);
38182+ fput(f);
38183+ if (f && pr)
38184+ fput(pr);
38185+}
38186+
38187+#ifndef CONFIG_MMU
38188+struct file *vmr_do_pr_or_file(struct vm_region *region, const char func[],
38189+ int line)
38190+{
38191+ struct file *f = region->vm_file, *pr = region->vm_prfile;
38192+
38193+ prfile_trace(f, pr, func, line, __func__);
38194+ return (f && pr) ? pr : f;
38195+}
38196+
38197+void vmr_do_fput(struct vm_region *region, const char func[], int line)
38198+{
38199+ struct file *f = region->vm_file, *pr = region->vm_prfile;
38200+
38201+ prfile_trace(f, pr, func, line, __func__);
38202+ fput(f);
38203+ if (f && pr)
38204+ fput(pr);
38205+}
38206+#endif /* !CONFIG_MMU */
38207diff -urNp -x '*.orig' linux-4.9/security/commoncap.c linux-4.9/security/commoncap.c
38208--- linux-4.9/security/commoncap.c 2021-02-24 16:14:57.371178363 +0100
38209+++ linux-4.9/security/commoncap.c 2021-02-24 16:15:09.518240088 +0100
38210@@ -1067,12 +1067,14 @@ int cap_mmap_addr(unsigned long addr)
38211 }
38212 return ret;
38213 }
38214+EXPORT_SYMBOL_GPL(cap_mmap_addr);
38215
38216 int cap_mmap_file(struct file *file, unsigned long reqprot,
38217 unsigned long prot, unsigned long flags)
5527c038 38218 {
e8791d4f
AM
38219 return 0;
38220 }
38221+EXPORT_SYMBOL_GPL(cap_mmap_file);
5527c038 38222
e8791d4f 38223 #ifdef CONFIG_SECURITY
5527c038 38224
e8791d4f
AM
38225diff -urNp -x '*.orig' linux-4.9/security/device_cgroup.c linux-4.9/security/device_cgroup.c
38226--- linux-4.9/security/device_cgroup.c 2021-02-24 16:14:56.891162747 +0100
38227+++ linux-4.9/security/device_cgroup.c 2021-02-24 16:15:09.518240088 +0100
38228@@ -7,6 +7,7 @@
38229 #include <linux/device_cgroup.h>
38230 #include <linux/cgroup.h>
38231 #include <linux/ctype.h>
38232+#include <linux/export.h>
38233 #include <linux/list.h>
38234 #include <linux/uaccess.h>
38235 #include <linux/seq_file.h>
38236@@ -849,6 +850,7 @@ int __devcgroup_inode_permission(struct
38237 return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
38238 access);
38239 }
38240+EXPORT_SYMBOL_GPL(__devcgroup_inode_permission);
38241
38242 int devcgroup_inode_mknod(int mode, dev_t dev)
5527c038 38243 {
e8791d4f
AM
38244diff -urNp -x '*.orig' linux-4.9/security/security.c linux-4.9/security/security.c
38245--- linux-4.9/security/security.c 2021-02-24 16:14:56.897829631 +0100
38246+++ linux-4.9/security/security.c 2021-02-24 16:15:09.518240088 +0100
38247@@ -443,6 +443,7 @@ int security_path_rmdir(const struct pat
38248 return 0;
38249 return call_int_hook(path_rmdir, 0, dir, dentry);
38250 }
38251+EXPORT_SYMBOL_GPL(security_path_rmdir);
5527c038 38252
e8791d4f
AM
38253 int security_path_unlink(const struct path *dir, struct dentry *dentry)
38254 {
38255@@ -459,6 +460,7 @@ int security_path_symlink(const struct p
38256 return 0;
38257 return call_int_hook(path_symlink, 0, dir, dentry, old_name);
38258 }
38259+EXPORT_SYMBOL_GPL(security_path_symlink);
5527c038 38260
e8791d4f
AM
38261 int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
38262 struct dentry *new_dentry)
38263@@ -467,6 +469,7 @@ int security_path_link(struct dentry *ol
38264 return 0;
38265 return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry);
5527c038 38266 }
e8791d4f 38267+EXPORT_SYMBOL_GPL(security_path_link);
5527c038 38268
e8791d4f
AM
38269 int security_path_rename(const struct path *old_dir, struct dentry *old_dentry,
38270 const struct path *new_dir, struct dentry *new_dentry,
38271@@ -494,6 +497,7 @@ int security_path_truncate(const struct
38272 return 0;
38273 return call_int_hook(path_truncate, 0, path);
38274 }
38275+EXPORT_SYMBOL_GPL(security_path_truncate);
5527c038 38276
e8791d4f
AM
38277 int security_path_chmod(const struct path *path, umode_t mode)
38278 {
38279@@ -501,6 +505,7 @@ int security_path_chmod(const struct pat
38280 return 0;
38281 return call_int_hook(path_chmod, 0, path, mode);
38282 }
38283+EXPORT_SYMBOL_GPL(security_path_chmod);
5527c038 38284
e8791d4f
AM
38285 int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
38286 {
38287@@ -508,6 +513,7 @@ int security_path_chown(const struct pat
38288 return 0;
38289 return call_int_hook(path_chown, 0, path, uid, gid);
5527c038 38290 }
e8791d4f 38291+EXPORT_SYMBOL_GPL(security_path_chown);
5527c038 38292
e8791d4f
AM
38293 int security_path_chroot(const struct path *path)
38294 {
38295@@ -593,6 +599,7 @@ int security_inode_readlink(struct dentr
38296 return 0;
38297 return call_int_hook(inode_readlink, 0, dentry);
38298 }
38299+EXPORT_SYMBOL_GPL(security_inode_readlink);
5527c038 38300
e8791d4f
AM
38301 int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
38302 bool rcu)
38303@@ -608,6 +615,7 @@ int security_inode_permission(struct ino
38304 return 0;
38305 return call_int_hook(inode_permission, 0, inode, mask);
38306 }
38307+EXPORT_SYMBOL_GPL(security_inode_permission);
5527c038 38308
e8791d4f
AM
38309 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
38310 {
38311@@ -779,6 +787,7 @@ int security_file_permission(struct file
5527c038 38312
e8791d4f
AM
38313 return fsnotify_perm(file, mask);
38314 }
38315+EXPORT_SYMBOL_GPL(security_file_permission);
5527c038 38316
e8791d4f
AM
38317 int security_file_alloc(struct file *file)
38318 {
38319@@ -838,6 +847,7 @@ int security_mmap_file(struct file *file
38320 return ret;
38321 return ima_file_mmap(file, prot);
38322 }
38323+EXPORT_SYMBOL_GPL(security_mmap_file);
38324
38325 int security_mmap_addr(unsigned long addr)
38326 {
This page took 6.872953 seconds and 4 git commands to generate.