]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-aufs4.patch
- builds without imq and aufs
[packages/kernel.git] / kernel-aufs4.patch
CommitLineData
e2f27e51 1aufs4.x-rcN kbuild patch
7f207e10
AM
2
3diff --git a/fs/Kconfig b/fs/Kconfig
e2f27e51 4index 2bc7ad7..3049386 100644
7f207e10
AM
5--- a/fs/Kconfig
6+++ b/fs/Kconfig
e2f27e51 7@@ -245,6 +245,7 @@ source "fs/pstore/Kconfig"
5527c038 8 source "fs/sysv/Kconfig"
7e9cd9fe 9 source "fs/ufs/Kconfig"
7f207e10
AM
10 source "fs/exofs/Kconfig"
11+source "fs/aufs/Kconfig"
12
13 endif # MISC_FILESYSTEMS
14
15diff --git a/fs/Makefile b/fs/Makefile
e2f27e51 16index ed2b632..aa6d14b 100644
7f207e10
AM
17--- a/fs/Makefile
18+++ b/fs/Makefile
e2f27e51 19@@ -129,3 +129,4 @@ obj-y += exofs/ # Multiple modules
7f207e10 20 obj-$(CONFIG_CEPH_FS) += ceph/
bf0370f2 21 obj-$(CONFIG_PSTORE) += pstore/
c06a8ce3 22 obj-$(CONFIG_EFIVAR_FS) += efivarfs/
86dc4139 23+obj-$(CONFIG_AUFS_FS) += aufs/
c06a8ce3 24diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
e2f27e51 25index 185f8ea..5deb0d1 100644
c06a8ce3
AM
26--- a/include/uapi/linux/Kbuild
27+++ b/include/uapi/linux/Kbuild
5527c038 28@@ -59,6 +59,7 @@ header-y += atmsvc.h
03673fb0
JR
29 header-y += atm_tcp.h
30 header-y += atm_zatm.h
c06a8ce3
AM
31 header-y += audit.h
32+header-y += aufs_type.h
c06a8ce3 33 header-y += auto_fs4.h
03673fb0 34 header-y += auto_fs.h
c06a8ce3 35 header-y += auxvec.h
e2f27e51 36aufs4.x-rcN base patch
7f207e10 37
c1595e42 38diff --git a/MAINTAINERS b/MAINTAINERS
106341ce 39index f593300..8a17054 100644
c1595e42
JR
40--- a/MAINTAINERS
41+++ b/MAINTAINERS
e2f27e51 42@@ -2256,6 +2256,19 @@ F: include/linux/audit.h
c1595e42
JR
43 F: include/uapi/linux/audit.h
44 F: kernel/audit*
45
46+AUFS (advanced multi layered unification filesystem) FILESYSTEM
47+M: "J. R. Okajima" <hooanon05g@gmail.com>
48+L: linux-unionfs@vger.kernel.org
49+L: aufs-users@lists.sourceforge.net (members only)
50+W: http://aufs.sourceforge.net
5527c038 51+T: git://github.com/sfjro/aufs4-linux.git
c1595e42
JR
52+S: Supported
53+F: Documentation/filesystems/aufs/
54+F: Documentation/ABI/testing/debugfs-aufs
55+F: Documentation/ABI/testing/sysfs-aufs
56+F: fs/aufs/
57+F: include/uapi/linux/aufs_type.h
58+
59 AUXILIARY DISPLAY DRIVERS
60 M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
61 W: http://miguelojeda.es/auxdisplay.htm
392086de 62diff --git a/drivers/block/loop.c b/drivers/block/loop.c
e2f27e51 63index c9f2107..005e292 100644
392086de
AM
64--- a/drivers/block/loop.c
65+++ b/drivers/block/loop.c
e2f27e51 66@@ -701,6 +701,24 @@ static inline int is_loop_device(struct file *file)
392086de
AM
67 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
68 }
69
70+/*
71+ * for AUFS
72+ * no get/put for file.
73+ */
74+struct file *loop_backing_file(struct super_block *sb)
75+{
76+ struct file *ret;
77+ struct loop_device *l;
78+
79+ ret = NULL;
80+ if (MAJOR(sb->s_dev) == LOOP_MAJOR) {
81+ l = sb->s_bdev->bd_disk->private_data;
82+ ret = l->lo_backing_file;
83+ }
84+ return ret;
85+}
febd17d6 86+EXPORT_SYMBOL_GPL(loop_backing_file);
392086de
AM
87+
88 /* loop sysfs attributes */
89
90 static ssize_t loop_attr_show(struct device *dev, char *page,
c1595e42 91diff --git a/fs/dcache.c b/fs/dcache.c
e2f27e51 92index 5c7cc95..df0268c 100644
c1595e42
JR
93--- a/fs/dcache.c
94+++ b/fs/dcache.c
e2f27e51 95@@ -1164,7 +1164,7 @@ enum d_walk_ret {
c1595e42
JR
96 *
97 * The @enter() and @finish() callbacks are called with d_lock held.
98 */
99-static void d_walk(struct dentry *parent, void *data,
100+void d_walk(struct dentry *parent, void *data,
101 enum d_walk_ret (*enter)(void *, struct dentry *),
102 void (*finish)(void *))
103 {
febd17d6
JR
104diff --git a/fs/fcntl.c b/fs/fcntl.c
105index 350a2c8..6f42279 100644
106--- a/fs/fcntl.c
107+++ b/fs/fcntl.c
108@@ -29,7 +29,7 @@
109
110 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
111
112-static int setfl(int fd, struct file * filp, unsigned long arg)
113+int setfl(int fd, struct file * filp, unsigned long arg)
114 {
115 struct inode * inode = file_inode(filp);
116 int error = 0;
117@@ -60,6 +60,8 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
118
119 if (filp->f_op->check_flags)
120 error = filp->f_op->check_flags(arg);
121+ if (!error && filp->f_op->setfl)
122+ error = filp->f_op->setfl(filp, arg);
123 if (error)
124 return error;
125
5afbbe0d 126diff --git a/fs/inode.c b/fs/inode.c
e2f27e51 127index 7e3ef3a..675fe84 100644
5afbbe0d
AM
128--- a/fs/inode.c
129+++ b/fs/inode.c
e2f27e51 130@@ -1593,7 +1593,7 @@ EXPORT_SYMBOL(generic_update_time);
5afbbe0d
AM
131 * This does the actual work of updating an inodes time or version. Must have
132 * had called mnt_want_write() before calling this.
133 */
134-static int update_time(struct inode *inode, struct timespec *time, int flags)
135+int update_time(struct inode *inode, struct timespec *time, int flags)
136 {
137 int (*update_time)(struct inode *, struct timespec *, int);
138
5527c038 139diff --git a/fs/read_write.c b/fs/read_write.c
e2f27e51 140index 66215a7..a1da117 100644
5527c038
JR
141--- a/fs/read_write.c
142+++ b/fs/read_write.c
5afbbe0d 143@@ -515,6 +515,28 @@ ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
5527c038
JR
144 }
145 EXPORT_SYMBOL(__vfs_write);
146
147+vfs_readf_t vfs_readf(struct file *file)
148+{
149+ const struct file_operations *fop = file->f_op;
150+
151+ if (fop->read)
152+ return fop->read;
153+ if (fop->read_iter)
154+ return new_sync_read;
155+ return ERR_PTR(-ENOSYS);
156+}
157+
158+vfs_writef_t vfs_writef(struct file *file)
159+{
160+ const struct file_operations *fop = file->f_op;
161+
162+ if (fop->write)
163+ return fop->write;
164+ if (fop->write_iter)
165+ return new_sync_write;
166+ return ERR_PTR(-ENOSYS);
167+}
168+
169 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
170 {
171 mm_segment_t old_fs;
7f207e10 172diff --git a/fs/splice.c b/fs/splice.c
5afbbe0d 173index dd9bf7e..9326c2a 100644
7f207e10
AM
174--- a/fs/splice.c
175+++ b/fs/splice.c
5afbbe0d 176@@ -1111,8 +1111,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
7f207e10
AM
177 /*
178 * Attempt to initiate a splice from pipe to file.
179 */
180-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
181- loff_t *ppos, size_t len, unsigned int flags)
182+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
183+ loff_t *ppos, size_t len, unsigned int flags)
184 {
185 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
186 loff_t *, size_t, unsigned int);
5afbbe0d 187@@ -1128,9 +1128,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
188 /*
189 * Attempt to initiate a splice from a file to a pipe.
190 */
191-static long do_splice_to(struct file *in, loff_t *ppos,
192- struct pipe_inode_info *pipe, size_t len,
193- unsigned int flags)
194+long do_splice_to(struct file *in, loff_t *ppos,
195+ struct pipe_inode_info *pipe, size_t len,
196+ unsigned int flags)
197 {
198 ssize_t (*splice_read)(struct file *, loff_t *,
199 struct pipe_inode_info *, size_t, unsigned int);
b912730e 200diff --git a/include/linux/file.h b/include/linux/file.h
5afbbe0d 201index 7444f5f..bdac0be 100644
b912730e
AM
202--- a/include/linux/file.h
203+++ b/include/linux/file.h
204@@ -19,6 +19,7 @@ struct dentry;
205 struct path;
206 extern struct file *alloc_file(struct path *, fmode_t mode,
207 const struct file_operations *fop);
208+extern struct file *get_empty_filp(void);
209
210 static inline void fput_light(struct file *file, int fput_needed)
211 {
5527c038 212diff --git a/include/linux/fs.h b/include/linux/fs.h
e2f27e51 213index 901e25d..a71aa9e 100644
5527c038
JR
214--- a/include/linux/fs.h
215+++ b/include/linux/fs.h
e2f27e51 216@@ -1275,6 +1275,7 @@ extern void fasync_free(struct fasync_struct *);
febd17d6
JR
217 /* can be called from interrupts */
218 extern void kill_fasync(struct fasync_struct **, int, int);
219
220+extern int setfl(int fd, struct file * filp, unsigned long arg);
221 extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
222 extern void f_setown(struct file *filp, unsigned long arg, int force);
223 extern void f_delown(struct file *filp);
e2f27e51 224@@ -1699,6 +1700,7 @@ struct file_operations {
febd17d6
JR
225 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
226 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
227 int (*check_flags)(int);
228+ int (*setfl)(struct file *, unsigned long);
229 int (*flock) (struct file *, int, struct file_lock *);
230 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
231 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
e2f27e51 232@@ -1759,6 +1761,12 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
5527c038
JR
233 struct iovec *fast_pointer,
234 struct iovec **ret_pointer);
235
236+typedef ssize_t (*vfs_readf_t)(struct file *, char __user *, size_t, loff_t *);
237+typedef ssize_t (*vfs_writef_t)(struct file *, const char __user *, size_t,
238+ loff_t *);
239+vfs_readf_t vfs_readf(struct file *file);
240+vfs_writef_t vfs_writef(struct file *file);
241+
242 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
243 extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
244 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
e2f27e51 245@@ -2123,6 +2131,7 @@ extern int current_umask(void);
5afbbe0d
AM
246 extern void ihold(struct inode * inode);
247 extern void iput(struct inode *);
248 extern int generic_update_time(struct inode *, struct timespec *, int);
249+extern int update_time(struct inode *, struct timespec *, int);
250
251 /* /sys/fs */
252 extern struct kobject *fs_kobj;
1e00d052 253diff --git a/include/linux/splice.h b/include/linux/splice.h
076b876e 254index da2751d..2e0fca6 100644
1e00d052
AM
255--- a/include/linux/splice.h
256+++ b/include/linux/splice.h
076b876e 257@@ -83,4 +83,10 @@ extern void splice_shrink_spd(struct splice_pipe_desc *);
4b3da204
AM
258
259 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
106341ce 260 extern const struct pipe_buf_operations default_pipe_buf_ops;
1e00d052
AM
261+
262+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
263+ loff_t *ppos, size_t len, unsigned int flags);
264+extern long do_splice_to(struct file *in, loff_t *ppos,
265+ struct pipe_inode_info *pipe, size_t len,
266+ unsigned int flags);
267 #endif
e2f27e51 268aufs4.x-rcN mmap patch
fb47a38f 269
c1595e42 270diff --git a/fs/proc/base.c b/fs/proc/base.c
e2f27e51 271index ac0df4d..42255e5 100644
c1595e42
JR
272--- a/fs/proc/base.c
273+++ b/fs/proc/base.c
e2f27e51 274@@ -1938,7 +1938,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
c1595e42
JR
275 down_read(&mm->mmap_sem);
276 vma = find_exact_vma(mm, vm_start, vm_end);
277 if (vma && vma->vm_file) {
278- *path = vma->vm_file->f_path;
279+ *path = vma_pr_or_file(vma)->f_path;
280 path_get(path);
281 rc = 0;
282 }
fb47a38f 283diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
c2c0f25c 284index f8595e8..cb8eda0 100644
fb47a38f
JR
285--- a/fs/proc/nommu.c
286+++ b/fs/proc/nommu.c
076b876e 287@@ -45,7 +45,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
fb47a38f
JR
288 file = region->vm_file;
289
290 if (file) {
291- struct inode *inode = file_inode(region->vm_file);
292+ struct inode *inode;
076b876e 293+
fb47a38f
JR
294+ file = vmr_pr_or_file(region);
295+ inode = file_inode(file);
296 dev = inode->i_sb->s_dev;
297 ino = inode->i_ino;
298 }
299diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
e2f27e51 300index f6fa99e..2750949 100644
fb47a38f
JR
301--- a/fs/proc/task_mmu.c
302+++ b/fs/proc/task_mmu.c
febd17d6 303@@ -298,7 +298,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
fb47a38f
JR
304 const char *name = NULL;
305
306 if (file) {
307- struct inode *inode = file_inode(vma->vm_file);
308+ struct inode *inode;
076b876e 309+
fb47a38f
JR
310+ file = vma_pr_or_file(vma);
311+ inode = file_inode(file);
312 dev = inode->i_sb->s_dev;
313 ino = inode->i_ino;
314 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
e2f27e51 315@@ -1634,7 +1637,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
076b876e
AM
316 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
317 struct vm_area_struct *vma = v;
318 struct numa_maps *md = &numa_priv->md;
319- struct file *file = vma->vm_file;
320+ struct file *file = vma_pr_or_file(vma);
076b876e 321 struct mm_struct *mm = vma->vm_mm;
7e9cd9fe
AM
322 struct mm_walk walk = {
323 .hugetlb_entry = gather_hugetlb_stats,
fb47a38f 324diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
febd17d6 325index faacb0c..17b43be 100644
fb47a38f
JR
326--- a/fs/proc/task_nommu.c
327+++ b/fs/proc/task_nommu.c
febd17d6 328@@ -163,7 +163,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
fb47a38f
JR
329 file = vma->vm_file;
330
331 if (file) {
332- struct inode *inode = file_inode(vma->vm_file);
333+ struct inode *inode;
076b876e 334+
b912730e 335+ file = vma_pr_or_file(vma);
fb47a38f
JR
336+ inode = file_inode(file);
337 dev = inode->i_sb->s_dev;
338 ino = inode->i_ino;
339 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
340diff --git a/include/linux/mm.h b/include/linux/mm.h
e2f27e51 341index ef815b9..a772481 100644
fb47a38f
JR
342--- a/include/linux/mm.h
343+++ b/include/linux/mm.h
106341ce 344@@ -1266,6 +1266,28 @@ static inline int fixup_user_fault(struc
fb47a38f
JR
345 }
346 #endif
347
076b876e
AM
348+extern void vma_do_file_update_time(struct vm_area_struct *, const char[], int);
349+extern struct file *vma_do_pr_or_file(struct vm_area_struct *, const char[],
106341ce 350+ int);
076b876e
AM
351+extern void vma_do_get_file(struct vm_area_struct *, const char[], int);
352+extern void vma_do_fput(struct vm_area_struct *, const char[], int);
fb47a38f 353+
106341ce
AM
354+#define vma_file_update_time(vma) vma_do_file_update_time(vma, __func__, \
355+ __LINE__)
356+#define vma_pr_or_file(vma) vma_do_pr_or_file(vma, __func__, \
357+ __LINE__)
358+#define vma_get_file(vma) vma_do_get_file(vma, __func__, __LINE__)
359+#define vma_fput(vma) vma_do_fput(vma, __func__, __LINE__)
b912730e
AM
360+
361+#ifndef CONFIG_MMU
076b876e
AM
362+extern struct file *vmr_do_pr_or_file(struct vm_region *, const char[], int);
363+extern void vmr_do_fput(struct vm_region *, const char[], int);
364+
106341ce
AM
365+#define vmr_pr_or_file(region) vmr_do_pr_or_file(region, __func__, \
366+ __LINE__)
367+#define vmr_fput(region) vmr_do_fput(region, __func__, __LINE__)
b912730e 368+#endif /* !CONFIG_MMU */
fb47a38f 369+
106341ce
AM
370 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len,
371 unsigned int gup_flags);
fb47a38f 372 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
fb47a38f 373diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
e2f27e51 374index 903200f..55fc528 100644
fb47a38f
JR
375--- a/include/linux/mm_types.h
376+++ b/include/linux/mm_types.h
e2f27e51 377@@ -275,6 +275,7 @@ struct vm_region {
fb47a38f
JR
378 unsigned long vm_top; /* region allocated to here */
379 unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
380 struct file *vm_file; /* the backing file or NULL */
381+ struct file *vm_prfile; /* the virtual backing file or NULL */
382
383 int vm_usage; /* region usage count (access under nommu_region_sem) */
384 bool vm_icache_flushed : 1; /* true if the icache has been flushed for
e2f27e51 385@@ -349,6 +350,7 @@ struct vm_area_struct {
fb47a38f 386 unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
5afbbe0d 387 units */
fb47a38f
JR
388 struct file * vm_file; /* File we map to (can be NULL). */
389+ struct file *vm_prfile; /* shadow of vm_file */
390 void * vm_private_data; /* was vm_pte (shared mem) */
391
392 #ifndef CONFIG_MMU
393diff --git a/kernel/fork.c b/kernel/fork.c
e2f27e51 394index beb3172..ad4cfa8 100644
fb47a38f
JR
395--- a/kernel/fork.c
396+++ b/kernel/fork.c
e2f27e51 397@@ -477,7 +477,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
fb47a38f
JR
398 struct inode *inode = file_inode(file);
399 struct address_space *mapping = file->f_mapping;
400
401- get_file(file);
402+ vma_get_file(tmp);
403 if (tmp->vm_flags & VM_DENYWRITE)
404 atomic_dec(&inode->i_writecount);
2000de60 405 i_mmap_lock_write(mapping);
076b876e 406diff --git a/mm/Makefile b/mm/Makefile
e2f27e51 407index 2ca1faf..6b9da3f 100644
076b876e
AM
408--- a/mm/Makefile
409+++ b/mm/Makefile
e2f27e51 410@@ -40,7 +40,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
076b876e 411 mm_init.o mmu_context.o percpu.o slab_common.o \
c1595e42 412 compaction.o vmacache.o \
076b876e 413 interval_tree.o list_lru.o workingset.o \
7e9cd9fe
AM
414- debug.o $(mmu-y)
415+ prfile.o debug.o $(mmu-y)
076b876e
AM
416
417 obj-y += init-mm.o
418
fb47a38f 419diff --git a/mm/filemap.c b/mm/filemap.c
106341ce 420index 2d0986a..4a31bad 100644
fb47a38f
JR
421--- a/mm/filemap.c
422+++ b/mm/filemap.c
e2f27e51 423@@ -2284,7 +2284,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
fb47a38f
JR
424 int ret = VM_FAULT_LOCKED;
425
426 sb_start_pagefault(inode->i_sb);
427- file_update_time(vma->vm_file);
428+ vma_file_update_time(vma);
429 lock_page(page);
430 if (page->mapping != inode->i_mapping) {
431 unlock_page(page);
fb47a38f 432diff --git a/mm/memory.c b/mm/memory.c
e2f27e51 433index 793fe0f..45f39f3 100644
fb47a38f
JR
434--- a/mm/memory.c
435+++ b/mm/memory.c
e2f27e51 436@@ -2113,7 +2113,7 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte,
fb47a38f 437 }
7e9cd9fe 438
b912730e
AM
439 if (!page_mkwrite)
440- file_update_time(vma->vm_file);
441+ vma_file_update_time(vma);
442 }
443
444 return VM_FAULT_WRITE;
fb47a38f 445diff --git a/mm/mmap.c b/mm/mmap.c
e2f27e51 446index ca9d91b..f3ebc5a 100644
fb47a38f
JR
447--- a/mm/mmap.c
448+++ b/mm/mmap.c
e2f27e51 449@@ -163,7 +163,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
fb47a38f
JR
450 if (vma->vm_ops && vma->vm_ops->close)
451 vma->vm_ops->close(vma);
452 if (vma->vm_file)
453- fput(vma->vm_file);
454+ vma_fput(vma);
455 mpol_put(vma_policy(vma));
456 kmem_cache_free(vm_area_cachep, vma);
457 return next;
e2f27e51 458@@ -790,7 +790,7 @@ again:
fb47a38f
JR
459 if (remove_next) {
460 if (file) {
461 uprobe_munmap(next, next->vm_start, next->vm_end);
462- fput(file);
463+ vma_fput(vma);
464 }
465 if (next->anon_vma)
466 anon_vma_merge(vma, next);
e2f27e51 467@@ -1574,8 +1574,8 @@ out:
35939ee7
JR
468 return addr;
469
fb47a38f 470 unmap_and_free_vma:
fb47a38f
JR
471+ vma_fput(vma);
472 vma->vm_file = NULL;
473- fput(file);
474
475 /* Undo any partial mapping done by a device driver. */
476 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
e2f27e51 477@@ -2380,7 +2380,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
fb47a38f
JR
478 goto out_free_mpol;
479
480 if (new->vm_file)
481- get_file(new->vm_file);
482+ vma_get_file(new);
483
484 if (new->vm_ops && new->vm_ops->open)
485 new->vm_ops->open(new);
e2f27e51 486@@ -2399,7 +2399,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
fb47a38f
JR
487 if (new->vm_ops && new->vm_ops->close)
488 new->vm_ops->close(new);
489 if (new->vm_file)
490- fput(new->vm_file);
491+ vma_fput(new);
492 unlink_anon_vmas(new);
493 out_free_mpol:
494 mpol_put(vma_policy(new));
e2f27e51 495@@ -2550,7 +2550,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
7e9cd9fe
AM
496 struct vm_area_struct *vma;
497 unsigned long populate = 0;
498 unsigned long ret = -EINVAL;
499- struct file *file;
5afbbe0d 500+ struct file *file, *prfile;
7e9cd9fe 501
5afbbe0d
AM
502 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n",
503 current->comm, current->pid);
e2f27e51 504@@ -2625,10 +2625,27 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
febd17d6 505 }
7e9cd9fe
AM
506 }
507
508- file = get_file(vma->vm_file);
509+ vma_get_file(vma);
5afbbe0d
AM
510+ file = vma->vm_file;
511+ prfile = vma->vm_prfile;
7e9cd9fe
AM
512 ret = do_mmap_pgoff(vma->vm_file, start, size,
513 prot, flags, pgoff, &populate);
5afbbe0d
AM
514+ if (!IS_ERR_VALUE(ret) && file && prfile) {
515+ struct vm_area_struct *new_vma;
516+
517+ new_vma = find_vma(mm, ret);
518+ if (!new_vma->vm_prfile)
519+ new_vma->vm_prfile = prfile;
520+ if (new_vma != vma)
521+ get_file(prfile);
522+ }
523+ /*
524+ * two fput()s instead of vma_fput(vma),
525+ * coz vma may not be available anymore.
526+ */
527 fput(file);
528+ if (prfile)
529+ fput(prfile);
7e9cd9fe
AM
530 out:
531 up_write(&mm->mmap_sem);
532 if (populate)
e2f27e51 533@@ -2903,7 +2920,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
79b8bda9
AM
534 if (anon_vma_clone(new_vma, vma))
535 goto out_free_mempol;
536 if (new_vma->vm_file)
537- get_file(new_vma->vm_file);
538+ vma_get_file(new_vma);
539 if (new_vma->vm_ops && new_vma->vm_ops->open)
540 new_vma->vm_ops->open(new_vma);
541 vma_link(mm, new_vma, prev, rb_link, rb_parent);
fb47a38f 542diff --git a/mm/nommu.c b/mm/nommu.c
e2f27e51 543index 95daf81..5086a29 100644
fb47a38f
JR
544--- a/mm/nommu.c
545+++ b/mm/nommu.c
5afbbe0d 546@@ -644,7 +644,7 @@ static void __put_nommu_region(struct vm_region *region)
fb47a38f
JR
547 up_write(&nommu_region_sem);
548
549 if (region->vm_file)
550- fput(region->vm_file);
551+ vmr_fput(region);
552
553 /* IO memory and memory shared directly out of the pagecache
554 * from ramfs/tmpfs mustn't be released here */
5afbbe0d 555@@ -802,7 +802,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
fb47a38f
JR
556 if (vma->vm_ops && vma->vm_ops->close)
557 vma->vm_ops->close(vma);
558 if (vma->vm_file)
559- fput(vma->vm_file);
560+ vma_fput(vma);
561 put_nommu_region(vma->vm_region);
562 kmem_cache_free(vm_area_cachep, vma);
563 }
5afbbe0d 564@@ -1328,7 +1328,7 @@ unsigned long do_mmap(struct file *file,
fb47a38f
JR
565 goto error_just_free;
566 }
567 }
568- fput(region->vm_file);
569+ vmr_fput(region);
570 kmem_cache_free(vm_region_jar, region);
571 region = pregion;
572 result = start;
5afbbe0d 573@@ -1403,10 +1403,10 @@ error_just_free:
fb47a38f
JR
574 up_write(&nommu_region_sem);
575 error:
576 if (region->vm_file)
577- fput(region->vm_file);
578+ vmr_fput(region);
579 kmem_cache_free(vm_region_jar, region);
580 if (vma->vm_file)
581- fput(vma->vm_file);
582+ vma_fput(vma);
583 kmem_cache_free(vm_area_cachep, vma);
fb47a38f 584 return ret;
c2c0f25c 585
076b876e
AM
586diff --git a/mm/prfile.c b/mm/prfile.c
587new file mode 100644
c2c0f25c 588index 0000000..b323b8a
076b876e
AM
589--- /dev/null
590+++ b/mm/prfile.c
591@@ -0,0 +1,86 @@
592+/*
593+ * Mainly for aufs which mmap(2) diffrent file and wants to print different path
594+ * in /proc/PID/maps.
595+ * Call these functions via macros defined in linux/mm.h.
596+ *
597+ * See Documentation/filesystems/aufs/design/06mmap.txt
598+ *
599+ * Copyright (c) 2014 Junjro R. Okajima
600+ * Copyright (c) 2014 Ian Campbell
601+ */
602+
603+#include <linux/mm.h>
604+#include <linux/file.h>
605+#include <linux/fs.h>
606+
607+/* #define PRFILE_TRACE */
608+static inline void prfile_trace(struct file *f, struct file *pr,
609+ const char func[], int line, const char func2[])
610+{
611+#ifdef PRFILE_TRACE
612+ if (pr)
c2c0f25c 613+ pr_info("%s:%d: %s, %s\n", func, line, func2,
7e9cd9fe 614+ f ? (char *)f->f_path.dentry->d_name.name : "(null)");
076b876e
AM
615+#endif
616+}
617+
076b876e
AM
618+void vma_do_file_update_time(struct vm_area_struct *vma, const char func[],
619+ int line)
620+{
621+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
622+
623+ prfile_trace(f, pr, func, line, __func__);
624+ file_update_time(f);
625+ if (f && pr)
626+ file_update_time(pr);
627+}
628+
629+struct file *vma_do_pr_or_file(struct vm_area_struct *vma, const char func[],
630+ int line)
631+{
632+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
633+
634+ prfile_trace(f, pr, func, line, __func__);
635+ return (f && pr) ? pr : f;
636+}
637+
638+void vma_do_get_file(struct vm_area_struct *vma, const char func[], int line)
639+{
640+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
641+
642+ prfile_trace(f, pr, func, line, __func__);
643+ get_file(f);
644+ if (f && pr)
645+ get_file(pr);
646+}
647+
648+void vma_do_fput(struct vm_area_struct *vma, const char func[], int line)
649+{
650+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
651+
652+ prfile_trace(f, pr, func, line, __func__);
653+ fput(f);
654+ if (f && pr)
655+ fput(pr);
656+}
b912730e
AM
657+
658+#ifndef CONFIG_MMU
076b876e
AM
659+struct file *vmr_do_pr_or_file(struct vm_region *region, const char func[],
660+ int line)
661+{
662+ struct file *f = region->vm_file, *pr = region->vm_prfile;
663+
664+ prfile_trace(f, pr, func, line, __func__);
665+ return (f && pr) ? pr : f;
666+}
667+
668+void vmr_do_fput(struct vm_region *region, const char func[], int line)
669+{
670+ struct file *f = region->vm_file, *pr = region->vm_prfile;
671+
672+ prfile_trace(f, pr, func, line, __func__);
673+ fput(f);
674+ if (f && pr)
675+ fput(pr);
676+}
b912730e 677+#endif /* !CONFIG_MMU */
e2f27e51 678aufs4.x-rcN standalone patch
7f207e10 679
c1595e42 680diff --git a/fs/dcache.c b/fs/dcache.c
e2f27e51 681index df0268c..76280ee 100644
c1595e42
JR
682--- a/fs/dcache.c
683+++ b/fs/dcache.c
e2f27e51 684@@ -1272,6 +1272,7 @@ rename_retry:
c1595e42
JR
685 seq = 1;
686 goto again;
687 }
febd17d6 688+EXPORT_SYMBOL_GPL(d_walk);
c1595e42
JR
689
690 /*
691 * Search for at least 1 mount point in the dentry's subdirs.
79b8bda9 692diff --git a/fs/exec.c b/fs/exec.c
e2f27e51 693index 6fcfb3f..ed9d646 100644
79b8bda9
AM
694--- a/fs/exec.c
695+++ b/fs/exec.c
5afbbe0d 696@@ -104,6 +104,7 @@ bool path_noexec(const struct path *path)
79b8bda9
AM
697 return (path->mnt->mnt_flags & MNT_NOEXEC) ||
698 (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
699 }
febd17d6 700+EXPORT_SYMBOL_GPL(path_noexec);
79b8bda9
AM
701
702 #ifdef CONFIG_USELIB
703 /*
febd17d6
JR
704diff --git a/fs/fcntl.c b/fs/fcntl.c
705index 6f42279..04fd33c 100644
706--- a/fs/fcntl.c
707+++ b/fs/fcntl.c
708@@ -82,6 +82,7 @@ int setfl(int fd, struct file * filp, unsigned long arg)
709 out:
710 return error;
711 }
712+EXPORT_SYMBOL_GPL(setfl);
713
714 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
715 int force)
b912730e 716diff --git a/fs/file_table.c b/fs/file_table.c
febd17d6 717index ad17e05..ae9f267 100644
b912730e
AM
718--- a/fs/file_table.c
719+++ b/fs/file_table.c
79b8bda9 720@@ -147,6 +147,7 @@ over:
b912730e
AM
721 }
722 return ERR_PTR(-ENFILE);
723 }
febd17d6 724+EXPORT_SYMBOL_GPL(get_empty_filp);
b912730e
AM
725
726 /**
727 * alloc_file - allocate and initialize a 'struct file'
8cdd5066
JR
728@@ -258,6 +259,7 @@ void flush_delayed_fput(void)
729 {
730 delayed_fput(NULL);
731 }
febd17d6 732+EXPORT_SYMBOL_GPL(flush_delayed_fput);
8cdd5066
JR
733
734 static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
735
736@@ -300,6 +302,7 @@ void __fput_sync(struct file *file)
737 }
738
739 EXPORT_SYMBOL(fput);
febd17d6 740+EXPORT_SYMBOL_GPL(__fput_sync);
8cdd5066
JR
741
742 void put_filp(struct file *file)
743 {
744@@ -308,6 +311,7 @@ void put_filp(struct file *file)
b912730e
AM
745 file_free(file);
746 }
747 }
febd17d6 748+EXPORT_SYMBOL_GPL(put_filp);
b912730e 749
79b8bda9 750 void __init files_init(void)
b912730e 751 {
5afbbe0d 752diff --git a/fs/inode.c b/fs/inode.c
e2f27e51 753index 675fe84..da063d3 100644
5afbbe0d
AM
754--- a/fs/inode.c
755+++ b/fs/inode.c
e2f27e51 756@@ -1602,6 +1602,7 @@ int update_time(struct inode *inode, struct timespec *time, int flags)
5afbbe0d
AM
757
758 return update_time(inode, time, flags);
759 }
760+EXPORT_SYMBOL_GPL(update_time);
761
762 /**
763 * touch_atime - update the access time
7f207e10 764diff --git a/fs/namespace.c b/fs/namespace.c
e2f27e51 765index 7bb2cda..88ec098 100644
7f207e10
AM
766--- a/fs/namespace.c
767+++ b/fs/namespace.c
7e9cd9fe 768@@ -463,6 +463,7 @@ void __mnt_drop_write(struct vfsmount *mnt)
c06a8ce3
AM
769 mnt_dec_writers(real_mount(mnt));
770 preempt_enable();
771 }
772+EXPORT_SYMBOL_GPL(__mnt_drop_write);
773
774 /**
775 * mnt_drop_write - give up write access to a mount
f0c0a007 776@@ -1812,6 +1813,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
7f207e10
AM
777 }
778 return 0;
779 }
febd17d6 780+EXPORT_SYMBOL_GPL(iterate_mounts);
7f207e10 781
7eafdf33 782 static void cleanup_group_ids(struct mount *mnt, struct mount *end)
7f207e10
AM
783 {
784diff --git a/fs/notify/group.c b/fs/notify/group.c
e2f27e51 785index b47f7cf..618bc9e 100644
7f207e10
AM
786--- a/fs/notify/group.c
787+++ b/fs/notify/group.c
788@@ -22,6 +22,7 @@
789 #include <linux/srcu.h>
790 #include <linux/rculist.h>
791 #include <linux/wait.h>
792+#include <linux/module.h>
793
794 #include <linux/fsnotify_backend.h>
795 #include "fsnotify.h"
e2f27e51 796@@ -100,6 +101,7 @@ void fsnotify_get_group(struct fsnotify_group *group)
1716fcea
AM
797 {
798 atomic_inc(&group->refcnt);
799 }
febd17d6 800+EXPORT_SYMBOL_GPL(fsnotify_get_group);
1716fcea
AM
801
802 /*
803 * Drop a reference to a group. Free it if it's through.
e2f27e51 804@@ -109,6 +111,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
7f207e10 805 if (atomic_dec_and_test(&group->refcnt))
1716fcea 806 fsnotify_final_destroy_group(group);
7f207e10 807 }
febd17d6 808+EXPORT_SYMBOL_GPL(fsnotify_put_group);
7f207e10
AM
809
810 /*
811 * Create a new fsnotify_group and hold a reference for the group returned.
e2f27e51 812@@ -137,6 +140,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
7f207e10
AM
813
814 return group;
815 }
febd17d6 816+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
1716fcea
AM
817
818 int fsnotify_fasync(int fd, struct file *file, int on)
819 {
7f207e10 820diff --git a/fs/notify/mark.c b/fs/notify/mark.c
5afbbe0d 821index d3fea0b..5fc06ad 100644
7f207e10
AM
822--- a/fs/notify/mark.c
823+++ b/fs/notify/mark.c
febd17d6 824@@ -113,6 +113,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
7f207e10 825 mark->free_mark(mark);
1716fcea 826 }
7f207e10 827 }
febd17d6 828+EXPORT_SYMBOL_GPL(fsnotify_put_mark);
7f207e10 829
2000de60
JR
830 /* Calculate mask of events for a list of marks */
831 u32 fsnotify_recalc_mask(struct hlist_head *head)
5afbbe0d 832@@ -230,6 +231,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
1716fcea 833 mutex_unlock(&group->mark_mutex);
79b8bda9 834 fsnotify_free_mark(mark);
7f207e10 835 }
febd17d6 836+EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
7f207e10 837
79b8bda9
AM
838 void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
839 {
5afbbe0d 840@@ -415,6 +417,7 @@ err:
7f207e10
AM
841
842 return ret;
843 }
febd17d6 844+EXPORT_SYMBOL_GPL(fsnotify_add_mark);
7f207e10 845
1716fcea
AM
846 int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
847 struct inode *inode, struct vfsmount *mnt, int allow_dups)
5afbbe0d 848@@ -533,6 +536,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
7f207e10
AM
849 atomic_set(&mark->refcnt, 1);
850 mark->free_mark = free_mark;
851 }
febd17d6 852+EXPORT_SYMBOL_GPL(fsnotify_init_mark);
7f207e10 853
5afbbe0d
AM
854 /*
855 * Destroy all marks in destroy_list, waits for SRCU period to finish before
7f207e10 856diff --git a/fs/open.c b/fs/open.c
e2f27e51 857index 4fd6e25..ec6f532 100644
7f207e10
AM
858--- a/fs/open.c
859+++ b/fs/open.c
c2c0f25c 860@@ -64,6 +64,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
febd17d6 861 inode_unlock(dentry->d_inode);
7f207e10
AM
862 return ret;
863 }
febd17d6 864+EXPORT_SYMBOL_GPL(do_truncate);
7f207e10 865
5afbbe0d 866 long vfs_truncate(const struct path *path, loff_t length)
7f207e10 867 {
c2c0f25c 868@@ -678,6 +679,7 @@ int open_check_o_direct(struct file *f)
b912730e
AM
869 }
870 return 0;
871 }
febd17d6 872+EXPORT_SYMBOL_GPL(open_check_o_direct);
b912730e
AM
873
874 static int do_dentry_open(struct file *f,
c2c0f25c 875 struct inode *inode,
5527c038 876diff --git a/fs/read_write.c b/fs/read_write.c
e2f27e51 877index a1da117..c643215 100644
5527c038
JR
878--- a/fs/read_write.c
879+++ b/fs/read_write.c
5afbbe0d 880@@ -525,6 +525,7 @@ vfs_readf_t vfs_readf(struct file *file)
5527c038
JR
881 return new_sync_read;
882 return ERR_PTR(-ENOSYS);
883 }
febd17d6 884+EXPORT_SYMBOL_GPL(vfs_readf);
5527c038
JR
885
886 vfs_writef_t vfs_writef(struct file *file)
887 {
5afbbe0d 888@@ -536,6 +537,7 @@ vfs_writef_t vfs_writef(struct file *file)
5527c038
JR
889 return new_sync_write;
890 return ERR_PTR(-ENOSYS);
891 }
febd17d6 892+EXPORT_SYMBOL_GPL(vfs_writef);
5527c038
JR
893
894 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
895 {
7f207e10 896diff --git a/fs/splice.c b/fs/splice.c
5afbbe0d 897index 9326c2a..0606690 100644
7f207e10
AM
898--- a/fs/splice.c
899+++ b/fs/splice.c
5afbbe0d 900@@ -1124,6 +1124,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
392086de
AM
901
902 return splice_write(pipe, out, ppos, len, flags);
7f207e10 903 }
febd17d6 904+EXPORT_SYMBOL_GPL(do_splice_from);
7f207e10
AM
905
906 /*
907 * Attempt to initiate a splice from a file to a pipe.
5afbbe0d 908@@ -1153,6 +1154,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
7f207e10
AM
909
910 return splice_read(in, ppos, pipe, len, flags);
911 }
febd17d6 912+EXPORT_SYMBOL_GPL(do_splice_to);
7f207e10
AM
913
914 /**
915 * splice_direct_to_actor - splices data directly between two non-pipes
c1595e42 916diff --git a/fs/xattr.c b/fs/xattr.c
e2f27e51 917index c243905..b60dc60 100644
c1595e42
JR
918--- a/fs/xattr.c
919+++ b/fs/xattr.c
e2f27e51 920@@ -214,6 +214,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value,
c1595e42
JR
921 *xattr_value = value;
922 return error;
923 }
febd17d6 924+EXPORT_SYMBOL_GPL(vfs_getxattr_alloc);
c1595e42 925
febd17d6
JR
926 ssize_t
927 vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
8cdd5066 928diff --git a/kernel/task_work.c b/kernel/task_work.c
e2f27e51 929index d513051..e056d54 100644
8cdd5066
JR
930--- a/kernel/task_work.c
931+++ b/kernel/task_work.c
e2f27e51 932@@ -119,3 +119,4 @@ void task_work_run(void)
8cdd5066
JR
933 } while (work);
934 }
935 }
febd17d6 936+EXPORT_SYMBOL_GPL(task_work_run);
7f207e10 937diff --git a/security/commoncap.c b/security/commoncap.c
e2f27e51 938index 14540bd..4e3b242 100644
7f207e10
AM
939--- a/security/commoncap.c
940+++ b/security/commoncap.c
e2f27e51 941@@ -1066,12 +1066,14 @@ int cap_mmap_addr(unsigned long addr)
94337f0d 942 }
7f207e10
AM
943 return ret;
944 }
febd17d6 945+EXPORT_SYMBOL_GPL(cap_mmap_addr);
0c3ec466
AM
946
947 int cap_mmap_file(struct file *file, unsigned long reqprot,
948 unsigned long prot, unsigned long flags)
949 {
950 return 0;
951 }
febd17d6 952+EXPORT_SYMBOL_GPL(cap_mmap_file);
c2c0f25c
AM
953
954 #ifdef CONFIG_SECURITY
955
7f207e10 956diff --git a/security/device_cgroup.c b/security/device_cgroup.c
febd17d6 957index 03c1652..f88c84b 100644
7f207e10
AM
958--- a/security/device_cgroup.c
959+++ b/security/device_cgroup.c
f6c5ef8b
AM
960@@ -7,6 +7,7 @@
961 #include <linux/device_cgroup.h>
962 #include <linux/cgroup.h>
963 #include <linux/ctype.h>
964+#include <linux/export.h>
965 #include <linux/list.h>
966 #include <linux/uaccess.h>
967 #include <linux/seq_file.h>
076b876e 968@@ -849,6 +850,7 @@ int __devcgroup_inode_permission(struct inode *inode, int mask)
537831f9
AM
969 return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
970 access);
7f207e10 971 }
febd17d6 972+EXPORT_SYMBOL_GPL(__devcgroup_inode_permission);
7f207e10
AM
973
974 int devcgroup_inode_mknod(int mode, dev_t dev)
975 {
976diff --git a/security/security.c b/security/security.c
e2f27e51 977index 4838e7f..36c741e 100644
7f207e10
AM
978--- a/security/security.c
979+++ b/security/security.c
5afbbe0d 980@@ -434,6 +434,7 @@ int security_path_rmdir(const struct path *dir, struct dentry *dentry)
7f207e10 981 return 0;
c2c0f25c 982 return call_int_hook(path_rmdir, 0, dir, dentry);
7f207e10 983 }
febd17d6 984+EXPORT_SYMBOL_GPL(security_path_rmdir);
7f207e10 985
5afbbe0d 986 int security_path_unlink(const struct path *dir, struct dentry *dentry)
7f207e10 987 {
5afbbe0d 988@@ -450,6 +451,7 @@ int security_path_symlink(const struct path *dir, struct dentry *dentry,
7f207e10 989 return 0;
c2c0f25c 990 return call_int_hook(path_symlink, 0, dir, dentry, old_name);
7f207e10 991 }
febd17d6 992+EXPORT_SYMBOL_GPL(security_path_symlink);
7f207e10 993
5afbbe0d 994 int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
7f207e10 995 struct dentry *new_dentry)
5afbbe0d 996@@ -458,6 +460,7 @@ int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
7f207e10 997 return 0;
c2c0f25c 998 return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry);
7f207e10 999 }
febd17d6 1000+EXPORT_SYMBOL_GPL(security_path_link);
7f207e10 1001
5afbbe0d
AM
1002 int security_path_rename(const struct path *old_dir, struct dentry *old_dentry,
1003 const struct path *new_dir, struct dentry *new_dentry,
1004@@ -485,6 +488,7 @@ int security_path_truncate(const struct path *path)
7f207e10 1005 return 0;
c2c0f25c 1006 return call_int_hook(path_truncate, 0, path);
7f207e10 1007 }
febd17d6 1008+EXPORT_SYMBOL_GPL(security_path_truncate);
7f207e10 1009
5afbbe0d 1010 int security_path_chmod(const struct path *path, umode_t mode)
7eafdf33 1011 {
5afbbe0d 1012@@ -492,6 +496,7 @@ int security_path_chmod(const struct path *path, umode_t mode)
7f207e10 1013 return 0;
c2c0f25c 1014 return call_int_hook(path_chmod, 0, path, mode);
7f207e10 1015 }
febd17d6 1016+EXPORT_SYMBOL_GPL(security_path_chmod);
7f207e10 1017
5afbbe0d 1018 int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
7f207e10 1019 {
5afbbe0d 1020@@ -499,6 +504,7 @@ int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
7f207e10 1021 return 0;
c2c0f25c 1022 return call_int_hook(path_chown, 0, path, uid, gid);
7f207e10 1023 }
febd17d6 1024+EXPORT_SYMBOL_GPL(security_path_chown);
7f207e10 1025
5afbbe0d 1026 int security_path_chroot(const struct path *path)
7f207e10 1027 {
5afbbe0d 1028@@ -584,6 +590,7 @@ int security_inode_readlink(struct dentry *dentry)
7f207e10 1029 return 0;
c2c0f25c 1030 return call_int_hook(inode_readlink, 0, dentry);
7f207e10 1031 }
febd17d6 1032+EXPORT_SYMBOL_GPL(security_inode_readlink);
7f207e10 1033
c2c0f25c
AM
1034 int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
1035 bool rcu)
5afbbe0d 1036@@ -599,6 +606,7 @@ int security_inode_permission(struct inode *inode, int mask)
7f207e10 1037 return 0;
c2c0f25c 1038 return call_int_hook(inode_permission, 0, inode, mask);
7f207e10 1039 }
febd17d6 1040+EXPORT_SYMBOL_GPL(security_inode_permission);
7f207e10 1041
1e00d052 1042 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
7f207e10 1043 {
e2f27e51 1044@@ -758,6 +766,7 @@ int security_file_permission(struct file *file, int mask)
7f207e10
AM
1045
1046 return fsnotify_perm(file, mask);
1047 }
febd17d6 1048+EXPORT_SYMBOL_GPL(security_file_permission);
7f207e10
AM
1049
1050 int security_file_alloc(struct file *file)
1051 {
e2f27e51 1052@@ -817,6 +826,7 @@ int security_mmap_file(struct file *file, unsigned long prot,
7f207e10
AM
1053 return ret;
1054 return ima_file_mmap(file, prot);
1055 }
febd17d6 1056+EXPORT_SYMBOL_GPL(security_mmap_file);
7f207e10 1057
0c3ec466
AM
1058 int security_mmap_addr(unsigned long addr)
1059 {
7f207e10
AM
1060diff -urN /usr/share/empty/Documentation/ABI/testing/debugfs-aufs linux/Documentation/ABI/testing/debugfs-aufs
1061--- /usr/share/empty/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1062+++ linux/Documentation/ABI/testing/debugfs-aufs 2016-10-09 16:55:36.476034536 +0200
86dc4139 1063@@ -0,0 +1,50 @@
7f207e10
AM
1064+What: /debug/aufs/si_<id>/
1065+Date: March 2009
f6b6e03d 1066+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1067+Description:
1068+ Under /debug/aufs, a directory named si_<id> is created
1069+ per aufs mount, where <id> is a unique id generated
1070+ internally.
1facf9fc 1071+
86dc4139
AM
1072+What: /debug/aufs/si_<id>/plink
1073+Date: Apr 2013
f6b6e03d 1074+Contact: J. R. Okajima <hooanon05g@gmail.com>
86dc4139
AM
1075+Description:
1076+ It has three lines and shows the information about the
1077+ pseudo-link. The first line is a single number
1078+ representing a number of buckets. The second line is a
1079+ number of pseudo-links per buckets (separated by a
1080+ blank). The last line is a single number representing a
1081+ total number of psedo-links.
1082+ When the aufs mount option 'noplink' is specified, it
1083+ will show "1\n0\n0\n".
1084+
7f207e10
AM
1085+What: /debug/aufs/si_<id>/xib
1086+Date: March 2009
f6b6e03d 1087+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1088+Description:
1089+ It shows the consumed blocks by xib (External Inode Number
1090+ Bitmap), its block size and file size.
1091+ When the aufs mount option 'noxino' is specified, it
1092+ will be empty. About XINO files, see the aufs manual.
1093+
1094+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
1095+Date: March 2009
f6b6e03d 1096+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1097+Description:
1098+ It shows the consumed blocks by xino (External Inode Number
1099+ Translation Table), its link count, block size and file
1100+ size.
1101+ When the aufs mount option 'noxino' is specified, it
1102+ will be empty. About XINO files, see the aufs manual.
1103+
1104+What: /debug/aufs/si_<id>/xigen
1105+Date: March 2009
f6b6e03d 1106+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1107+Description:
1108+ It shows the consumed blocks by xigen (External Inode
1109+ Generation Table), its block size and file size.
1110+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
1111+ be created.
1112+ When the aufs mount option 'noxino' is specified, it
1113+ will be empty. About XINO files, see the aufs manual.
1114diff -urN /usr/share/empty/Documentation/ABI/testing/sysfs-aufs linux/Documentation/ABI/testing/sysfs-aufs
1115--- /usr/share/empty/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1116+++ linux/Documentation/ABI/testing/sysfs-aufs 2016-10-09 16:55:36.476034536 +0200
392086de 1117@@ -0,0 +1,31 @@
7f207e10
AM
1118+What: /sys/fs/aufs/si_<id>/
1119+Date: March 2009
f6b6e03d 1120+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1121+Description:
1122+ Under /sys/fs/aufs, a directory named si_<id> is created
1123+ per aufs mount, where <id> is a unique id generated
1124+ internally.
1125+
1126+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
1127+Date: March 2009
f6b6e03d 1128+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1129+Description:
1130+ It shows the abolute path of a member directory (which
1131+ is called branch) in aufs, and its permission.
1132+
392086de
AM
1133+What: /sys/fs/aufs/si_<id>/brid0, brid1 ... bridN
1134+Date: July 2013
f6b6e03d 1135+Contact: J. R. Okajima <hooanon05g@gmail.com>
392086de
AM
1136+Description:
1137+ It shows the id of a member directory (which is called
1138+ branch) in aufs.
1139+
7f207e10
AM
1140+What: /sys/fs/aufs/si_<id>/xi_path
1141+Date: March 2009
f6b6e03d 1142+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1143+Description:
1144+ It shows the abolute path of XINO (External Inode Number
1145+ Bitmap, Translation Table and Generation Table) file
1146+ even if it is the default path.
1147+ When the aufs mount option 'noxino' is specified, it
1148+ will be empty. About XINO files, see the aufs manual.
53392da6
AM
1149diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt linux/Documentation/filesystems/aufs/design/01intro.txt
1150--- /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1151+++ linux/Documentation/filesystems/aufs/design/01intro.txt 2016-10-09 16:55:36.479367956 +0200
7e9cd9fe 1152@@ -0,0 +1,170 @@
53392da6 1153+
8cdd5066 1154+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1155+#
1156+# This program is free software; you can redistribute it and/or modify
1157+# it under the terms of the GNU General Public License as published by
1158+# the Free Software Foundation; either version 2 of the License, or
1159+# (at your option) any later version.
1160+#
1161+# This program is distributed in the hope that it will be useful,
1162+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1163+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1164+# GNU General Public License for more details.
1165+#
1166+# You should have received a copy of the GNU General Public License
523b37e3 1167+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1168+
1169+Introduction
1170+----------------------------------------
1171+
1172+aufs [ei ju: ef es] | [a u f s]
1173+1. abbrev. for "advanced multi-layered unification filesystem".
1174+2. abbrev. for "another unionfs".
1175+3. abbrev. for "auf das" in German which means "on the" in English.
1176+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
1177+ But "Filesystem aufs Filesystem" is hard to understand.
1178+
1179+AUFS is a filesystem with features:
1180+- multi layered stackable unification filesystem, the member directory
1181+ is called as a branch.
1182+- branch permission and attribute, 'readonly', 'real-readonly',
7e9cd9fe 1183+ 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
53392da6
AM
1184+ combination.
1185+- internal "file copy-on-write".
1186+- logical deletion, whiteout.
1187+- dynamic branch manipulation, adding, deleting and changing permission.
1188+- allow bypassing aufs, user's direct branch access.
1189+- external inode number translation table and bitmap which maintains the
1190+ persistent aufs inode number.
1191+- seekable directory, including NFS readdir.
1192+- file mapping, mmap and sharing pages.
1193+- pseudo-link, hardlink over branches.
1194+- loopback mounted filesystem as a branch.
1195+- several policies to select one among multiple writable branches.
1196+- revert a single systemcall when an error occurs in aufs.
1197+- and more...
1198+
1199+
1200+Multi Layered Stackable Unification Filesystem
1201+----------------------------------------------------------------------
1202+Most people already knows what it is.
1203+It is a filesystem which unifies several directories and provides a
1204+merged single directory. When users access a file, the access will be
1205+passed/re-directed/converted (sorry, I am not sure which English word is
1206+correct) to the real file on the member filesystem. The member
1207+filesystem is called 'lower filesystem' or 'branch' and has a mode
1208+'readonly' and 'readwrite.' And the deletion for a file on the lower
1209+readonly branch is handled by creating 'whiteout' on the upper writable
1210+branch.
1211+
1212+On LKML, there have been discussions about UnionMount (Jan Blunck,
1213+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
1214+different approaches to implement the merged-view.
1215+The former tries putting it into VFS, and the latter implements as a
1216+separate filesystem.
1217+(If I misunderstand about these implementations, please let me know and
1218+I shall correct it. Because it is a long time ago when I read their
1219+source files last time).
1220+
1221+UnionMount's approach will be able to small, but may be hard to share
1222+branches between several UnionMount since the whiteout in it is
1223+implemented in the inode on branch filesystem and always
1224+shared. According to Bharata's post, readdir does not seems to be
1225+finished yet.
1226+There are several missing features known in this implementations such as
1227+- for users, the inode number may change silently. eg. copy-up.
1228+- link(2) may break by copy-up.
1229+- read(2) may get an obsoleted filedata (fstat(2) too).
1230+- fcntl(F_SETLK) may be broken by copy-up.
1231+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
1232+ open(O_RDWR).
1233+
7e9cd9fe
AM
1234+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
1235+merged into mainline. This is another implementation of UnionMount as a
1236+separated filesystem. All the limitations and known problems which
1237+UnionMount are equally inherited to "overlay" filesystem.
1238+
1239+Unionfs has a longer history. When I started implementing a stackable
1240+filesystem (Aug 2005), it already existed. It has virtual super_block,
1241+inode, dentry and file objects and they have an array pointing lower
1242+same kind objects. After contributing many patches for Unionfs, I
1243+re-started my project AUFS (Jun 2006).
53392da6
AM
1244+
1245+In AUFS, the structure of filesystem resembles to Unionfs, but I
1246+implemented my own ideas, approaches and enhancements and it became
1247+totally different one.
1248+
1249+Comparing DM snapshot and fs based implementation
1250+- the number of bytes to be copied between devices is much smaller.
1251+- the type of filesystem must be one and only.
1252+- the fs must be writable, no readonly fs, even for the lower original
1253+ device. so the compression fs will not be usable. but if we use
1254+ loopback mount, we may address this issue.
1255+ for instance,
1256+ mount /cdrom/squashfs.img /sq
1257+ losetup /sq/ext2.img
1258+ losetup /somewhere/cow
1259+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
1260+- it will be difficult (or needs more operations) to extract the
1261+ difference between the original device and COW.
1262+- DM snapshot-merge may help a lot when users try merging. in the
1263+ fs-layer union, users will use rsync(1).
1264+
7e9cd9fe
AM
1265+You may want to read my old paper "Filesystems in LiveCD"
1266+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
53392da6 1267+
7e9cd9fe
AM
1268+
1269+Several characters/aspects/persona of aufs
53392da6
AM
1270+----------------------------------------------------------------------
1271+
7e9cd9fe 1272+Aufs has several characters, aspects or persona.
53392da6
AM
1273+1. a filesystem, callee of VFS helper
1274+2. sub-VFS, caller of VFS helper for branches
1275+3. a virtual filesystem which maintains persistent inode number
1276+4. reader/writer of files on branches such like an application
1277+
1278+1. Callee of VFS Helper
1279+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
1280+unlink(2) from an application reaches sys_unlink() kernel function and
1281+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
1282+calls filesystem specific unlink operation. Actually aufs implements the
1283+unlink operation but it behaves like a redirector.
1284+
1285+2. Caller of VFS Helper for Branches
1286+aufs_unlink() passes the unlink request to the branch filesystem as if
1287+it were called from VFS. So the called unlink operation of the branch
1288+filesystem acts as usual. As a caller of VFS helper, aufs should handle
1289+every necessary pre/post operation for the branch filesystem.
1290+- acquire the lock for the parent dir on a branch
1291+- lookup in a branch
1292+- revalidate dentry on a branch
1293+- mnt_want_write() for a branch
1294+- vfs_unlink() for a branch
1295+- mnt_drop_write() for a branch
1296+- release the lock on a branch
1297+
1298+3. Persistent Inode Number
1299+One of the most important issue for a filesystem is to maintain inode
1300+numbers. This is particularly important to support exporting a
1301+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
1302+backend block device for its own. But some storage is necessary to
7e9cd9fe
AM
1303+keep and maintain the inode numbers. It may be a large space and may not
1304+suit to keep in memory. Aufs rents some space from its first writable
1305+branch filesystem (by default) and creates file(s) on it. These files
1306+are created by aufs internally and removed soon (currently) keeping
1307+opened.
53392da6
AM
1308+Note: Because these files are removed, they are totally gone after
1309+ unmounting aufs. It means the inode numbers are not persistent
1310+ across unmount or reboot. I have a plan to make them really
1311+ persistent which will be important for aufs on NFS server.
1312+
1313+4. Read/Write Files Internally (copy-on-write)
1314+Because a branch can be readonly, when you write a file on it, aufs will
1315+"copy-up" it to the upper writable branch internally. And then write the
1316+originally requested thing to the file. Generally kernel doesn't
1317+open/read/write file actively. In aufs, even a single write may cause a
1318+internal "file copy". This behaviour is very similar to cp(1) command.
1319+
1320+Some people may think it is better to pass such work to user space
1321+helper, instead of doing in kernel space. Actually I am still thinking
1322+about it. But currently I have implemented it in kernel space.
1323diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt linux/Documentation/filesystems/aufs/design/02struct.txt
1324--- /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1325+++ linux/Documentation/filesystems/aufs/design/02struct.txt 2016-10-09 16:55:36.479367956 +0200
7e9cd9fe 1326@@ -0,0 +1,258 @@
53392da6 1327+
8cdd5066 1328+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1329+#
1330+# This program is free software; you can redistribute it and/or modify
1331+# it under the terms of the GNU General Public License as published by
1332+# the Free Software Foundation; either version 2 of the License, or
1333+# (at your option) any later version.
1334+#
1335+# This program is distributed in the hope that it will be useful,
1336+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1337+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1338+# GNU General Public License for more details.
1339+#
1340+# You should have received a copy of the GNU General Public License
523b37e3 1341+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1342+
1343+Basic Aufs Internal Structure
1344+
1345+Superblock/Inode/Dentry/File Objects
1346+----------------------------------------------------------------------
1347+As like an ordinary filesystem, aufs has its own
1348+superblock/inode/dentry/file objects. All these objects have a
1349+dynamically allocated array and store the same kind of pointers to the
1350+lower filesystem, branch.
1351+For example, when you build a union with one readwrite branch and one
1352+readonly, mounted /au, /rw and /ro respectively.
1353+- /au = /rw + /ro
1354+- /ro/fileA exists but /rw/fileA
1355+
1356+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
1357+pointers are stored in a aufs dentry. The array in aufs dentry will be,
7e9cd9fe 1358+- [0] = NULL (because /rw/fileA doesn't exist)
53392da6
AM
1359+- [1] = /ro/fileA
1360+
1361+This style of an array is essentially same to the aufs
1362+superblock/inode/dentry/file objects.
1363+
1364+Because aufs supports manipulating branches, ie. add/delete/change
7e9cd9fe
AM
1365+branches dynamically, these objects has its own generation. When
1366+branches are changed, the generation in aufs superblock is
1367+incremented. And a generation in other object are compared when it is
1368+accessed. When a generation in other objects are obsoleted, aufs
1369+refreshes the internal array.
53392da6
AM
1370+
1371+
1372+Superblock
1373+----------------------------------------------------------------------
1374+Additionally aufs superblock has some data for policies to select one
1375+among multiple writable branches, XIB files, pseudo-links and kobject.
1376+See below in detail.
7e9cd9fe
AM
1377+About the policies which supports copy-down a directory, see
1378+wbr_policy.txt too.
53392da6
AM
1379+
1380+
1381+Branch and XINO(External Inode Number Translation Table)
1382+----------------------------------------------------------------------
1383+Every branch has its own xino (external inode number translation table)
1384+file. The xino file is created and unlinked by aufs internally. When two
1385+members of a union exist on the same filesystem, they share the single
1386+xino file.
1387+The struct of a xino file is simple, just a sequence of aufs inode
1388+numbers which is indexed by the lower inode number.
1389+In the above sample, assume the inode number of /ro/fileA is i111 and
1390+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
1391+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
1392+
1393+When the inode numbers are not contiguous, the xino file will be sparse
1394+which has a hole in it and doesn't consume as much disk space as it
1395+might appear. If your branch filesystem consumes disk space for such
1396+holes, then you should specify 'xino=' option at mounting aufs.
1397+
7e9cd9fe
AM
1398+Aufs has a mount option to free the disk blocks for such holes in XINO
1399+files on tmpfs or ramdisk. But it is not so effective actually. If you
1400+meet a problem of disk shortage due to XINO files, then you should try
1401+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
1402+The patch localizes the assignment inumbers per tmpfs-mount and avoid
1403+the holes in XINO files.
1404+
53392da6 1405+Also a writable branch has three kinds of "whiteout bases". All these
7e9cd9fe 1406+are existed when the branch is joined to aufs, and their names are
53392da6
AM
1407+whiteout-ed doubly, so that users will never see their names in aufs
1408+hierarchy.
7e9cd9fe 1409+1. a regular file which will be hardlinked to all whiteouts.
53392da6 1410+2. a directory to store a pseudo-link.
7e9cd9fe 1411+3. a directory to store an "orphan"-ed file temporary.
53392da6
AM
1412+
1413+1. Whiteout Base
1414+ When you remove a file on a readonly branch, aufs handles it as a
1415+ logical deletion and creates a whiteout on the upper writable branch
1416+ as a hardlink of this file in order not to consume inode on the
1417+ writable branch.
1418+2. Pseudo-link Dir
1419+ See below, Pseudo-link.
1420+3. Step-Parent Dir
1421+ When "fileC" exists on the lower readonly branch only and it is
1422+ opened and removed with its parent dir, and then user writes
1423+ something into it, then aufs copies-up fileC to this
1424+ directory. Because there is no other dir to store fileC. After
1425+ creating a file under this dir, the file is unlinked.
1426+
1427+Because aufs supports manipulating branches, ie. add/delete/change
7e9cd9fe
AM
1428+dynamically, a branch has its own id. When the branch order changes,
1429+aufs finds the new index by searching the branch id.
53392da6
AM
1430+
1431+
1432+Pseudo-link
1433+----------------------------------------------------------------------
1434+Assume "fileA" exists on the lower readonly branch only and it is
1435+hardlinked to "fileB" on the branch. When you write something to fileA,
1436+aufs copies-up it to the upper writable branch. Additionally aufs
1437+creates a hardlink under the Pseudo-link Directory of the writable
1438+branch. The inode of a pseudo-link is kept in aufs super_block as a
1439+simple list. If fileB is read after unlinking fileA, aufs returns
1440+filedata from the pseudo-link instead of the lower readonly
1441+branch. Because the pseudo-link is based upon the inode, to keep the
7e9cd9fe 1442+inode number by xino (see above) is essentially necessary.
53392da6
AM
1443+
1444+All the hardlinks under the Pseudo-link Directory of the writable branch
1445+should be restored in a proper location later. Aufs provides a utility
1446+to do this. The userspace helpers executed at remounting and unmounting
1447+aufs by default.
1448+During this utility is running, it puts aufs into the pseudo-link
1449+maintenance mode. In this mode, only the process which began the
1450+maintenance mode (and its child processes) is allowed to operate in
1451+aufs. Some other processes which are not related to the pseudo-link will
1452+be allowed to run too, but the rest have to return an error or wait
1453+until the maintenance mode ends. If a process already acquires an inode
1454+mutex (in VFS), it has to return an error.
1455+
1456+
1457+XIB(external inode number bitmap)
1458+----------------------------------------------------------------------
1459+Addition to the xino file per a branch, aufs has an external inode number
7e9cd9fe
AM
1460+bitmap in a superblock object. It is also an internal file such like a
1461+xino file.
53392da6
AM
1462+It is a simple bitmap to mark whether the aufs inode number is in-use or
1463+not.
1464+To reduce the file I/O, aufs prepares a single memory page to cache xib.
1465+
7e9cd9fe 1466+As well as XINO files, aufs has a feature to truncate/refresh XIB to
53392da6
AM
1467+reduce the number of consumed disk blocks for these files.
1468+
1469+
1470+Virtual or Vertical Dir, and Readdir in Userspace
1471+----------------------------------------------------------------------
1472+In order to support multiple layers (branches), aufs readdir operation
1473+constructs a virtual dir block on memory. For readdir, aufs calls
1474+vfs_readdir() internally for each dir on branches, merges their entries
1475+with eliminating the whiteout-ed ones, and sets it to file (dir)
1476+object. So the file object has its entry list until it is closed. The
1477+entry list will be updated when the file position is zero and becomes
7e9cd9fe 1478+obsoleted. This decision is made in aufs automatically.
53392da6
AM
1479+
1480+The dynamically allocated memory block for the name of entries has a
1481+unit of 512 bytes (by default) and stores the names contiguously (no
1482+padding). Another block for each entry is handled by kmem_cache too.
1483+During building dir blocks, aufs creates hash list and judging whether
1484+the entry is whiteouted by its upper branch or already listed.
1485+The merged result is cached in the corresponding inode object and
1486+maintained by a customizable life-time option.
1487+
1488+Some people may call it can be a security hole or invite DoS attack
1489+since the opened and once readdir-ed dir (file object) holds its entry
1490+list and becomes a pressure for system memory. But I'd say it is similar
1491+to files under /proc or /sys. The virtual files in them also holds a
1492+memory page (generally) while they are opened. When an idea to reduce
1493+memory for them is introduced, it will be applied to aufs too.
1494+For those who really hate this situation, I've developed readdir(3)
1495+library which operates this merging in userspace. You just need to set
1496+LD_PRELOAD environment variable, and aufs will not consume no memory in
1497+kernel space for readdir(3).
1498+
1499+
1500+Workqueue
1501+----------------------------------------------------------------------
1502+Aufs sometimes requires privilege access to a branch. For instance,
1503+in copy-up/down operation. When a user process is going to make changes
1504+to a file which exists in the lower readonly branch only, and the mode
1505+of one of ancestor directories may not be writable by a user
1506+process. Here aufs copy-up the file with its ancestors and they may
1507+require privilege to set its owner/group/mode/etc.
1508+This is a typical case of a application character of aufs (see
1509+Introduction).
1510+
1511+Aufs uses workqueue synchronously for this case. It creates its own
1512+workqueue. The workqueue is a kernel thread and has privilege. Aufs
1513+passes the request to call mkdir or write (for example), and wait for
1514+its completion. This approach solves a problem of a signal handler
1515+simply.
1516+If aufs didn't adopt the workqueue and changed the privilege of the
7e9cd9fe
AM
1517+process, then the process may receive the unexpected SIGXFSZ or other
1518+signals.
53392da6
AM
1519+
1520+Also aufs uses the system global workqueue ("events" kernel thread) too
1521+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
1522+whiteout base and etc. This is unrelated to a privilege.
1523+Most of aufs operation tries acquiring a rw_semaphore for aufs
1524+superblock at the beginning, at the same time waits for the completion
1525+of all queued asynchronous tasks.
1526+
1527+
1528+Whiteout
1529+----------------------------------------------------------------------
1530+The whiteout in aufs is very similar to Unionfs's. That is represented
1531+by its filename. UnionMount takes an approach of a file mode, but I am
1532+afraid several utilities (find(1) or something) will have to support it.
1533+
1534+Basically the whiteout represents "logical deletion" which stops aufs to
1535+lookup further, but also it represents "dir is opaque" which also stop
7e9cd9fe 1536+further lookup.
53392da6
AM
1537+
1538+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
1539+In order to make several functions in a single systemcall to be
1540+revertible, aufs adopts an approach to rename a directory to a temporary
1541+unique whiteouted name.
1542+For example, in rename(2) dir where the target dir already existed, aufs
1543+renames the target dir to a temporary unique whiteouted name before the
7e9cd9fe 1544+actual rename on a branch, and then handles other actions (make it opaque,
53392da6
AM
1545+update the attributes, etc). If an error happens in these actions, aufs
1546+simply renames the whiteouted name back and returns an error. If all are
1547+succeeded, aufs registers a function to remove the whiteouted unique
1548+temporary name completely and asynchronously to the system global
1549+workqueue.
1550+
1551+
1552+Copy-up
1553+----------------------------------------------------------------------
1554+It is a well-known feature or concept.
1555+When user modifies a file on a readonly branch, aufs operate "copy-up"
1556+internally and makes change to the new file on the upper writable branch.
1557+When the trigger systemcall does not update the timestamps of the parent
1558+dir, aufs reverts it after copy-up.
c2b27bf2
AM
1559+
1560+
1561+Move-down (aufs3.9 and later)
1562+----------------------------------------------------------------------
1563+"Copy-up" is one of the essential feature in aufs. It copies a file from
1564+the lower readonly branch to the upper writable branch when a user
1565+changes something about the file.
1566+"Move-down" is an opposite action of copy-up. Basically this action is
1567+ran manually instead of automatically and internally.
076b876e
AM
1568+For desgin and implementation, aufs has to consider these issues.
1569+- whiteout for the file may exist on the lower branch.
1570+- ancestor directories may not exist on the lower branch.
1571+- diropq for the ancestor directories may exist on the upper branch.
1572+- free space on the lower branch will reduce.
1573+- another access to the file may happen during moving-down, including
7e9cd9fe 1574+ UDBA (see "Revalidate Dentry and UDBA").
076b876e
AM
1575+- the file should not be hard-linked nor pseudo-linked. they should be
1576+ handled by auplink utility later.
c2b27bf2
AM
1577+
1578+Sometimes users want to move-down a file from the upper writable branch
1579+to the lower readonly or writable branch. For instance,
1580+- the free space of the upper writable branch is going to run out.
1581+- create a new intermediate branch between the upper and lower branch.
1582+- etc.
1583+
1584+For this purpose, use "aumvdown" command in aufs-util.git.
b912730e
AM
1585diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt linux/Documentation/filesystems/aufs/design/03atomic_open.txt
1586--- /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1587+++ linux/Documentation/filesystems/aufs/design/03atomic_open.txt 2016-10-09 16:55:36.479367956 +0200
b912730e
AM
1588@@ -0,0 +1,85 @@
1589+
8cdd5066 1590+# Copyright (C) 2015-2016 Junjiro R. Okajima
b912730e
AM
1591+#
1592+# This program is free software; you can redistribute it and/or modify
1593+# it under the terms of the GNU General Public License as published by
1594+# the Free Software Foundation; either version 2 of the License, or
1595+# (at your option) any later version.
1596+#
1597+# This program is distributed in the hope that it will be useful,
1598+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1599+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1600+# GNU General Public License for more details.
1601+#
1602+# You should have received a copy of the GNU General Public License
1603+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1604+
1605+Support for a branch who has its ->atomic_open()
1606+----------------------------------------------------------------------
1607+The filesystems who implement its ->atomic_open() are not majority. For
1608+example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
1609+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
1610+->atomic_open(), NFSv4 returns an error for this open(2). While I am not
1611+sure whether all filesystems who have ->atomic_open() behave like this,
1612+but NFSv4 surely returns the error.
1613+
1614+In order to support ->atomic_open() for aufs, there are a few
1615+approaches.
1616+
1617+A. Introduce aufs_atomic_open()
1618+ - calls one of VFS:do_last(), lookup_open() or atomic_open() for
1619+ branch fs.
1620+B. Introduce aufs_atomic_open() calling create, open and chmod. this is
1621+ an aufs user Pip Cet's approach
1622+ - calls aufs_create(), VFS finish_open() and notify_change().
1623+ - pass fake-mode to finish_open(), and then correct the mode by
1624+ notify_change().
1625+C. Extend aufs_open() to call branch fs's ->atomic_open()
1626+ - no aufs_atomic_open().
1627+ - aufs_lookup() registers the TID to an aufs internal object.
1628+ - aufs_create() does nothing when the matching TID is registered, but
1629+ registers the mode.
1630+ - aufs_open() calls branch fs's ->atomic_open() when the matching
1631+ TID is registered.
1632+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
1633+ credential
1634+ - no aufs_atomic_open().
1635+ - aufs_create() registers the TID to an internal object. this info
1636+ represents "this process created this file just now."
1637+ - when aufs gets EACCES from branch fs's ->open(), then confirm the
1638+ registered TID and re-try open() with superuser's credential.
1639+
1640+Pros and cons for each approach.
1641+
1642+A.
1643+ - straightforward but highly depends upon VFS internal.
1644+ - the atomic behavaiour is kept.
1645+ - some of parameters such as nameidata are hard to reproduce for
1646+ branch fs.
1647+ - large overhead.
1648+B.
1649+ - easy to implement.
1650+ - the atomic behavaiour is lost.
1651+C.
1652+ - the atomic behavaiour is kept.
1653+ - dirty and tricky.
1654+ - VFS checks whether the file is created correctly after calling
1655+ ->create(), which means this approach doesn't work.
1656+D.
1657+ - easy to implement.
1658+ - the atomic behavaiour is lost.
1659+ - to open a file with superuser's credential and give it to a user
1660+ process is a bad idea, since the file object keeps the credential
1661+ in it. It may affect LSM or something. This approach doesn't work
1662+ either.
1663+
1664+The approach A is ideal, but it hard to implement. So here is a
1665+variation of A, which is to be implemented.
1666+
1667+A-1. Introduce aufs_atomic_open()
1668+ - calls branch fs ->atomic_open() if exists. otherwise calls
1669+ vfs_create() and finish_open().
1670+ - the demerit is that the several checks after branch fs
1671+ ->atomic_open() are lost. in the ordinary case, the checks are
1672+ done by VFS:do_last(), lookup_open() and atomic_open(). some can
1673+ be implemented in aufs, but not all I am afraid.
53392da6
AM
1674diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt linux/Documentation/filesystems/aufs/design/03lookup.txt
1675--- /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1676+++ linux/Documentation/filesystems/aufs/design/03lookup.txt 2016-10-09 16:55:36.479367956 +0200
7e9cd9fe 1677@@ -0,0 +1,113 @@
53392da6 1678+
8cdd5066 1679+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1680+#
1681+# This program is free software; you can redistribute it and/or modify
1682+# it under the terms of the GNU General Public License as published by
1683+# the Free Software Foundation; either version 2 of the License, or
1684+# (at your option) any later version.
1685+#
1686+# This program is distributed in the hope that it will be useful,
1687+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1688+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1689+# GNU General Public License for more details.
1690+#
1691+# You should have received a copy of the GNU General Public License
523b37e3 1692+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1693+
1694+Lookup in a Branch
1695+----------------------------------------------------------------------
1696+Since aufs has a character of sub-VFS (see Introduction), it operates
7e9cd9fe
AM
1697+lookup for branches as VFS does. It may be a heavy work. But almost all
1698+lookup operation in aufs is the simplest case, ie. lookup only an entry
1699+directly connected to its parent. Digging down the directory hierarchy
1700+is unnecessary. VFS has a function lookup_one_len() for that use, and
1701+aufs calls it.
1702+
1703+When a branch is a remote filesystem, aufs basically relies upon its
53392da6
AM
1704+->d_revalidate(), also aufs forces the hardest revalidate tests for
1705+them.
1706+For d_revalidate, aufs implements three levels of revalidate tests. See
1707+"Revalidate Dentry and UDBA" in detail.
1708+
1709+
076b876e
AM
1710+Test Only the Highest One for the Directory Permission (dirperm1 option)
1711+----------------------------------------------------------------------
1712+Let's try case study.
1713+- aufs has two branches, upper readwrite and lower readonly.
1714+ /au = /rw + /ro
1715+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1716+- user invoked "chmod a+rx /au/dirA"
1717+- the internal copy-up is activated and "/rw/dirA" is created and its
7e9cd9fe 1718+ permission bits are set to world readable.
076b876e
AM
1719+- then "/au/dirA" becomes world readable?
1720+
1721+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1722+or it may be a natively readonly filesystem. If aufs respects the lower
1723+branch, it should not respond readdir request from other users. But user
1724+allowed it by chmod. Should really aufs rejects showing the entries
1725+under /ro/dirA?
1726+
7e9cd9fe
AM
1727+To be honest, I don't have a good solution for this case. So aufs
1728+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
1729+users.
076b876e
AM
1730+When dirperm1 is specified, aufs checks only the highest one for the
1731+directory permission, and shows the entries. Otherwise, as usual, checks
1732+every dir existing on all branches and rejects the request.
1733+
1734+As a side effect, dirperm1 option improves the performance of aufs
1735+because the number of permission check is reduced when the number of
1736+branch is many.
1737+
1738+
53392da6
AM
1739+Revalidate Dentry and UDBA (User's Direct Branch Access)
1740+----------------------------------------------------------------------
1741+Generally VFS helpers re-validate a dentry as a part of lookup.
1742+0. digging down the directory hierarchy.
1743+1. lock the parent dir by its i_mutex.
1744+2. lookup the final (child) entry.
1745+3. revalidate it.
1746+4. call the actual operation (create, unlink, etc.)
1747+5. unlock the parent dir
1748+
1749+If the filesystem implements its ->d_revalidate() (step 3), then it is
1750+called. Actually aufs implements it and checks the dentry on a branch is
1751+still valid.
1752+But it is not enough. Because aufs has to release the lock for the
1753+parent dir on a branch at the end of ->lookup() (step 2) and
1754+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
1755+held by VFS.
1756+If the file on a branch is changed directly, eg. bypassing aufs, after
1757+aufs released the lock, then the subsequent operation may cause
1758+something unpleasant result.
1759+
1760+This situation is a result of VFS architecture, ->lookup() and
1761+->d_revalidate() is separated. But I never say it is wrong. It is a good
1762+design from VFS's point of view. It is just not suitable for sub-VFS
1763+character in aufs.
1764+
1765+Aufs supports such case by three level of revalidation which is
1766+selectable by user.
1767+1. Simple Revalidate
1768+ Addition to the native flow in VFS's, confirm the child-parent
1769+ relationship on the branch just after locking the parent dir on the
1770+ branch in the "actual operation" (step 4). When this validation
1771+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
1772+ checks the validation of the dentry on branches.
1773+2. Monitor Changes Internally by Inotify/Fsnotify
1774+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
1775+ the dentry on the branch, and returns EBUSY if it finds different
1776+ dentry.
1777+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
1778+ during it is in cache. When the event is notified, aufs registers a
1779+ function to kernel 'events' thread by schedule_work(). And the
1780+ function sets some special status to the cached aufs dentry and inode
1781+ private data. If they are not cached, then aufs has nothing to
1782+ do. When the same file is accessed through aufs (step 0-3) later,
1783+ aufs will detect the status and refresh all necessary data.
1784+ In this mode, aufs has to ignore the event which is fired by aufs
1785+ itself.
1786+3. No Extra Validation
1787+ This is the simplest test and doesn't add any additional revalidation
7e9cd9fe 1788+ test, and skip the revalidation in step 4. It is useful and improves
53392da6
AM
1789+ aufs performance when system surely hide the aufs branches from user,
1790+ by over-mounting something (or another method).
1791diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt linux/Documentation/filesystems/aufs/design/04branch.txt
1792--- /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1793+++ linux/Documentation/filesystems/aufs/design/04branch.txt 2016-10-09 16:55:36.482701377 +0200
7e9cd9fe 1794@@ -0,0 +1,74 @@
53392da6 1795+
8cdd5066 1796+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1797+#
1798+# This program is free software; you can redistribute it and/or modify
1799+# it under the terms of the GNU General Public License as published by
1800+# the Free Software Foundation; either version 2 of the License, or
1801+# (at your option) any later version.
1802+#
1803+# This program is distributed in the hope that it will be useful,
1804+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1805+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1806+# GNU General Public License for more details.
1807+#
1808+# You should have received a copy of the GNU General Public License
523b37e3 1809+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1810+
1811+Branch Manipulation
1812+
1813+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
1814+and changing its permission/attribute, there are a lot of works to do.
1815+
1816+
1817+Add a Branch
1818+----------------------------------------------------------------------
1819+o Confirm the adding dir exists outside of aufs, including loopback
7e9cd9fe 1820+ mount, and its various attributes.
53392da6
AM
1821+o Initialize the xino file and whiteout bases if necessary.
1822+ See struct.txt.
1823+
1824+o Check the owner/group/mode of the directory
1825+ When the owner/group/mode of the adding directory differs from the
1826+ existing branch, aufs issues a warning because it may impose a
1827+ security risk.
1828+ For example, when a upper writable branch has a world writable empty
1829+ top directory, a malicious user can create any files on the writable
1830+ branch directly, like copy-up and modify manually. If something like
1831+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
1832+ writable branch, and the writable branch is world-writable, then a
1833+ malicious guy may create /etc/passwd on the writable branch directly
1834+ and the infected file will be valid in aufs.
7e9cd9fe 1835+ I am afraid it can be a security issue, but aufs can do nothing except
53392da6
AM
1836+ producing a warning.
1837+
1838+
1839+Delete a Branch
1840+----------------------------------------------------------------------
1841+o Confirm the deleting branch is not busy
1842+ To be general, there is one merit to adopt "remount" interface to
1843+ manipulate branches. It is to discard caches. At deleting a branch,
1844+ aufs checks the still cached (and connected) dentries and inodes. If
1845+ there are any, then they are all in-use. An inode without its
1846+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
1847+
1848+ For the cached one, aufs checks whether the same named entry exists on
1849+ other branches.
1850+ If the cached one is a directory, because aufs provides a merged view
1851+ to users, as long as one dir is left on any branch aufs can show the
1852+ dir to users. In this case, the branch can be removed from aufs.
1853+ Otherwise aufs rejects deleting the branch.
1854+
1855+ If any file on the deleting branch is opened by aufs, then aufs
1856+ rejects deleting.
1857+
1858+
1859+Modify the Permission of a Branch
1860+----------------------------------------------------------------------
1861+o Re-initialize or remove the xino file and whiteout bases if necessary.
1862+ See struct.txt.
1863+
1864+o rw --> ro: Confirm the modifying branch is not busy
1865+ Aufs rejects the request if any of these conditions are true.
1866+ - a file on the branch is mmap-ed.
1867+ - a regular file on the branch is opened for write and there is no
1868+ same named entry on the upper branch.
1869diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt linux/Documentation/filesystems/aufs/design/05wbr_policy.txt
1870--- /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1871+++ linux/Documentation/filesystems/aufs/design/05wbr_policy.txt 2016-10-09 16:55:36.482701377 +0200
523b37e3 1872@@ -0,0 +1,64 @@
53392da6 1873+
8cdd5066 1874+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1875+#
1876+# This program is free software; you can redistribute it and/or modify
1877+# it under the terms of the GNU General Public License as published by
1878+# the Free Software Foundation; either version 2 of the License, or
1879+# (at your option) any later version.
1880+#
1881+# This program is distributed in the hope that it will be useful,
1882+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1883+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1884+# GNU General Public License for more details.
1885+#
1886+# You should have received a copy of the GNU General Public License
523b37e3 1887+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1888+
1889+Policies to Select One among Multiple Writable Branches
1890+----------------------------------------------------------------------
1891+When the number of writable branch is more than one, aufs has to decide
1892+the target branch for file creation or copy-up. By default, the highest
1893+writable branch which has the parent (or ancestor) dir of the target
1894+file is chosen (top-down-parent policy).
1895+By user's request, aufs implements some other policies to select the
7e9cd9fe
AM
1896+writable branch, for file creation several policies, round-robin,
1897+most-free-space, and other policies. For copy-up, top-down-parent,
1898+bottom-up-parent, bottom-up and others.
53392da6
AM
1899+
1900+As expected, the round-robin policy selects the branch in circular. When
1901+you have two writable branches and creates 10 new files, 5 files will be
1902+created for each branch. mkdir(2) systemcall is an exception. When you
1903+create 10 new directories, all will be created on the same branch.
1904+And the most-free-space policy selects the one which has most free
1905+space among the writable branches. The amount of free space will be
1906+checked by aufs internally, and users can specify its time interval.
1907+
1908+The policies for copy-up is more simple,
1909+top-down-parent is equivalent to the same named on in create policy,
1910+bottom-up-parent selects the writable branch where the parent dir
1911+exists and the nearest upper one from the copyup-source,
1912+bottom-up selects the nearest upper writable branch from the
1913+copyup-source, regardless the existence of the parent dir.
1914+
1915+There are some rules or exceptions to apply these policies.
1916+- If there is a readonly branch above the policy-selected branch and
1917+ the parent dir is marked as opaque (a variation of whiteout), or the
1918+ target (creating) file is whiteout-ed on the upper readonly branch,
1919+ then the result of the policy is ignored and the target file will be
1920+ created on the nearest upper writable branch than the readonly branch.
1921+- If there is a writable branch above the policy-selected branch and
1922+ the parent dir is marked as opaque or the target file is whiteouted
1923+ on the branch, then the result of the policy is ignored and the target
1924+ file will be created on the highest one among the upper writable
1925+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1926+ it as usual.
1927+- link(2) and rename(2) systemcalls are exceptions in every policy.
1928+ They try selecting the branch where the source exists as possible
1929+ since copyup a large file will take long time. If it can't be,
1930+ ie. the branch where the source exists is readonly, then they will
1931+ follow the copyup policy.
1932+- There is an exception for rename(2) when the target exists.
1933+ If the rename target exists, aufs compares the index of the branches
1934+ where the source and the target exists and selects the higher
1935+ one. If the selected branch is readonly, then aufs follows the
1936+ copyup policy.
076b876e
AM
1937diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt linux/Documentation/filesystems/aufs/design/06fhsm.txt
1938--- /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1939+++ linux/Documentation/filesystems/aufs/design/06fhsm.txt 2016-10-09 16:55:36.482701377 +0200
076b876e
AM
1940@@ -0,0 +1,120 @@
1941+
8cdd5066 1942+# Copyright (C) 2011-2016 Junjiro R. Okajima
076b876e
AM
1943+#
1944+# This program is free software; you can redistribute it and/or modify
1945+# it under the terms of the GNU General Public License as published by
1946+# the Free Software Foundation; either version 2 of the License, or
1947+# (at your option) any later version.
1948+#
1949+# This program is distributed in the hope that it will be useful,
1950+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1951+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1952+# GNU General Public License for more details.
1953+#
1954+# You should have received a copy of the GNU General Public License
1955+# along with this program; if not, write to the Free Software
1956+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1957+
1958+
1959+File-based Hierarchical Storage Management (FHSM)
1960+----------------------------------------------------------------------
1961+Hierarchical Storage Management (or HSM) is a well-known feature in the
1962+storage world. Aufs provides this feature as file-based with multiple
7e9cd9fe 1963+writable branches, based upon the principle of "Colder, the Lower".
076b876e 1964+Here the word "colder" means that the less used files, and "lower" means
7e9cd9fe 1965+that the position in the order of the stacked branches vertically.
076b876e
AM
1966+These multiple writable branches are prioritized, ie. the topmost one
1967+should be the fastest drive and be used heavily.
1968+
1969+o Characters in aufs FHSM story
1970+- aufs itself and a new branch attribute.
1971+- a new ioctl interface to move-down and to establish a connection with
1972+ the daemon ("move-down" is a converse of "copy-up").
1973+- userspace tool and daemon.
1974+
1975+The userspace daemon establishes a connection with aufs and waits for
1976+the notification. The notified information is very similar to struct
1977+statfs containing the number of consumed blocks and inodes.
1978+When the consumed blocks/inodes of a branch exceeds the user-specified
1979+upper watermark, the daemon activates its move-down process until the
1980+consumed blocks/inodes reaches the user-specified lower watermark.
1981+
1982+The actual move-down is done by aufs based upon the request from
1983+user-space since we need to maintain the inode number and the internal
1984+pointer arrays in aufs.
1985+
1986+Currently aufs FHSM handles the regular files only. Additionally they
1987+must not be hard-linked nor pseudo-linked.
1988+
1989+
1990+o Cowork of aufs and the user-space daemon
1991+ During the userspace daemon established the connection, aufs sends a
1992+ small notification to it whenever aufs writes something into the
1993+ writable branch. But it may cost high since aufs issues statfs(2)
1994+ internally. So user can specify a new option to cache the
1995+ info. Actually the notification is controlled by these factors.
1996+ + the specified cache time.
1997+ + classified as "force" by aufs internally.
1998+ Until the specified time expires, aufs doesn't send the info
1999+ except the forced cases. When aufs decide forcing, the info is always
2000+ notified to userspace.
2001+ For example, the number of free inodes is generally large enough and
2002+ the shortage of it happens rarely. So aufs doesn't force the
2003+ notification when creating a new file, directory and others. This is
2004+ the typical case which aufs doesn't force.
2005+ When aufs writes the actual filedata and the files consumes any of new
2006+ blocks, the aufs forces notifying.
2007+
2008+
2009+o Interfaces in aufs
2010+- New branch attribute.
2011+ + fhsm
2012+ Specifies that the branch is managed by FHSM feature. In other word,
2013+ participant in the FHSM.
2014+ When nofhsm is set to the branch, it will not be the source/target
2015+ branch of the move-down operation. This attribute is set
2016+ independently from coo and moo attributes, and if you want full
2017+ FHSM, you should specify them as well.
2018+- New mount option.
2019+ + fhsm_sec
2020+ Specifies a second to suppress many less important info to be
2021+ notified.
2022+- New ioctl.
2023+ + AUFS_CTL_FHSM_FD
2024+ create a new file descriptor which userspace can read the notification
2025+ (a subset of struct statfs) from aufs.
2026+- Module parameter 'brs'
2027+ It has to be set to 1. Otherwise the new mount option 'fhsm' will not
2028+ be set.
2029+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
2030+ When there are two or more branches with fhsm attributes,
2031+ /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
2032+ terminates it. As a result of remounting and branch-manipulation, the
2033+ number of branches with fhsm attribute can be one. In this case,
2034+ /sbin/mount.aufs will terminate the user-space daemon.
2035+
2036+
2037+Finally the operation is done as these steps in kernel-space.
2038+- make sure that,
2039+ + no one else is using the file.
2040+ + the file is not hard-linked.
2041+ + the file is not pseudo-linked.
2042+ + the file is a regular file.
2043+ + the parent dir is not opaqued.
2044+- find the target writable branch.
2045+- make sure the file is not whiteout-ed by the upper (than the target)
2046+ branch.
2047+- make the parent dir on the target branch.
2048+- mutex lock the inode on the branch.
2049+- unlink the whiteout on the target branch (if exists).
2050+- lookup and create the whiteout-ed temporary name on the target branch.
2051+- copy the file as the whiteout-ed temporary name on the target branch.
2052+- rename the whiteout-ed temporary name to the original name.
2053+- unlink the file on the source branch.
2054+- maintain the internal pointer array and the external inode number
2055+ table (XINO).
2056+- maintain the timestamps and other attributes of the parent dir and the
2057+ file.
2058+
2059+And of course, in every step, an error may happen. So the operation
2060+should restore the original file state after an error happens.
53392da6
AM
2061diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt linux/Documentation/filesystems/aufs/design/06mmap.txt
2062--- /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2063+++ linux/Documentation/filesystems/aufs/design/06mmap.txt 2016-10-09 16:55:36.482701377 +0200
b912730e 2064@@ -0,0 +1,72 @@
53392da6 2065+
8cdd5066 2066+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
2067+#
2068+# This program is free software; you can redistribute it and/or modify
2069+# it under the terms of the GNU General Public License as published by
2070+# the Free Software Foundation; either version 2 of the License, or
2071+# (at your option) any later version.
2072+#
2073+# This program is distributed in the hope that it will be useful,
2074+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2075+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2076+# GNU General Public License for more details.
2077+#
2078+# You should have received a copy of the GNU General Public License
523b37e3 2079+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2080+
2081+mmap(2) -- File Memory Mapping
2082+----------------------------------------------------------------------
2083+In aufs, the file-mapped pages are handled by a branch fs directly, no
2084+interaction with aufs. It means aufs_mmap() calls the branch fs's
2085+->mmap().
2086+This approach is simple and good, but there is one problem.
7e9cd9fe 2087+Under /proc, several entries show the mmapped files by its path (with
53392da6
AM
2088+device and inode number), and the printed path will be the path on the
2089+branch fs's instead of virtual aufs's.
2090+This is not a problem in most cases, but some utilities lsof(1) (and its
2091+user) may expect the path on aufs.
2092+
2093+To address this issue, aufs adds a new member called vm_prfile in struct
2094+vm_area_struct (and struct vm_region). The original vm_file points to
2095+the file on the branch fs in order to handle everything correctly as
2096+usual. The new vm_prfile points to a virtual file in aufs, and the
2097+show-functions in procfs refers to vm_prfile if it is set.
2098+Also we need to maintain several other places where touching vm_file
2099+such like
2100+- fork()/clone() copies vma and the reference count of vm_file is
2101+ incremented.
2102+- merging vma maintains the ref count too.
2103+
7e9cd9fe 2104+This is not a good approach. It just fakes the printed path. But it
53392da6
AM
2105+leaves all behaviour around f_mapping unchanged. This is surely an
2106+advantage.
2107+Actually aufs had adopted another complicated approach which calls
2108+generic_file_mmap() and handles struct vm_operations_struct. In this
2109+approach, aufs met a hard problem and I could not solve it without
2110+switching the approach.
b912730e
AM
2111+
2112+There may be one more another approach which is
2113+- bind-mount the branch-root onto the aufs-root internally
2114+- grab the new vfsmount (ie. struct mount)
2115+- lazy-umount the branch-root internally
2116+- in open(2) the aufs-file, open the branch-file with the hidden
2117+ vfsmount (instead of the original branch's vfsmount)
2118+- ideally this "bind-mount and lazy-umount" should be done atomically,
2119+ but it may be possible from userspace by the mount helper.
2120+
2121+Adding the internal hidden vfsmount and using it in opening a file, the
2122+file path under /proc will be printed correctly. This approach looks
2123+smarter, but is not possible I am afraid.
2124+- aufs-root may be bind-mount later. when it happens, another hidden
2125+ vfsmount will be required.
2126+- it is hard to get the chance to bind-mount and lazy-umount
2127+ + in kernel-space, FS can have vfsmount in open(2) via
2128+ file->f_path, and aufs can know its vfsmount. But several locks are
2129+ already acquired, and if aufs tries to bind-mount and lazy-umount
2130+ here, then it may cause a deadlock.
2131+ + in user-space, bind-mount doesn't invoke the mount helper.
2132+- since /proc shows dev and ino, aufs has to give vma these info. it
2133+ means a new member vm_prinode will be necessary. this is essentially
2134+ equivalent to vm_prfile described above.
2135+
2136+I have to give up this "looks-smater" approach.
c1595e42
JR
2137diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt linux/Documentation/filesystems/aufs/design/06xattr.txt
2138--- /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2139+++ linux/Documentation/filesystems/aufs/design/06xattr.txt 2016-10-09 16:55:36.482701377 +0200
c1595e42
JR
2140@@ -0,0 +1,96 @@
2141+
8cdd5066 2142+# Copyright (C) 2014-2016 Junjiro R. Okajima
c1595e42
JR
2143+#
2144+# This program is free software; you can redistribute it and/or modify
2145+# it under the terms of the GNU General Public License as published by
2146+# the Free Software Foundation; either version 2 of the License, or
2147+# (at your option) any later version.
2148+#
2149+# This program is distributed in the hope that it will be useful,
2150+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2151+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2152+# GNU General Public License for more details.
2153+#
2154+# You should have received a copy of the GNU General Public License
2155+# along with this program; if not, write to the Free Software
2156+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2157+
2158+
2159+Listing XATTR/EA and getting the value
2160+----------------------------------------------------------------------
2161+For the inode standard attributes (owner, group, timestamps, etc.), aufs
2162+shows the values from the topmost existing file. This behaviour is good
7e9cd9fe 2163+for the non-dir entries since the bahaviour exactly matches the shown
c1595e42
JR
2164+information. But for the directories, aufs considers all the same named
2165+entries on the lower branches. Which means, if one of the lower entry
2166+rejects readdir call, then aufs returns an error even if the topmost
2167+entry allows it. This behaviour is necessary to respect the branch fs's
2168+security, but can make users confused since the user-visible standard
2169+attributes don't match the behaviour.
2170+To address this issue, aufs has a mount option called dirperm1 which
2171+checks the permission for the topmost entry only, and ignores the lower
2172+entry's permission.
2173+
2174+A similar issue can happen around XATTR.
2175+getxattr(2) and listxattr(2) families behave as if dirperm1 option is
7e9cd9fe
AM
2176+always set. Otherwise these very unpleasant situation would happen.
2177+- listxattr(2) may return the duplicated entries.
c1595e42
JR
2178+- users may not be able to remove or reset the XATTR forever,
2179+
2180+
2181+XATTR/EA support in the internal (copy,move)-(up,down)
2182+----------------------------------------------------------------------
7e9cd9fe 2183+Generally the extended attributes of inode are categorized as these.
c1595e42
JR
2184+- "security" for LSM and capability.
2185+- "system" for posix ACL, 'acl' mount option is required for the branch
2186+ fs generally.
2187+- "trusted" for userspace, CAP_SYS_ADMIN is required.
2188+- "user" for userspace, 'user_xattr' mount option is required for the
2189+ branch fs generally.
2190+
2191+Moreover there are some other categories. Aufs handles these rather
2192+unpopular categories as the ordinary ones, ie. there is no special
2193+condition nor exception.
2194+
2195+In copy-up, the support for XATTR on the dst branch may differ from the
2196+src branch. In this case, the copy-up operation will get an error and
7e9cd9fe
AM
2197+the original user operation which triggered the copy-up will fail. It
2198+can happen that even all copy-up will fail.
c1595e42
JR
2199+When both of src and dst branches support XATTR and if an error occurs
2200+during copying XATTR, then the copy-up should fail obviously. That is a
2201+good reason and aufs should return an error to userspace. But when only
7e9cd9fe 2202+the src branch support that XATTR, aufs should not return an error.
c1595e42
JR
2203+For example, the src branch supports ACL but the dst branch doesn't
2204+because the dst branch may natively un-support it or temporary
2205+un-support it due to "noacl" mount option. Of course, the dst branch fs
2206+may NOT return an error even if the XATTR is not supported. It is
2207+totally up to the branch fs.
2208+
2209+Anyway when the aufs internal copy-up gets an error from the dst branch
2210+fs, then aufs tries removing the just copied entry and returns the error
2211+to the userspace. The worst case of this situation will be all copy-up
2212+will fail.
2213+
2214+For the copy-up operation, there two basic approaches.
2215+- copy the specified XATTR only (by category above), and return the
7e9cd9fe 2216+ error unconditionally if it happens.
c1595e42
JR
2217+- copy all XATTR, and ignore the error on the specified category only.
2218+
2219+In order to support XATTR and to implement the correct behaviour, aufs
7e9cd9fe
AM
2220+chooses the latter approach and introduces some new branch attributes,
2221+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
c1595e42 2222+They correspond to the XATTR namespaces (see above). Additionally, to be
7e9cd9fe
AM
2223+convenient, "icex" is also provided which means all "icex*" attributes
2224+are set (here the word "icex" stands for "ignore copy-error on XATTR").
c1595e42
JR
2225+
2226+The meaning of these attributes is to ignore the error from setting
2227+XATTR on that branch.
2228+Note that aufs tries copying all XATTR unconditionally, and ignores the
2229+error from the dst branch according to the specified attributes.
2230+
2231+Some XATTR may have its default value. The default value may come from
2232+the parent dir or the environment. If the default value is set at the
2233+file creating-time, it will be overwritten by copy-up.
2234+Some contradiction may happen I am afraid.
2235+Do we need another attribute to stop copying XATTR? I am unsure. For
2236+now, aufs implements the branch attributes to ignore the error.
53392da6
AM
2237diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt linux/Documentation/filesystems/aufs/design/07export.txt
2238--- /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2239+++ linux/Documentation/filesystems/aufs/design/07export.txt 2016-10-09 16:55:36.482701377 +0200
523b37e3 2240@@ -0,0 +1,58 @@
53392da6 2241+
8cdd5066 2242+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
2243+#
2244+# This program is free software; you can redistribute it and/or modify
2245+# it under the terms of the GNU General Public License as published by
2246+# the Free Software Foundation; either version 2 of the License, or
2247+# (at your option) any later version.
2248+#
2249+# This program is distributed in the hope that it will be useful,
2250+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2251+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2252+# GNU General Public License for more details.
2253+#
2254+# You should have received a copy of the GNU General Public License
523b37e3 2255+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2256+
2257+Export Aufs via NFS
2258+----------------------------------------------------------------------
2259+Here is an approach.
2260+- like xino/xib, add a new file 'xigen' which stores aufs inode
2261+ generation.
2262+- iget_locked(): initialize aufs inode generation for a new inode, and
2263+ store it in xigen file.
2264+- destroy_inode(): increment aufs inode generation and store it in xigen
2265+ file. it is necessary even if it is not unlinked, because any data of
2266+ inode may be changed by UDBA.
2267+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
2268+ build file handle by
2269+ + branch id (4 bytes)
2270+ + superblock generation (4 bytes)
2271+ + inode number (4 or 8 bytes)
2272+ + parent dir inode number (4 or 8 bytes)
2273+ + inode generation (4 bytes))
2274+ + return value of exportfs_encode_fh() for the parent on a branch (4
2275+ bytes)
2276+ + file handle for a branch (by exportfs_encode_fh())
2277+- fh_to_dentry():
2278+ + find the index of a branch from its id in handle, and check it is
2279+ still exist in aufs.
2280+ + 1st level: get the inode number from handle and search it in cache.
7e9cd9fe
AM
2281+ + 2nd level: if not found in cache, get the parent inode number from
2282+ the handle and search it in cache. and then open the found parent
2283+ dir, find the matching inode number by vfs_readdir() and get its
2284+ name, and call lookup_one_len() for the target dentry.
53392da6
AM
2285+ + 3rd level: if the parent dir is not cached, call
2286+ exportfs_decode_fh() for a branch and get the parent on a branch,
2287+ build a pathname of it, convert it a pathname in aufs, call
2288+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
2289+ the 2nd level.
2290+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
2291+ for every branch, but not itself. to get this, (currently) aufs
2292+ searches in current->nsproxy->mnt_ns list. it may not be a good
2293+ idea, but I didn't get other approach.
2294+ + test the generation of the gotten inode.
2295+- every inode operation: they may get EBUSY due to UDBA. in this case,
2296+ convert it into ESTALE for NFSD.
2297+- readdir(): call lockdep_on/off() because filldir in NFSD calls
2298+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
2299diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt linux/Documentation/filesystems/aufs/design/08shwh.txt
2300--- /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2301+++ linux/Documentation/filesystems/aufs/design/08shwh.txt 2016-10-09 16:55:36.482701377 +0200
523b37e3 2302@@ -0,0 +1,52 @@
53392da6 2303+
8cdd5066 2304+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
2305+#
2306+# This program is free software; you can redistribute it and/or modify
2307+# it under the terms of the GNU General Public License as published by
2308+# the Free Software Foundation; either version 2 of the License, or
2309+# (at your option) any later version.
2310+#
2311+# This program is distributed in the hope that it will be useful,
2312+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2313+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2314+# GNU General Public License for more details.
2315+#
2316+# You should have received a copy of the GNU General Public License
523b37e3 2317+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2318+
2319+Show Whiteout Mode (shwh)
2320+----------------------------------------------------------------------
2321+Generally aufs hides the name of whiteouts. But in some cases, to show
2322+them is very useful for users. For instance, creating a new middle layer
2323+(branch) by merging existing layers.
2324+
2325+(borrowing aufs1 HOW-TO from a user, Michael Towers)
2326+When you have three branches,
2327+- Bottom: 'system', squashfs (underlying base system), read-only
2328+- Middle: 'mods', squashfs, read-only
2329+- Top: 'overlay', ram (tmpfs), read-write
2330+
2331+The top layer is loaded at boot time and saved at shutdown, to preserve
2332+the changes made to the system during the session.
2333+When larger changes have been made, or smaller changes have accumulated,
2334+the size of the saved top layer data grows. At this point, it would be
2335+nice to be able to merge the two overlay branches ('mods' and 'overlay')
2336+and rewrite the 'mods' squashfs, clearing the top layer and thus
2337+restoring save and load speed.
2338+
2339+This merging is simplified by the use of another aufs mount, of just the
2340+two overlay branches using the 'shwh' option.
2341+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
2342+ aufs /livesys/merge_union
2343+
2344+A merged view of these two branches is then available at
2345+/livesys/merge_union, and the new feature is that the whiteouts are
2346+visible!
2347+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
2348+writing to all branches. Also the default mode for all branches is 'ro'.
2349+It is now possible to save the combined contents of the two overlay
2350+branches to a new squashfs, e.g.:
2351+# mksquashfs /livesys/merge_union /path/to/newmods.squash
2352+
2353+This new squashfs archive can be stored on the boot device and the
2354+initramfs will use it to replace the old one at the next boot.
2355diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt linux/Documentation/filesystems/aufs/design/10dynop.txt
2356--- /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2357+++ linux/Documentation/filesystems/aufs/design/10dynop.txt 2016-10-09 16:55:36.482701377 +0200
7e9cd9fe 2358@@ -0,0 +1,47 @@
53392da6 2359+
8cdd5066 2360+# Copyright (C) 2010-2016 Junjiro R. Okajima
53392da6
AM
2361+#
2362+# This program is free software; you can redistribute it and/or modify
2363+# it under the terms of the GNU General Public License as published by
2364+# the Free Software Foundation; either version 2 of the License, or
2365+# (at your option) any later version.
2366+#
2367+# This program is distributed in the hope that it will be useful,
2368+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2369+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2370+# GNU General Public License for more details.
2371+#
2372+# You should have received a copy of the GNU General Public License
523b37e3 2373+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2374+
2375+Dynamically customizable FS operations
2376+----------------------------------------------------------------------
2377+Generally FS operations (struct inode_operations, struct
2378+address_space_operations, struct file_operations, etc.) are defined as
2379+"static const", but it never means that FS have only one set of
2380+operation. Some FS have multiple sets of them. For instance, ext2 has
2381+three sets, one for XIP, for NOBH, and for normal.
2382+Since aufs overrides and redirects these operations, sometimes aufs has
7e9cd9fe 2383+to change its behaviour according to the branch FS type. More importantly
53392da6
AM
2384+VFS acts differently if a function (member in the struct) is set or
2385+not. It means aufs should have several sets of operations and select one
2386+among them according to the branch FS definition.
2387+
7e9cd9fe 2388+In order to solve this problem and not to affect the behaviour of VFS,
53392da6 2389+aufs defines these operations dynamically. For instance, aufs defines
7e9cd9fe
AM
2390+dummy direct_IO function for struct address_space_operations, but it may
2391+not be set to the address_space_operations actually. When the branch FS
2392+doesn't have it, aufs doesn't set it to its address_space_operations
2393+while the function definition itself is still alive. So the behaviour
2394+itself will not change, and it will return an error when direct_IO is
2395+not set.
53392da6
AM
2396+
2397+The lifetime of these dynamically generated operation object is
2398+maintained by aufs branch object. When the branch is removed from aufs,
2399+the reference counter of the object is decremented. When it reaches
2400+zero, the dynamically generated operation object will be freed.
2401+
7e9cd9fe
AM
2402+This approach is designed to support AIO (io_submit), Direct I/O and
2403+XIP (DAX) mainly.
2404+Currently this approach is applied to address_space_operations for
2405+regular files only.
53392da6
AM
2406diff -urN /usr/share/empty/Documentation/filesystems/aufs/README linux/Documentation/filesystems/aufs/README
2407--- /usr/share/empty/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2408+++ linux/Documentation/filesystems/aufs/README 2016-10-09 16:55:36.479367956 +0200
5afbbe0d 2409@@ -0,0 +1,392 @@
53392da6 2410+
5527c038 2411+Aufs4 -- advanced multi layered unification filesystem version 4.x
53392da6
AM
2412+http://aufs.sf.net
2413+Junjiro R. Okajima
2414+
2415+
2416+0. Introduction
2417+----------------------------------------
2418+In the early days, aufs was entirely re-designed and re-implemented
7e9cd9fe 2419+Unionfs Version 1.x series. Adding many original ideas, approaches,
53392da6
AM
2420+improvements and implementations, it becomes totally different from
2421+Unionfs while keeping the basic features.
2422+Recently, Unionfs Version 2.x series begin taking some of the same
2423+approaches to aufs1's.
2424+Unionfs is being developed by Professor Erez Zadok at Stony Brook
2425+University and his team.
2426+
5527c038 2427+Aufs4 supports linux-4.0 and later, and for linux-3.x series try aufs3.
53392da6
AM
2428+If you want older kernel version support, try aufs2-2.6.git or
2429+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
2430+
2431+Note: it becomes clear that "Aufs was rejected. Let's give it up."
38d290e6
JR
2432+ According to Christoph Hellwig, linux rejects all union-type
2433+ filesystems but UnionMount.
53392da6
AM
2434+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
2435+
38d290e6
JR
2436+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
2437+ UnionMount, and he pointed out an issue around a directory mutex
2438+ lock and aufs addressed it. But it is still unsure whether aufs will
2439+ be merged (or any other union solution).
076b876e 2440+<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
38d290e6 2441+
53392da6
AM
2442+
2443+1. Features
2444+----------------------------------------
2445+- unite several directories into a single virtual filesystem. The member
2446+ directory is called as a branch.
2447+- you can specify the permission flags to the branch, which are 'readonly',
2448+ 'readwrite' and 'whiteout-able.'
2449+- by upper writable branch, internal copyup and whiteout, files/dirs on
2450+ readonly branch are modifiable logically.
2451+- dynamic branch manipulation, add, del.
2452+- etc...
2453+
7e9cd9fe
AM
2454+Also there are many enhancements in aufs, such as:
2455+- test only the highest one for the directory permission (dirperm1)
2456+- copyup on open (coo=)
2457+- 'move' policy for copy-up between two writable branches, after
2458+ checking free space.
2459+- xattr, acl
53392da6
AM
2460+- readdir(3) in userspace.
2461+- keep inode number by external inode number table
2462+- keep the timestamps of file/dir in internal copyup operation
2463+- seekable directory, supporting NFS readdir.
2464+- whiteout is hardlinked in order to reduce the consumption of inodes
2465+ on branch
2466+- do not copyup, nor create a whiteout when it is unnecessary
2467+- revert a single systemcall when an error occurs in aufs
2468+- remount interface instead of ioctl
2469+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
2470+- loopback mounted filesystem as a branch
2471+- kernel thread for removing the dir who has a plenty of whiteouts
2472+- support copyup sparse file (a file which has a 'hole' in it)
2473+- default permission flags for branches
2474+- selectable permission flags for ro branch, whether whiteout can
2475+ exist or not
2476+- export via NFS.
2477+- support <sysfs>/fs/aufs and <debugfs>/aufs.
2478+- support multiple writable branches, some policies to select one
2479+ among multiple writable branches.
2480+- a new semantics for link(2) and rename(2) to support multiple
2481+ writable branches.
2482+- no glibc changes are required.
2483+- pseudo hardlink (hardlink over branches)
2484+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
2485+ including NFS or remote filesystem branch.
2486+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
2487+- and more...
2488+
5527c038 2489+Currently these features are dropped temporary from aufs4.
53392da6 2490+See design/08plan.txt in detail.
53392da6
AM
2491+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
2492+ (robr)
2493+- statistics of aufs thread (/sys/fs/aufs/stat)
53392da6
AM
2494+
2495+Features or just an idea in the future (see also design/*.txt),
2496+- reorder the branch index without del/re-add.
2497+- permanent xino files for NFSD
2498+- an option for refreshing the opened files after add/del branches
53392da6
AM
2499+- light version, without branch manipulation. (unnecessary?)
2500+- copyup in userspace
2501+- inotify in userspace
2502+- readv/writev
53392da6
AM
2503+
2504+
2505+2. Download
2506+----------------------------------------
5527c038
JR
2507+There are three GIT trees for aufs4, aufs4-linux.git,
2508+aufs4-standalone.git, and aufs-util.git. Note that there is no "4" in
1e00d052 2509+"aufs-util.git."
5527c038
JR
2510+While the aufs-util is always necessary, you need either of aufs4-linux
2511+or aufs4-standalone.
1e00d052 2512+
5527c038 2513+The aufs4-linux tree includes the whole linux mainline GIT tree,
1e00d052
AM
2514+git://git.kernel.org/.../torvalds/linux.git.
2515+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
5527c038 2516+build aufs4 as an external kernel module.
2000de60 2517+Several extra patches are not included in this tree. Only
be52b249 2518+aufs4-standalone tree contains them. They are described in the later
2000de60 2519+section "Configuration and Compilation."
1e00d052 2520+
5527c038 2521+On the other hand, the aufs4-standalone tree has only aufs source files
53392da6 2522+and necessary patches, and you can select CONFIG_AUFS_FS=m.
2000de60 2523+But you need to apply all aufs patches manually.
53392da6 2524+
5527c038
JR
2525+You will find GIT branches whose name is in form of "aufs4.x" where "x"
2526+represents the linux kernel version, "linux-4.x". For instance,
2527+"aufs4.0" is for linux-4.0. For latest "linux-4.x-rcN", use
2528+"aufs4.x-rcN" branch.
1e00d052 2529+
5527c038 2530+o aufs4-linux tree
1e00d052 2531+$ git clone --reference /your/linux/git/tree \
5527c038 2532+ git://github.com/sfjro/aufs4-linux.git aufs4-linux.git
1e00d052 2533+- if you don't have linux GIT tree, then remove "--reference ..."
5527c038
JR
2534+$ cd aufs4-linux.git
2535+$ git checkout origin/aufs4.0
53392da6 2536+
2000de60
JR
2537+Or You may want to directly git-pull aufs into your linux GIT tree, and
2538+leave the patch-work to GIT.
2539+$ cd /your/linux/git/tree
5527c038
JR
2540+$ git remote add aufs4 git://github.com/sfjro/aufs4-linux.git
2541+$ git fetch aufs4
2542+$ git checkout -b my4.0 v4.0
2543+$ (add your local change...)
2544+$ git pull aufs4 aufs4.0
2545+- now you have v4.0 + your_changes + aufs4.0 in you my4.0 branch.
2000de60 2546+- you may need to solve some conflicts between your_changes and
5527c038
JR
2547+ aufs4.0. in this case, git-rerere is recommended so that you can
2548+ solve the similar conflicts automatically when you upgrade to 4.1 or
2000de60
JR
2549+ later in the future.
2550+
5527c038
JR
2551+o aufs4-standalone tree
2552+$ git clone git://github.com/sfjro/aufs4-standalone.git aufs4-standalone.git
2553+$ cd aufs4-standalone.git
2554+$ git checkout origin/aufs4.0
53392da6
AM
2555+
2556+o aufs-util tree
5527c038
JR
2557+$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git
2558+- note that the public aufs-util.git is on SourceForge instead of
2559+ GitHUB.
53392da6 2560+$ cd aufs-util.git
5527c038 2561+$ git checkout origin/aufs4.0
53392da6 2562+
5527c038
JR
2563+Note: The 4.x-rcN branch is to be used with `rc' kernel versions ONLY.
2564+The minor version number, 'x' in '4.x', of aufs may not always
9dbd164d
AM
2565+follow the minor version number of the kernel.
2566+Because changes in the kernel that cause the use of a new
2567+minor version number do not always require changes to aufs-util.
2568+
2569+Since aufs-util has its own minor version number, you may not be
2570+able to find a GIT branch in aufs-util for your kernel's
2571+exact minor version number.
2572+In this case, you should git-checkout the branch for the
53392da6 2573+nearest lower number.
9dbd164d
AM
2574+
2575+For (an unreleased) example:
5527c038
JR
2576+If you are using "linux-4.10" and the "aufs4.10" branch
2577+does not exist in aufs-util repository, then "aufs4.9", "aufs4.8"
9dbd164d
AM
2578+or something numerically smaller is the branch for your kernel.
2579+
53392da6
AM
2580+Also you can view all branches by
2581+ $ git branch -a
2582+
2583+
2584+3. Configuration and Compilation
2585+----------------------------------------
2586+Make sure you have git-checkout'ed the correct branch.
2587+
5527c038 2588+For aufs4-linux tree,
c06a8ce3 2589+- enable CONFIG_AUFS_FS.
1e00d052
AM
2590+- set other aufs configurations if necessary.
2591+
5527c038 2592+For aufs4-standalone tree,
53392da6
AM
2593+There are several ways to build.
2594+
2595+1.
5527c038
JR
2596+- apply ./aufs4-kbuild.patch to your kernel source files.
2597+- apply ./aufs4-base.patch too.
2598+- apply ./aufs4-mmap.patch too.
2599+- apply ./aufs4-standalone.patch too, if you have a plan to set
2600+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs4-standalone.patch.
537831f9
AM
2601+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
2602+ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
c06a8ce3 2603+- enable CONFIG_AUFS_FS, you can select either
53392da6
AM
2604+ =m or =y.
2605+- and build your kernel as usual.
2606+- install the built kernel.
c06a8ce3
AM
2607+ Note: Since linux-3.9, every filesystem module requires an alias
2608+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2609+ modules.aliases file if you set CONFIG_AUFS_FS=m.
7eafdf33
AM
2610+- install the header files too by "make headers_install" to the
2611+ directory where you specify. By default, it is $PWD/usr.
b4510431 2612+ "make help" shows a brief note for headers_install.
53392da6
AM
2613+- and reboot your system.
2614+
2615+2.
2616+- module only (CONFIG_AUFS_FS=m).
5527c038
JR
2617+- apply ./aufs4-base.patch to your kernel source files.
2618+- apply ./aufs4-mmap.patch too.
2619+- apply ./aufs4-standalone.patch too.
53392da6
AM
2620+- build your kernel, don't forget "make headers_install", and reboot.
2621+- edit ./config.mk and set other aufs configurations if necessary.
b4510431 2622+ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
53392da6
AM
2623+ every aufs configurations.
2624+- build the module by simple "make".
c06a8ce3
AM
2625+ Note: Since linux-3.9, every filesystem module requires an alias
2626+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2627+ modules.aliases file.
53392da6
AM
2628+- you can specify ${KDIR} make variable which points to your kernel
2629+ source tree.
2630+- install the files
2631+ + run "make install" to install the aufs module, or copy the built
b4510431
AM
2632+ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
2633+ + run "make install_headers" (instead of headers_install) to install
2634+ the modified aufs header file (you can specify DESTDIR which is
2635+ available in aufs standalone version's Makefile only), or copy
2636+ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
2637+ you like manually. By default, the target directory is $PWD/usr.
5527c038 2638+- no need to apply aufs4-kbuild.patch, nor copying source files to your
53392da6
AM
2639+ kernel source tree.
2640+
b4510431 2641+Note: The header file aufs_type.h is necessary to build aufs-util
53392da6
AM
2642+ as well as "make headers_install" in the kernel source tree.
2643+ headers_install is subject to be forgotten, but it is essentially
2644+ necessary, not only for building aufs-util.
2645+ You may not meet problems without headers_install in some older
2646+ version though.
2647+
2648+And then,
2649+- read README in aufs-util, build and install it
9dbd164d
AM
2650+- note that your distribution may contain an obsoleted version of
2651+ aufs_type.h in /usr/include/linux or something. When you build aufs
2652+ utilities, make sure that your compiler refers the correct aufs header
2653+ file which is built by "make headers_install."
53392da6
AM
2654+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
2655+ then run "make install_ulib" too. And refer to the aufs manual in
2656+ detail.
2657+
5527c038 2658+There several other patches in aufs4-standalone.git. They are all
38d290e6 2659+optional. When you meet some problems, they will help you.
5527c038 2660+- aufs4-loopback.patch
38d290e6
JR
2661+ Supports a nested loopback mount in a branch-fs. This patch is
2662+ unnecessary until aufs produces a message like "you may want to try
2663+ another patch for loopback file".
2664+- vfs-ino.patch
2665+ Modifies a system global kernel internal function get_next_ino() in
2666+ order to stop assigning 0 for an inode-number. Not directly related to
2667+ aufs, but recommended generally.
2668+- tmpfs-idr.patch
2669+ Keeps the tmpfs inode number as the lowest value. Effective to reduce
2670+ the size of aufs XINO files for tmpfs branch. Also it prevents the
2671+ duplication of inode number, which is important for backup tools and
2672+ other utilities. When you find aufs XINO files for tmpfs branch
2673+ growing too much, try this patch.
be52b249
AM
2674+- lockdep-debug.patch
2675+ Because aufs is not only an ordinary filesystem (callee of VFS), but
2676+ also a caller of VFS functions for branch filesystems, subclassing of
2677+ the internal locks for LOCKDEP is necessary. LOCKDEP is a debugging
2678+ feature of linux kernel. If you enable CONFIG_LOCKDEP, then you will
2679+ need to apply this debug patch to expand several constant values.
2680+ If don't know what LOCKDEP, then you don't have apply this patch.
38d290e6 2681+
53392da6
AM
2682+
2683+4. Usage
2684+----------------------------------------
2685+At first, make sure aufs-util are installed, and please read the aufs
2686+manual, aufs.5 in aufs-util.git tree.
2687+$ man -l aufs.5
2688+
2689+And then,
2690+$ mkdir /tmp/rw /tmp/aufs
2691+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
2692+
2693+Here is another example. The result is equivalent.
2694+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
2695+ Or
2696+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
2697+# mount -o remount,append:${HOME} /tmp/aufs
2698+
2699+Then, you can see whole tree of your home dir through /tmp/aufs. If
2700+you modify a file under /tmp/aufs, the one on your home directory is
2701+not affected, instead the same named file will be newly created under
2702+/tmp/rw. And all of your modification to a file will be applied to
2703+the one under /tmp/rw. This is called the file based Copy on Write
2704+(COW) method.
2705+Aufs mount options are described in aufs.5.
2706+If you run chroot or something and make your aufs as a root directory,
2707+then you need to customize the shutdown script. See the aufs manual in
2708+detail.
2709+
2710+Additionally, there are some sample usages of aufs which are a
2711+diskless system with network booting, and LiveCD over NFS.
2712+See sample dir in CVS tree on SourceForge.
2713+
2714+
2715+5. Contact
2716+----------------------------------------
2717+When you have any problems or strange behaviour in aufs, please let me
2718+know with:
2719+- /proc/mounts (instead of the output of mount(8))
2720+- /sys/module/aufs/*
2721+- /sys/fs/aufs/* (if you have them)
2722+- /debug/aufs/* (if you have them)
2723+- linux kernel version
2724+ if your kernel is not plain, for example modified by distributor,
2725+ the url where i can download its source is necessary too.
2726+- aufs version which was printed at loading the module or booting the
2727+ system, instead of the date you downloaded.
2728+- configuration (define/undefine CONFIG_AUFS_xxx)
2729+- kernel configuration or /proc/config.gz (if you have it)
2730+- behaviour which you think to be incorrect
2731+- actual operation, reproducible one is better
2732+- mailto: aufs-users at lists.sourceforge.net
2733+
2734+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
2735+and Feature Requests) on SourceForge. Please join and write to
2736+aufs-users ML.
2737+
2738+
2739+6. Acknowledgements
2740+----------------------------------------
2741+Thanks to everyone who have tried and are using aufs, whoever
2742+have reported a bug or any feedback.
2743+
2744+Especially donators:
2745+Tomas Matejicek(slax.org) made a donation (much more than once).
2746+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
2747+ scripts) is making "doubling" donations.
2748+ Unfortunately I cannot list all of the donators, but I really
b4510431 2749+ appreciate.
53392da6
AM
2750+ It ends Aug 2010, but the ordinary donation URL is still available.
2751+ <http://sourceforge.net/donate/index.php?group_id=167503>
2752+Dai Itasaka made a donation (2007/8).
2753+Chuck Smith made a donation (2008/4, 10 and 12).
2754+Henk Schoneveld made a donation (2008/9).
2755+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
2756+Francois Dupoux made a donation (2008/11).
2757+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
2758+ aufs2 GIT tree (2009/2).
2759+William Grant made a donation (2009/3).
2760+Patrick Lane made a donation (2009/4).
2761+The Mail Archive (mail-archive.com) made donations (2009/5).
2762+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
2763+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
2764+Pavel Pronskiy made a donation (2011/2).
2765+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
2766+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
537831f9
AM
2767+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
2768+11).
1e00d052 2769+Sam Liddicott made a donation (2011/9).
86dc4139
AM
2770+Era Scarecrow made a donation (2013/4).
2771+Bor Ratajc made a donation (2013/4).
2772+Alessandro Gorreta made a donation (2013/4).
2773+POIRETTE Marc made a donation (2013/4).
2774+Alessandro Gorreta made a donation (2013/4).
2775+lauri kasvandik made a donation (2013/5).
392086de 2776+"pemasu from Finland" made a donation (2013/7).
523b37e3
AM
2777+The Parted Magic Project made a donation (2013/9 and 11).
2778+Pavel Barta made a donation (2013/10).
38d290e6 2779+Nikolay Pertsev made a donation (2014/5).
c2c0f25c 2780+James B made a donation (2014/7 and 2015/7).
076b876e 2781+Stefano Di Biase made a donation (2014/8).
2000de60 2782+Daniel Epellei made a donation (2015/1).
8cdd5066 2783+OmegaPhil made a donation (2016/1).
5afbbe0d 2784+Tomasz Szewczyk made a donation (2016/4).
53392da6
AM
2785+
2786+Thank you very much.
2787+Donations are always, including future donations, very important and
2788+helpful for me to keep on developing aufs.
2789+
2790+
2791+7.
2792+----------------------------------------
2793+If you are an experienced user, no explanation is needed. Aufs is
2794+just a linux filesystem.
2795+
2796+
2797+Enjoy!
2798+
2799+# Local variables: ;
2800+# mode: text;
2801+# End: ;
7f207e10
AM
2802diff -urN /usr/share/empty/fs/aufs/aufs.h linux/fs/aufs/aufs.h
2803--- /usr/share/empty/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2804+++ linux/fs/aufs/aufs.h 2016-10-09 16:55:36.486034798 +0200
523b37e3 2805@@ -0,0 +1,59 @@
7f207e10 2806+/*
8cdd5066 2807+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7f207e10
AM
2808+ *
2809+ * This program, aufs is free software; you can redistribute it and/or modify
2810+ * it under the terms of the GNU General Public License as published by
2811+ * the Free Software Foundation; either version 2 of the License, or
2812+ * (at your option) any later version.
2813+ *
2814+ * This program is distributed in the hope that it will be useful,
2815+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2816+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2817+ * GNU General Public License for more details.
2818+ *
2819+ * You should have received a copy of the GNU General Public License
523b37e3 2820+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2821+ */
2822+
2823+/*
2824+ * all header files
2825+ */
2826+
2827+#ifndef __AUFS_H__
2828+#define __AUFS_H__
2829+
2830+#ifdef __KERNEL__
2831+
2832+#define AuStub(type, name, body, ...) \
2833+ static inline type name(__VA_ARGS__) { body; }
2834+
2835+#define AuStubVoid(name, ...) \
2836+ AuStub(void, name, , __VA_ARGS__)
2837+#define AuStubInt0(name, ...) \
2838+ AuStub(int, name, return 0, __VA_ARGS__)
2839+
2840+#include "debug.h"
2841+
2842+#include "branch.h"
2843+#include "cpup.h"
2844+#include "dcsub.h"
2845+#include "dbgaufs.h"
2846+#include "dentry.h"
2847+#include "dir.h"
2848+#include "dynop.h"
2849+#include "file.h"
2850+#include "fstype.h"
2851+#include "inode.h"
2852+#include "loop.h"
2853+#include "module.h"
7f207e10
AM
2854+#include "opts.h"
2855+#include "rwsem.h"
2856+#include "spl.h"
2857+#include "super.h"
2858+#include "sysaufs.h"
2859+#include "vfsub.h"
2860+#include "whout.h"
2861+#include "wkq.h"
2862+
2863+#endif /* __KERNEL__ */
2864+#endif /* __AUFS_H__ */
2865diff -urN /usr/share/empty/fs/aufs/branch.c linux/fs/aufs/branch.c
2866--- /usr/share/empty/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
2867+++ linux/fs/aufs/branch.c 2016-10-09 16:55:38.886097714 +0200
2868@@ -0,0 +1,1412 @@
7f207e10 2869+/*
8cdd5066 2870+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7f207e10
AM
2871+ *
2872+ * This program, aufs is free software; you can redistribute it and/or modify
2873+ * it under the terms of the GNU General Public License as published by
2874+ * the Free Software Foundation; either version 2 of the License, or
2875+ * (at your option) any later version.
2876+ *
2877+ * This program is distributed in the hope that it will be useful,
2878+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2879+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2880+ * GNU General Public License for more details.
2881+ *
2882+ * You should have received a copy of the GNU General Public License
523b37e3 2883+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2884+ */
2885+
2886+/*
2887+ * branch management
2888+ */
2889+
027c5e7a 2890+#include <linux/compat.h>
7f207e10
AM
2891+#include <linux/statfs.h>
2892+#include "aufs.h"
2893+
2894+/*
2895+ * free a single branch
1facf9fc 2896+ */
2897+static void au_br_do_free(struct au_branch *br)
2898+{
2899+ int i;
2900+ struct au_wbr *wbr;
4a4d8108 2901+ struct au_dykey **key;
1facf9fc 2902+
027c5e7a
AM
2903+ au_hnotify_fin_br(br);
2904+
1facf9fc 2905+ if (br->br_xino.xi_file)
2906+ fput(br->br_xino.xi_file);
2907+ mutex_destroy(&br->br_xino.xi_nondir_mtx);
2908+
5afbbe0d
AM
2909+ AuDebugOn(au_br_count(br));
2910+ au_br_count_fin(br);
1facf9fc 2911+
2912+ wbr = br->br_wbr;
2913+ if (wbr) {
2914+ for (i = 0; i < AuBrWh_Last; i++)
2915+ dput(wbr->wbr_wh[i]);
2916+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
dece6358 2917+ AuRwDestroy(&wbr->wbr_wh_rwsem);
1facf9fc 2918+ }
2919+
076b876e
AM
2920+ if (br->br_fhsm) {
2921+ au_br_fhsm_fin(br->br_fhsm);
f0c0a007 2922+ au_delayed_kfree(br->br_fhsm);
076b876e
AM
2923+ }
2924+
4a4d8108
AM
2925+ key = br->br_dykey;
2926+ for (i = 0; i < AuBrDynOp; i++, key++)
2927+ if (*key)
2928+ au_dy_put(*key);
2929+ else
2930+ break;
2931+
537831f9
AM
2932+ /* recursive lock, s_umount of branch's */
2933+ lockdep_off();
86dc4139 2934+ path_put(&br->br_path);
537831f9 2935+ lockdep_on();
f0c0a007
AM
2936+ if (wbr)
2937+ au_delayed_kfree(wbr);
2938+ au_delayed_kfree(br);
1facf9fc 2939+}
2940+
2941+/*
2942+ * frees all branches
2943+ */
2944+void au_br_free(struct au_sbinfo *sbinfo)
2945+{
2946+ aufs_bindex_t bmax;
2947+ struct au_branch **br;
2948+
dece6358
AM
2949+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2950+
5afbbe0d 2951+ bmax = sbinfo->si_bbot + 1;
1facf9fc 2952+ br = sbinfo->si_branch;
2953+ while (bmax--)
2954+ au_br_do_free(*br++);
2955+}
2956+
2957+/*
2958+ * find the index of a branch which is specified by @br_id.
2959+ */
2960+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
2961+{
5afbbe0d 2962+ aufs_bindex_t bindex, bbot;
1facf9fc 2963+
5afbbe0d
AM
2964+ bbot = au_sbbot(sb);
2965+ for (bindex = 0; bindex <= bbot; bindex++)
1facf9fc 2966+ if (au_sbr_id(sb, bindex) == br_id)
2967+ return bindex;
2968+ return -1;
2969+}
2970+
2971+/* ---------------------------------------------------------------------- */
2972+
2973+/*
2974+ * add a branch
2975+ */
2976+
b752ccd1
AM
2977+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
2978+ struct dentry *h_root)
1facf9fc 2979+{
b752ccd1
AM
2980+ if (unlikely(h_adding == h_root
2981+ || au_test_loopback_overlap(sb, h_adding)))
1facf9fc 2982+ return 1;
b752ccd1
AM
2983+ if (h_adding->d_sb != h_root->d_sb)
2984+ return 0;
2985+ return au_test_subdir(h_adding, h_root)
2986+ || au_test_subdir(h_root, h_adding);
1facf9fc 2987+}
2988+
2989+/*
2990+ * returns a newly allocated branch. @new_nbranch is a number of branches
2991+ * after adding a branch.
2992+ */
2993+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
2994+ int perm)
2995+{
2996+ struct au_branch *add_branch;
2997+ struct dentry *root;
5527c038 2998+ struct inode *inode;
4a4d8108 2999+ int err;
1facf9fc 3000+
4a4d8108 3001+ err = -ENOMEM;
1facf9fc 3002+ root = sb->s_root;
be52b249 3003+ add_branch = kzalloc(sizeof(*add_branch), GFP_NOFS);
1facf9fc 3004+ if (unlikely(!add_branch))
3005+ goto out;
3006+
027c5e7a
AM
3007+ err = au_hnotify_init_br(add_branch, perm);
3008+ if (unlikely(err))
3009+ goto out_br;
3010+
1facf9fc 3011+ if (au_br_writable(perm)) {
3012+ /* may be freed separately at changing the branch permission */
be52b249 3013+ add_branch->br_wbr = kzalloc(sizeof(*add_branch->br_wbr),
1facf9fc 3014+ GFP_NOFS);
3015+ if (unlikely(!add_branch->br_wbr))
027c5e7a 3016+ goto out_hnotify;
1facf9fc 3017+ }
3018+
076b876e
AM
3019+ if (au_br_fhsm(perm)) {
3020+ err = au_fhsm_br_alloc(add_branch);
3021+ if (unlikely(err))
3022+ goto out_wbr;
3023+ }
3024+
e2f27e51 3025+ err = au_sbr_realloc(au_sbi(sb), new_nbranch, /*may_shrink*/0);
4a4d8108 3026+ if (!err)
e2f27e51 3027+ err = au_di_realloc(au_di(root), new_nbranch, /*may_shrink*/0);
5527c038
JR
3028+ if (!err) {
3029+ inode = d_inode(root);
e2f27e51 3030+ err = au_hinode_realloc(au_ii(inode), new_nbranch, /*may_shrink*/0);
5527c038 3031+ }
4a4d8108
AM
3032+ if (!err)
3033+ return add_branch; /* success */
1facf9fc 3034+
076b876e 3035+out_wbr:
f0c0a007
AM
3036+ if (add_branch->br_wbr)
3037+ au_delayed_kfree(add_branch->br_wbr);
027c5e7a
AM
3038+out_hnotify:
3039+ au_hnotify_fin_br(add_branch);
4f0767ce 3040+out_br:
f0c0a007 3041+ au_delayed_kfree(add_branch);
4f0767ce 3042+out:
4a4d8108 3043+ return ERR_PTR(err);
1facf9fc 3044+}
3045+
3046+/*
3047+ * test if the branch permission is legal or not.
3048+ */
3049+static int test_br(struct inode *inode, int brperm, char *path)
3050+{
3051+ int err;
3052+
4a4d8108
AM
3053+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
3054+ if (!err)
3055+ goto out;
1facf9fc 3056+
4a4d8108
AM
3057+ err = -EINVAL;
3058+ pr_err("write permission for readonly mount or inode, %s\n", path);
3059+
4f0767ce 3060+out:
1facf9fc 3061+ return err;
3062+}
3063+
3064+/*
3065+ * returns:
3066+ * 0: success, the caller will add it
3067+ * plus: success, it is already unified, the caller should ignore it
3068+ * minus: error
3069+ */
3070+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
3071+{
3072+ int err;
5afbbe0d 3073+ aufs_bindex_t bbot, bindex;
5527c038 3074+ struct dentry *root, *h_dentry;
1facf9fc 3075+ struct inode *inode, *h_inode;
3076+
3077+ root = sb->s_root;
5afbbe0d
AM
3078+ bbot = au_sbbot(sb);
3079+ if (unlikely(bbot >= 0
1facf9fc 3080+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
3081+ err = 1;
3082+ if (!remount) {
3083+ err = -EINVAL;
4a4d8108 3084+ pr_err("%s duplicated\n", add->pathname);
1facf9fc 3085+ }
3086+ goto out;
3087+ }
3088+
3089+ err = -ENOSPC; /* -E2BIG; */
3090+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
5afbbe0d 3091+ || AUFS_BRANCH_MAX - 1 <= bbot)) {
4a4d8108 3092+ pr_err("number of branches exceeded %s\n", add->pathname);
1facf9fc 3093+ goto out;
3094+ }
3095+
3096+ err = -EDOM;
5afbbe0d 3097+ if (unlikely(add->bindex < 0 || bbot + 1 < add->bindex)) {
4a4d8108 3098+ pr_err("bad index %d\n", add->bindex);
1facf9fc 3099+ goto out;
3100+ }
3101+
5527c038 3102+ inode = d_inode(add->path.dentry);
1facf9fc 3103+ err = -ENOENT;
3104+ if (unlikely(!inode->i_nlink)) {
4a4d8108 3105+ pr_err("no existence %s\n", add->pathname);
1facf9fc 3106+ goto out;
3107+ }
3108+
3109+ err = -EINVAL;
3110+ if (unlikely(inode->i_sb == sb)) {
4a4d8108 3111+ pr_err("%s must be outside\n", add->pathname);
1facf9fc 3112+ goto out;
3113+ }
3114+
3115+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
4a4d8108
AM
3116+ pr_err("unsupported filesystem, %s (%s)\n",
3117+ add->pathname, au_sbtype(inode->i_sb));
1facf9fc 3118+ goto out;
3119+ }
3120+
c1595e42
JR
3121+ if (unlikely(inode->i_sb->s_stack_depth)) {
3122+ pr_err("already stacked, %s (%s)\n",
3123+ add->pathname, au_sbtype(inode->i_sb));
3124+ goto out;
3125+ }
3126+
5527c038 3127+ err = test_br(d_inode(add->path.dentry), add->perm, add->pathname);
1facf9fc 3128+ if (unlikely(err))
3129+ goto out;
3130+
5afbbe0d 3131+ if (bbot < 0)
1facf9fc 3132+ return 0; /* success */
3133+
3134+ err = -EINVAL;
5afbbe0d 3135+ for (bindex = 0; bindex <= bbot; bindex++)
1facf9fc 3136+ if (unlikely(test_overlap(sb, add->path.dentry,
3137+ au_h_dptr(root, bindex)))) {
4a4d8108 3138+ pr_err("%s is overlapped\n", add->pathname);
1facf9fc 3139+ goto out;
3140+ }
3141+
3142+ err = 0;
3143+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
5527c038
JR
3144+ h_dentry = au_h_dptr(root, 0);
3145+ h_inode = d_inode(h_dentry);
1facf9fc 3146+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
0c3ec466
AM
3147+ || !uid_eq(h_inode->i_uid, inode->i_uid)
3148+ || !gid_eq(h_inode->i_gid, inode->i_gid))
3149+ pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
3150+ add->pathname,
3151+ i_uid_read(inode), i_gid_read(inode),
3152+ (inode->i_mode & S_IALLUGO),
3153+ i_uid_read(h_inode), i_gid_read(h_inode),
3154+ (h_inode->i_mode & S_IALLUGO));
1facf9fc 3155+ }
3156+
4f0767ce 3157+out:
1facf9fc 3158+ return err;
3159+}
3160+
3161+/*
3162+ * initialize or clean the whiteouts for an adding branch
3163+ */
3164+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
86dc4139 3165+ int new_perm)
1facf9fc 3166+{
3167+ int err, old_perm;
3168+ aufs_bindex_t bindex;
febd17d6 3169+ struct inode *h_inode;
1facf9fc 3170+ struct au_wbr *wbr;
3171+ struct au_hinode *hdir;
5527c038 3172+ struct dentry *h_dentry;
1facf9fc 3173+
86dc4139
AM
3174+ err = vfsub_mnt_want_write(au_br_mnt(br));
3175+ if (unlikely(err))
3176+ goto out;
3177+
1facf9fc 3178+ wbr = br->br_wbr;
3179+ old_perm = br->br_perm;
3180+ br->br_perm = new_perm;
3181+ hdir = NULL;
febd17d6 3182+ h_inode = NULL;
1facf9fc 3183+ bindex = au_br_index(sb, br->br_id);
3184+ if (0 <= bindex) {
5527c038 3185+ hdir = au_hi(d_inode(sb->s_root), bindex);
5afbbe0d 3186+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 3187+ } else {
5527c038 3188+ h_dentry = au_br_dentry(br);
febd17d6
JR
3189+ h_inode = d_inode(h_dentry);
3190+ inode_lock_nested(h_inode, AuLsc_I_PARENT);
1facf9fc 3191+ }
3192+ if (!wbr)
86dc4139 3193+ err = au_wh_init(br, sb);
1facf9fc 3194+ else {
3195+ wbr_wh_write_lock(wbr);
86dc4139 3196+ err = au_wh_init(br, sb);
1facf9fc 3197+ wbr_wh_write_unlock(wbr);
3198+ }
3199+ if (hdir)
5afbbe0d 3200+ au_hn_inode_unlock(hdir);
1facf9fc 3201+ else
febd17d6 3202+ inode_unlock(h_inode);
86dc4139 3203+ vfsub_mnt_drop_write(au_br_mnt(br));
1facf9fc 3204+ br->br_perm = old_perm;
3205+
3206+ if (!err && wbr && !au_br_writable(new_perm)) {
f0c0a007 3207+ au_delayed_kfree(wbr);
1facf9fc 3208+ br->br_wbr = NULL;
3209+ }
3210+
86dc4139 3211+out:
1facf9fc 3212+ return err;
3213+}
3214+
3215+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
86dc4139 3216+ int perm)
1facf9fc 3217+{
3218+ int err;
4a4d8108 3219+ struct kstatfs kst;
1facf9fc 3220+ struct au_wbr *wbr;
3221+
3222+ wbr = br->br_wbr;
dece6358 3223+ au_rw_init(&wbr->wbr_wh_rwsem);
1facf9fc 3224+ atomic_set(&wbr->wbr_wh_running, 0);
1facf9fc 3225+
4a4d8108
AM
3226+ /*
3227+ * a limit for rmdir/rename a dir
523b37e3 3228+ * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h
4a4d8108 3229+ */
86dc4139 3230+ err = vfs_statfs(&br->br_path, &kst);
4a4d8108
AM
3231+ if (unlikely(err))
3232+ goto out;
3233+ err = -EINVAL;
3234+ if (kst.f_namelen >= NAME_MAX)
86dc4139 3235+ err = au_br_init_wh(sb, br, perm);
4a4d8108 3236+ else
523b37e3
AM
3237+ pr_err("%pd(%s), unsupported namelen %ld\n",
3238+ au_br_dentry(br),
86dc4139 3239+ au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen);
1facf9fc 3240+
4f0767ce 3241+out:
1facf9fc 3242+ return err;
3243+}
3244+
c1595e42 3245+/* initialize a new branch */
1facf9fc 3246+static int au_br_init(struct au_branch *br, struct super_block *sb,
3247+ struct au_opt_add *add)
3248+{
3249+ int err;
5527c038 3250+ struct inode *h_inode;
1facf9fc 3251+
3252+ err = 0;
1facf9fc 3253+ mutex_init(&br->br_xino.xi_nondir_mtx);
3254+ br->br_perm = add->perm;
86dc4139 3255+ br->br_path = add->path; /* set first, path_get() later */
4a4d8108 3256+ spin_lock_init(&br->br_dykey_lock);
5afbbe0d 3257+ au_br_count_init(br);
1facf9fc 3258+ atomic_set(&br->br_xino_running, 0);
3259+ br->br_id = au_new_br_id(sb);
7f207e10 3260+ AuDebugOn(br->br_id < 0);
1facf9fc 3261+
3262+ if (au_br_writable(add->perm)) {
86dc4139 3263+ err = au_wbr_init(br, sb, add->perm);
1facf9fc 3264+ if (unlikely(err))
b752ccd1 3265+ goto out_err;
1facf9fc 3266+ }
3267+
3268+ if (au_opt_test(au_mntflags(sb), XINO)) {
5527c038
JR
3269+ h_inode = d_inode(add->path.dentry);
3270+ err = au_xino_br(sb, br, h_inode->i_ino,
1facf9fc 3271+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
3272+ if (unlikely(err)) {
3273+ AuDebugOn(br->br_xino.xi_file);
b752ccd1 3274+ goto out_err;
1facf9fc 3275+ }
3276+ }
3277+
3278+ sysaufs_br_init(br);
86dc4139 3279+ path_get(&br->br_path);
b752ccd1 3280+ goto out; /* success */
1facf9fc 3281+
4f0767ce 3282+out_err:
86dc4139 3283+ memset(&br->br_path, 0, sizeof(br->br_path));
4f0767ce 3284+out:
1facf9fc 3285+ return err;
3286+}
3287+
3288+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
5afbbe0d 3289+ struct au_branch *br, aufs_bindex_t bbot,
1facf9fc 3290+ aufs_bindex_t amount)
3291+{
3292+ struct au_branch **brp;
3293+
dece6358
AM
3294+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3295+
1facf9fc 3296+ brp = sbinfo->si_branch + bindex;
3297+ memmove(brp + 1, brp, sizeof(*brp) * amount);
3298+ *brp = br;
5afbbe0d
AM
3299+ sbinfo->si_bbot++;
3300+ if (unlikely(bbot < 0))
3301+ sbinfo->si_bbot = 0;
1facf9fc 3302+}
3303+
3304+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
5afbbe0d 3305+ aufs_bindex_t bbot, aufs_bindex_t amount)
1facf9fc 3306+{
3307+ struct au_hdentry *hdp;
3308+
1308ab2a 3309+ AuRwMustWriteLock(&dinfo->di_rwsem);
3310+
5afbbe0d 3311+ hdp = au_hdentry(dinfo, bindex);
1facf9fc 3312+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
3313+ au_h_dentry_init(hdp);
5afbbe0d
AM
3314+ dinfo->di_bbot++;
3315+ if (unlikely(bbot < 0))
3316+ dinfo->di_btop = 0;
1facf9fc 3317+}
3318+
3319+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
5afbbe0d 3320+ aufs_bindex_t bbot, aufs_bindex_t amount)
1facf9fc 3321+{
3322+ struct au_hinode *hip;
3323+
1308ab2a 3324+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3325+
5afbbe0d 3326+ hip = au_hinode(iinfo, bindex);
1facf9fc 3327+ memmove(hip + 1, hip, sizeof(*hip) * amount);
5afbbe0d
AM
3328+ au_hinode_init(hip);
3329+ iinfo->ii_bbot++;
3330+ if (unlikely(bbot < 0))
3331+ iinfo->ii_btop = 0;
1facf9fc 3332+}
3333+
86dc4139
AM
3334+static void au_br_do_add(struct super_block *sb, struct au_branch *br,
3335+ aufs_bindex_t bindex)
1facf9fc 3336+{
86dc4139 3337+ struct dentry *root, *h_dentry;
5527c038 3338+ struct inode *root_inode, *h_inode;
5afbbe0d 3339+ aufs_bindex_t bbot, amount;
1facf9fc 3340+
3341+ root = sb->s_root;
5527c038 3342+ root_inode = d_inode(root);
5afbbe0d
AM
3343+ bbot = au_sbbot(sb);
3344+ amount = bbot + 1 - bindex;
86dc4139 3345+ h_dentry = au_br_dentry(br);
53392da6 3346+ au_sbilist_lock();
5afbbe0d
AM
3347+ au_br_do_add_brp(au_sbi(sb), bindex, br, bbot, amount);
3348+ au_br_do_add_hdp(au_di(root), bindex, bbot, amount);
3349+ au_br_do_add_hip(au_ii(root_inode), bindex, bbot, amount);
1facf9fc 3350+ au_set_h_dptr(root, bindex, dget(h_dentry));
5527c038
JR
3351+ h_inode = d_inode(h_dentry);
3352+ au_set_h_iptr(root_inode, bindex, au_igrab(h_inode), /*flags*/0);
53392da6 3353+ au_sbilist_unlock();
1facf9fc 3354+}
3355+
3356+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
3357+{
3358+ int err;
5afbbe0d 3359+ aufs_bindex_t bbot, add_bindex;
1facf9fc 3360+ struct dentry *root, *h_dentry;
3361+ struct inode *root_inode;
3362+ struct au_branch *add_branch;
3363+
3364+ root = sb->s_root;
5527c038 3365+ root_inode = d_inode(root);
1facf9fc 3366+ IMustLock(root_inode);
5afbbe0d 3367+ IiMustWriteLock(root_inode);
1facf9fc 3368+ err = test_add(sb, add, remount);
3369+ if (unlikely(err < 0))
3370+ goto out;
3371+ if (err) {
3372+ err = 0;
3373+ goto out; /* success */
3374+ }
3375+
5afbbe0d
AM
3376+ bbot = au_sbbot(sb);
3377+ add_branch = au_br_alloc(sb, bbot + 2, add->perm);
1facf9fc 3378+ err = PTR_ERR(add_branch);
3379+ if (IS_ERR(add_branch))
3380+ goto out;
3381+
3382+ err = au_br_init(add_branch, sb, add);
3383+ if (unlikely(err)) {
3384+ au_br_do_free(add_branch);
3385+ goto out;
3386+ }
3387+
3388+ add_bindex = add->bindex;
1facf9fc 3389+ if (!remount)
86dc4139 3390+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 3391+ else {
3392+ sysaufs_brs_del(sb, add_bindex);
86dc4139 3393+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 3394+ sysaufs_brs_add(sb, add_bindex);
3395+ }
3396+
86dc4139 3397+ h_dentry = add->path.dentry;
1308ab2a 3398+ if (!add_bindex) {
1facf9fc 3399+ au_cpup_attr_all(root_inode, /*force*/1);
1308ab2a 3400+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
3401+ } else
5527c038 3402+ au_add_nlink(root_inode, d_inode(h_dentry));
1facf9fc 3403+
3404+ /*
4a4d8108 3405+ * this test/set prevents aufs from handling unnecesary notify events
027c5e7a 3406+ * of xino files, in case of re-adding a writable branch which was
1facf9fc 3407+ * once detached from aufs.
3408+ */
3409+ if (au_xino_brid(sb) < 0
3410+ && au_br_writable(add_branch->br_perm)
3411+ && !au_test_fs_bad_xino(h_dentry->d_sb)
3412+ && add_branch->br_xino.xi_file
2000de60 3413+ && add_branch->br_xino.xi_file->f_path.dentry->d_parent == h_dentry)
1facf9fc 3414+ au_xino_brid_set(sb, add_branch->br_id);
3415+
4f0767ce 3416+out:
1facf9fc 3417+ return err;
3418+}
3419+
3420+/* ---------------------------------------------------------------------- */
3421+
79b8bda9 3422+static unsigned long long au_farray_cb(struct super_block *sb, void *a,
076b876e
AM
3423+ unsigned long long max __maybe_unused,
3424+ void *arg)
3425+{
3426+ unsigned long long n;
3427+ struct file **p, *f;
3428+ struct au_sphlhead *files;
3429+ struct au_finfo *finfo;
076b876e
AM
3430+
3431+ n = 0;
3432+ p = a;
3433+ files = &au_sbi(sb)->si_files;
3434+ spin_lock(&files->spin);
3435+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
3436+ f = finfo->fi_file;
3437+ if (file_count(f)
3438+ && !special_file(file_inode(f)->i_mode)) {
3439+ get_file(f);
3440+ *p++ = f;
3441+ n++;
3442+ AuDebugOn(n > max);
3443+ }
3444+ }
3445+ spin_unlock(&files->spin);
3446+
3447+ return n;
3448+}
3449+
3450+static struct file **au_farray_alloc(struct super_block *sb,
3451+ unsigned long long *max)
3452+{
5afbbe0d 3453+ *max = au_nfiles(sb);
79b8bda9 3454+ return au_array_alloc(max, au_farray_cb, sb, /*arg*/NULL);
076b876e
AM
3455+}
3456+
3457+static void au_farray_free(struct file **a, unsigned long long max)
3458+{
3459+ unsigned long long ull;
3460+
3461+ for (ull = 0; ull < max; ull++)
3462+ if (a[ull])
3463+ fput(a[ull]);
be52b249 3464+ kvfree(a);
076b876e
AM
3465+}
3466+
3467+/* ---------------------------------------------------------------------- */
3468+
1facf9fc 3469+/*
3470+ * delete a branch
3471+ */
3472+
3473+/* to show the line number, do not make it inlined function */
4a4d8108 3474+#define AuVerbose(do_info, fmt, ...) do { \
1facf9fc 3475+ if (do_info) \
4a4d8108 3476+ pr_info(fmt, ##__VA_ARGS__); \
1facf9fc 3477+} while (0)
3478+
5afbbe0d
AM
3479+static int au_test_ibusy(struct inode *inode, aufs_bindex_t btop,
3480+ aufs_bindex_t bbot)
027c5e7a 3481+{
5afbbe0d 3482+ return (inode && !S_ISDIR(inode->i_mode)) || btop == bbot;
027c5e7a
AM
3483+}
3484+
5afbbe0d
AM
3485+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t btop,
3486+ aufs_bindex_t bbot)
027c5e7a 3487+{
5afbbe0d 3488+ return au_test_ibusy(d_inode(dentry), btop, bbot);
027c5e7a
AM
3489+}
3490+
1facf9fc 3491+/*
3492+ * test if the branch is deletable or not.
3493+ */
3494+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
b752ccd1 3495+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3496+{
3497+ int err, i, j, ndentry;
5afbbe0d 3498+ aufs_bindex_t btop, bbot;
1facf9fc 3499+ struct au_dcsub_pages dpages;
3500+ struct au_dpage *dpage;
3501+ struct dentry *d;
1facf9fc 3502+
3503+ err = au_dpages_init(&dpages, GFP_NOFS);
3504+ if (unlikely(err))
3505+ goto out;
3506+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
3507+ if (unlikely(err))
3508+ goto out_dpages;
3509+
1facf9fc 3510+ for (i = 0; !err && i < dpages.ndpage; i++) {
3511+ dpage = dpages.dpages + i;
3512+ ndentry = dpage->ndentry;
3513+ for (j = 0; !err && j < ndentry; j++) {
3514+ d = dpage->dentries[j];
c1595e42 3515+ AuDebugOn(au_dcount(d) <= 0);
027c5e7a 3516+ if (!au_digen_test(d, sigen)) {
1facf9fc 3517+ di_read_lock_child(d, AuLock_IR);
027c5e7a
AM
3518+ if (unlikely(au_dbrange_test(d))) {
3519+ di_read_unlock(d, AuLock_IR);
3520+ continue;
3521+ }
3522+ } else {
1facf9fc 3523+ di_write_lock_child(d);
027c5e7a
AM
3524+ if (unlikely(au_dbrange_test(d))) {
3525+ di_write_unlock(d);
3526+ continue;
3527+ }
1facf9fc 3528+ err = au_reval_dpath(d, sigen);
3529+ if (!err)
3530+ di_downgrade_lock(d, AuLock_IR);
3531+ else {
3532+ di_write_unlock(d);
3533+ break;
3534+ }
3535+ }
3536+
027c5e7a 3537+ /* AuDbgDentry(d); */
5afbbe0d
AM
3538+ btop = au_dbtop(d);
3539+ bbot = au_dbbot(d);
3540+ if (btop <= bindex
3541+ && bindex <= bbot
1facf9fc 3542+ && au_h_dptr(d, bindex)
5afbbe0d 3543+ && au_test_dbusy(d, btop, bbot)) {
1facf9fc 3544+ err = -EBUSY;
523b37e3 3545+ AuVerbose(verbose, "busy %pd\n", d);
027c5e7a 3546+ AuDbgDentry(d);
1facf9fc 3547+ }
3548+ di_read_unlock(d, AuLock_IR);
3549+ }
3550+ }
3551+
4f0767ce 3552+out_dpages:
1facf9fc 3553+ au_dpages_free(&dpages);
4f0767ce 3554+out:
1facf9fc 3555+ return err;
3556+}
3557+
3558+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
b752ccd1 3559+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3560+{
3561+ int err;
7f207e10
AM
3562+ unsigned long long max, ull;
3563+ struct inode *i, **array;
5afbbe0d 3564+ aufs_bindex_t btop, bbot;
1facf9fc 3565+
7f207e10
AM
3566+ array = au_iarray_alloc(sb, &max);
3567+ err = PTR_ERR(array);
3568+ if (IS_ERR(array))
3569+ goto out;
3570+
1facf9fc 3571+ err = 0;
7f207e10
AM
3572+ AuDbg("b%d\n", bindex);
3573+ for (ull = 0; !err && ull < max; ull++) {
3574+ i = array[ull];
076b876e
AM
3575+ if (unlikely(!i))
3576+ break;
7f207e10 3577+ if (i->i_ino == AUFS_ROOT_INO)
1facf9fc 3578+ continue;
3579+
7f207e10 3580+ /* AuDbgInode(i); */
537831f9 3581+ if (au_iigen(i, NULL) == sigen)
1facf9fc 3582+ ii_read_lock_child(i);
3583+ else {
3584+ ii_write_lock_child(i);
027c5e7a
AM
3585+ err = au_refresh_hinode_self(i);
3586+ au_iigen_dec(i);
1facf9fc 3587+ if (!err)
3588+ ii_downgrade_lock(i);
3589+ else {
3590+ ii_write_unlock(i);
3591+ break;
3592+ }
3593+ }
3594+
5afbbe0d
AM
3595+ btop = au_ibtop(i);
3596+ bbot = au_ibbot(i);
3597+ if (btop <= bindex
3598+ && bindex <= bbot
1facf9fc 3599+ && au_h_iptr(i, bindex)
5afbbe0d 3600+ && au_test_ibusy(i, btop, bbot)) {
1facf9fc 3601+ err = -EBUSY;
3602+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
7f207e10 3603+ AuDbgInode(i);
1facf9fc 3604+ }
3605+ ii_read_unlock(i);
3606+ }
7f207e10 3607+ au_iarray_free(array, max);
1facf9fc 3608+
7f207e10 3609+out:
1facf9fc 3610+ return err;
3611+}
3612+
b752ccd1
AM
3613+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
3614+ const unsigned int verbose)
1facf9fc 3615+{
3616+ int err;
3617+ unsigned int sigen;
3618+
3619+ sigen = au_sigen(root->d_sb);
3620+ DiMustNoWaiters(root);
5527c038 3621+ IiMustNoWaiters(d_inode(root));
1facf9fc 3622+ di_write_unlock(root);
b752ccd1 3623+ err = test_dentry_busy(root, bindex, sigen, verbose);
1facf9fc 3624+ if (!err)
b752ccd1 3625+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
1facf9fc 3626+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
3627+
3628+ return err;
3629+}
3630+
076b876e
AM
3631+static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
3632+ struct file **to_free, int *idx)
3633+{
3634+ int err;
c1595e42 3635+ unsigned char matched, root;
5afbbe0d 3636+ aufs_bindex_t bindex, bbot;
076b876e
AM
3637+ struct au_fidir *fidir;
3638+ struct au_hfile *hfile;
3639+
3640+ err = 0;
2000de60 3641+ root = IS_ROOT(file->f_path.dentry);
c1595e42
JR
3642+ if (root) {
3643+ get_file(file);
3644+ to_free[*idx] = file;
3645+ (*idx)++;
3646+ goto out;
3647+ }
3648+
076b876e 3649+ matched = 0;
076b876e
AM
3650+ fidir = au_fi(file)->fi_hdir;
3651+ AuDebugOn(!fidir);
5afbbe0d
AM
3652+ bbot = au_fbbot_dir(file);
3653+ for (bindex = au_fbtop(file); bindex <= bbot; bindex++) {
076b876e
AM
3654+ hfile = fidir->fd_hfile + bindex;
3655+ if (!hfile->hf_file)
3656+ continue;
3657+
c1595e42 3658+ if (hfile->hf_br->br_id == br_id) {
076b876e 3659+ matched = 1;
076b876e 3660+ break;
c1595e42 3661+ }
076b876e 3662+ }
c1595e42 3663+ if (matched)
076b876e
AM
3664+ err = -EBUSY;
3665+
3666+out:
3667+ return err;
3668+}
3669+
3670+static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
3671+ struct file **to_free, int opened)
3672+{
3673+ int err, idx;
3674+ unsigned long long ull, max;
5afbbe0d 3675+ aufs_bindex_t btop;
076b876e 3676+ struct file *file, **array;
076b876e
AM
3677+ struct dentry *root;
3678+ struct au_hfile *hfile;
3679+
3680+ array = au_farray_alloc(sb, &max);
3681+ err = PTR_ERR(array);
3682+ if (IS_ERR(array))
3683+ goto out;
3684+
3685+ err = 0;
3686+ idx = 0;
3687+ root = sb->s_root;
3688+ di_write_unlock(root);
3689+ for (ull = 0; ull < max; ull++) {
3690+ file = array[ull];
3691+ if (unlikely(!file))
3692+ break;
3693+
3694+ /* AuDbg("%pD\n", file); */
3695+ fi_read_lock(file);
5afbbe0d 3696+ btop = au_fbtop(file);
2000de60 3697+ if (!d_is_dir(file->f_path.dentry)) {
076b876e
AM
3698+ hfile = &au_fi(file)->fi_htop;
3699+ if (hfile->hf_br->br_id == br_id)
3700+ err = -EBUSY;
3701+ } else
3702+ err = test_dir_busy(file, br_id, to_free, &idx);
3703+ fi_read_unlock(file);
3704+ if (unlikely(err))
3705+ break;
3706+ }
3707+ di_write_lock_child(root);
3708+ au_farray_free(array, max);
3709+ AuDebugOn(idx > opened);
3710+
3711+out:
3712+ return err;
3713+}
3714+
3715+static void br_del_file(struct file **to_free, unsigned long long opened,
3716+ aufs_bindex_t br_id)
3717+{
3718+ unsigned long long ull;
5afbbe0d 3719+ aufs_bindex_t bindex, btop, bbot, bfound;
076b876e
AM
3720+ struct file *file;
3721+ struct au_fidir *fidir;
3722+ struct au_hfile *hfile;
3723+
3724+ for (ull = 0; ull < opened; ull++) {
3725+ file = to_free[ull];
3726+ if (unlikely(!file))
3727+ break;
3728+
3729+ /* AuDbg("%pD\n", file); */
2000de60 3730+ AuDebugOn(!d_is_dir(file->f_path.dentry));
076b876e
AM
3731+ bfound = -1;
3732+ fidir = au_fi(file)->fi_hdir;
3733+ AuDebugOn(!fidir);
3734+ fi_write_lock(file);
5afbbe0d
AM
3735+ btop = au_fbtop(file);
3736+ bbot = au_fbbot_dir(file);
3737+ for (bindex = btop; bindex <= bbot; bindex++) {
076b876e
AM
3738+ hfile = fidir->fd_hfile + bindex;
3739+ if (!hfile->hf_file)
3740+ continue;
3741+
3742+ if (hfile->hf_br->br_id == br_id) {
3743+ bfound = bindex;
3744+ break;
3745+ }
3746+ }
3747+ AuDebugOn(bfound < 0);
3748+ au_set_h_fptr(file, bfound, NULL);
5afbbe0d
AM
3749+ if (bfound == btop) {
3750+ for (btop++; btop <= bbot; btop++)
3751+ if (au_hf_dir(file, btop)) {
3752+ au_set_fbtop(file, btop);
076b876e
AM
3753+ break;
3754+ }
3755+ }
3756+ fi_write_unlock(file);
3757+ }
3758+}
3759+
1facf9fc 3760+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
3761+ const aufs_bindex_t bindex,
5afbbe0d 3762+ const aufs_bindex_t bbot)
1facf9fc 3763+{
3764+ struct au_branch **brp, **p;
3765+
dece6358
AM
3766+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3767+
1facf9fc 3768+ brp = sbinfo->si_branch + bindex;
5afbbe0d
AM
3769+ if (bindex < bbot)
3770+ memmove(brp, brp + 1, sizeof(*brp) * (bbot - bindex));
3771+ sbinfo->si_branch[0 + bbot] = NULL;
3772+ sbinfo->si_bbot--;
1facf9fc 3773+
e2f27e51
AM
3774+ p = au_krealloc(sbinfo->si_branch, sizeof(*p) * bbot, AuGFP_SBILIST,
3775+ /*may_shrink*/1);
1facf9fc 3776+ if (p)
3777+ sbinfo->si_branch = p;
4a4d8108 3778+ /* harmless error */
1facf9fc 3779+}
3780+
3781+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
5afbbe0d 3782+ const aufs_bindex_t bbot)
1facf9fc 3783+{
3784+ struct au_hdentry *hdp, *p;
3785+
1308ab2a 3786+ AuRwMustWriteLock(&dinfo->di_rwsem);
3787+
5afbbe0d
AM
3788+ hdp = au_hdentry(dinfo, bindex);
3789+ if (bindex < bbot)
3790+ memmove(hdp, hdp + 1, sizeof(*hdp) * (bbot - bindex));
3791+ /* au_h_dentry_init(au_hdentry(dinfo, bbot); */
3792+ dinfo->di_bbot--;
1facf9fc 3793+
e2f27e51
AM
3794+ p = au_krealloc(dinfo->di_hdentry, sizeof(*p) * bbot, AuGFP_SBILIST,
3795+ /*may_shrink*/1);
1facf9fc 3796+ if (p)
3797+ dinfo->di_hdentry = p;
4a4d8108 3798+ /* harmless error */
1facf9fc 3799+}
3800+
3801+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
5afbbe0d 3802+ const aufs_bindex_t bbot)
1facf9fc 3803+{
3804+ struct au_hinode *hip, *p;
3805+
1308ab2a 3806+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3807+
5afbbe0d
AM
3808+ hip = au_hinode(iinfo, bindex);
3809+ if (bindex < bbot)
3810+ memmove(hip, hip + 1, sizeof(*hip) * (bbot - bindex));
3811+ /* au_hinode_init(au_hinode(iinfo, bbot)); */
3812+ iinfo->ii_bbot--;
1facf9fc 3813+
e2f27e51
AM
3814+ p = au_krealloc(iinfo->ii_hinode, sizeof(*p) * bbot, AuGFP_SBILIST,
3815+ /*may_shrink*/1);
1facf9fc 3816+ if (p)
3817+ iinfo->ii_hinode = p;
4a4d8108 3818+ /* harmless error */
1facf9fc 3819+}
3820+
3821+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
3822+ struct au_branch *br)
3823+{
5afbbe0d 3824+ aufs_bindex_t bbot;
1facf9fc 3825+ struct au_sbinfo *sbinfo;
53392da6
AM
3826+ struct dentry *root, *h_root;
3827+ struct inode *inode, *h_inode;
3828+ struct au_hinode *hinode;
1facf9fc 3829+
dece6358
AM
3830+ SiMustWriteLock(sb);
3831+
1facf9fc 3832+ root = sb->s_root;
5527c038 3833+ inode = d_inode(root);
1facf9fc 3834+ sbinfo = au_sbi(sb);
5afbbe0d 3835+ bbot = sbinfo->si_bbot;
1facf9fc 3836+
53392da6
AM
3837+ h_root = au_h_dptr(root, bindex);
3838+ hinode = au_hi(inode, bindex);
3839+ h_inode = au_igrab(hinode->hi_inode);
3840+ au_hiput(hinode);
1facf9fc 3841+
53392da6 3842+ au_sbilist_lock();
5afbbe0d
AM
3843+ au_br_do_del_brp(sbinfo, bindex, bbot);
3844+ au_br_do_del_hdp(au_di(root), bindex, bbot);
3845+ au_br_do_del_hip(au_ii(inode), bindex, bbot);
53392da6
AM
3846+ au_sbilist_unlock();
3847+
3848+ dput(h_root);
3849+ iput(h_inode);
3850+ au_br_do_free(br);
1facf9fc 3851+}
3852+
79b8bda9
AM
3853+static unsigned long long empty_cb(struct super_block *sb, void *array,
3854+ unsigned long long max, void *arg)
076b876e
AM
3855+{
3856+ return max;
3857+}
3858+
1facf9fc 3859+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
3860+{
3861+ int err, rerr, i;
076b876e 3862+ unsigned long long opened;
1facf9fc 3863+ unsigned int mnt_flags;
5afbbe0d 3864+ aufs_bindex_t bindex, bbot, br_id;
1facf9fc 3865+ unsigned char do_wh, verbose;
3866+ struct au_branch *br;
3867+ struct au_wbr *wbr;
076b876e
AM
3868+ struct dentry *root;
3869+ struct file **to_free;
1facf9fc 3870+
3871+ err = 0;
076b876e
AM
3872+ opened = 0;
3873+ to_free = NULL;
3874+ root = sb->s_root;
3875+ bindex = au_find_dbindex(root, del->h_path.dentry);
1facf9fc 3876+ if (bindex < 0) {
3877+ if (remount)
3878+ goto out; /* success */
3879+ err = -ENOENT;
4a4d8108 3880+ pr_err("%s no such branch\n", del->pathname);
1facf9fc 3881+ goto out;
3882+ }
3883+ AuDbg("bindex b%d\n", bindex);
3884+
3885+ err = -EBUSY;
3886+ mnt_flags = au_mntflags(sb);
3887+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
5afbbe0d
AM
3888+ bbot = au_sbbot(sb);
3889+ if (unlikely(!bbot)) {
1facf9fc 3890+ AuVerbose(verbose, "no more branches left\n");
3891+ goto out;
3892+ }
3893+ br = au_sbr(sb, bindex);
86dc4139 3894+ AuDebugOn(!path_equal(&br->br_path, &del->h_path));
076b876e
AM
3895+
3896+ br_id = br->br_id;
5afbbe0d 3897+ opened = au_br_count(br);
076b876e 3898+ if (unlikely(opened)) {
79b8bda9 3899+ to_free = au_array_alloc(&opened, empty_cb, sb, NULL);
076b876e
AM
3900+ err = PTR_ERR(to_free);
3901+ if (IS_ERR(to_free))
3902+ goto out;
3903+
3904+ err = test_file_busy(sb, br_id, to_free, opened);
3905+ if (unlikely(err)) {
3906+ AuVerbose(verbose, "%llu file(s) opened\n", opened);
3907+ goto out;
3908+ }
1facf9fc 3909+ }
3910+
3911+ wbr = br->br_wbr;
3912+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
3913+ if (do_wh) {
1308ab2a 3914+ /* instead of WbrWhMustWriteLock(wbr) */
3915+ SiMustWriteLock(sb);
1facf9fc 3916+ for (i = 0; i < AuBrWh_Last; i++) {
3917+ dput(wbr->wbr_wh[i]);
3918+ wbr->wbr_wh[i] = NULL;
3919+ }
3920+ }
3921+
076b876e 3922+ err = test_children_busy(root, bindex, verbose);
1facf9fc 3923+ if (unlikely(err)) {
3924+ if (do_wh)
3925+ goto out_wh;
3926+ goto out;
3927+ }
3928+
3929+ err = 0;
076b876e
AM
3930+ if (to_free) {
3931+ /*
3932+ * now we confirmed the branch is deletable.
3933+ * let's free the remaining opened dirs on the branch.
3934+ */
3935+ di_write_unlock(root);
3936+ br_del_file(to_free, opened, br_id);
3937+ di_write_lock_child(root);
3938+ }
3939+
1facf9fc 3940+ if (!remount)
3941+ au_br_do_del(sb, bindex, br);
3942+ else {
3943+ sysaufs_brs_del(sb, bindex);
3944+ au_br_do_del(sb, bindex, br);
3945+ sysaufs_brs_add(sb, bindex);
3946+ }
3947+
1308ab2a 3948+ if (!bindex) {
5527c038 3949+ au_cpup_attr_all(d_inode(root), /*force*/1);
1308ab2a 3950+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
3951+ } else
5527c038 3952+ au_sub_nlink(d_inode(root), d_inode(del->h_path.dentry));
1facf9fc 3953+ if (au_opt_test(mnt_flags, PLINK))
3954+ au_plink_half_refresh(sb, br_id);
3955+
b752ccd1 3956+ if (au_xino_brid(sb) == br_id)
1facf9fc 3957+ au_xino_brid_set(sb, -1);
3958+ goto out; /* success */
3959+
4f0767ce 3960+out_wh:
1facf9fc 3961+ /* revert */
86dc4139 3962+ rerr = au_br_init_wh(sb, br, br->br_perm);
1facf9fc 3963+ if (rerr)
0c3ec466
AM
3964+ pr_warn("failed re-creating base whiteout, %s. (%d)\n",
3965+ del->pathname, rerr);
4f0767ce 3966+out:
076b876e
AM
3967+ if (to_free)
3968+ au_farray_free(to_free, opened);
1facf9fc 3969+ return err;
3970+}
3971+
3972+/* ---------------------------------------------------------------------- */
3973+
027c5e7a
AM
3974+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
3975+{
3976+ int err;
5afbbe0d 3977+ aufs_bindex_t btop, bbot;
027c5e7a
AM
3978+ struct aufs_ibusy ibusy;
3979+ struct inode *inode, *h_inode;
3980+
3981+ err = -EPERM;
3982+ if (unlikely(!capable(CAP_SYS_ADMIN)))
3983+ goto out;
3984+
3985+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
3986+ if (!err)
3987+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
3988+ if (unlikely(err)) {
3989+ err = -EFAULT;
3990+ AuTraceErr(err);
3991+ goto out;
3992+ }
3993+
3994+ err = -EINVAL;
3995+ si_read_lock(sb, AuLock_FLUSH);
5afbbe0d 3996+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbbot(sb)))
027c5e7a
AM
3997+ goto out_unlock;
3998+
3999+ err = 0;
4000+ ibusy.h_ino = 0; /* invalid */
4001+ inode = ilookup(sb, ibusy.ino);
4002+ if (!inode
4003+ || inode->i_ino == AUFS_ROOT_INO
5afbbe0d 4004+ || au_is_bad_inode(inode))
027c5e7a
AM
4005+ goto out_unlock;
4006+
4007+ ii_read_lock_child(inode);
5afbbe0d
AM
4008+ btop = au_ibtop(inode);
4009+ bbot = au_ibbot(inode);
4010+ if (btop <= ibusy.bindex && ibusy.bindex <= bbot) {
027c5e7a 4011+ h_inode = au_h_iptr(inode, ibusy.bindex);
5afbbe0d 4012+ if (h_inode && au_test_ibusy(inode, btop, bbot))
027c5e7a
AM
4013+ ibusy.h_ino = h_inode->i_ino;
4014+ }
4015+ ii_read_unlock(inode);
4016+ iput(inode);
4017+
4018+out_unlock:
4019+ si_read_unlock(sb);
4020+ if (!err) {
4021+ err = __put_user(ibusy.h_ino, &arg->h_ino);
4022+ if (unlikely(err)) {
4023+ err = -EFAULT;
4024+ AuTraceErr(err);
4025+ }
4026+ }
4027+out:
4028+ return err;
4029+}
4030+
4031+long au_ibusy_ioctl(struct file *file, unsigned long arg)
4032+{
2000de60 4033+ return au_ibusy(file->f_path.dentry->d_sb, (void __user *)arg);
027c5e7a
AM
4034+}
4035+
4036+#ifdef CONFIG_COMPAT
4037+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
4038+{
2000de60 4039+ return au_ibusy(file->f_path.dentry->d_sb, compat_ptr(arg));
027c5e7a
AM
4040+}
4041+#endif
4042+
4043+/* ---------------------------------------------------------------------- */
4044+
1facf9fc 4045+/*
4046+ * change a branch permission
4047+ */
4048+
dece6358
AM
4049+static void au_warn_ima(void)
4050+{
4051+#ifdef CONFIG_IMA
1308ab2a 4052+ /* since it doesn't support mark_files_ro() */
027c5e7a 4053+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
dece6358
AM
4054+#endif
4055+}
4056+
1facf9fc 4057+static int do_need_sigen_inc(int a, int b)
4058+{
4059+ return au_br_whable(a) && !au_br_whable(b);
4060+}
4061+
4062+static int need_sigen_inc(int old, int new)
4063+{
4064+ return do_need_sigen_inc(old, new)
4065+ || do_need_sigen_inc(new, old);
4066+}
4067+
4068+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
4069+{
7f207e10 4070+ int err, do_warn;
027c5e7a 4071+ unsigned int mnt_flags;
7f207e10 4072+ unsigned long long ull, max;
e49829fe 4073+ aufs_bindex_t br_id;
38d290e6 4074+ unsigned char verbose, writer;
7f207e10 4075+ struct file *file, *hf, **array;
e49829fe 4076+ struct au_hfile *hfile;
1facf9fc 4077+
027c5e7a
AM
4078+ mnt_flags = au_mntflags(sb);
4079+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4080+
7f207e10
AM
4081+ array = au_farray_alloc(sb, &max);
4082+ err = PTR_ERR(array);
4083+ if (IS_ERR(array))
1facf9fc 4084+ goto out;
4085+
7f207e10 4086+ do_warn = 0;
e49829fe 4087+ br_id = au_sbr_id(sb, bindex);
7f207e10
AM
4088+ for (ull = 0; ull < max; ull++) {
4089+ file = array[ull];
076b876e
AM
4090+ if (unlikely(!file))
4091+ break;
1facf9fc 4092+
523b37e3 4093+ /* AuDbg("%pD\n", file); */
1facf9fc 4094+ fi_read_lock(file);
4095+ if (unlikely(au_test_mmapped(file))) {
4096+ err = -EBUSY;
523b37e3 4097+ AuVerbose(verbose, "mmapped %pD\n", file);
7f207e10 4098+ AuDbgFile(file);
1facf9fc 4099+ FiMustNoWaiters(file);
4100+ fi_read_unlock(file);
7f207e10 4101+ goto out_array;
1facf9fc 4102+ }
4103+
e49829fe
JR
4104+ hfile = &au_fi(file)->fi_htop;
4105+ hf = hfile->hf_file;
7e9cd9fe 4106+ if (!d_is_reg(file->f_path.dentry)
1facf9fc 4107+ || !(file->f_mode & FMODE_WRITE)
e49829fe 4108+ || hfile->hf_br->br_id != br_id
7f207e10
AM
4109+ || !(hf->f_mode & FMODE_WRITE))
4110+ array[ull] = NULL;
4111+ else {
4112+ do_warn = 1;
4113+ get_file(file);
1facf9fc 4114+ }
4115+
1facf9fc 4116+ FiMustNoWaiters(file);
4117+ fi_read_unlock(file);
7f207e10
AM
4118+ fput(file);
4119+ }
1facf9fc 4120+
4121+ err = 0;
7f207e10 4122+ if (do_warn)
dece6358 4123+ au_warn_ima();
7f207e10
AM
4124+
4125+ for (ull = 0; ull < max; ull++) {
4126+ file = array[ull];
4127+ if (!file)
4128+ continue;
4129+
1facf9fc 4130+ /* todo: already flushed? */
523b37e3
AM
4131+ /*
4132+ * fs/super.c:mark_files_ro() is gone, but aufs keeps its
4133+ * approach which resets f_mode and calls mnt_drop_write() and
4134+ * file_release_write() for each file, because the branch
4135+ * attribute in aufs world is totally different from the native
4136+ * fs rw/ro mode.
4137+ */
7f207e10
AM
4138+ /* fi_read_lock(file); */
4139+ hfile = &au_fi(file)->fi_htop;
4140+ hf = hfile->hf_file;
4141+ /* fi_read_unlock(file); */
027c5e7a 4142+ spin_lock(&hf->f_lock);
38d290e6
JR
4143+ writer = !!(hf->f_mode & FMODE_WRITER);
4144+ hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER);
027c5e7a 4145+ spin_unlock(&hf->f_lock);
38d290e6
JR
4146+ if (writer) {
4147+ put_write_access(file_inode(hf));
c06a8ce3 4148+ __mnt_drop_write(hf->f_path.mnt);
1facf9fc 4149+ }
4150+ }
4151+
7f207e10
AM
4152+out_array:
4153+ au_farray_free(array, max);
4f0767ce 4154+out:
7f207e10 4155+ AuTraceErr(err);
1facf9fc 4156+ return err;
4157+}
4158+
4159+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4160+ int *do_refresh)
1facf9fc 4161+{
4162+ int err, rerr;
4163+ aufs_bindex_t bindex;
4164+ struct dentry *root;
4165+ struct au_branch *br;
076b876e 4166+ struct au_br_fhsm *bf;
1facf9fc 4167+
4168+ root = sb->s_root;
1facf9fc 4169+ bindex = au_find_dbindex(root, mod->h_root);
4170+ if (bindex < 0) {
4171+ if (remount)
4172+ return 0; /* success */
4173+ err = -ENOENT;
4a4d8108 4174+ pr_err("%s no such branch\n", mod->path);
1facf9fc 4175+ goto out;
4176+ }
4177+ AuDbg("bindex b%d\n", bindex);
4178+
5527c038 4179+ err = test_br(d_inode(mod->h_root), mod->perm, mod->path);
1facf9fc 4180+ if (unlikely(err))
4181+ goto out;
4182+
4183+ br = au_sbr(sb, bindex);
86dc4139 4184+ AuDebugOn(mod->h_root != au_br_dentry(br));
1facf9fc 4185+ if (br->br_perm == mod->perm)
4186+ return 0; /* success */
4187+
076b876e
AM
4188+ /* pre-allocate for non-fhsm --> fhsm */
4189+ bf = NULL;
4190+ if (!au_br_fhsm(br->br_perm) && au_br_fhsm(mod->perm)) {
4191+ err = au_fhsm_br_alloc(br);
4192+ if (unlikely(err))
4193+ goto out;
4194+ bf = br->br_fhsm;
4195+ br->br_fhsm = NULL;
4196+ }
4197+
1facf9fc 4198+ if (au_br_writable(br->br_perm)) {
4199+ /* remove whiteout base */
86dc4139 4200+ err = au_br_init_wh(sb, br, mod->perm);
1facf9fc 4201+ if (unlikely(err))
076b876e 4202+ goto out_bf;
1facf9fc 4203+
4204+ if (!au_br_writable(mod->perm)) {
4205+ /* rw --> ro, file might be mmapped */
4206+ DiMustNoWaiters(root);
5527c038 4207+ IiMustNoWaiters(d_inode(root));
1facf9fc 4208+ di_write_unlock(root);
4209+ err = au_br_mod_files_ro(sb, bindex);
4210+ /* aufs_write_lock() calls ..._child() */
4211+ di_write_lock_child(root);
4212+
4213+ if (unlikely(err)) {
4214+ rerr = -ENOMEM;
be52b249 4215+ br->br_wbr = kzalloc(sizeof(*br->br_wbr),
1facf9fc 4216+ GFP_NOFS);
86dc4139
AM
4217+ if (br->br_wbr)
4218+ rerr = au_wbr_init(br, sb, br->br_perm);
1facf9fc 4219+ if (unlikely(rerr)) {
4220+ AuIOErr("nested error %d (%d)\n",
4221+ rerr, err);
4222+ br->br_perm = mod->perm;
4223+ }
4224+ }
4225+ }
4226+ } else if (au_br_writable(mod->perm)) {
4227+ /* ro --> rw */
4228+ err = -ENOMEM;
be52b249 4229+ br->br_wbr = kzalloc(sizeof(*br->br_wbr), GFP_NOFS);
1facf9fc 4230+ if (br->br_wbr) {
86dc4139 4231+ err = au_wbr_init(br, sb, mod->perm);
1facf9fc 4232+ if (unlikely(err)) {
f0c0a007 4233+ au_delayed_kfree(br->br_wbr);
1facf9fc 4234+ br->br_wbr = NULL;
4235+ }
4236+ }
4237+ }
076b876e
AM
4238+ if (unlikely(err))
4239+ goto out_bf;
4240+
4241+ if (au_br_fhsm(br->br_perm)) {
4242+ if (!au_br_fhsm(mod->perm)) {
4243+ /* fhsm --> non-fhsm */
4244+ au_br_fhsm_fin(br->br_fhsm);
f0c0a007 4245+ au_delayed_kfree(br->br_fhsm);
076b876e
AM
4246+ br->br_fhsm = NULL;
4247+ }
4248+ } else if (au_br_fhsm(mod->perm))
4249+ /* non-fhsm --> fhsm */
4250+ br->br_fhsm = bf;
4251+
076b876e
AM
4252+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
4253+ br->br_perm = mod->perm;
4254+ goto out; /* success */
1facf9fc 4255+
076b876e 4256+out_bf:
f0c0a007
AM
4257+ if (bf)
4258+ au_delayed_kfree(bf);
076b876e
AM
4259+out:
4260+ AuTraceErr(err);
4261+ return err;
4262+}
4263+
4264+/* ---------------------------------------------------------------------- */
4265+
4266+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs)
4267+{
4268+ int err;
4269+ struct kstatfs kstfs;
4270+
4271+ err = vfs_statfs(&br->br_path, &kstfs);
1facf9fc 4272+ if (!err) {
076b876e
AM
4273+ stfs->f_blocks = kstfs.f_blocks;
4274+ stfs->f_bavail = kstfs.f_bavail;
4275+ stfs->f_files = kstfs.f_files;
4276+ stfs->f_ffree = kstfs.f_ffree;
1facf9fc 4277+ }
4278+
1facf9fc 4279+ return err;
4280+}
7f207e10
AM
4281diff -urN /usr/share/empty/fs/aufs/branch.h linux/fs/aufs/branch.h
4282--- /usr/share/empty/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 4283+++ linux/fs/aufs/branch.h 2016-10-09 16:55:36.486034798 +0200
5afbbe0d 4284@@ -0,0 +1,309 @@
1facf9fc 4285+/*
8cdd5066 4286+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 4287+ *
4288+ * This program, aufs is free software; you can redistribute it and/or modify
4289+ * it under the terms of the GNU General Public License as published by
4290+ * the Free Software Foundation; either version 2 of the License, or
4291+ * (at your option) any later version.
dece6358
AM
4292+ *
4293+ * This program is distributed in the hope that it will be useful,
4294+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4295+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4296+ * GNU General Public License for more details.
4297+ *
4298+ * You should have received a copy of the GNU General Public License
523b37e3 4299+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4300+ */
4301+
4302+/*
4303+ * branch filesystems and xino for them
4304+ */
4305+
4306+#ifndef __AUFS_BRANCH_H__
4307+#define __AUFS_BRANCH_H__
4308+
4309+#ifdef __KERNEL__
4310+
1facf9fc 4311+#include <linux/mount.h>
4a4d8108 4312+#include "dynop.h"
1facf9fc 4313+#include "rwsem.h"
4314+#include "super.h"
4315+
4316+/* ---------------------------------------------------------------------- */
4317+
4318+/* a xino file */
4319+struct au_xino_file {
4320+ struct file *xi_file;
4321+ struct mutex xi_nondir_mtx;
4322+
4323+ /* todo: make xino files an array to support huge inode number */
4324+
4325+#ifdef CONFIG_DEBUG_FS
4326+ struct dentry *xi_dbgaufs;
4327+#endif
4328+};
4329+
076b876e
AM
4330+/* File-based Hierarchical Storage Management */
4331+struct au_br_fhsm {
4332+#ifdef CONFIG_AUFS_FHSM
4333+ struct mutex bf_lock;
4334+ unsigned long bf_jiffy;
4335+ struct aufs_stfs bf_stfs;
4336+ int bf_readable;
4337+#endif
4338+};
4339+
1facf9fc 4340+/* members for writable branch only */
4341+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
4342+struct au_wbr {
dece6358 4343+ struct au_rwsem wbr_wh_rwsem;
1facf9fc 4344+ struct dentry *wbr_wh[AuBrWh_Last];
4a4d8108 4345+ atomic_t wbr_wh_running;
1facf9fc 4346+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
4347+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
4348+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
4349+
4350+ /* mfs mode */
4351+ unsigned long long wbr_bytes;
4352+};
4353+
4a4d8108
AM
4354+/* ext2 has 3 types of operations at least, ext3 has 4 */
4355+#define AuBrDynOp (AuDyLast * 4)
4356+
1716fcea
AM
4357+#ifdef CONFIG_AUFS_HFSNOTIFY
4358+/* support for asynchronous destruction */
4359+struct au_br_hfsnotify {
4360+ struct fsnotify_group *hfsn_group;
4361+};
4362+#endif
4363+
392086de
AM
4364+/* sysfs entries */
4365+struct au_brsysfs {
4366+ char name[16];
4367+ struct attribute attr;
4368+};
4369+
4370+enum {
4371+ AuBrSysfs_BR,
4372+ AuBrSysfs_BRID,
4373+ AuBrSysfs_Last
4374+};
4375+
1facf9fc 4376+/* protected by superblock rwsem */
4377+struct au_branch {
4378+ struct au_xino_file br_xino;
4379+
4380+ aufs_bindex_t br_id;
4381+
4382+ int br_perm;
86dc4139 4383+ struct path br_path;
4a4d8108
AM
4384+ spinlock_t br_dykey_lock;
4385+ struct au_dykey *br_dykey[AuBrDynOp];
5afbbe0d 4386+ struct percpu_counter br_count;
1facf9fc 4387+
4388+ struct au_wbr *br_wbr;
076b876e 4389+ struct au_br_fhsm *br_fhsm;
1facf9fc 4390+
4391+ /* xino truncation */
1facf9fc 4392+ atomic_t br_xino_running;
4393+
027c5e7a 4394+#ifdef CONFIG_AUFS_HFSNOTIFY
1716fcea 4395+ struct au_br_hfsnotify *br_hfsn;
027c5e7a
AM
4396+#endif
4397+
1facf9fc 4398+#ifdef CONFIG_SYSFS
392086de
AM
4399+ /* entries under sysfs per mount-point */
4400+ struct au_brsysfs br_sysfs[AuBrSysfs_Last];
1facf9fc 4401+#endif
4402+};
4403+
4404+/* ---------------------------------------------------------------------- */
4405+
86dc4139
AM
4406+static inline struct vfsmount *au_br_mnt(struct au_branch *br)
4407+{
4408+ return br->br_path.mnt;
4409+}
4410+
4411+static inline struct dentry *au_br_dentry(struct au_branch *br)
4412+{
4413+ return br->br_path.dentry;
4414+}
4415+
4416+static inline struct super_block *au_br_sb(struct au_branch *br)
4417+{
4418+ return au_br_mnt(br)->mnt_sb;
4419+}
4420+
5afbbe0d
AM
4421+static inline void au_br_get(struct au_branch *br)
4422+{
4423+ percpu_counter_inc(&br->br_count);
4424+}
4425+
4426+static inline void au_br_put(struct au_branch *br)
4427+{
4428+ percpu_counter_dec(&br->br_count);
4429+}
4430+
4431+static inline s64 au_br_count(struct au_branch *br)
4432+{
4433+ return percpu_counter_sum(&br->br_count);
4434+}
4435+
4436+static inline void au_br_count_init(struct au_branch *br)
4437+{
4438+ percpu_counter_init(&br->br_count, 0, GFP_NOFS);
4439+}
4440+
4441+static inline void au_br_count_fin(struct au_branch *br)
4442+{
4443+ percpu_counter_destroy(&br->br_count);
4444+}
4445+
1facf9fc 4446+static inline int au_br_rdonly(struct au_branch *br)
4447+{
86dc4139 4448+ return ((au_br_sb(br)->s_flags & MS_RDONLY)
1facf9fc 4449+ || !au_br_writable(br->br_perm))
4450+ ? -EROFS : 0;
4451+}
4452+
4a4d8108 4453+static inline int au_br_hnotifyable(int brperm __maybe_unused)
1facf9fc 4454+{
4a4d8108 4455+#ifdef CONFIG_AUFS_HNOTIFY
1e00d052 4456+ return !(brperm & AuBrPerm_RR);
1facf9fc 4457+#else
4458+ return 0;
4459+#endif
4460+}
4461+
b912730e
AM
4462+static inline int au_br_test_oflag(int oflag, struct au_branch *br)
4463+{
4464+ int err, exec_flag;
4465+
4466+ err = 0;
4467+ exec_flag = oflag & __FMODE_EXEC;
79b8bda9 4468+ if (unlikely(exec_flag && path_noexec(&br->br_path)))
b912730e
AM
4469+ err = -EACCES;
4470+
4471+ return err;
4472+}
4473+
1facf9fc 4474+/* ---------------------------------------------------------------------- */
4475+
4476+/* branch.c */
4477+struct au_sbinfo;
4478+void au_br_free(struct au_sbinfo *sinfo);
4479+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
4480+struct au_opt_add;
4481+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
4482+struct au_opt_del;
4483+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
027c5e7a
AM
4484+long au_ibusy_ioctl(struct file *file, unsigned long arg);
4485+#ifdef CONFIG_COMPAT
4486+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
4487+#endif
1facf9fc 4488+struct au_opt_mod;
4489+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4490+ int *do_refresh);
076b876e
AM
4491+struct aufs_stfs;
4492+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
1facf9fc 4493+
4494+/* xino.c */
4495+static const loff_t au_loff_max = LLONG_MAX;
4496+
4497+int au_xib_trunc(struct super_block *sb);
5527c038 4498+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
1facf9fc 4499+ loff_t *pos);
5527c038
JR
4500+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
4501+ size_t size, loff_t *pos);
1facf9fc 4502+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
4503+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
4504+ino_t au_xino_new_ino(struct super_block *sb);
b752ccd1 4505+void au_xino_delete_inode(struct inode *inode, const int unlinked);
1facf9fc 4506+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4507+ ino_t ino);
4508+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4509+ ino_t *ino);
4510+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
4511+ struct file *base_file, int do_test);
4512+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
4513+
4514+struct au_opt_xino;
4515+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
4516+void au_xino_clr(struct super_block *sb);
4517+struct file *au_xino_def(struct super_block *sb);
4518+int au_xino_path(struct seq_file *seq, struct file *file);
4519+
4520+/* ---------------------------------------------------------------------- */
4521+
4522+/* Superblock to branch */
4523+static inline
4524+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
4525+{
4526+ return au_sbr(sb, bindex)->br_id;
4527+}
4528+
4529+static inline
4530+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
4531+{
86dc4139 4532+ return au_br_mnt(au_sbr(sb, bindex));
1facf9fc 4533+}
4534+
4535+static inline
4536+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
4537+{
86dc4139 4538+ return au_br_sb(au_sbr(sb, bindex));
1facf9fc 4539+}
4540+
5afbbe0d
AM
4541+static inline void au_sbr_get(struct super_block *sb, aufs_bindex_t bindex)
4542+{
4543+ au_br_get(au_sbr(sb, bindex));
4544+}
4545+
1facf9fc 4546+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
4547+{
5afbbe0d 4548+ au_br_put(au_sbr(sb, bindex));
1facf9fc 4549+}
4550+
4551+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
4552+{
4553+ return au_sbr(sb, bindex)->br_perm;
4554+}
4555+
4556+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
4557+{
4558+ return au_br_whable(au_sbr_perm(sb, bindex));
4559+}
4560+
4561+/* ---------------------------------------------------------------------- */
4562+
4563+/*
4564+ * wbr_wh_read_lock, wbr_wh_write_lock
4565+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
4566+ */
4567+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
4568+
dece6358
AM
4569+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
4570+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
4571+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
4572+
076b876e
AM
4573+/* ---------------------------------------------------------------------- */
4574+
4575+#ifdef CONFIG_AUFS_FHSM
4576+static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
4577+{
4578+ mutex_init(&brfhsm->bf_lock);
4579+ brfhsm->bf_jiffy = 0;
4580+ brfhsm->bf_readable = 0;
4581+}
4582+
4583+static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
4584+{
4585+ mutex_destroy(&brfhsm->bf_lock);
4586+}
4587+#else
4588+AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
4589+AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
4590+#endif
4591+
1facf9fc 4592+#endif /* __KERNEL__ */
4593+#endif /* __AUFS_BRANCH_H__ */
7f207e10
AM
4594diff -urN /usr/share/empty/fs/aufs/conf.mk linux/fs/aufs/conf.mk
4595--- /usr/share/empty/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
e2f27e51 4596+++ linux/fs/aufs/conf.mk 2016-10-09 16:55:36.486034798 +0200
c1595e42 4597@@ -0,0 +1,38 @@
4a4d8108
AM
4598+
4599+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
4600+
4601+define AuConf
4602+ifdef ${1}
4603+AuConfStr += ${1}=${${1}}
4604+endif
4605+endef
4606+
b752ccd1 4607+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
e49829fe 4608+ SBILIST \
7f207e10 4609+ HNOTIFY HFSNOTIFY \
4a4d8108 4610+ EXPORT INO_T_64 \
c1595e42 4611+ XATTR \
076b876e 4612+ FHSM \
4a4d8108 4613+ RDU \
4a4d8108
AM
4614+ SHWH \
4615+ BR_RAMFS \
4616+ BR_FUSE POLL \
4617+ BR_HFSPLUS \
4618+ BDEV_LOOP \
b752ccd1
AM
4619+ DEBUG MAGIC_SYSRQ
4620+$(foreach i, ${AuConfAll}, \
4a4d8108
AM
4621+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
4622+
4623+AuConfName = ${obj}/conf.str
4624+${AuConfName}.tmp: FORCE
4625+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
4626+${AuConfName}: ${AuConfName}.tmp
4627+ @diff -q $< $@ > /dev/null 2>&1 || { \
4628+ echo ' GEN ' $@; \
4629+ cp -p $< $@; \
4630+ }
4631+FORCE:
4632+clean-files += ${AuConfName} ${AuConfName}.tmp
4633+${obj}/sysfs.o: ${AuConfName}
b752ccd1
AM
4634+
4635+-include ${srctree}/${src}/conf_priv.mk
7f207e10
AM
4636diff -urN /usr/share/empty/fs/aufs/cpup.c linux/fs/aufs/cpup.c
4637--- /usr/share/empty/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
4638+++ linux/fs/aufs/cpup.c 2016-10-09 16:55:38.886097714 +0200
4639@@ -0,0 +1,1391 @@
1facf9fc 4640+/*
8cdd5066 4641+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 4642+ *
4643+ * This program, aufs is free software; you can redistribute it and/or modify
4644+ * it under the terms of the GNU General Public License as published by
4645+ * the Free Software Foundation; either version 2 of the License, or
4646+ * (at your option) any later version.
dece6358
AM
4647+ *
4648+ * This program is distributed in the hope that it will be useful,
4649+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4650+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4651+ * GNU General Public License for more details.
4652+ *
4653+ * You should have received a copy of the GNU General Public License
523b37e3 4654+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4655+ */
4656+
4657+/*
4658+ * copy-up functions, see wbr_policy.c for copy-down
4659+ */
4660+
4661+#include <linux/fs_stack.h>
dece6358 4662+#include <linux/mm.h>
8cdd5066 4663+#include <linux/task_work.h>
1facf9fc 4664+#include "aufs.h"
4665+
86dc4139 4666+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
1facf9fc 4667+{
4668+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
367653fa 4669+ | S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
1facf9fc 4670+
86dc4139
AM
4671+ BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
4672+
4673+ dst->i_flags |= iflags & ~mask;
1facf9fc 4674+ if (au_test_fs_notime(dst->i_sb))
4675+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
4676+}
4677+
4678+void au_cpup_attr_timesizes(struct inode *inode)
4679+{
4680+ struct inode *h_inode;
4681+
5afbbe0d 4682+ h_inode = au_h_iptr(inode, au_ibtop(inode));
1facf9fc 4683+ fsstack_copy_attr_times(inode, h_inode);
4a4d8108 4684+ fsstack_copy_inode_size(inode, h_inode);
1facf9fc 4685+}
4686+
4687+void au_cpup_attr_nlink(struct inode *inode, int force)
4688+{
4689+ struct inode *h_inode;
4690+ struct super_block *sb;
5afbbe0d 4691+ aufs_bindex_t bindex, bbot;
1facf9fc 4692+
4693+ sb = inode->i_sb;
5afbbe0d 4694+ bindex = au_ibtop(inode);
1facf9fc 4695+ h_inode = au_h_iptr(inode, bindex);
4696+ if (!force
4697+ && !S_ISDIR(h_inode->i_mode)
4698+ && au_opt_test(au_mntflags(sb), PLINK)
4699+ && au_plink_test(inode))
4700+ return;
4701+
7eafdf33
AM
4702+ /*
4703+ * 0 can happen in revalidating.
38d290e6
JR
4704+ * h_inode->i_mutex may not be held here, but it is harmless since once
4705+ * i_nlink reaches 0, it will never become positive except O_TMPFILE
4706+ * case.
4707+ * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause
4708+ * the incorrect link count.
7eafdf33 4709+ */
92d182d2 4710+ set_nlink(inode, h_inode->i_nlink);
1facf9fc 4711+
4712+ /*
4713+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
4714+ * it may includes whplink directory.
4715+ */
4716+ if (S_ISDIR(h_inode->i_mode)) {
5afbbe0d
AM
4717+ bbot = au_ibbot(inode);
4718+ for (bindex++; bindex <= bbot; bindex++) {
1facf9fc 4719+ h_inode = au_h_iptr(inode, bindex);
4720+ if (h_inode)
4721+ au_add_nlink(inode, h_inode);
4722+ }
4723+ }
4724+}
4725+
4726+void au_cpup_attr_changeable(struct inode *inode)
4727+{
4728+ struct inode *h_inode;
4729+
5afbbe0d 4730+ h_inode = au_h_iptr(inode, au_ibtop(inode));
1facf9fc 4731+ inode->i_mode = h_inode->i_mode;
4732+ inode->i_uid = h_inode->i_uid;
4733+ inode->i_gid = h_inode->i_gid;
4734+ au_cpup_attr_timesizes(inode);
86dc4139 4735+ au_cpup_attr_flags(inode, h_inode->i_flags);
1facf9fc 4736+}
4737+
4738+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
4739+{
4740+ struct au_iinfo *iinfo = au_ii(inode);
4741+
1308ab2a 4742+ IiMustWriteLock(inode);
4743+
1facf9fc 4744+ iinfo->ii_higen = h_inode->i_generation;
4745+ iinfo->ii_hsb1 = h_inode->i_sb;
4746+}
4747+
4748+void au_cpup_attr_all(struct inode *inode, int force)
4749+{
4750+ struct inode *h_inode;
4751+
5afbbe0d 4752+ h_inode = au_h_iptr(inode, au_ibtop(inode));
1facf9fc 4753+ au_cpup_attr_changeable(inode);
4754+ if (inode->i_nlink > 0)
4755+ au_cpup_attr_nlink(inode, force);
4756+ inode->i_rdev = h_inode->i_rdev;
4757+ inode->i_blkbits = h_inode->i_blkbits;
4758+ au_cpup_igen(inode, h_inode);
4759+}
4760+
4761+/* ---------------------------------------------------------------------- */
4762+
4763+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
4764+
4765+/* keep the timestamps of the parent dir when cpup */
4766+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4767+ struct path *h_path)
4768+{
4769+ struct inode *h_inode;
4770+
4771+ dt->dt_dentry = dentry;
4772+ dt->dt_h_path = *h_path;
5527c038 4773+ h_inode = d_inode(h_path->dentry);
1facf9fc 4774+ dt->dt_atime = h_inode->i_atime;
4775+ dt->dt_mtime = h_inode->i_mtime;
4776+ /* smp_mb(); */
4777+}
4778+
4779+void au_dtime_revert(struct au_dtime *dt)
4780+{
4781+ struct iattr attr;
4782+ int err;
4783+
4784+ attr.ia_atime = dt->dt_atime;
4785+ attr.ia_mtime = dt->dt_mtime;
4786+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
4787+ | ATTR_ATIME | ATTR_ATIME_SET;
4788+
523b37e3
AM
4789+ /* no delegation since this is a directory */
4790+ err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL);
1facf9fc 4791+ if (unlikely(err))
0c3ec466 4792+ pr_warn("restoring timestamps failed(%d). ignored\n", err);
1facf9fc 4793+}
4794+
4795+/* ---------------------------------------------------------------------- */
4796+
86dc4139
AM
4797+/* internal use only */
4798+struct au_cpup_reg_attr {
4799+ int valid;
4800+ struct kstat st;
4801+ unsigned int iflags; /* inode->i_flags */
4802+};
4803+
1facf9fc 4804+static noinline_for_stack
86dc4139
AM
4805+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src,
4806+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4807+{
c1595e42 4808+ int err, sbits, icex;
7e9cd9fe
AM
4809+ unsigned int mnt_flags;
4810+ unsigned char verbose;
1facf9fc 4811+ struct iattr ia;
4812+ struct path h_path;
1308ab2a 4813+ struct inode *h_isrc, *h_idst;
86dc4139 4814+ struct kstat *h_st;
c1595e42 4815+ struct au_branch *br;
1facf9fc 4816+
4817+ h_path.dentry = au_h_dptr(dst, bindex);
5527c038 4818+ h_idst = d_inode(h_path.dentry);
c1595e42
JR
4819+ br = au_sbr(dst->d_sb, bindex);
4820+ h_path.mnt = au_br_mnt(br);
5527c038 4821+ h_isrc = d_inode(h_src);
1308ab2a 4822+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
1facf9fc 4823+ | ATTR_ATIME | ATTR_MTIME
4824+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
86dc4139
AM
4825+ if (h_src_attr && h_src_attr->valid) {
4826+ h_st = &h_src_attr->st;
4827+ ia.ia_uid = h_st->uid;
4828+ ia.ia_gid = h_st->gid;
4829+ ia.ia_atime = h_st->atime;
4830+ ia.ia_mtime = h_st->mtime;
4831+ if (h_idst->i_mode != h_st->mode
4832+ && !S_ISLNK(h_idst->i_mode)) {
4833+ ia.ia_valid |= ATTR_MODE;
4834+ ia.ia_mode = h_st->mode;
4835+ }
4836+ sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
4837+ au_cpup_attr_flags(h_idst, h_src_attr->iflags);
4838+ } else {
4839+ ia.ia_uid = h_isrc->i_uid;
4840+ ia.ia_gid = h_isrc->i_gid;
4841+ ia.ia_atime = h_isrc->i_atime;
4842+ ia.ia_mtime = h_isrc->i_mtime;
4843+ if (h_idst->i_mode != h_isrc->i_mode
4844+ && !S_ISLNK(h_idst->i_mode)) {
4845+ ia.ia_valid |= ATTR_MODE;
4846+ ia.ia_mode = h_isrc->i_mode;
4847+ }
4848+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
4849+ au_cpup_attr_flags(h_idst, h_isrc->i_flags);
1308ab2a 4850+ }
523b37e3
AM
4851+ /* no delegation since it is just created */
4852+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4853+
4854+ /* is this nfs only? */
4855+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
4856+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
4857+ ia.ia_mode = h_isrc->i_mode;
523b37e3 4858+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4859+ }
4860+
c1595e42 4861+ icex = br->br_perm & AuBrAttr_ICEX;
7e9cd9fe
AM
4862+ if (!err) {
4863+ mnt_flags = au_mntflags(dst->d_sb);
4864+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4865+ err = au_cpup_xattr(h_path.dentry, h_src, icex, verbose);
4866+ }
c1595e42 4867+
1facf9fc 4868+ return err;
4869+}
4870+
4871+/* ---------------------------------------------------------------------- */
4872+
4873+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
4874+ char *buf, unsigned long blksize)
4875+{
4876+ int err;
4877+ size_t sz, rbytes, wbytes;
4878+ unsigned char all_zero;
4879+ char *p, *zp;
febd17d6 4880+ struct inode *h_inode;
1facf9fc 4881+ /* reduce stack usage */
4882+ struct iattr *ia;
4883+
4884+ zp = page_address(ZERO_PAGE(0));
4885+ if (unlikely(!zp))
4886+ return -ENOMEM; /* possible? */
4887+
4888+ err = 0;
4889+ all_zero = 0;
4890+ while (len) {
4891+ AuDbg("len %lld\n", len);
4892+ sz = blksize;
4893+ if (len < blksize)
4894+ sz = len;
4895+
4896+ rbytes = 0;
4897+ /* todo: signal_pending? */
4898+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
4899+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
4900+ err = rbytes;
4901+ }
4902+ if (unlikely(err < 0))
4903+ break;
4904+
4905+ all_zero = 0;
4906+ if (len >= rbytes && rbytes == blksize)
4907+ all_zero = !memcmp(buf, zp, rbytes);
4908+ if (!all_zero) {
4909+ wbytes = rbytes;
4910+ p = buf;
4911+ while (wbytes) {
4912+ size_t b;
4913+
4914+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
4915+ err = b;
4916+ /* todo: signal_pending? */
4917+ if (unlikely(err == -EAGAIN || err == -EINTR))
4918+ continue;
4919+ if (unlikely(err < 0))
4920+ break;
4921+ wbytes -= b;
4922+ p += b;
4923+ }
392086de
AM
4924+ if (unlikely(err < 0))
4925+ break;
1facf9fc 4926+ } else {
4927+ loff_t res;
4928+
4929+ AuLabel(hole);
4930+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
4931+ err = res;
4932+ if (unlikely(res < 0))
4933+ break;
4934+ }
4935+ len -= rbytes;
4936+ err = 0;
4937+ }
4938+
4939+ /* the last block may be a hole */
4940+ if (!err && all_zero) {
4941+ AuLabel(last hole);
4942+
4943+ err = 1;
2000de60 4944+ if (au_test_nfs(dst->f_path.dentry->d_sb)) {
1facf9fc 4945+ /* nfs requires this step to make last hole */
4946+ /* is this only nfs? */
4947+ do {
4948+ /* todo: signal_pending? */
4949+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
4950+ } while (err == -EAGAIN || err == -EINTR);
4951+ if (err == 1)
4952+ dst->f_pos--;
4953+ }
4954+
4955+ if (err == 1) {
4956+ ia = (void *)buf;
4957+ ia->ia_size = dst->f_pos;
4958+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
4959+ ia->ia_file = dst;
febd17d6
JR
4960+ h_inode = file_inode(dst);
4961+ inode_lock_nested(h_inode, AuLsc_I_CHILD2);
523b37e3
AM
4962+ /* no delegation since it is just created */
4963+ err = vfsub_notify_change(&dst->f_path, ia,
4964+ /*delegated*/NULL);
febd17d6 4965+ inode_unlock(h_inode);
1facf9fc 4966+ }
4967+ }
4968+
4969+ return err;
4970+}
4971+
4972+int au_copy_file(struct file *dst, struct file *src, loff_t len)
4973+{
4974+ int err;
4975+ unsigned long blksize;
4976+ unsigned char do_kfree;
4977+ char *buf;
4978+
4979+ err = -ENOMEM;
2000de60 4980+ blksize = dst->f_path.dentry->d_sb->s_blocksize;
1facf9fc 4981+ if (!blksize || PAGE_SIZE < blksize)
4982+ blksize = PAGE_SIZE;
4983+ AuDbg("blksize %lu\n", blksize);
4984+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
4985+ if (do_kfree)
4986+ buf = kmalloc(blksize, GFP_NOFS);
4987+ else
4988+ buf = (void *)__get_free_page(GFP_NOFS);
4989+ if (unlikely(!buf))
4990+ goto out;
4991+
4992+ if (len > (1 << 22))
4993+ AuDbg("copying a large file %lld\n", (long long)len);
4994+
4995+ src->f_pos = 0;
4996+ dst->f_pos = 0;
4997+ err = au_do_copy_file(dst, src, len, buf, blksize);
4998+ if (do_kfree)
f0c0a007 4999+ au_delayed_kfree(buf);
1facf9fc 5000+ else
f0c0a007 5001+ au_delayed_free_page((unsigned long)buf);
1facf9fc 5002+
4f0767ce 5003+out:
1facf9fc 5004+ return err;
5005+}
5006+
5007+/*
5008+ * to support a sparse file which is opened with O_APPEND,
5009+ * we need to close the file.
5010+ */
c2b27bf2 5011+static int au_cp_regular(struct au_cp_generic *cpg)
1facf9fc 5012+{
5013+ int err, i;
5014+ enum { SRC, DST };
5015+ struct {
5016+ aufs_bindex_t bindex;
5017+ unsigned int flags;
5018+ struct dentry *dentry;
392086de 5019+ int force_wr;
1facf9fc 5020+ struct file *file;
523b37e3 5021+ void *label;
1facf9fc 5022+ } *f, file[] = {
5023+ {
c2b27bf2 5024+ .bindex = cpg->bsrc,
1facf9fc 5025+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
523b37e3 5026+ .label = &&out
1facf9fc 5027+ },
5028+ {
c2b27bf2 5029+ .bindex = cpg->bdst,
1facf9fc 5030+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
392086de 5031+ .force_wr = !!au_ftest_cpup(cpg->flags, RWDST),
523b37e3 5032+ .label = &&out_src
1facf9fc 5033+ }
5034+ };
5035+ struct super_block *sb;
e2f27e51 5036+ struct inode *h_src_inode;
8cdd5066 5037+ struct task_struct *tsk = current;
1facf9fc 5038+
5039+ /* bsrc branch can be ro/rw. */
c2b27bf2 5040+ sb = cpg->dentry->d_sb;
1facf9fc 5041+ f = file;
5042+ for (i = 0; i < 2; i++, f++) {
c2b27bf2
AM
5043+ f->dentry = au_h_dptr(cpg->dentry, f->bindex);
5044+ f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
392086de 5045+ /*file*/NULL, f->force_wr);
1facf9fc 5046+ err = PTR_ERR(f->file);
5047+ if (IS_ERR(f->file))
5048+ goto *f->label;
1facf9fc 5049+ }
5050+
5051+ /* try stopping to update while we copyup */
e2f27e51
AM
5052+ h_src_inode = d_inode(file[SRC].dentry);
5053+ if (!au_test_nfs(h_src_inode->i_sb))
5054+ IMustLock(h_src_inode);
c2b27bf2 5055+ err = au_copy_file(file[DST].file, file[SRC].file, cpg->len);
1facf9fc 5056+
8cdd5066
JR
5057+ /* i wonder if we had O_NO_DELAY_FPUT flag */
5058+ if (tsk->flags & PF_KTHREAD)
5059+ __fput_sync(file[DST].file);
5060+ else {
5061+ WARN(1, "%pD\nPlease report this warning to aufs-users ML",
5062+ file[DST].file);
5063+ fput(file[DST].file);
5064+ /*
5065+ * too bad.
5066+ * we have to call both since we don't know which place the file
5067+ * was added to.
5068+ */
5069+ task_work_run();
5070+ flush_delayed_fput();
5071+ }
1facf9fc 5072+ au_sbr_put(sb, file[DST].bindex);
523b37e3 5073+
4f0767ce 5074+out_src:
1facf9fc 5075+ fput(file[SRC].file);
5076+ au_sbr_put(sb, file[SRC].bindex);
4f0767ce 5077+out:
1facf9fc 5078+ return err;
5079+}
5080+
c2b27bf2 5081+static int au_do_cpup_regular(struct au_cp_generic *cpg,
86dc4139 5082+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 5083+{
5084+ int err, rerr;
5085+ loff_t l;
86dc4139 5086+ struct path h_path;
38d290e6 5087+ struct inode *h_src_inode, *h_dst_inode;
1facf9fc 5088+
5089+ err = 0;
5527c038 5090+ h_src_inode = au_h_iptr(d_inode(cpg->dentry), cpg->bsrc);
86dc4139 5091+ l = i_size_read(h_src_inode);
c2b27bf2
AM
5092+ if (cpg->len == -1 || l < cpg->len)
5093+ cpg->len = l;
5094+ if (cpg->len) {
86dc4139 5095+ /* try stopping to update while we are referencing */
febd17d6 5096+ inode_lock_nested(h_src_inode, AuLsc_I_CHILD);
c2b27bf2 5097+ au_pin_hdir_unlock(cpg->pin);
1facf9fc 5098+
c2b27bf2
AM
5099+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5100+ h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
86dc4139 5101+ h_src_attr->iflags = h_src_inode->i_flags;
5527c038
JR
5102+ if (!au_test_nfs(h_src_inode->i_sb))
5103+ err = vfs_getattr(&h_path, &h_src_attr->st);
5104+ else {
febd17d6 5105+ inode_unlock(h_src_inode);
5527c038 5106+ err = vfs_getattr(&h_path, &h_src_attr->st);
febd17d6 5107+ inode_lock_nested(h_src_inode, AuLsc_I_CHILD);
5527c038 5108+ }
86dc4139 5109+ if (unlikely(err)) {
febd17d6 5110+ inode_unlock(h_src_inode);
86dc4139
AM
5111+ goto out;
5112+ }
5113+ h_src_attr->valid = 1;
e2f27e51
AM
5114+ if (!au_test_nfs(h_src_inode->i_sb)) {
5115+ err = au_cp_regular(cpg);
5116+ inode_unlock(h_src_inode);
5117+ } else {
5118+ inode_unlock(h_src_inode);
5119+ err = au_cp_regular(cpg);
5120+ }
c2b27bf2 5121+ rerr = au_pin_hdir_relock(cpg->pin);
86dc4139
AM
5122+ if (!err && rerr)
5123+ err = rerr;
1facf9fc 5124+ }
38d290e6
JR
5125+ if (!err && (h_src_inode->i_state & I_LINKABLE)) {
5126+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst);
5527c038 5127+ h_dst_inode = d_inode(h_path.dentry);
38d290e6
JR
5128+ spin_lock(&h_dst_inode->i_lock);
5129+ h_dst_inode->i_state |= I_LINKABLE;
5130+ spin_unlock(&h_dst_inode->i_lock);
5131+ }
1facf9fc 5132+
4f0767ce 5133+out:
1facf9fc 5134+ return err;
5135+}
5136+
5137+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
5138+ struct inode *h_dir)
5139+{
5140+ int err, symlen;
5141+ mm_segment_t old_fs;
b752ccd1
AM
5142+ union {
5143+ char *k;
5144+ char __user *u;
5145+ } sym;
5527c038
JR
5146+ struct inode *h_inode = d_inode(h_src);
5147+ const struct inode_operations *h_iop = h_inode->i_op;
1facf9fc 5148+
5149+ err = -ENOSYS;
5527c038 5150+ if (unlikely(!h_iop->readlink))
1facf9fc 5151+ goto out;
5152+
5153+ err = -ENOMEM;
537831f9 5154+ sym.k = (void *)__get_free_page(GFP_NOFS);
b752ccd1 5155+ if (unlikely(!sym.k))
1facf9fc 5156+ goto out;
5157+
9dbd164d 5158+ /* unnecessary to support mmap_sem since symlink is not mmap-able */
1facf9fc 5159+ old_fs = get_fs();
5160+ set_fs(KERNEL_DS);
5527c038 5161+ symlen = h_iop->readlink(h_src, sym.u, PATH_MAX);
1facf9fc 5162+ err = symlen;
5163+ set_fs(old_fs);
5164+
5165+ if (symlen > 0) {
b752ccd1
AM
5166+ sym.k[symlen] = 0;
5167+ err = vfsub_symlink(h_dir, h_path, sym.k);
1facf9fc 5168+ }
f0c0a007 5169+ au_delayed_free_page((unsigned long)sym.k);
1facf9fc 5170+
4f0767ce 5171+out:
1facf9fc 5172+ return err;
5173+}
5174+
8cdd5066
JR
5175+/*
5176+ * regardless 'acl' option, reset all ACL.
5177+ * All ACL will be copied up later from the original entry on the lower branch.
5178+ */
5179+static int au_reset_acl(struct inode *h_dir, struct path *h_path, umode_t mode)
5180+{
5181+ int err;
5182+ struct dentry *h_dentry;
5183+ struct inode *h_inode;
5184+
5185+ h_dentry = h_path->dentry;
5186+ h_inode = d_inode(h_dentry);
5187+ /* forget_all_cached_acls(h_inode)); */
5188+ err = vfsub_removexattr(h_dentry, XATTR_NAME_POSIX_ACL_ACCESS);
5189+ AuTraceErr(err);
5190+ if (err == -EOPNOTSUPP)
5191+ err = 0;
5192+ if (!err)
5193+ err = vfsub_acl_chmod(h_inode, mode);
5194+
5195+ AuTraceErr(err);
5196+ return err;
5197+}
5198+
5199+static int au_do_cpup_dir(struct au_cp_generic *cpg, struct dentry *dst_parent,
5200+ struct inode *h_dir, struct path *h_path)
5201+{
5202+ int err;
5203+ struct inode *dir, *inode;
5204+
5205+ err = vfsub_removexattr(h_path->dentry, XATTR_NAME_POSIX_ACL_DEFAULT);
5206+ AuTraceErr(err);
5207+ if (err == -EOPNOTSUPP)
5208+ err = 0;
5209+ if (unlikely(err))
5210+ goto out;
5211+
5212+ /*
5213+ * strange behaviour from the users view,
5214+ * particularry setattr case
5215+ */
5216+ dir = d_inode(dst_parent);
5afbbe0d 5217+ if (au_ibtop(dir) == cpg->bdst)
8cdd5066
JR
5218+ au_cpup_attr_nlink(dir, /*force*/1);
5219+ inode = d_inode(cpg->dentry);
5220+ au_cpup_attr_nlink(inode, /*force*/1);
5221+
5222+out:
5223+ return err;
5224+}
5225+
1facf9fc 5226+static noinline_for_stack
c2b27bf2 5227+int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
86dc4139 5228+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 5229+{
5230+ int err;
5231+ umode_t mode;
5232+ unsigned int mnt_flags;
076b876e 5233+ unsigned char isdir, isreg, force;
c2b27bf2 5234+ const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5235+ struct au_dtime dt;
5236+ struct path h_path;
5237+ struct dentry *h_src, *h_dst, *h_parent;
8cdd5066 5238+ struct inode *h_inode, *h_dir;
1facf9fc 5239+ struct super_block *sb;
5240+
5241+ /* bsrc branch can be ro/rw. */
c2b27bf2 5242+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038
JR
5243+ h_inode = d_inode(h_src);
5244+ AuDebugOn(h_inode != au_h_iptr(d_inode(cpg->dentry), cpg->bsrc));
1facf9fc 5245+
5246+ /* try stopping to be referenced while we are creating */
c2b27bf2
AM
5247+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
5248+ if (au_ftest_cpup(cpg->flags, RENAME))
86dc4139
AM
5249+ AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
5250+ AUFS_WH_PFX_LEN));
1facf9fc 5251+ h_parent = h_dst->d_parent; /* dir inode is locked */
5527c038 5252+ h_dir = d_inode(h_parent);
1facf9fc 5253+ IMustLock(h_dir);
5254+ AuDebugOn(h_parent != h_dst->d_parent);
5255+
c2b27bf2
AM
5256+ sb = cpg->dentry->d_sb;
5257+ h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
1facf9fc 5258+ if (do_dt) {
5259+ h_path.dentry = h_parent;
5260+ au_dtime_store(&dt, dst_parent, &h_path);
5261+ }
5262+ h_path.dentry = h_dst;
5263+
076b876e 5264+ isreg = 0;
1facf9fc 5265+ isdir = 0;
5266+ mode = h_inode->i_mode;
5267+ switch (mode & S_IFMT) {
5268+ case S_IFREG:
076b876e 5269+ isreg = 1;
8cdd5066 5270+ err = vfsub_create(h_dir, &h_path, S_IRUSR | S_IWUSR,
b4510431 5271+ /*want_excl*/true);
1facf9fc 5272+ if (!err)
c2b27bf2 5273+ err = au_do_cpup_regular(cpg, h_src_attr);
1facf9fc 5274+ break;
5275+ case S_IFDIR:
5276+ isdir = 1;
5277+ err = vfsub_mkdir(h_dir, &h_path, mode);
8cdd5066
JR
5278+ if (!err)
5279+ err = au_do_cpup_dir(cpg, dst_parent, h_dir, &h_path);
1facf9fc 5280+ break;
5281+ case S_IFLNK:
5282+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
5283+ break;
5284+ case S_IFCHR:
5285+ case S_IFBLK:
5286+ AuDebugOn(!capable(CAP_MKNOD));
5287+ /*FALLTHROUGH*/
5288+ case S_IFIFO:
5289+ case S_IFSOCK:
5290+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
5291+ break;
5292+ default:
5293+ AuIOErr("Unknown inode type 0%o\n", mode);
5294+ err = -EIO;
5295+ }
8cdd5066
JR
5296+ if (!err)
5297+ err = au_reset_acl(h_dir, &h_path, mode);
1facf9fc 5298+
5299+ mnt_flags = au_mntflags(sb);
5300+ if (!au_opt_test(mnt_flags, UDBA_NONE)
5301+ && !isdir
5302+ && au_opt_test(mnt_flags, XINO)
38d290e6
JR
5303+ && (h_inode->i_nlink == 1
5304+ || (h_inode->i_state & I_LINKABLE))
1facf9fc 5305+ /* todo: unnecessary? */
5527c038 5306+ /* && d_inode(cpg->dentry)->i_nlink == 1 */
c2b27bf2
AM
5307+ && cpg->bdst < cpg->bsrc
5308+ && !au_ftest_cpup(cpg->flags, KEEPLINO))
5309+ au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
1facf9fc 5310+ /* ignore this error */
5311+
076b876e
AM
5312+ if (!err) {
5313+ force = 0;
5314+ if (isreg) {
5315+ force = !!cpg->len;
5316+ if (cpg->len == -1)
5317+ force = !!i_size_read(h_inode);
5318+ }
5319+ au_fhsm_wrote(sb, cpg->bdst, force);
5320+ }
5321+
1facf9fc 5322+ if (do_dt)
5323+ au_dtime_revert(&dt);
5324+ return err;
5325+}
5326+
392086de 5327+static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path)
86dc4139
AM
5328+{
5329+ int err;
392086de 5330+ struct dentry *dentry, *h_dentry, *h_parent, *parent;
86dc4139 5331+ struct inode *h_dir;
392086de 5332+ aufs_bindex_t bdst;
86dc4139 5333+
392086de
AM
5334+ dentry = cpg->dentry;
5335+ bdst = cpg->bdst;
5336+ h_dentry = au_h_dptr(dentry, bdst);
5337+ if (!au_ftest_cpup(cpg->flags, OVERWRITE)) {
5338+ dget(h_dentry);
5339+ au_set_h_dptr(dentry, bdst, NULL);
5340+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
5341+ if (!err)
5342+ h_path->dentry = dget(au_h_dptr(dentry, bdst));
86dc4139 5343+ au_set_h_dptr(dentry, bdst, h_dentry);
392086de
AM
5344+ } else {
5345+ err = 0;
5346+ parent = dget_parent(dentry);
5347+ h_parent = au_h_dptr(parent, bdst);
5348+ dput(parent);
5349+ h_path->dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
5350+ if (IS_ERR(h_path->dentry))
5351+ err = PTR_ERR(h_path->dentry);
86dc4139 5352+ }
392086de
AM
5353+ if (unlikely(err))
5354+ goto out;
86dc4139 5355+
86dc4139 5356+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5527c038 5357+ h_dir = d_inode(h_parent);
86dc4139 5358+ IMustLock(h_dir);
523b37e3
AM
5359+ AuDbg("%pd %pd\n", h_dentry, h_path->dentry);
5360+ /* no delegation since it is just created */
5361+ err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL);
86dc4139
AM
5362+ dput(h_path->dentry);
5363+
5364+out:
5365+ return err;
5366+}
5367+
1facf9fc 5368+/*
5369+ * copyup the @dentry from @bsrc to @bdst.
5370+ * the caller must set the both of lower dentries.
5371+ * @len is for truncating when it is -1 copyup the entire file.
5372+ * in link/rename cases, @dst_parent may be different from the real one.
c2b27bf2 5373+ * basic->bsrc can be larger than basic->bdst.
1facf9fc 5374+ */
c2b27bf2 5375+static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5376+{
5377+ int err, rerr;
5afbbe0d 5378+ aufs_bindex_t old_ibtop;
1facf9fc 5379+ unsigned char isdir, plink;
1facf9fc 5380+ struct dentry *h_src, *h_dst, *h_parent;
5527c038 5381+ struct inode *dst_inode, *h_dir, *inode, *delegated, *src_inode;
1facf9fc 5382+ struct super_block *sb;
86dc4139 5383+ struct au_branch *br;
c2b27bf2
AM
5384+ /* to reuduce stack size */
5385+ struct {
5386+ struct au_dtime dt;
5387+ struct path h_path;
5388+ struct au_cpup_reg_attr h_src_attr;
5389+ } *a;
1facf9fc 5390+
c2b27bf2
AM
5391+ err = -ENOMEM;
5392+ a = kmalloc(sizeof(*a), GFP_NOFS);
5393+ if (unlikely(!a))
5394+ goto out;
5395+ a->h_src_attr.valid = 0;
1facf9fc 5396+
c2b27bf2
AM
5397+ sb = cpg->dentry->d_sb;
5398+ br = au_sbr(sb, cpg->bdst);
5399+ a->h_path.mnt = au_br_mnt(br);
5400+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
1facf9fc 5401+ h_parent = h_dst->d_parent; /* dir inode is locked */
5527c038 5402+ h_dir = d_inode(h_parent);
1facf9fc 5403+ IMustLock(h_dir);
5404+
c2b27bf2 5405+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038 5406+ inode = d_inode(cpg->dentry);
1facf9fc 5407+
5408+ if (!dst_parent)
c2b27bf2 5409+ dst_parent = dget_parent(cpg->dentry);
1facf9fc 5410+ else
5411+ dget(dst_parent);
5412+
5413+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
c2b27bf2 5414+ dst_inode = au_h_iptr(inode, cpg->bdst);
1facf9fc 5415+ if (dst_inode) {
5416+ if (unlikely(!plink)) {
5417+ err = -EIO;
027c5e7a
AM
5418+ AuIOErr("hi%lu(i%lu) exists on b%d "
5419+ "but plink is disabled\n",
c2b27bf2
AM
5420+ dst_inode->i_ino, inode->i_ino, cpg->bdst);
5421+ goto out_parent;
1facf9fc 5422+ }
5423+
5424+ if (dst_inode->i_nlink) {
c2b27bf2 5425+ const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5426+
c2b27bf2 5427+ h_src = au_plink_lkup(inode, cpg->bdst);
1facf9fc 5428+ err = PTR_ERR(h_src);
5429+ if (IS_ERR(h_src))
c2b27bf2 5430+ goto out_parent;
5527c038 5431+ if (unlikely(d_is_negative(h_src))) {
1facf9fc 5432+ err = -EIO;
79b8bda9 5433+ AuIOErr("i%lu exists on b%d "
027c5e7a 5434+ "but not pseudo-linked\n",
79b8bda9 5435+ inode->i_ino, cpg->bdst);
1facf9fc 5436+ dput(h_src);
c2b27bf2 5437+ goto out_parent;
1facf9fc 5438+ }
5439+
5440+ if (do_dt) {
c2b27bf2
AM
5441+ a->h_path.dentry = h_parent;
5442+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
1facf9fc 5443+ }
86dc4139 5444+
c2b27bf2 5445+ a->h_path.dentry = h_dst;
523b37e3
AM
5446+ delegated = NULL;
5447+ err = vfsub_link(h_src, h_dir, &a->h_path, &delegated);
c2b27bf2 5448+ if (!err && au_ftest_cpup(cpg->flags, RENAME))
392086de 5449+ err = au_do_ren_after_cpup(cpg, &a->h_path);
1facf9fc 5450+ if (do_dt)
c2b27bf2 5451+ au_dtime_revert(&a->dt);
523b37e3
AM
5452+ if (unlikely(err == -EWOULDBLOCK)) {
5453+ pr_warn("cannot retry for NFSv4 delegation"
5454+ " for an internal link\n");
5455+ iput(delegated);
5456+ }
1facf9fc 5457+ dput(h_src);
c2b27bf2 5458+ goto out_parent;
1facf9fc 5459+ } else
5460+ /* todo: cpup_wh_file? */
5461+ /* udba work */
4a4d8108 5462+ au_update_ibrange(inode, /*do_put_zero*/1);
1facf9fc 5463+ }
5464+
86dc4139 5465+ isdir = S_ISDIR(inode->i_mode);
5afbbe0d 5466+ old_ibtop = au_ibtop(inode);
c2b27bf2 5467+ err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
1facf9fc 5468+ if (unlikely(err))
86dc4139 5469+ goto out_rev;
5527c038 5470+ dst_inode = d_inode(h_dst);
febd17d6 5471+ inode_lock_nested(dst_inode, AuLsc_I_CHILD2);
86dc4139 5472+ /* todo: necessary? */
c2b27bf2 5473+ /* au_pin_hdir_unlock(cpg->pin); */
1facf9fc 5474+
c2b27bf2 5475+ err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr);
86dc4139
AM
5476+ if (unlikely(err)) {
5477+ /* todo: necessary? */
c2b27bf2 5478+ /* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
febd17d6 5479+ inode_unlock(dst_inode);
86dc4139
AM
5480+ goto out_rev;
5481+ }
5482+
5afbbe0d 5483+ if (cpg->bdst < old_ibtop) {
86dc4139 5484+ if (S_ISREG(inode->i_mode)) {
c2b27bf2 5485+ err = au_dy_iaop(inode, cpg->bdst, dst_inode);
86dc4139 5486+ if (unlikely(err)) {
c2b27bf2
AM
5487+ /* ignore an error */
5488+ /* au_pin_hdir_relock(cpg->pin); */
febd17d6 5489+ inode_unlock(dst_inode);
86dc4139 5490+ goto out_rev;
4a4d8108 5491+ }
4a4d8108 5492+ }
5afbbe0d 5493+ au_set_ibtop(inode, cpg->bdst);
c2b27bf2 5494+ } else
5afbbe0d 5495+ au_set_ibbot(inode, cpg->bdst);
c2b27bf2 5496+ au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
86dc4139
AM
5497+ au_hi_flags(inode, isdir));
5498+
5499+ /* todo: necessary? */
c2b27bf2 5500+ /* err = au_pin_hdir_relock(cpg->pin); */
febd17d6 5501+ inode_unlock(dst_inode);
86dc4139
AM
5502+ if (unlikely(err))
5503+ goto out_rev;
5504+
5527c038 5505+ src_inode = d_inode(h_src);
86dc4139 5506+ if (!isdir
5527c038
JR
5507+ && (src_inode->i_nlink > 1
5508+ || src_inode->i_state & I_LINKABLE)
86dc4139 5509+ && plink)
c2b27bf2 5510+ au_plink_append(inode, cpg->bdst, h_dst);
86dc4139 5511+
c2b27bf2
AM
5512+ if (au_ftest_cpup(cpg->flags, RENAME)) {
5513+ a->h_path.dentry = h_dst;
392086de 5514+ err = au_do_ren_after_cpup(cpg, &a->h_path);
86dc4139
AM
5515+ }
5516+ if (!err)
c2b27bf2 5517+ goto out_parent; /* success */
1facf9fc 5518+
5519+ /* revert */
4a4d8108 5520+out_rev:
c2b27bf2
AM
5521+ a->h_path.dentry = h_parent;
5522+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
5523+ a->h_path.dentry = h_dst;
86dc4139 5524+ rerr = 0;
5527c038 5525+ if (d_is_positive(h_dst)) {
523b37e3
AM
5526+ if (!isdir) {
5527+ /* no delegation since it is just created */
5528+ rerr = vfsub_unlink(h_dir, &a->h_path,
5529+ /*delegated*/NULL, /*force*/0);
5530+ } else
c2b27bf2 5531+ rerr = vfsub_rmdir(h_dir, &a->h_path);
86dc4139 5532+ }
c2b27bf2 5533+ au_dtime_revert(&a->dt);
1facf9fc 5534+ if (rerr) {
5535+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
5536+ err = -EIO;
5537+ }
c2b27bf2 5538+out_parent:
1facf9fc 5539+ dput(dst_parent);
f0c0a007 5540+ au_delayed_kfree(a);
c2b27bf2 5541+out:
1facf9fc 5542+ return err;
5543+}
5544+
7e9cd9fe 5545+#if 0 /* reserved */
1facf9fc 5546+struct au_cpup_single_args {
5547+ int *errp;
c2b27bf2 5548+ struct au_cp_generic *cpg;
1facf9fc 5549+ struct dentry *dst_parent;
5550+};
5551+
5552+static void au_call_cpup_single(void *args)
5553+{
5554+ struct au_cpup_single_args *a = args;
86dc4139 5555+
c2b27bf2
AM
5556+ au_pin_hdir_acquire_nest(a->cpg->pin);
5557+ *a->errp = au_cpup_single(a->cpg, a->dst_parent);
5558+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5559+}
c2b27bf2 5560+#endif
1facf9fc 5561+
53392da6
AM
5562+/*
5563+ * prevent SIGXFSZ in copy-up.
5564+ * testing CAP_MKNOD is for generic fs,
5565+ * but CAP_FSETID is for xfs only, currently.
5566+ */
86dc4139 5567+static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
53392da6
AM
5568+{
5569+ int do_sio;
86dc4139
AM
5570+ struct super_block *sb;
5571+ struct inode *h_dir;
53392da6
AM
5572+
5573+ do_sio = 0;
86dc4139 5574+ sb = au_pinned_parent(pin)->d_sb;
53392da6
AM
5575+ if (!au_wkq_test()
5576+ && (!au_sbi(sb)->si_plink_maint_pid
5577+ || au_plink_maint(sb, AuLock_NOPLM))) {
5578+ switch (mode & S_IFMT) {
5579+ case S_IFREG:
5580+ /* no condition about RLIMIT_FSIZE and the file size */
5581+ do_sio = 1;
5582+ break;
5583+ case S_IFCHR:
5584+ case S_IFBLK:
5585+ do_sio = !capable(CAP_MKNOD);
5586+ break;
5587+ }
5588+ if (!do_sio)
5589+ do_sio = ((mode & (S_ISUID | S_ISGID))
5590+ && !capable(CAP_FSETID));
86dc4139
AM
5591+ /* this workaround may be removed in the future */
5592+ if (!do_sio) {
5593+ h_dir = au_pinned_h_dir(pin);
5594+ do_sio = h_dir->i_mode & S_ISVTX;
5595+ }
53392da6
AM
5596+ }
5597+
5598+ return do_sio;
5599+}
5600+
7e9cd9fe 5601+#if 0 /* reserved */
c2b27bf2 5602+int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5603+{
5604+ int err, wkq_err;
1facf9fc 5605+ struct dentry *h_dentry;
5606+
c2b27bf2 5607+ h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038 5608+ if (!au_cpup_sio_test(pin, d_inode(h_dentry)->i_mode))
c2b27bf2 5609+ err = au_cpup_single(cpg, dst_parent);
1facf9fc 5610+ else {
5611+ struct au_cpup_single_args args = {
5612+ .errp = &err,
c2b27bf2
AM
5613+ .cpg = cpg,
5614+ .dst_parent = dst_parent
1facf9fc 5615+ };
5616+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
5617+ if (unlikely(wkq_err))
5618+ err = wkq_err;
5619+ }
5620+
5621+ return err;
5622+}
c2b27bf2 5623+#endif
1facf9fc 5624+
5625+/*
5626+ * copyup the @dentry from the first active lower branch to @bdst,
5627+ * using au_cpup_single().
5628+ */
c2b27bf2 5629+static int au_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5630+{
5631+ int err;
c2b27bf2
AM
5632+ unsigned int flags_orig;
5633+ struct dentry *dentry;
5634+
5635+ AuDebugOn(cpg->bsrc < 0);
1facf9fc 5636+
c2b27bf2 5637+ dentry = cpg->dentry;
86dc4139 5638+ DiMustWriteLock(dentry);
1facf9fc 5639+
c2b27bf2 5640+ err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
1facf9fc 5641+ if (!err) {
c2b27bf2
AM
5642+ flags_orig = cpg->flags;
5643+ au_fset_cpup(cpg->flags, RENAME);
5644+ err = au_cpup_single(cpg, NULL);
5645+ cpg->flags = flags_orig;
1facf9fc 5646+ if (!err)
5647+ return 0; /* success */
5648+
5649+ /* revert */
c2b27bf2 5650+ au_set_h_dptr(dentry, cpg->bdst, NULL);
5afbbe0d 5651+ au_set_dbtop(dentry, cpg->bsrc);
1facf9fc 5652+ }
5653+
5654+ return err;
5655+}
5656+
5657+struct au_cpup_simple_args {
5658+ int *errp;
c2b27bf2 5659+ struct au_cp_generic *cpg;
1facf9fc 5660+};
5661+
5662+static void au_call_cpup_simple(void *args)
5663+{
5664+ struct au_cpup_simple_args *a = args;
86dc4139 5665+
c2b27bf2
AM
5666+ au_pin_hdir_acquire_nest(a->cpg->pin);
5667+ *a->errp = au_cpup_simple(a->cpg);
5668+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5669+}
5670+
c2b27bf2 5671+static int au_do_sio_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5672+{
5673+ int err, wkq_err;
c2b27bf2
AM
5674+ struct dentry *dentry, *parent;
5675+ struct file *h_file;
1facf9fc 5676+ struct inode *h_dir;
5677+
c2b27bf2
AM
5678+ dentry = cpg->dentry;
5679+ h_file = NULL;
5680+ if (au_ftest_cpup(cpg->flags, HOPEN)) {
5681+ AuDebugOn(cpg->bsrc < 0);
392086de 5682+ h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0);
c2b27bf2
AM
5683+ err = PTR_ERR(h_file);
5684+ if (IS_ERR(h_file))
5685+ goto out;
5686+ }
5687+
1facf9fc 5688+ parent = dget_parent(dentry);
5527c038 5689+ h_dir = au_h_iptr(d_inode(parent), cpg->bdst);
53392da6 5690+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
5527c038 5691+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
c2b27bf2 5692+ err = au_cpup_simple(cpg);
1facf9fc 5693+ else {
5694+ struct au_cpup_simple_args args = {
5695+ .errp = &err,
c2b27bf2 5696+ .cpg = cpg
1facf9fc 5697+ };
5698+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
5699+ if (unlikely(wkq_err))
5700+ err = wkq_err;
5701+ }
5702+
5703+ dput(parent);
c2b27bf2
AM
5704+ if (h_file)
5705+ au_h_open_post(dentry, cpg->bsrc, h_file);
5706+
5707+out:
1facf9fc 5708+ return err;
5709+}
5710+
c2b27bf2 5711+int au_sio_cpup_simple(struct au_cp_generic *cpg)
367653fa 5712+{
5afbbe0d 5713+ aufs_bindex_t bsrc, bbot;
c2b27bf2 5714+ struct dentry *dentry, *h_dentry;
367653fa 5715+
c2b27bf2
AM
5716+ if (cpg->bsrc < 0) {
5717+ dentry = cpg->dentry;
5afbbe0d
AM
5718+ bbot = au_dbbot(dentry);
5719+ for (bsrc = cpg->bdst + 1; bsrc <= bbot; bsrc++) {
c2b27bf2
AM
5720+ h_dentry = au_h_dptr(dentry, bsrc);
5721+ if (h_dentry) {
5527c038 5722+ AuDebugOn(d_is_negative(h_dentry));
c2b27bf2
AM
5723+ break;
5724+ }
5725+ }
5afbbe0d 5726+ AuDebugOn(bsrc > bbot);
c2b27bf2 5727+ cpg->bsrc = bsrc;
367653fa 5728+ }
c2b27bf2
AM
5729+ AuDebugOn(cpg->bsrc <= cpg->bdst);
5730+ return au_do_sio_cpup_simple(cpg);
5731+}
367653fa 5732+
c2b27bf2
AM
5733+int au_sio_cpdown_simple(struct au_cp_generic *cpg)
5734+{
5735+ AuDebugOn(cpg->bdst <= cpg->bsrc);
5736+ return au_do_sio_cpup_simple(cpg);
367653fa
AM
5737+}
5738+
1facf9fc 5739+/* ---------------------------------------------------------------------- */
5740+
5741+/*
5742+ * copyup the deleted file for writing.
5743+ */
c2b27bf2
AM
5744+static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
5745+ struct file *file)
1facf9fc 5746+{
5747+ int err;
c2b27bf2
AM
5748+ unsigned int flags_orig;
5749+ aufs_bindex_t bsrc_orig;
c2b27bf2 5750+ struct au_dinfo *dinfo;
5afbbe0d
AM
5751+ struct {
5752+ struct au_hdentry *hd;
5753+ struct dentry *h_dentry;
5754+ } hdst, hsrc;
1facf9fc 5755+
c2b27bf2 5756+ dinfo = au_di(cpg->dentry);
1308ab2a 5757+ AuRwMustWriteLock(&dinfo->di_rwsem);
5758+
c2b27bf2 5759+ bsrc_orig = cpg->bsrc;
5afbbe0d
AM
5760+ cpg->bsrc = dinfo->di_btop;
5761+ hdst.hd = au_hdentry(dinfo, cpg->bdst);
5762+ hdst.h_dentry = hdst.hd->hd_dentry;
5763+ hdst.hd->hd_dentry = wh_dentry;
5764+ dinfo->di_btop = cpg->bdst;
5765+
5766+ hsrc.h_dentry = NULL;
027c5e7a 5767+ if (file) {
5afbbe0d
AM
5768+ hsrc.hd = au_hdentry(dinfo, cpg->bsrc);
5769+ hsrc.h_dentry = hsrc.hd->hd_dentry;
5770+ hsrc.hd->hd_dentry = au_hf_top(file)->f_path.dentry;
027c5e7a 5771+ }
c2b27bf2
AM
5772+ flags_orig = cpg->flags;
5773+ cpg->flags = !AuCpup_DTIME;
5774+ err = au_cpup_single(cpg, /*h_parent*/NULL);
5775+ cpg->flags = flags_orig;
027c5e7a
AM
5776+ if (file) {
5777+ if (!err)
5778+ err = au_reopen_nondir(file);
5afbbe0d 5779+ hsrc.hd->hd_dentry = hsrc.h_dentry;
1facf9fc 5780+ }
5afbbe0d
AM
5781+ hdst.hd->hd_dentry = hdst.h_dentry;
5782+ dinfo->di_btop = cpg->bsrc;
c2b27bf2 5783+ cpg->bsrc = bsrc_orig;
1facf9fc 5784+
5785+ return err;
5786+}
5787+
c2b27bf2 5788+static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5789+{
5790+ int err;
c2b27bf2 5791+ aufs_bindex_t bdst;
1facf9fc 5792+ struct au_dtime dt;
c2b27bf2 5793+ struct dentry *dentry, *parent, *h_parent, *wh_dentry;
1facf9fc 5794+ struct au_branch *br;
5795+ struct path h_path;
5796+
c2b27bf2
AM
5797+ dentry = cpg->dentry;
5798+ bdst = cpg->bdst;
1facf9fc 5799+ br = au_sbr(dentry->d_sb, bdst);
5800+ parent = dget_parent(dentry);
5801+ h_parent = au_h_dptr(parent, bdst);
5802+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
5803+ err = PTR_ERR(wh_dentry);
5804+ if (IS_ERR(wh_dentry))
5805+ goto out;
5806+
5807+ h_path.dentry = h_parent;
86dc4139 5808+ h_path.mnt = au_br_mnt(br);
1facf9fc 5809+ au_dtime_store(&dt, parent, &h_path);
c2b27bf2 5810+ err = au_do_cpup_wh(cpg, wh_dentry, file);
1facf9fc 5811+ if (unlikely(err))
5812+ goto out_wh;
5813+
5814+ dget(wh_dentry);
5815+ h_path.dentry = wh_dentry;
2000de60 5816+ if (!d_is_dir(wh_dentry)) {
523b37e3 5817+ /* no delegation since it is just created */
5527c038 5818+ err = vfsub_unlink(d_inode(h_parent), &h_path,
523b37e3
AM
5819+ /*delegated*/NULL, /*force*/0);
5820+ } else
5527c038 5821+ err = vfsub_rmdir(d_inode(h_parent), &h_path);
1facf9fc 5822+ if (unlikely(err)) {
523b37e3
AM
5823+ AuIOErr("failed remove copied-up tmp file %pd(%d)\n",
5824+ wh_dentry, err);
1facf9fc 5825+ err = -EIO;
5826+ }
5827+ au_dtime_revert(&dt);
5527c038 5828+ au_set_hi_wh(d_inode(dentry), bdst, wh_dentry);
1facf9fc 5829+
4f0767ce 5830+out_wh:
1facf9fc 5831+ dput(wh_dentry);
4f0767ce 5832+out:
1facf9fc 5833+ dput(parent);
5834+ return err;
5835+}
5836+
5837+struct au_cpup_wh_args {
5838+ int *errp;
c2b27bf2 5839+ struct au_cp_generic *cpg;
1facf9fc 5840+ struct file *file;
5841+};
5842+
5843+static void au_call_cpup_wh(void *args)
5844+{
5845+ struct au_cpup_wh_args *a = args;
86dc4139 5846+
c2b27bf2
AM
5847+ au_pin_hdir_acquire_nest(a->cpg->pin);
5848+ *a->errp = au_cpup_wh(a->cpg, a->file);
5849+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5850+}
5851+
c2b27bf2 5852+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5853+{
5854+ int err, wkq_err;
c2b27bf2 5855+ aufs_bindex_t bdst;
c1595e42 5856+ struct dentry *dentry, *parent, *h_orph, *h_parent;
86dc4139 5857+ struct inode *dir, *h_dir, *h_tmpdir;
1facf9fc 5858+ struct au_wbr *wbr;
c2b27bf2 5859+ struct au_pin wh_pin, *pin_orig;
1facf9fc 5860+
c2b27bf2
AM
5861+ dentry = cpg->dentry;
5862+ bdst = cpg->bdst;
1facf9fc 5863+ parent = dget_parent(dentry);
5527c038 5864+ dir = d_inode(parent);
1facf9fc 5865+ h_orph = NULL;
5866+ h_parent = NULL;
5867+ h_dir = au_igrab(au_h_iptr(dir, bdst));
5868+ h_tmpdir = h_dir;
c2b27bf2 5869+ pin_orig = NULL;
1facf9fc 5870+ if (!h_dir->i_nlink) {
5871+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
5872+ h_orph = wbr->wbr_orph;
5873+
5874+ h_parent = dget(au_h_dptr(parent, bdst));
1facf9fc 5875+ au_set_h_dptr(parent, bdst, dget(h_orph));
5527c038 5876+ h_tmpdir = d_inode(h_orph);
1facf9fc 5877+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
5878+
febd17d6 5879+ inode_lock_nested(h_tmpdir, AuLsc_I_PARENT3);
4a4d8108 5880+ /* todo: au_h_open_pre()? */
86dc4139 5881+
c2b27bf2 5882+ pin_orig = cpg->pin;
86dc4139 5883+ au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
c2b27bf2
AM
5884+ AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
5885+ cpg->pin = &wh_pin;
1facf9fc 5886+ }
5887+
53392da6 5888+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
5527c038 5889+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
c2b27bf2 5890+ err = au_cpup_wh(cpg, file);
1facf9fc 5891+ else {
5892+ struct au_cpup_wh_args args = {
5893+ .errp = &err,
c2b27bf2
AM
5894+ .cpg = cpg,
5895+ .file = file
1facf9fc 5896+ };
5897+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
5898+ if (unlikely(wkq_err))
5899+ err = wkq_err;
5900+ }
5901+
5902+ if (h_orph) {
febd17d6 5903+ inode_unlock(h_tmpdir);
4a4d8108 5904+ /* todo: au_h_open_post()? */
1facf9fc 5905+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
1facf9fc 5906+ au_set_h_dptr(parent, bdst, h_parent);
c2b27bf2
AM
5907+ AuDebugOn(!pin_orig);
5908+ cpg->pin = pin_orig;
1facf9fc 5909+ }
5910+ iput(h_dir);
5911+ dput(parent);
5912+
5913+ return err;
5914+}
5915+
5916+/* ---------------------------------------------------------------------- */
5917+
5918+/*
5919+ * generic routine for both of copy-up and copy-down.
5920+ */
5921+/* cf. revalidate function in file.c */
5922+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
5923+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5924+ struct au_pin *pin,
1facf9fc 5925+ struct dentry *h_parent, void *arg),
5926+ void *arg)
5927+{
5928+ int err;
5929+ struct au_pin pin;
5527c038 5930+ struct dentry *d, *parent, *h_parent, *real_parent, *h_dentry;
1facf9fc 5931+
5932+ err = 0;
5933+ parent = dget_parent(dentry);
5934+ if (IS_ROOT(parent))
5935+ goto out;
5936+
5937+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
5938+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
5939+
5940+ /* do not use au_dpage */
5941+ real_parent = parent;
5942+ while (1) {
5943+ dput(parent);
5944+ parent = dget_parent(dentry);
5945+ h_parent = au_h_dptr(parent, bdst);
5946+ if (h_parent)
5947+ goto out; /* success */
5948+
5949+ /* find top dir which is necessary to cpup */
5950+ do {
5951+ d = parent;
5952+ dput(parent);
5953+ parent = dget_parent(d);
5954+ di_read_lock_parent3(parent, !AuLock_IR);
5955+ h_parent = au_h_dptr(parent, bdst);
5956+ di_read_unlock(parent, !AuLock_IR);
5957+ } while (!h_parent);
5958+
5959+ if (d != real_parent)
5960+ di_write_lock_child3(d);
5961+
5962+ /* somebody else might create while we were sleeping */
5527c038
JR
5963+ h_dentry = au_h_dptr(d, bdst);
5964+ if (!h_dentry || d_is_negative(h_dentry)) {
5965+ if (h_dentry)
5afbbe0d 5966+ au_update_dbtop(d);
1facf9fc 5967+
5968+ au_pin_set_dentry(&pin, d);
5969+ err = au_do_pin(&pin);
5970+ if (!err) {
86dc4139 5971+ err = cp(d, bdst, &pin, h_parent, arg);
1facf9fc 5972+ au_unpin(&pin);
5973+ }
5974+ }
5975+
5976+ if (d != real_parent)
5977+ di_write_unlock(d);
5978+ if (unlikely(err))
5979+ break;
5980+ }
5981+
4f0767ce 5982+out:
1facf9fc 5983+ dput(parent);
5984+ return err;
5985+}
5986+
5987+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5988+ struct au_pin *pin,
2000de60 5989+ struct dentry *h_parent __maybe_unused,
1facf9fc 5990+ void *arg __maybe_unused)
5991+{
c2b27bf2
AM
5992+ struct au_cp_generic cpg = {
5993+ .dentry = dentry,
5994+ .bdst = bdst,
5995+ .bsrc = -1,
5996+ .len = 0,
5997+ .pin = pin,
5998+ .flags = AuCpup_DTIME
5999+ };
6000+ return au_sio_cpup_simple(&cpg);
1facf9fc 6001+}
6002+
6003+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
6004+{
6005+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
6006+}
6007+
6008+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
6009+{
6010+ int err;
6011+ struct dentry *parent;
6012+ struct inode *dir;
6013+
6014+ parent = dget_parent(dentry);
5527c038 6015+ dir = d_inode(parent);
1facf9fc 6016+ err = 0;
6017+ if (au_h_iptr(dir, bdst))
6018+ goto out;
6019+
6020+ di_read_unlock(parent, AuLock_IR);
6021+ di_write_lock_parent(parent);
6022+ /* someone else might change our inode while we were sleeping */
6023+ if (!au_h_iptr(dir, bdst))
6024+ err = au_cpup_dirs(dentry, bdst);
6025+ di_downgrade_lock(parent, AuLock_IR);
6026+
4f0767ce 6027+out:
1facf9fc 6028+ dput(parent);
6029+ return err;
6030+}
7f207e10
AM
6031diff -urN /usr/share/empty/fs/aufs/cpup.h linux/fs/aufs/cpup.h
6032--- /usr/share/empty/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 6033+++ linux/fs/aufs/cpup.h 2016-10-09 16:55:36.486034798 +0200
523b37e3 6034@@ -0,0 +1,94 @@
1facf9fc 6035+/*
8cdd5066 6036+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6037+ *
6038+ * This program, aufs is free software; you can redistribute it and/or modify
6039+ * it under the terms of the GNU General Public License as published by
6040+ * the Free Software Foundation; either version 2 of the License, or
6041+ * (at your option) any later version.
dece6358
AM
6042+ *
6043+ * This program is distributed in the hope that it will be useful,
6044+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6045+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6046+ * GNU General Public License for more details.
6047+ *
6048+ * You should have received a copy of the GNU General Public License
523b37e3 6049+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6050+ */
6051+
6052+/*
6053+ * copy-up/down functions
6054+ */
6055+
6056+#ifndef __AUFS_CPUP_H__
6057+#define __AUFS_CPUP_H__
6058+
6059+#ifdef __KERNEL__
6060+
dece6358 6061+#include <linux/path.h>
1facf9fc 6062+
dece6358
AM
6063+struct inode;
6064+struct file;
86dc4139 6065+struct au_pin;
dece6358 6066+
86dc4139 6067+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
1facf9fc 6068+void au_cpup_attr_timesizes(struct inode *inode);
6069+void au_cpup_attr_nlink(struct inode *inode, int force);
6070+void au_cpup_attr_changeable(struct inode *inode);
6071+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
6072+void au_cpup_attr_all(struct inode *inode, int force);
6073+
6074+/* ---------------------------------------------------------------------- */
6075+
c2b27bf2
AM
6076+struct au_cp_generic {
6077+ struct dentry *dentry;
6078+ aufs_bindex_t bdst, bsrc;
6079+ loff_t len;
6080+ struct au_pin *pin;
6081+ unsigned int flags;
6082+};
6083+
1facf9fc 6084+/* cpup flags */
392086de
AM
6085+#define AuCpup_DTIME 1 /* do dtime_store/revert */
6086+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
6087+ for link(2) */
6088+#define AuCpup_RENAME (1 << 2) /* rename after cpup */
6089+#define AuCpup_HOPEN (1 << 3) /* call h_open_pre/post() in
6090+ cpup */
6091+#define AuCpup_OVERWRITE (1 << 4) /* allow overwriting the
6092+ existing entry */
6093+#define AuCpup_RWDST (1 << 5) /* force write target even if
6094+ the branch is marked as RO */
c2b27bf2 6095+
1facf9fc 6096+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
7f207e10
AM
6097+#define au_fset_cpup(flags, name) \
6098+ do { (flags) |= AuCpup_##name; } while (0)
6099+#define au_fclr_cpup(flags, name) \
6100+ do { (flags) &= ~AuCpup_##name; } while (0)
1facf9fc 6101+
6102+int au_copy_file(struct file *dst, struct file *src, loff_t len);
c2b27bf2
AM
6103+int au_sio_cpup_simple(struct au_cp_generic *cpg);
6104+int au_sio_cpdown_simple(struct au_cp_generic *cpg);
6105+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
1facf9fc 6106+
6107+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
6108+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 6109+ struct au_pin *pin,
1facf9fc 6110+ struct dentry *h_parent, void *arg),
6111+ void *arg);
6112+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
6113+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
6114+
6115+/* ---------------------------------------------------------------------- */
6116+
6117+/* keep timestamps when copyup */
6118+struct au_dtime {
6119+ struct dentry *dt_dentry;
6120+ struct path dt_h_path;
6121+ struct timespec dt_atime, dt_mtime;
6122+};
6123+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
6124+ struct path *h_path);
6125+void au_dtime_revert(struct au_dtime *dt);
6126+
6127+#endif /* __KERNEL__ */
6128+#endif /* __AUFS_CPUP_H__ */
7f207e10
AM
6129diff -urN /usr/share/empty/fs/aufs/dbgaufs.c linux/fs/aufs/dbgaufs.c
6130--- /usr/share/empty/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
6131+++ linux/fs/aufs/dbgaufs.c 2016-10-09 16:55:38.886097714 +0200
6132@@ -0,0 +1,438 @@
1facf9fc 6133+/*
8cdd5066 6134+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6135+ *
6136+ * This program, aufs is free software; you can redistribute it and/or modify
6137+ * it under the terms of the GNU General Public License as published by
6138+ * the Free Software Foundation; either version 2 of the License, or
6139+ * (at your option) any later version.
dece6358
AM
6140+ *
6141+ * This program is distributed in the hope that it will be useful,
6142+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6143+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6144+ * GNU General Public License for more details.
6145+ *
6146+ * You should have received a copy of the GNU General Public License
523b37e3 6147+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6148+ */
6149+
6150+/*
6151+ * debugfs interface
6152+ */
6153+
6154+#include <linux/debugfs.h>
6155+#include "aufs.h"
6156+
6157+#ifndef CONFIG_SYSFS
6158+#error DEBUG_FS depends upon SYSFS
6159+#endif
6160+
6161+static struct dentry *dbgaufs;
6162+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
6163+
6164+/* 20 is max digits length of ulong 64 */
6165+struct dbgaufs_arg {
6166+ int n;
6167+ char a[20 * 4];
6168+};
6169+
6170+/*
6171+ * common function for all XINO files
6172+ */
6173+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
6174+ struct file *file)
6175+{
f0c0a007 6176+ au_delayed_kfree(file->private_data);
1facf9fc 6177+ return 0;
6178+}
6179+
6180+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
6181+{
6182+ int err;
6183+ struct kstat st;
6184+ struct dbgaufs_arg *p;
6185+
6186+ err = -ENOMEM;
6187+ p = kmalloc(sizeof(*p), GFP_NOFS);
6188+ if (unlikely(!p))
6189+ goto out;
6190+
6191+ err = 0;
6192+ p->n = 0;
6193+ file->private_data = p;
6194+ if (!xf)
6195+ goto out;
6196+
c06a8ce3 6197+ err = vfs_getattr(&xf->f_path, &st);
1facf9fc 6198+ if (!err) {
6199+ if (do_fcnt)
6200+ p->n = snprintf
6201+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
6202+ (long)file_count(xf), st.blocks, st.blksize,
6203+ (long long)st.size);
6204+ else
6205+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
6206+ st.blocks, st.blksize,
6207+ (long long)st.size);
6208+ AuDebugOn(p->n >= sizeof(p->a));
6209+ } else {
6210+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
6211+ err = 0;
6212+ }
6213+
4f0767ce 6214+out:
1facf9fc 6215+ return err;
6216+
6217+}
6218+
6219+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
6220+ size_t count, loff_t *ppos)
6221+{
6222+ struct dbgaufs_arg *p;
6223+
6224+ p = file->private_data;
6225+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6226+}
6227+
6228+/* ---------------------------------------------------------------------- */
6229+
86dc4139
AM
6230+struct dbgaufs_plink_arg {
6231+ int n;
6232+ char a[];
6233+};
6234+
6235+static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
6236+ struct file *file)
6237+{
f0c0a007 6238+ au_delayed_free_page((unsigned long)file->private_data);
86dc4139
AM
6239+ return 0;
6240+}
6241+
6242+static int dbgaufs_plink_open(struct inode *inode, struct file *file)
6243+{
6244+ int err, i, limit;
6245+ unsigned long n, sum;
6246+ struct dbgaufs_plink_arg *p;
6247+ struct au_sbinfo *sbinfo;
6248+ struct super_block *sb;
6249+ struct au_sphlhead *sphl;
6250+
6251+ err = -ENOMEM;
6252+ p = (void *)get_zeroed_page(GFP_NOFS);
6253+ if (unlikely(!p))
6254+ goto out;
6255+
6256+ err = -EFBIG;
6257+ sbinfo = inode->i_private;
6258+ sb = sbinfo->si_sb;
6259+ si_noflush_read_lock(sb);
6260+ if (au_opt_test(au_mntflags(sb), PLINK)) {
6261+ limit = PAGE_SIZE - sizeof(p->n);
6262+
6263+ /* the number of buckets */
6264+ n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
6265+ p->n += n;
6266+ limit -= n;
6267+
6268+ sum = 0;
6269+ for (i = 0, sphl = sbinfo->si_plink;
6270+ i < AuPlink_NHASH;
6271+ i++, sphl++) {
6272+ n = au_sphl_count(sphl);
6273+ sum += n;
6274+
6275+ n = snprintf(p->a + p->n, limit, "%lu ", n);
6276+ p->n += n;
6277+ limit -= n;
6278+ if (unlikely(limit <= 0))
6279+ goto out_free;
6280+ }
6281+ p->a[p->n - 1] = '\n';
6282+
6283+ /* the sum of plinks */
6284+ n = snprintf(p->a + p->n, limit, "%lu\n", sum);
6285+ p->n += n;
6286+ limit -= n;
6287+ if (unlikely(limit <= 0))
6288+ goto out_free;
6289+ } else {
6290+#define str "1\n0\n0\n"
6291+ p->n = sizeof(str) - 1;
6292+ strcpy(p->a, str);
6293+#undef str
6294+ }
6295+ si_read_unlock(sb);
6296+
6297+ err = 0;
6298+ file->private_data = p;
6299+ goto out; /* success */
6300+
6301+out_free:
f0c0a007 6302+ au_delayed_free_page((unsigned long)p);
86dc4139
AM
6303+out:
6304+ return err;
6305+}
6306+
6307+static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
6308+ size_t count, loff_t *ppos)
6309+{
6310+ struct dbgaufs_plink_arg *p;
6311+
6312+ p = file->private_data;
6313+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6314+}
6315+
6316+static const struct file_operations dbgaufs_plink_fop = {
6317+ .owner = THIS_MODULE,
6318+ .open = dbgaufs_plink_open,
6319+ .release = dbgaufs_plink_release,
6320+ .read = dbgaufs_plink_read
6321+};
6322+
6323+/* ---------------------------------------------------------------------- */
6324+
1facf9fc 6325+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
6326+{
6327+ int err;
6328+ struct au_sbinfo *sbinfo;
6329+ struct super_block *sb;
6330+
6331+ sbinfo = inode->i_private;
6332+ sb = sbinfo->si_sb;
6333+ si_noflush_read_lock(sb);
6334+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
6335+ si_read_unlock(sb);
6336+ return err;
6337+}
6338+
6339+static const struct file_operations dbgaufs_xib_fop = {
4a4d8108 6340+ .owner = THIS_MODULE,
1facf9fc 6341+ .open = dbgaufs_xib_open,
6342+ .release = dbgaufs_xi_release,
6343+ .read = dbgaufs_xi_read
6344+};
6345+
6346+/* ---------------------------------------------------------------------- */
6347+
6348+#define DbgaufsXi_PREFIX "xi"
6349+
6350+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
6351+{
6352+ int err;
6353+ long l;
6354+ struct au_sbinfo *sbinfo;
6355+ struct super_block *sb;
6356+ struct file *xf;
6357+ struct qstr *name;
6358+
6359+ err = -ENOENT;
6360+ xf = NULL;
2000de60 6361+ name = &file->f_path.dentry->d_name;
1facf9fc 6362+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
6363+ || memcmp(name->name, DbgaufsXi_PREFIX,
6364+ sizeof(DbgaufsXi_PREFIX) - 1)))
6365+ goto out;
9dbd164d 6366+ err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
1facf9fc 6367+ if (unlikely(err))
6368+ goto out;
6369+
6370+ sbinfo = inode->i_private;
6371+ sb = sbinfo->si_sb;
6372+ si_noflush_read_lock(sb);
5afbbe0d 6373+ if (l <= au_sbbot(sb)) {
1facf9fc 6374+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
6375+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
6376+ } else
6377+ err = -ENOENT;
6378+ si_read_unlock(sb);
6379+
4f0767ce 6380+out:
1facf9fc 6381+ return err;
6382+}
6383+
6384+static const struct file_operations dbgaufs_xino_fop = {
4a4d8108 6385+ .owner = THIS_MODULE,
1facf9fc 6386+ .open = dbgaufs_xino_open,
6387+ .release = dbgaufs_xi_release,
6388+ .read = dbgaufs_xi_read
6389+};
6390+
6391+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
6392+{
5afbbe0d 6393+ aufs_bindex_t bbot;
1facf9fc 6394+ struct au_branch *br;
6395+ struct au_xino_file *xi;
6396+
6397+ if (!au_sbi(sb)->si_dbgaufs)
6398+ return;
6399+
5afbbe0d
AM
6400+ bbot = au_sbbot(sb);
6401+ for (; bindex <= bbot; bindex++) {
1facf9fc 6402+ br = au_sbr(sb, bindex);
6403+ xi = &br->br_xino;
e2f27e51
AM
6404+ /* debugfs acquires the parent i_mutex */
6405+ lockdep_off();
c06a8ce3 6406+ debugfs_remove(xi->xi_dbgaufs);
e2f27e51 6407+ lockdep_on();
c06a8ce3 6408+ xi->xi_dbgaufs = NULL;
1facf9fc 6409+ }
6410+}
6411+
6412+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
6413+{
6414+ struct au_sbinfo *sbinfo;
6415+ struct dentry *parent;
6416+ struct au_branch *br;
6417+ struct au_xino_file *xi;
5afbbe0d 6418+ aufs_bindex_t bbot;
1facf9fc 6419+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
6420+
6421+ sbinfo = au_sbi(sb);
6422+ parent = sbinfo->si_dbgaufs;
6423+ if (!parent)
6424+ return;
6425+
5afbbe0d
AM
6426+ bbot = au_sbbot(sb);
6427+ for (; bindex <= bbot; bindex++) {
1facf9fc 6428+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
6429+ br = au_sbr(sb, bindex);
6430+ xi = &br->br_xino;
6431+ AuDebugOn(xi->xi_dbgaufs);
f0c0a007
AM
6432+ /* debugfs acquires the parent i_mutex */
6433+ lockdep_off();
1facf9fc 6434+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
6435+ sbinfo, &dbgaufs_xino_fop);
f0c0a007 6436+ lockdep_on();
1facf9fc 6437+ /* ignore an error */
6438+ if (unlikely(!xi->xi_dbgaufs))
6439+ AuWarn1("failed %s under debugfs\n", name);
6440+ }
6441+}
6442+
6443+/* ---------------------------------------------------------------------- */
6444+
6445+#ifdef CONFIG_AUFS_EXPORT
6446+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
6447+{
6448+ int err;
6449+ struct au_sbinfo *sbinfo;
6450+ struct super_block *sb;
6451+
6452+ sbinfo = inode->i_private;
6453+ sb = sbinfo->si_sb;
6454+ si_noflush_read_lock(sb);
6455+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
6456+ si_read_unlock(sb);
6457+ return err;
6458+}
6459+
6460+static const struct file_operations dbgaufs_xigen_fop = {
4a4d8108 6461+ .owner = THIS_MODULE,
1facf9fc 6462+ .open = dbgaufs_xigen_open,
6463+ .release = dbgaufs_xi_release,
6464+ .read = dbgaufs_xi_read
6465+};
6466+
6467+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6468+{
6469+ int err;
6470+
dece6358 6471+ /*
c1595e42 6472+ * This function is a dynamic '__init' function actually,
dece6358
AM
6473+ * so the tiny check for si_rwsem is unnecessary.
6474+ */
6475+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6476+
1facf9fc 6477+ err = -EIO;
6478+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
6479+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6480+ &dbgaufs_xigen_fop);
6481+ if (sbinfo->si_dbgaufs_xigen)
6482+ err = 0;
6483+
6484+ return err;
6485+}
6486+#else
6487+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6488+{
6489+ return 0;
6490+}
6491+#endif /* CONFIG_AUFS_EXPORT */
6492+
6493+/* ---------------------------------------------------------------------- */
6494+
6495+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
6496+{
dece6358 6497+ /*
7e9cd9fe 6498+ * This function is a dynamic '__fin' function actually,
dece6358
AM
6499+ * so the tiny check for si_rwsem is unnecessary.
6500+ */
6501+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6502+
1facf9fc 6503+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
6504+ sbinfo->si_dbgaufs = NULL;
6505+ kobject_put(&sbinfo->si_kobj);
6506+}
6507+
6508+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
6509+{
6510+ int err;
6511+ char name[SysaufsSiNameLen];
6512+
dece6358 6513+ /*
c1595e42 6514+ * This function is a dynamic '__init' function actually,
dece6358
AM
6515+ * so the tiny check for si_rwsem is unnecessary.
6516+ */
6517+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6518+
1facf9fc 6519+ err = -ENOENT;
6520+ if (!dbgaufs) {
6521+ AuErr1("/debug/aufs is uninitialized\n");
6522+ goto out;
6523+ }
6524+
6525+ err = -EIO;
6526+ sysaufs_name(sbinfo, name);
6527+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
6528+ if (unlikely(!sbinfo->si_dbgaufs))
6529+ goto out;
6530+ kobject_get(&sbinfo->si_kobj);
6531+
6532+ sbinfo->si_dbgaufs_xib = debugfs_create_file
6533+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6534+ &dbgaufs_xib_fop);
6535+ if (unlikely(!sbinfo->si_dbgaufs_xib))
6536+ goto out_dir;
6537+
86dc4139
AM
6538+ sbinfo->si_dbgaufs_plink = debugfs_create_file
6539+ ("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6540+ &dbgaufs_plink_fop);
6541+ if (unlikely(!sbinfo->si_dbgaufs_plink))
6542+ goto out_dir;
6543+
1facf9fc 6544+ err = dbgaufs_xigen_init(sbinfo);
6545+ if (!err)
6546+ goto out; /* success */
6547+
4f0767ce 6548+out_dir:
1facf9fc 6549+ dbgaufs_si_fin(sbinfo);
4f0767ce 6550+out:
1facf9fc 6551+ return err;
6552+}
6553+
6554+/* ---------------------------------------------------------------------- */
6555+
6556+void dbgaufs_fin(void)
6557+{
6558+ debugfs_remove(dbgaufs);
6559+}
6560+
6561+int __init dbgaufs_init(void)
6562+{
6563+ int err;
6564+
6565+ err = -EIO;
6566+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
6567+ if (dbgaufs)
6568+ err = 0;
6569+ return err;
6570+}
7f207e10
AM
6571diff -urN /usr/share/empty/fs/aufs/dbgaufs.h linux/fs/aufs/dbgaufs.h
6572--- /usr/share/empty/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 6573+++ linux/fs/aufs/dbgaufs.h 2016-10-09 16:55:36.486034798 +0200
523b37e3 6574@@ -0,0 +1,48 @@
1facf9fc 6575+/*
8cdd5066 6576+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6577+ *
6578+ * This program, aufs is free software; you can redistribute it and/or modify
6579+ * it under the terms of the GNU General Public License as published by
6580+ * the Free Software Foundation; either version 2 of the License, or
6581+ * (at your option) any later version.
dece6358
AM
6582+ *
6583+ * This program is distributed in the hope that it will be useful,
6584+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6585+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6586+ * GNU General Public License for more details.
6587+ *
6588+ * You should have received a copy of the GNU General Public License
523b37e3 6589+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6590+ */
6591+
6592+/*
6593+ * debugfs interface
6594+ */
6595+
6596+#ifndef __DBGAUFS_H__
6597+#define __DBGAUFS_H__
6598+
6599+#ifdef __KERNEL__
6600+
dece6358 6601+struct super_block;
1facf9fc 6602+struct au_sbinfo;
dece6358 6603+
1facf9fc 6604+#ifdef CONFIG_DEBUG_FS
6605+/* dbgaufs.c */
6606+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
6607+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
6608+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
6609+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
6610+void dbgaufs_fin(void);
6611+int __init dbgaufs_init(void);
1facf9fc 6612+#else
4a4d8108
AM
6613+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
6614+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
6615+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
6616+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
6617+AuStubVoid(dbgaufs_fin, void)
6618+AuStubInt0(__init dbgaufs_init, void)
1facf9fc 6619+#endif /* CONFIG_DEBUG_FS */
6620+
6621+#endif /* __KERNEL__ */
6622+#endif /* __DBGAUFS_H__ */
7f207e10
AM
6623diff -urN /usr/share/empty/fs/aufs/dcsub.c linux/fs/aufs/dcsub.c
6624--- /usr/share/empty/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
6625+++ linux/fs/aufs/dcsub.c 2016-10-09 16:55:38.886097714 +0200
6626@@ -0,0 +1,225 @@
1facf9fc 6627+/*
8cdd5066 6628+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6629+ *
6630+ * This program, aufs is free software; you can redistribute it and/or modify
6631+ * it under the terms of the GNU General Public License as published by
6632+ * the Free Software Foundation; either version 2 of the License, or
6633+ * (at your option) any later version.
dece6358
AM
6634+ *
6635+ * This program is distributed in the hope that it will be useful,
6636+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6637+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6638+ * GNU General Public License for more details.
6639+ *
6640+ * You should have received a copy of the GNU General Public License
523b37e3 6641+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6642+ */
6643+
6644+/*
6645+ * sub-routines for dentry cache
6646+ */
6647+
6648+#include "aufs.h"
6649+
6650+static void au_dpage_free(struct au_dpage *dpage)
6651+{
6652+ int i;
6653+ struct dentry **p;
6654+
6655+ p = dpage->dentries;
6656+ for (i = 0; i < dpage->ndentry; i++)
6657+ dput(*p++);
f0c0a007 6658+ au_delayed_free_page((unsigned long)dpage->dentries);
1facf9fc 6659+}
6660+
6661+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
6662+{
6663+ int err;
6664+ void *p;
6665+
6666+ err = -ENOMEM;
6667+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
6668+ if (unlikely(!dpages->dpages))
6669+ goto out;
6670+
6671+ p = (void *)__get_free_page(gfp);
6672+ if (unlikely(!p))
6673+ goto out_dpages;
6674+
6675+ dpages->dpages[0].ndentry = 0;
6676+ dpages->dpages[0].dentries = p;
6677+ dpages->ndpage = 1;
6678+ return 0; /* success */
6679+
4f0767ce 6680+out_dpages:
f0c0a007 6681+ au_delayed_kfree(dpages->dpages);
4f0767ce 6682+out:
1facf9fc 6683+ return err;
6684+}
6685+
6686+void au_dpages_free(struct au_dcsub_pages *dpages)
6687+{
6688+ int i;
6689+ struct au_dpage *p;
6690+
6691+ p = dpages->dpages;
6692+ for (i = 0; i < dpages->ndpage; i++)
6693+ au_dpage_free(p++);
f0c0a007 6694+ au_delayed_kfree(dpages->dpages);
1facf9fc 6695+}
6696+
6697+static int au_dpages_append(struct au_dcsub_pages *dpages,
6698+ struct dentry *dentry, gfp_t gfp)
6699+{
6700+ int err, sz;
6701+ struct au_dpage *dpage;
6702+ void *p;
6703+
6704+ dpage = dpages->dpages + dpages->ndpage - 1;
6705+ sz = PAGE_SIZE / sizeof(dentry);
6706+ if (unlikely(dpage->ndentry >= sz)) {
6707+ AuLabel(new dpage);
6708+ err = -ENOMEM;
6709+ sz = dpages->ndpage * sizeof(*dpages->dpages);
6710+ p = au_kzrealloc(dpages->dpages, sz,
e2f27e51
AM
6711+ sz + sizeof(*dpages->dpages), gfp,
6712+ /*may_shrink*/0);
1facf9fc 6713+ if (unlikely(!p))
6714+ goto out;
6715+
6716+ dpages->dpages = p;
6717+ dpage = dpages->dpages + dpages->ndpage;
6718+ p = (void *)__get_free_page(gfp);
6719+ if (unlikely(!p))
6720+ goto out;
6721+
6722+ dpage->ndentry = 0;
6723+ dpage->dentries = p;
6724+ dpages->ndpage++;
6725+ }
6726+
c1595e42 6727+ AuDebugOn(au_dcount(dentry) <= 0);
027c5e7a 6728+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
1facf9fc 6729+ return 0; /* success */
6730+
4f0767ce 6731+out:
1facf9fc 6732+ return err;
6733+}
6734+
c1595e42
JR
6735+/* todo: BAD approach */
6736+/* copied from linux/fs/dcache.c */
6737+enum d_walk_ret {
6738+ D_WALK_CONTINUE,
6739+ D_WALK_QUIT,
6740+ D_WALK_NORETRY,
6741+ D_WALK_SKIP,
6742+};
6743+
6744+extern void d_walk(struct dentry *parent, void *data,
6745+ enum d_walk_ret (*enter)(void *, struct dentry *),
6746+ void (*finish)(void *));
6747+
6748+struct ac_dpages_arg {
1facf9fc 6749+ int err;
c1595e42
JR
6750+ struct au_dcsub_pages *dpages;
6751+ struct super_block *sb;
6752+ au_dpages_test test;
6753+ void *arg;
6754+};
1facf9fc 6755+
c1595e42
JR
6756+static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
6757+{
6758+ enum d_walk_ret ret;
6759+ struct ac_dpages_arg *arg = _arg;
1facf9fc 6760+
c1595e42
JR
6761+ ret = D_WALK_CONTINUE;
6762+ if (dentry->d_sb == arg->sb
6763+ && !IS_ROOT(dentry)
6764+ && au_dcount(dentry) > 0
6765+ && au_di(dentry)
6766+ && (!arg->test || arg->test(dentry, arg->arg))) {
6767+ arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
6768+ if (unlikely(arg->err))
6769+ ret = D_WALK_QUIT;
1facf9fc 6770+ }
6771+
c1595e42
JR
6772+ return ret;
6773+}
027c5e7a 6774+
c1595e42
JR
6775+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6776+ au_dpages_test test, void *arg)
6777+{
6778+ struct ac_dpages_arg args = {
6779+ .err = 0,
6780+ .dpages = dpages,
6781+ .sb = root->d_sb,
6782+ .test = test,
6783+ .arg = arg
6784+ };
027c5e7a 6785+
c1595e42
JR
6786+ d_walk(root, &args, au_call_dpages_append, NULL);
6787+
6788+ return args.err;
1facf9fc 6789+}
6790+
6791+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6792+ int do_include, au_dpages_test test, void *arg)
6793+{
6794+ int err;
6795+
6796+ err = 0;
027c5e7a
AM
6797+ write_seqlock(&rename_lock);
6798+ spin_lock(&dentry->d_lock);
6799+ if (do_include
c1595e42 6800+ && au_dcount(dentry) > 0
027c5e7a 6801+ && (!test || test(dentry, arg)))
1facf9fc 6802+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6803+ spin_unlock(&dentry->d_lock);
6804+ if (unlikely(err))
6805+ goto out;
6806+
6807+ /*
523b37e3 6808+ * RCU for vfsmount is unnecessary since this is a traverse in a single
027c5e7a
AM
6809+ * mount
6810+ */
1facf9fc 6811+ while (!IS_ROOT(dentry)) {
027c5e7a
AM
6812+ dentry = dentry->d_parent; /* rename_lock is locked */
6813+ spin_lock(&dentry->d_lock);
c1595e42 6814+ if (au_dcount(dentry) > 0
027c5e7a 6815+ && (!test || test(dentry, arg)))
1facf9fc 6816+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6817+ spin_unlock(&dentry->d_lock);
6818+ if (unlikely(err))
6819+ break;
1facf9fc 6820+ }
6821+
4f0767ce 6822+out:
027c5e7a 6823+ write_sequnlock(&rename_lock);
1facf9fc 6824+ return err;
6825+}
6826+
027c5e7a
AM
6827+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
6828+{
6829+ return au_di(dentry) && dentry->d_sb == arg;
6830+}
6831+
6832+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6833+ struct dentry *dentry, int do_include)
6834+{
6835+ return au_dcsub_pages_rev(dpages, dentry, do_include,
6836+ au_dcsub_dpages_aufs, dentry->d_sb);
6837+}
6838+
4a4d8108 6839+int au_test_subdir(struct dentry *d1, struct dentry *d2)
1facf9fc 6840+{
4a4d8108
AM
6841+ struct path path[2] = {
6842+ {
6843+ .dentry = d1
6844+ },
6845+ {
6846+ .dentry = d2
6847+ }
6848+ };
1facf9fc 6849+
4a4d8108 6850+ return path_is_under(path + 0, path + 1);
1facf9fc 6851+}
7f207e10
AM
6852diff -urN /usr/share/empty/fs/aufs/dcsub.h linux/fs/aufs/dcsub.h
6853--- /usr/share/empty/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 6854+++ linux/fs/aufs/dcsub.h 2016-10-09 16:55:36.486034798 +0200
5527c038 6855@@ -0,0 +1,136 @@
1facf9fc 6856+/*
8cdd5066 6857+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6858+ *
6859+ * This program, aufs is free software; you can redistribute it and/or modify
6860+ * it under the terms of the GNU General Public License as published by
6861+ * the Free Software Foundation; either version 2 of the License, or
6862+ * (at your option) any later version.
dece6358
AM
6863+ *
6864+ * This program is distributed in the hope that it will be useful,
6865+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6866+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6867+ * GNU General Public License for more details.
6868+ *
6869+ * You should have received a copy of the GNU General Public License
523b37e3 6870+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6871+ */
6872+
6873+/*
6874+ * sub-routines for dentry cache
6875+ */
6876+
6877+#ifndef __AUFS_DCSUB_H__
6878+#define __AUFS_DCSUB_H__
6879+
6880+#ifdef __KERNEL__
6881+
7f207e10 6882+#include <linux/dcache.h>
027c5e7a 6883+#include <linux/fs.h>
dece6358 6884+
1facf9fc 6885+struct au_dpage {
6886+ int ndentry;
6887+ struct dentry **dentries;
6888+};
6889+
6890+struct au_dcsub_pages {
6891+ int ndpage;
6892+ struct au_dpage *dpages;
6893+};
6894+
6895+/* ---------------------------------------------------------------------- */
6896+
7f207e10 6897+/* dcsub.c */
1facf9fc 6898+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
6899+void au_dpages_free(struct au_dcsub_pages *dpages);
6900+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
6901+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6902+ au_dpages_test test, void *arg);
6903+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6904+ int do_include, au_dpages_test test, void *arg);
027c5e7a
AM
6905+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6906+ struct dentry *dentry, int do_include);
4a4d8108 6907+int au_test_subdir(struct dentry *d1, struct dentry *d2);
1facf9fc 6908+
7f207e10
AM
6909+/* ---------------------------------------------------------------------- */
6910+
523b37e3
AM
6911+/*
6912+ * todo: in linux-3.13, several similar (but faster) helpers are added to
6913+ * include/linux/dcache.h. Try them (in the future).
6914+ */
6915+
027c5e7a
AM
6916+static inline int au_d_hashed_positive(struct dentry *d)
6917+{
6918+ int err;
5527c038 6919+ struct inode *inode = d_inode(d);
076b876e 6920+
027c5e7a 6921+ err = 0;
5527c038
JR
6922+ if (unlikely(d_unhashed(d)
6923+ || d_is_negative(d)
6924+ || !inode->i_nlink))
027c5e7a
AM
6925+ err = -ENOENT;
6926+ return err;
6927+}
6928+
38d290e6
JR
6929+static inline int au_d_linkable(struct dentry *d)
6930+{
6931+ int err;
5527c038 6932+ struct inode *inode = d_inode(d);
076b876e 6933+
38d290e6
JR
6934+ err = au_d_hashed_positive(d);
6935+ if (err
5527c038 6936+ && d_is_positive(d)
38d290e6
JR
6937+ && (inode->i_state & I_LINKABLE))
6938+ err = 0;
6939+ return err;
6940+}
6941+
027c5e7a
AM
6942+static inline int au_d_alive(struct dentry *d)
6943+{
6944+ int err;
6945+ struct inode *inode;
076b876e 6946+
027c5e7a
AM
6947+ err = 0;
6948+ if (!IS_ROOT(d))
6949+ err = au_d_hashed_positive(d);
6950+ else {
5527c038
JR
6951+ inode = d_inode(d);
6952+ if (unlikely(d_unlinked(d)
6953+ || d_is_negative(d)
6954+ || !inode->i_nlink))
027c5e7a
AM
6955+ err = -ENOENT;
6956+ }
6957+ return err;
6958+}
6959+
6960+static inline int au_alive_dir(struct dentry *d)
7f207e10 6961+{
027c5e7a 6962+ int err;
076b876e 6963+
027c5e7a 6964+ err = au_d_alive(d);
5527c038 6965+ if (unlikely(err || IS_DEADDIR(d_inode(d))))
027c5e7a
AM
6966+ err = -ENOENT;
6967+ return err;
7f207e10
AM
6968+}
6969+
38d290e6
JR
6970+static inline int au_qstreq(struct qstr *a, struct qstr *b)
6971+{
6972+ return a->len == b->len
6973+ && !memcmp(a->name, b->name, a->len);
6974+}
6975+
7e9cd9fe
AM
6976+/*
6977+ * by the commit
6978+ * 360f547 2015-01-25 dcache: let the dentry count go down to zero without
6979+ * taking d_lock
6980+ * the type of d_lockref.count became int, but the inlined function d_count()
6981+ * still returns unsigned int.
6982+ * I don't know why. Maybe it is for every d_count() users?
6983+ * Anyway au_dcount() lives on.
6984+ */
c1595e42
JR
6985+static inline int au_dcount(struct dentry *d)
6986+{
6987+ return (int)d_count(d);
6988+}
6989+
1facf9fc 6990+#endif /* __KERNEL__ */
6991+#endif /* __AUFS_DCSUB_H__ */
7f207e10
AM
6992diff -urN /usr/share/empty/fs/aufs/debug.c linux/fs/aufs/debug.c
6993--- /usr/share/empty/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 6994+++ linux/fs/aufs/debug.c 2016-10-09 16:55:36.486034798 +0200
f0c0a007 6995@@ -0,0 +1,440 @@
1facf9fc 6996+/*
8cdd5066 6997+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6998+ *
6999+ * This program, aufs is free software; you can redistribute it and/or modify
7000+ * it under the terms of the GNU General Public License as published by
7001+ * the Free Software Foundation; either version 2 of the License, or
7002+ * (at your option) any later version.
dece6358
AM
7003+ *
7004+ * This program is distributed in the hope that it will be useful,
7005+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7006+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7007+ * GNU General Public License for more details.
7008+ *
7009+ * You should have received a copy of the GNU General Public License
523b37e3 7010+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7011+ */
7012+
7013+/*
7014+ * debug print functions
7015+ */
7016+
7017+#include "aufs.h"
7018+
392086de
AM
7019+/* Returns 0, or -errno. arg is in kp->arg. */
7020+static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
7021+{
7022+ int err, n;
7023+
7024+ err = kstrtoint(val, 0, &n);
7025+ if (!err) {
7026+ if (n > 0)
7027+ au_debug_on();
7028+ else
7029+ au_debug_off();
7030+ }
7031+ return err;
7032+}
7033+
7034+/* Returns length written or -errno. Buffer is 4k (ie. be short!) */
7035+static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
7036+{
7037+ atomic_t *a;
7038+
7039+ a = kp->arg;
7040+ return sprintf(buffer, "%d", atomic_read(a));
7041+}
7042+
7043+static struct kernel_param_ops param_ops_atomic_t = {
7044+ .set = param_atomic_t_set,
7045+ .get = param_atomic_t_get
7046+ /* void (*free)(void *arg) */
7047+};
7048+
7049+atomic_t aufs_debug = ATOMIC_INIT(0);
1facf9fc 7050+MODULE_PARM_DESC(debug, "debug print");
392086de 7051+module_param_named(debug, aufs_debug, atomic_t, S_IRUGO | S_IWUSR | S_IWGRP);
1facf9fc 7052+
c1595e42 7053+DEFINE_MUTEX(au_dbg_mtx); /* just to serialize the dbg msgs */
1facf9fc 7054+char *au_plevel = KERN_DEBUG;
e49829fe
JR
7055+#define dpri(fmt, ...) do { \
7056+ if ((au_plevel \
7057+ && strcmp(au_plevel, KERN_DEBUG)) \
7058+ || au_debug_test()) \
7059+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
1facf9fc 7060+} while (0)
7061+
7062+/* ---------------------------------------------------------------------- */
7063+
7064+void au_dpri_whlist(struct au_nhash *whlist)
7065+{
7066+ unsigned long ul, n;
7067+ struct hlist_head *head;
c06a8ce3 7068+ struct au_vdir_wh *pos;
1facf9fc 7069+
7070+ n = whlist->nh_num;
7071+ head = whlist->nh_head;
7072+ for (ul = 0; ul < n; ul++) {
c06a8ce3 7073+ hlist_for_each_entry(pos, head, wh_hash)
1facf9fc 7074+ dpri("b%d, %.*s, %d\n",
c06a8ce3
AM
7075+ pos->wh_bindex,
7076+ pos->wh_str.len, pos->wh_str.name,
7077+ pos->wh_str.len);
1facf9fc 7078+ head++;
7079+ }
7080+}
7081+
7082+void au_dpri_vdir(struct au_vdir *vdir)
7083+{
7084+ unsigned long ul;
7085+ union au_vdir_deblk_p p;
7086+ unsigned char *o;
7087+
7088+ if (!vdir || IS_ERR(vdir)) {
7089+ dpri("err %ld\n", PTR_ERR(vdir));
7090+ return;
7091+ }
7092+
7093+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
7094+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
7095+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
7096+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
7097+ p.deblk = vdir->vd_deblk[ul];
7098+ o = p.deblk;
7099+ dpri("[%lu]: %p\n", ul, o);
7100+ }
7101+}
7102+
53392da6 7103+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
1facf9fc 7104+ struct dentry *wh)
7105+{
7106+ char *n = NULL;
7107+ int l = 0;
7108+
7109+ if (!inode || IS_ERR(inode)) {
7110+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
7111+ return -1;
7112+ }
7113+
c2b27bf2 7114+ /* the type of i_blocks depends upon CONFIG_LBDAF */
1facf9fc 7115+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
7116+ && sizeof(inode->i_blocks) != sizeof(u64));
7117+ if (wh) {
7118+ n = (void *)wh->d_name.name;
7119+ l = wh->d_name.len;
7120+ }
7121+
53392da6
AM
7122+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
7123+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
7124+ bindex, inode,
1facf9fc 7125+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
7126+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
7127+ i_size_read(inode), (unsigned long long)inode->i_blocks,
53392da6 7128+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
1facf9fc 7129+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
b752ccd1
AM
7130+ inode->i_state, inode->i_flags, inode->i_version,
7131+ inode->i_generation,
1facf9fc 7132+ l ? ", wh " : "", l, n);
7133+ return 0;
7134+}
7135+
7136+void au_dpri_inode(struct inode *inode)
7137+{
7138+ struct au_iinfo *iinfo;
5afbbe0d 7139+ struct au_hinode *hi;
1facf9fc 7140+ aufs_bindex_t bindex;
53392da6 7141+ int err, hn;
1facf9fc 7142+
53392da6 7143+ err = do_pri_inode(-1, inode, -1, NULL);
5afbbe0d 7144+ if (err || !au_test_aufs(inode->i_sb) || au_is_bad_inode(inode))
1facf9fc 7145+ return;
7146+
7147+ iinfo = au_ii(inode);
5afbbe0d
AM
7148+ dpri("i-1: btop %d, bbot %d, gen %d\n",
7149+ iinfo->ii_btop, iinfo->ii_bbot, au_iigen(inode, NULL));
7150+ if (iinfo->ii_btop < 0)
1facf9fc 7151+ return;
53392da6 7152+ hn = 0;
5afbbe0d
AM
7153+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; bindex++) {
7154+ hi = au_hinode(iinfo, bindex);
7155+ hn = !!au_hn(hi);
7156+ do_pri_inode(bindex, hi->hi_inode, hn, hi->hi_whdentry);
53392da6 7157+ }
1facf9fc 7158+}
7159+
2cbb1c4b
JR
7160+void au_dpri_dalias(struct inode *inode)
7161+{
7162+ struct dentry *d;
7163+
7164+ spin_lock(&inode->i_lock);
c1595e42 7165+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
2cbb1c4b
JR
7166+ au_dpri_dentry(d);
7167+ spin_unlock(&inode->i_lock);
7168+}
7169+
1facf9fc 7170+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
7171+{
7172+ struct dentry *wh = NULL;
53392da6 7173+ int hn;
5afbbe0d 7174+ struct inode *inode;
076b876e 7175+ struct au_iinfo *iinfo;
5afbbe0d 7176+ struct au_hinode *hi;
1facf9fc 7177+
7178+ if (!dentry || IS_ERR(dentry)) {
7179+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
7180+ return -1;
7181+ }
7182+ /* do not call dget_parent() here */
027c5e7a 7183+ /* note: access d_xxx without d_lock */
523b37e3
AM
7184+ dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
7185+ bindex, dentry, dentry,
1facf9fc 7186+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
c1595e42 7187+ au_dcount(dentry), dentry->d_flags,
523b37e3 7188+ d_unhashed(dentry) ? "un" : "");
53392da6 7189+ hn = -1;
5afbbe0d
AM
7190+ inode = NULL;
7191+ if (d_is_positive(dentry))
7192+ inode = d_inode(dentry);
7193+ if (inode
7194+ && au_test_aufs(dentry->d_sb)
7195+ && bindex >= 0
7196+ && !au_is_bad_inode(inode)) {
7197+ iinfo = au_ii(inode);
7198+ hi = au_hinode(iinfo, bindex);
7199+ hn = !!au_hn(hi);
7200+ wh = hi->hi_whdentry;
7201+ }
7202+ do_pri_inode(bindex, inode, hn, wh);
1facf9fc 7203+ return 0;
7204+}
7205+
7206+void au_dpri_dentry(struct dentry *dentry)
7207+{
7208+ struct au_dinfo *dinfo;
7209+ aufs_bindex_t bindex;
7210+ int err;
7211+
7212+ err = do_pri_dentry(-1, dentry);
7213+ if (err || !au_test_aufs(dentry->d_sb))
7214+ return;
7215+
7216+ dinfo = au_di(dentry);
7217+ if (!dinfo)
7218+ return;
5afbbe0d
AM
7219+ dpri("d-1: btop %d, bbot %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
7220+ dinfo->di_btop, dinfo->di_bbot,
38d290e6
JR
7221+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
7222+ dinfo->di_tmpfile);
5afbbe0d 7223+ if (dinfo->di_btop < 0)
1facf9fc 7224+ return;
5afbbe0d
AM
7225+ for (bindex = dinfo->di_btop; bindex <= dinfo->di_bbot; bindex++)
7226+ do_pri_dentry(bindex, au_hdentry(dinfo, bindex)->hd_dentry);
1facf9fc 7227+}
7228+
7229+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
7230+{
7231+ char a[32];
7232+
7233+ if (!file || IS_ERR(file)) {
7234+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
7235+ return -1;
7236+ }
7237+ a[0] = 0;
7238+ if (bindex < 0
b912730e 7239+ && !IS_ERR_OR_NULL(file->f_path.dentry)
2000de60 7240+ && au_test_aufs(file->f_path.dentry->d_sb)
1facf9fc 7241+ && au_fi(file))
e49829fe 7242+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
2cbb1c4b 7243+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
b752ccd1 7244+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
1facf9fc 7245+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
b752ccd1 7246+ file->f_version, file->f_pos, a);
b912730e 7247+ if (!IS_ERR_OR_NULL(file->f_path.dentry))
2000de60 7248+ do_pri_dentry(bindex, file->f_path.dentry);
1facf9fc 7249+ return 0;
7250+}
7251+
7252+void au_dpri_file(struct file *file)
7253+{
7254+ struct au_finfo *finfo;
4a4d8108
AM
7255+ struct au_fidir *fidir;
7256+ struct au_hfile *hfile;
1facf9fc 7257+ aufs_bindex_t bindex;
7258+ int err;
7259+
7260+ err = do_pri_file(-1, file);
2000de60 7261+ if (err
b912730e 7262+ || IS_ERR_OR_NULL(file->f_path.dentry)
2000de60 7263+ || !au_test_aufs(file->f_path.dentry->d_sb))
1facf9fc 7264+ return;
7265+
7266+ finfo = au_fi(file);
7267+ if (!finfo)
7268+ return;
4a4d8108 7269+ if (finfo->fi_btop < 0)
1facf9fc 7270+ return;
4a4d8108
AM
7271+ fidir = finfo->fi_hdir;
7272+ if (!fidir)
7273+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
7274+ else
e49829fe
JR
7275+ for (bindex = finfo->fi_btop;
7276+ bindex >= 0 && bindex <= fidir->fd_bbot;
4a4d8108
AM
7277+ bindex++) {
7278+ hfile = fidir->fd_hfile + bindex;
7279+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
7280+ }
1facf9fc 7281+}
7282+
7283+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
7284+{
7285+ struct vfsmount *mnt;
7286+ struct super_block *sb;
7287+
7288+ if (!br || IS_ERR(br))
7289+ goto out;
86dc4139 7290+ mnt = au_br_mnt(br);
1facf9fc 7291+ if (!mnt || IS_ERR(mnt))
7292+ goto out;
7293+ sb = mnt->mnt_sb;
7294+ if (!sb || IS_ERR(sb))
7295+ goto out;
7296+
5afbbe0d 7297+ dpri("s%d: {perm 0x%x, id %d, cnt %lld, wbr %p}, "
b752ccd1 7298+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
1facf9fc 7299+ "xino %d\n",
5afbbe0d 7300+ bindex, br->br_perm, br->br_id, au_br_count(br),
1e00d052 7301+ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
b752ccd1 7302+ sb->s_flags, sb->s_count,
1facf9fc 7303+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
7304+ return 0;
7305+
4f0767ce 7306+out:
1facf9fc 7307+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
7308+ return -1;
7309+}
7310+
7311+void au_dpri_sb(struct super_block *sb)
7312+{
7313+ struct au_sbinfo *sbinfo;
7314+ aufs_bindex_t bindex;
7315+ int err;
7316+ /* to reuduce stack size */
7317+ struct {
7318+ struct vfsmount mnt;
7319+ struct au_branch fake;
7320+ } *a;
7321+
7322+ /* this function can be called from magic sysrq */
7323+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
7324+ if (unlikely(!a)) {
7325+ dpri("no memory\n");
7326+ return;
7327+ }
7328+
7329+ a->mnt.mnt_sb = sb;
86dc4139 7330+ a->fake.br_path.mnt = &a->mnt;
5afbbe0d 7331+ au_br_count_init(&a->fake);
1facf9fc 7332+ err = do_pri_br(-1, &a->fake);
5afbbe0d 7333+ au_br_count_fin(&a->fake);
f0c0a007 7334+ au_delayed_kfree(a);
1facf9fc 7335+ dpri("dev 0x%x\n", sb->s_dev);
7336+ if (err || !au_test_aufs(sb))
7337+ return;
7338+
7339+ sbinfo = au_sbi(sb);
7340+ if (!sbinfo)
7341+ return;
f0c0a007
AM
7342+ dpri("nw %d, gen %u, kobj %d\n",
7343+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
1facf9fc 7344+ atomic_read(&sbinfo->si_kobj.kref.refcount));
5afbbe0d 7345+ for (bindex = 0; bindex <= sbinfo->si_bbot; bindex++)
1facf9fc 7346+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
7347+}
7348+
7349+/* ---------------------------------------------------------------------- */
7350+
027c5e7a
AM
7351+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
7352+{
5527c038 7353+ struct inode *h_inode, *inode = d_inode(dentry);
027c5e7a 7354+ struct dentry *h_dentry;
5afbbe0d 7355+ aufs_bindex_t bindex, bbot, bi;
027c5e7a
AM
7356+
7357+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
7358+ return;
7359+
5afbbe0d
AM
7360+ bbot = au_dbbot(dentry);
7361+ bi = au_ibbot(inode);
7362+ if (bi < bbot)
7363+ bbot = bi;
7364+ bindex = au_dbtop(dentry);
7365+ bi = au_ibtop(inode);
027c5e7a
AM
7366+ if (bi > bindex)
7367+ bindex = bi;
7368+
5afbbe0d 7369+ for (; bindex <= bbot; bindex++) {
027c5e7a
AM
7370+ h_dentry = au_h_dptr(dentry, bindex);
7371+ if (!h_dentry)
7372+ continue;
7373+ h_inode = au_h_iptr(inode, bindex);
5527c038 7374+ if (unlikely(h_inode != d_inode(h_dentry))) {
392086de 7375+ au_debug_on();
027c5e7a
AM
7376+ AuDbg("b%d, %s:%d\n", bindex, func, line);
7377+ AuDbgDentry(dentry);
7378+ AuDbgInode(inode);
392086de 7379+ au_debug_off();
027c5e7a
AM
7380+ BUG();
7381+ }
7382+ }
7383+}
7384+
1facf9fc 7385+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
7386+{
7387+ int err, i, j;
7388+ struct au_dcsub_pages dpages;
7389+ struct au_dpage *dpage;
7390+ struct dentry **dentries;
7391+
7392+ err = au_dpages_init(&dpages, GFP_NOFS);
7393+ AuDebugOn(err);
027c5e7a 7394+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
1facf9fc 7395+ AuDebugOn(err);
7396+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
7397+ dpage = dpages.dpages + i;
7398+ dentries = dpage->dentries;
7399+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
027c5e7a 7400+ AuDebugOn(au_digen_test(dentries[j], sigen));
1facf9fc 7401+ }
7402+ au_dpages_free(&dpages);
7403+}
7404+
1facf9fc 7405+void au_dbg_verify_kthread(void)
7406+{
53392da6 7407+ if (au_wkq_test()) {
1facf9fc 7408+ au_dbg_blocked();
1e00d052
AM
7409+ /*
7410+ * It may be recursive, but udba=notify between two aufs mounts,
7411+ * where a single ro branch is shared, is not a problem.
7412+ */
7413+ /* WARN_ON(1); */
1facf9fc 7414+ }
7415+}
7416+
7417+/* ---------------------------------------------------------------------- */
7418+
1facf9fc 7419+int __init au_debug_init(void)
7420+{
7421+ aufs_bindex_t bindex;
7422+ struct au_vdir_destr destr;
7423+
7424+ bindex = -1;
7425+ AuDebugOn(bindex >= 0);
7426+
7427+ destr.len = -1;
7428+ AuDebugOn(destr.len < NAME_MAX);
7429+
7430+#ifdef CONFIG_4KSTACKS
0c3ec466 7431+ pr_warn("CONFIG_4KSTACKS is defined.\n");
1facf9fc 7432+#endif
7433+
1facf9fc 7434+ return 0;
7435+}
7f207e10
AM
7436diff -urN /usr/share/empty/fs/aufs/debug.h linux/fs/aufs/debug.h
7437--- /usr/share/empty/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 7438+++ linux/fs/aufs/debug.h 2016-10-09 16:55:36.486034798 +0200
5527c038 7439@@ -0,0 +1,225 @@
1facf9fc 7440+/*
8cdd5066 7441+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 7442+ *
7443+ * This program, aufs is free software; you can redistribute it and/or modify
7444+ * it under the terms of the GNU General Public License as published by
7445+ * the Free Software Foundation; either version 2 of the License, or
7446+ * (at your option) any later version.
dece6358
AM
7447+ *
7448+ * This program is distributed in the hope that it will be useful,
7449+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7450+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7451+ * GNU General Public License for more details.
7452+ *
7453+ * You should have received a copy of the GNU General Public License
523b37e3 7454+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7455+ */
7456+
7457+/*
7458+ * debug print functions
7459+ */
7460+
7461+#ifndef __AUFS_DEBUG_H__
7462+#define __AUFS_DEBUG_H__
7463+
7464+#ifdef __KERNEL__
7465+
392086de 7466+#include <linux/atomic.h>
4a4d8108
AM
7467+#include <linux/module.h>
7468+#include <linux/kallsyms.h>
1facf9fc 7469+#include <linux/sysrq.h>
4a4d8108 7470+
1facf9fc 7471+#ifdef CONFIG_AUFS_DEBUG
7472+#define AuDebugOn(a) BUG_ON(a)
7473+
7474+/* module parameter */
392086de
AM
7475+extern atomic_t aufs_debug;
7476+static inline void au_debug_on(void)
1facf9fc 7477+{
392086de
AM
7478+ atomic_inc(&aufs_debug);
7479+}
7480+static inline void au_debug_off(void)
7481+{
7482+ atomic_dec_if_positive(&aufs_debug);
1facf9fc 7483+}
7484+
7485+static inline int au_debug_test(void)
7486+{
392086de 7487+ return atomic_read(&aufs_debug) > 0;
1facf9fc 7488+}
7489+#else
7490+#define AuDebugOn(a) do {} while (0)
392086de
AM
7491+AuStubVoid(au_debug_on, void)
7492+AuStubVoid(au_debug_off, void)
4a4d8108 7493+AuStubInt0(au_debug_test, void)
1facf9fc 7494+#endif /* CONFIG_AUFS_DEBUG */
7495+
392086de
AM
7496+#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
7497+
1facf9fc 7498+/* ---------------------------------------------------------------------- */
7499+
7500+/* debug print */
7501+
4a4d8108 7502+#define AuDbg(fmt, ...) do { \
1facf9fc 7503+ if (au_debug_test()) \
4a4d8108 7504+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
1facf9fc 7505+} while (0)
4a4d8108
AM
7506+#define AuLabel(l) AuDbg(#l "\n")
7507+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
7508+#define AuWarn1(fmt, ...) do { \
1facf9fc 7509+ static unsigned char _c; \
7510+ if (!_c++) \
0c3ec466 7511+ pr_warn(fmt, ##__VA_ARGS__); \
1facf9fc 7512+} while (0)
7513+
4a4d8108 7514+#define AuErr1(fmt, ...) do { \
1facf9fc 7515+ static unsigned char _c; \
7516+ if (!_c++) \
4a4d8108 7517+ pr_err(fmt, ##__VA_ARGS__); \
1facf9fc 7518+} while (0)
7519+
4a4d8108 7520+#define AuIOErr1(fmt, ...) do { \
1facf9fc 7521+ static unsigned char _c; \
7522+ if (!_c++) \
4a4d8108 7523+ AuIOErr(fmt, ##__VA_ARGS__); \
1facf9fc 7524+} while (0)
7525+
7526+#define AuUnsupportMsg "This operation is not supported." \
7527+ " Please report this application to aufs-users ML."
4a4d8108
AM
7528+#define AuUnsupport(fmt, ...) do { \
7529+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
1facf9fc 7530+ dump_stack(); \
7531+} while (0)
7532+
7533+#define AuTraceErr(e) do { \
7534+ if (unlikely((e) < 0)) \
7535+ AuDbg("err %d\n", (int)(e)); \
7536+} while (0)
7537+
7538+#define AuTraceErrPtr(p) do { \
7539+ if (IS_ERR(p)) \
7540+ AuDbg("err %ld\n", PTR_ERR(p)); \
7541+} while (0)
7542+
7543+/* dirty macros for debug print, use with "%.*s" and caution */
7544+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
1facf9fc 7545+
7546+/* ---------------------------------------------------------------------- */
7547+
dece6358 7548+struct dentry;
1facf9fc 7549+#ifdef CONFIG_AUFS_DEBUG
c1595e42 7550+extern struct mutex au_dbg_mtx;
1facf9fc 7551+extern char *au_plevel;
7552+struct au_nhash;
7553+void au_dpri_whlist(struct au_nhash *whlist);
7554+struct au_vdir;
7555+void au_dpri_vdir(struct au_vdir *vdir);
dece6358 7556+struct inode;
1facf9fc 7557+void au_dpri_inode(struct inode *inode);
2cbb1c4b 7558+void au_dpri_dalias(struct inode *inode);
1facf9fc 7559+void au_dpri_dentry(struct dentry *dentry);
dece6358 7560+struct file;
1facf9fc 7561+void au_dpri_file(struct file *filp);
dece6358 7562+struct super_block;
1facf9fc 7563+void au_dpri_sb(struct super_block *sb);
7564+
027c5e7a
AM
7565+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
7566+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
1facf9fc 7567+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
1facf9fc 7568+void au_dbg_verify_kthread(void);
7569+
7570+int __init au_debug_init(void);
7e9cd9fe 7571+
1facf9fc 7572+#define AuDbgWhlist(w) do { \
c1595e42 7573+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7574+ AuDbg(#w "\n"); \
7575+ au_dpri_whlist(w); \
c1595e42 7576+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7577+} while (0)
7578+
7579+#define AuDbgVdir(v) do { \
c1595e42 7580+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7581+ AuDbg(#v "\n"); \
7582+ au_dpri_vdir(v); \
c1595e42 7583+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7584+} while (0)
7585+
7586+#define AuDbgInode(i) do { \
c1595e42 7587+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7588+ AuDbg(#i "\n"); \
7589+ au_dpri_inode(i); \
c1595e42 7590+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7591+} while (0)
7592+
2cbb1c4b 7593+#define AuDbgDAlias(i) do { \
c1595e42 7594+ mutex_lock(&au_dbg_mtx); \
2cbb1c4b
JR
7595+ AuDbg(#i "\n"); \
7596+ au_dpri_dalias(i); \
c1595e42 7597+ mutex_unlock(&au_dbg_mtx); \
2cbb1c4b
JR
7598+} while (0)
7599+
1facf9fc 7600+#define AuDbgDentry(d) do { \
c1595e42 7601+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7602+ AuDbg(#d "\n"); \
7603+ au_dpri_dentry(d); \
c1595e42 7604+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7605+} while (0)
7606+
7607+#define AuDbgFile(f) do { \
c1595e42 7608+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7609+ AuDbg(#f "\n"); \
7610+ au_dpri_file(f); \
c1595e42 7611+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7612+} while (0)
7613+
7614+#define AuDbgSb(sb) do { \
c1595e42 7615+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7616+ AuDbg(#sb "\n"); \
7617+ au_dpri_sb(sb); \
c1595e42 7618+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7619+} while (0)
7620+
4a4d8108
AM
7621+#define AuDbgSym(addr) do { \
7622+ char sym[KSYM_SYMBOL_LEN]; \
7623+ sprint_symbol(sym, (unsigned long)addr); \
7624+ AuDbg("%s\n", sym); \
7625+} while (0)
1facf9fc 7626+#else
027c5e7a 7627+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
4a4d8108
AM
7628+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
7629+AuStubVoid(au_dbg_verify_kthread, void)
7630+AuStubInt0(__init au_debug_init, void)
1facf9fc 7631+
1facf9fc 7632+#define AuDbgWhlist(w) do {} while (0)
7633+#define AuDbgVdir(v) do {} while (0)
7634+#define AuDbgInode(i) do {} while (0)
2cbb1c4b 7635+#define AuDbgDAlias(i) do {} while (0)
1facf9fc 7636+#define AuDbgDentry(d) do {} while (0)
7637+#define AuDbgFile(f) do {} while (0)
7638+#define AuDbgSb(sb) do {} while (0)
4a4d8108 7639+#define AuDbgSym(addr) do {} while (0)
1facf9fc 7640+#endif /* CONFIG_AUFS_DEBUG */
7641+
7642+/* ---------------------------------------------------------------------- */
7643+
7644+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
7645+int __init au_sysrq_init(void);
7646+void au_sysrq_fin(void);
7647+
7648+#ifdef CONFIG_HW_CONSOLE
7649+#define au_dbg_blocked() do { \
7650+ WARN_ON(1); \
0c5527e5 7651+ handle_sysrq('w'); \
1facf9fc 7652+} while (0)
7653+#else
4a4d8108 7654+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7655+#endif
7656+
7657+#else
4a4d8108
AM
7658+AuStubInt0(__init au_sysrq_init, void)
7659+AuStubVoid(au_sysrq_fin, void)
7660+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7661+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
7662+
7663+#endif /* __KERNEL__ */
7664+#endif /* __AUFS_DEBUG_H__ */
7f207e10
AM
7665diff -urN /usr/share/empty/fs/aufs/dentry.c linux/fs/aufs/dentry.c
7666--- /usr/share/empty/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
7667+++ linux/fs/aufs/dentry.c 2016-10-09 16:55:38.889431135 +0200
7668@@ -0,0 +1,1130 @@
1facf9fc 7669+/*
8cdd5066 7670+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 7671+ *
7672+ * This program, aufs is free software; you can redistribute it and/or modify
7673+ * it under the terms of the GNU General Public License as published by
7674+ * the Free Software Foundation; either version 2 of the License, or
7675+ * (at your option) any later version.
dece6358
AM
7676+ *
7677+ * This program is distributed in the hope that it will be useful,
7678+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7679+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7680+ * GNU General Public License for more details.
7681+ *
7682+ * You should have received a copy of the GNU General Public License
523b37e3 7683+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7684+ */
7685+
7686+/*
7687+ * lookup and dentry operations
7688+ */
7689+
dece6358 7690+#include <linux/namei.h>
1facf9fc 7691+#include "aufs.h"
7692+
1facf9fc 7693+struct au_do_lookup_args {
7694+ unsigned int flags;
7695+ mode_t type;
1facf9fc 7696+};
7697+
7698+/*
7699+ * returns positive/negative dentry, NULL or an error.
7700+ * NULL means whiteout-ed or not-found.
7701+ */
7702+static struct dentry*
7703+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
7704+ aufs_bindex_t bindex, struct qstr *wh_name,
7705+ struct au_do_lookup_args *args)
7706+{
7707+ struct dentry *h_dentry;
2000de60 7708+ struct inode *h_inode;
1facf9fc 7709+ struct au_branch *br;
7710+ int wh_found, opq;
7711+ unsigned char wh_able;
7712+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
076b876e
AM
7713+ const unsigned char ignore_perm = !!au_ftest_lkup(args->flags,
7714+ IGNORE_PERM);
1facf9fc 7715+
1facf9fc 7716+ wh_found = 0;
7717+ br = au_sbr(dentry->d_sb, bindex);
7718+ wh_able = !!au_br_whable(br->br_perm);
7719+ if (wh_able)
e2f27e51 7720+ wh_found = au_wh_test(h_parent, wh_name, ignore_perm);
1facf9fc 7721+ h_dentry = ERR_PTR(wh_found);
7722+ if (!wh_found)
7723+ goto real_lookup;
7724+ if (unlikely(wh_found < 0))
7725+ goto out;
7726+
7727+ /* We found a whiteout */
5afbbe0d 7728+ /* au_set_dbbot(dentry, bindex); */
1facf9fc 7729+ au_set_dbwh(dentry, bindex);
7730+ if (!allow_neg)
7731+ return NULL; /* success */
7732+
4f0767ce 7733+real_lookup:
076b876e
AM
7734+ if (!ignore_perm)
7735+ h_dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
7736+ else
7737+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
2000de60
JR
7738+ if (IS_ERR(h_dentry)) {
7739+ if (PTR_ERR(h_dentry) == -ENAMETOOLONG
7740+ && !allow_neg)
7741+ h_dentry = NULL;
1facf9fc 7742+ goto out;
2000de60 7743+ }
1facf9fc 7744+
5527c038
JR
7745+ h_inode = d_inode(h_dentry);
7746+ if (d_is_negative(h_dentry)) {
1facf9fc 7747+ if (!allow_neg)
7748+ goto out_neg;
7749+ } else if (wh_found
7750+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
7751+ goto out_neg;
7752+
5afbbe0d
AM
7753+ if (au_dbbot(dentry) <= bindex)
7754+ au_set_dbbot(dentry, bindex);
7755+ if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
7756+ au_set_dbtop(dentry, bindex);
1facf9fc 7757+ au_set_h_dptr(dentry, bindex, h_dentry);
7758+
2000de60
JR
7759+ if (!d_is_dir(h_dentry)
7760+ || !wh_able
5527c038 7761+ || (d_really_is_positive(dentry) && !d_is_dir(dentry)))
1facf9fc 7762+ goto out; /* success */
7763+
febd17d6 7764+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
076b876e 7765+ opq = au_diropq_test(h_dentry);
febd17d6 7766+ inode_unlock(h_inode);
1facf9fc 7767+ if (opq > 0)
7768+ au_set_dbdiropq(dentry, bindex);
7769+ else if (unlikely(opq < 0)) {
7770+ au_set_h_dptr(dentry, bindex, NULL);
7771+ h_dentry = ERR_PTR(opq);
7772+ }
7773+ goto out;
7774+
4f0767ce 7775+out_neg:
1facf9fc 7776+ dput(h_dentry);
7777+ h_dentry = NULL;
4f0767ce 7778+out:
1facf9fc 7779+ return h_dentry;
7780+}
7781+
dece6358
AM
7782+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
7783+{
7784+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
7785+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
7786+ return -EPERM;
7787+ return 0;
7788+}
7789+
1facf9fc 7790+/*
7791+ * returns the number of lower positive dentries,
7792+ * otherwise an error.
7793+ * can be called at unlinking with @type is zero.
7794+ */
5afbbe0d
AM
7795+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
7796+ unsigned int flags)
1facf9fc 7797+{
7798+ int npositive, err;
7799+ aufs_bindex_t bindex, btail, bdiropq;
076b876e 7800+ unsigned char isdir, dirperm1;
1facf9fc 7801+ struct qstr whname;
7802+ struct au_do_lookup_args args = {
5afbbe0d 7803+ .flags = flags
1facf9fc 7804+ };
7805+ const struct qstr *name = &dentry->d_name;
7806+ struct dentry *parent;
076b876e 7807+ struct super_block *sb;
1facf9fc 7808+
076b876e
AM
7809+ sb = dentry->d_sb;
7810+ err = au_test_shwh(sb, name);
dece6358 7811+ if (unlikely(err))
1facf9fc 7812+ goto out;
7813+
7814+ err = au_wh_name_alloc(&whname, name);
7815+ if (unlikely(err))
7816+ goto out;
7817+
2000de60 7818+ isdir = !!d_is_dir(dentry);
076b876e 7819+ dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1);
1facf9fc 7820+
7821+ npositive = 0;
4a4d8108 7822+ parent = dget_parent(dentry);
1facf9fc 7823+ btail = au_dbtaildir(parent);
5afbbe0d 7824+ for (bindex = btop; bindex <= btail; bindex++) {
1facf9fc 7825+ struct dentry *h_parent, *h_dentry;
7826+ struct inode *h_inode, *h_dir;
7827+
7828+ h_dentry = au_h_dptr(dentry, bindex);
7829+ if (h_dentry) {
5527c038 7830+ if (d_is_positive(h_dentry))
1facf9fc 7831+ npositive++;
5afbbe0d 7832+ break;
1facf9fc 7833+ }
7834+ h_parent = au_h_dptr(parent, bindex);
2000de60 7835+ if (!h_parent || !d_is_dir(h_parent))
1facf9fc 7836+ continue;
7837+
5527c038 7838+ h_dir = d_inode(h_parent);
febd17d6 7839+ inode_lock_nested(h_dir, AuLsc_I_PARENT);
1facf9fc 7840+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
7841+ &args);
febd17d6 7842+ inode_unlock(h_dir);
1facf9fc 7843+ err = PTR_ERR(h_dentry);
7844+ if (IS_ERR(h_dentry))
4a4d8108 7845+ goto out_parent;
2000de60
JR
7846+ if (h_dentry)
7847+ au_fclr_lkup(args.flags, ALLOW_NEG);
076b876e
AM
7848+ if (dirperm1)
7849+ au_fset_lkup(args.flags, IGNORE_PERM);
1facf9fc 7850+
79b8bda9 7851+ if (au_dbwh(dentry) == bindex)
1facf9fc 7852+ break;
7853+ if (!h_dentry)
7854+ continue;
5527c038 7855+ if (d_is_negative(h_dentry))
1facf9fc 7856+ continue;
5527c038 7857+ h_inode = d_inode(h_dentry);
1facf9fc 7858+ npositive++;
7859+ if (!args.type)
7860+ args.type = h_inode->i_mode & S_IFMT;
7861+ if (args.type != S_IFDIR)
7862+ break;
7863+ else if (isdir) {
7864+ /* the type of lower may be different */
7865+ bdiropq = au_dbdiropq(dentry);
7866+ if (bdiropq >= 0 && bdiropq <= bindex)
7867+ break;
7868+ }
7869+ }
7870+
7871+ if (npositive) {
7872+ AuLabel(positive);
5afbbe0d 7873+ au_update_dbtop(dentry);
1facf9fc 7874+ }
7875+ err = npositive;
076b876e 7876+ if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
5afbbe0d 7877+ && au_dbtop(dentry) < 0)) {
1facf9fc 7878+ err = -EIO;
523b37e3
AM
7879+ AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
7880+ dentry, err);
027c5e7a 7881+ }
1facf9fc 7882+
4f0767ce 7883+out_parent:
4a4d8108 7884+ dput(parent);
f0c0a007 7885+ au_delayed_kfree(whname.name);
4f0767ce 7886+out:
1facf9fc 7887+ return err;
7888+}
7889+
076b876e 7890+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent)
1facf9fc 7891+{
7892+ struct dentry *dentry;
7893+ int wkq_err;
7894+
5527c038 7895+ if (!au_test_h_perm_sio(d_inode(parent), MAY_EXEC))
b4510431 7896+ dentry = vfsub_lkup_one(name, parent);
1facf9fc 7897+ else {
b4510431
AM
7898+ struct vfsub_lkup_one_args args = {
7899+ .errp = &dentry,
7900+ .name = name,
7901+ .parent = parent
1facf9fc 7902+ };
7903+
b4510431 7904+ wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args);
1facf9fc 7905+ if (unlikely(wkq_err))
7906+ dentry = ERR_PTR(wkq_err);
7907+ }
7908+
7909+ return dentry;
7910+}
7911+
7912+/*
7913+ * lookup @dentry on @bindex which should be negative.
7914+ */
86dc4139 7915+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
1facf9fc 7916+{
7917+ int err;
7918+ struct dentry *parent, *h_parent, *h_dentry;
86dc4139 7919+ struct au_branch *br;
1facf9fc 7920+
1facf9fc 7921+ parent = dget_parent(dentry);
7922+ h_parent = au_h_dptr(parent, bindex);
86dc4139
AM
7923+ br = au_sbr(dentry->d_sb, bindex);
7924+ if (wh)
7925+ h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
7926+ else
076b876e 7927+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
1facf9fc 7928+ err = PTR_ERR(h_dentry);
7929+ if (IS_ERR(h_dentry))
7930+ goto out;
5527c038 7931+ if (unlikely(d_is_positive(h_dentry))) {
1facf9fc 7932+ err = -EIO;
523b37e3 7933+ AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex);
1facf9fc 7934+ dput(h_dentry);
7935+ goto out;
7936+ }
7937+
4a4d8108 7938+ err = 0;
5afbbe0d
AM
7939+ if (bindex < au_dbtop(dentry))
7940+ au_set_dbtop(dentry, bindex);
7941+ if (au_dbbot(dentry) < bindex)
7942+ au_set_dbbot(dentry, bindex);
1facf9fc 7943+ au_set_h_dptr(dentry, bindex, h_dentry);
1facf9fc 7944+
4f0767ce 7945+out:
1facf9fc 7946+ dput(parent);
7947+ return err;
7948+}
7949+
7950+/* ---------------------------------------------------------------------- */
7951+
7952+/* subset of struct inode */
7953+struct au_iattr {
7954+ unsigned long i_ino;
7955+ /* unsigned int i_nlink; */
0c3ec466
AM
7956+ kuid_t i_uid;
7957+ kgid_t i_gid;
1facf9fc 7958+ u64 i_version;
7959+/*
7960+ loff_t i_size;
7961+ blkcnt_t i_blocks;
7962+*/
7963+ umode_t i_mode;
7964+};
7965+
7966+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
7967+{
7968+ ia->i_ino = h_inode->i_ino;
7969+ /* ia->i_nlink = h_inode->i_nlink; */
7970+ ia->i_uid = h_inode->i_uid;
7971+ ia->i_gid = h_inode->i_gid;
7972+ ia->i_version = h_inode->i_version;
7973+/*
7974+ ia->i_size = h_inode->i_size;
7975+ ia->i_blocks = h_inode->i_blocks;
7976+*/
7977+ ia->i_mode = (h_inode->i_mode & S_IFMT);
7978+}
7979+
7980+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
7981+{
7982+ return ia->i_ino != h_inode->i_ino
7983+ /* || ia->i_nlink != h_inode->i_nlink */
0c3ec466 7984+ || !uid_eq(ia->i_uid, h_inode->i_uid)
2dfbb274 7985+ || !gid_eq(ia->i_gid, h_inode->i_gid)
1facf9fc 7986+ || ia->i_version != h_inode->i_version
7987+/*
7988+ || ia->i_size != h_inode->i_size
7989+ || ia->i_blocks != h_inode->i_blocks
7990+*/
7991+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
7992+}
7993+
7994+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
7995+ struct au_branch *br)
7996+{
7997+ int err;
7998+ struct au_iattr ia;
7999+ struct inode *h_inode;
8000+ struct dentry *h_d;
8001+ struct super_block *h_sb;
8002+
8003+ err = 0;
8004+ memset(&ia, -1, sizeof(ia));
8005+ h_sb = h_dentry->d_sb;
5527c038
JR
8006+ h_inode = NULL;
8007+ if (d_is_positive(h_dentry)) {
8008+ h_inode = d_inode(h_dentry);
1facf9fc 8009+ au_iattr_save(&ia, h_inode);
5527c038 8010+ } else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
1facf9fc 8011+ /* nfs d_revalidate may return 0 for negative dentry */
8012+ /* fuse d_revalidate always return 0 for negative dentry */
8013+ goto out;
8014+
8015+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
b4510431 8016+ h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent);
1facf9fc 8017+ err = PTR_ERR(h_d);
8018+ if (IS_ERR(h_d))
8019+ goto out;
8020+
8021+ err = 0;
8022+ if (unlikely(h_d != h_dentry
5527c038 8023+ || d_inode(h_d) != h_inode
1facf9fc 8024+ || (h_inode && au_iattr_test(&ia, h_inode))))
8025+ err = au_busy_or_stale();
8026+ dput(h_d);
8027+
4f0767ce 8028+out:
1facf9fc 8029+ AuTraceErr(err);
8030+ return err;
8031+}
8032+
8033+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
8034+ struct dentry *h_parent, struct au_branch *br)
8035+{
8036+ int err;
8037+
8038+ err = 0;
027c5e7a
AM
8039+ if (udba == AuOpt_UDBA_REVAL
8040+ && !au_test_fs_remote(h_dentry->d_sb)) {
1facf9fc 8041+ IMustLock(h_dir);
5527c038 8042+ err = (d_inode(h_dentry->d_parent) != h_dir);
027c5e7a 8043+ } else if (udba != AuOpt_UDBA_NONE)
1facf9fc 8044+ err = au_h_verify_dentry(h_dentry, h_parent, br);
8045+
8046+ return err;
8047+}
8048+
8049+/* ---------------------------------------------------------------------- */
8050+
027c5e7a 8051+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
1facf9fc 8052+{
027c5e7a 8053+ int err;
5afbbe0d 8054+ aufs_bindex_t new_bindex, bindex, bbot, bwh, bdiropq;
027c5e7a
AM
8055+ struct au_hdentry tmp, *p, *q;
8056+ struct au_dinfo *dinfo;
8057+ struct super_block *sb;
1facf9fc 8058+
027c5e7a 8059+ DiMustWriteLock(dentry);
1308ab2a 8060+
027c5e7a
AM
8061+ sb = dentry->d_sb;
8062+ dinfo = au_di(dentry);
5afbbe0d 8063+ bbot = dinfo->di_bbot;
1facf9fc 8064+ bwh = dinfo->di_bwh;
8065+ bdiropq = dinfo->di_bdiropq;
5afbbe0d
AM
8066+ bindex = dinfo->di_btop;
8067+ p = au_hdentry(dinfo, bindex);
8068+ for (; bindex <= bbot; bindex++, p++) {
027c5e7a 8069+ if (!p->hd_dentry)
1facf9fc 8070+ continue;
8071+
027c5e7a
AM
8072+ new_bindex = au_br_index(sb, p->hd_id);
8073+ if (new_bindex == bindex)
1facf9fc 8074+ continue;
1facf9fc 8075+
1facf9fc 8076+ if (dinfo->di_bwh == bindex)
8077+ bwh = new_bindex;
8078+ if (dinfo->di_bdiropq == bindex)
8079+ bdiropq = new_bindex;
8080+ if (new_bindex < 0) {
8081+ au_hdput(p);
8082+ p->hd_dentry = NULL;
8083+ continue;
8084+ }
8085+
8086+ /* swap two lower dentries, and loop again */
5afbbe0d 8087+ q = au_hdentry(dinfo, new_bindex);
1facf9fc 8088+ tmp = *q;
8089+ *q = *p;
8090+ *p = tmp;
8091+ if (tmp.hd_dentry) {
8092+ bindex--;
8093+ p--;
8094+ }
8095+ }
8096+
1facf9fc 8097+ dinfo->di_bwh = -1;
5afbbe0d 8098+ if (bwh >= 0 && bwh <= au_sbbot(sb) && au_sbr_whable(sb, bwh))
1facf9fc 8099+ dinfo->di_bwh = bwh;
8100+
8101+ dinfo->di_bdiropq = -1;
8102+ if (bdiropq >= 0
5afbbe0d 8103+ && bdiropq <= au_sbbot(sb)
1facf9fc 8104+ && au_sbr_whable(sb, bdiropq))
8105+ dinfo->di_bdiropq = bdiropq;
8106+
027c5e7a 8107+ err = -EIO;
5afbbe0d
AM
8108+ dinfo->di_btop = -1;
8109+ dinfo->di_bbot = -1;
8110+ bbot = au_dbbot(parent);
8111+ bindex = 0;
8112+ p = au_hdentry(dinfo, bindex);
8113+ for (; bindex <= bbot; bindex++, p++)
1facf9fc 8114+ if (p->hd_dentry) {
5afbbe0d 8115+ dinfo->di_btop = bindex;
1facf9fc 8116+ break;
8117+ }
8118+
5afbbe0d
AM
8119+ if (dinfo->di_btop >= 0) {
8120+ bindex = bbot;
8121+ p = au_hdentry(dinfo, bindex);
8122+ for (; bindex >= 0; bindex--, p--)
027c5e7a 8123+ if (p->hd_dentry) {
5afbbe0d 8124+ dinfo->di_bbot = bindex;
027c5e7a
AM
8125+ err = 0;
8126+ break;
8127+ }
8128+ }
8129+
8130+ return err;
1facf9fc 8131+}
8132+
027c5e7a 8133+static void au_do_hide(struct dentry *dentry)
1facf9fc 8134+{
027c5e7a 8135+ struct inode *inode;
1facf9fc 8136+
5527c038
JR
8137+ if (d_really_is_positive(dentry)) {
8138+ inode = d_inode(dentry);
8139+ if (!d_is_dir(dentry)) {
027c5e7a
AM
8140+ if (inode->i_nlink && !d_unhashed(dentry))
8141+ drop_nlink(inode);
8142+ } else {
8143+ clear_nlink(inode);
8144+ /* stop next lookup */
8145+ inode->i_flags |= S_DEAD;
8146+ }
8147+ smp_mb(); /* necessary? */
8148+ }
8149+ d_drop(dentry);
8150+}
1308ab2a 8151+
027c5e7a
AM
8152+static int au_hide_children(struct dentry *parent)
8153+{
8154+ int err, i, j, ndentry;
8155+ struct au_dcsub_pages dpages;
8156+ struct au_dpage *dpage;
8157+ struct dentry *dentry;
1facf9fc 8158+
027c5e7a 8159+ err = au_dpages_init(&dpages, GFP_NOFS);
1facf9fc 8160+ if (unlikely(err))
8161+ goto out;
027c5e7a
AM
8162+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
8163+ if (unlikely(err))
8164+ goto out_dpages;
1facf9fc 8165+
027c5e7a
AM
8166+ /* in reverse order */
8167+ for (i = dpages.ndpage - 1; i >= 0; i--) {
8168+ dpage = dpages.dpages + i;
8169+ ndentry = dpage->ndentry;
8170+ for (j = ndentry - 1; j >= 0; j--) {
8171+ dentry = dpage->dentries[j];
8172+ if (dentry != parent)
8173+ au_do_hide(dentry);
8174+ }
8175+ }
1facf9fc 8176+
027c5e7a
AM
8177+out_dpages:
8178+ au_dpages_free(&dpages);
4f0767ce 8179+out:
027c5e7a 8180+ return err;
1facf9fc 8181+}
8182+
027c5e7a 8183+static void au_hide(struct dentry *dentry)
1facf9fc 8184+{
027c5e7a 8185+ int err;
1facf9fc 8186+
027c5e7a 8187+ AuDbgDentry(dentry);
2000de60 8188+ if (d_is_dir(dentry)) {
027c5e7a
AM
8189+ /* shrink_dcache_parent(dentry); */
8190+ err = au_hide_children(dentry);
8191+ if (unlikely(err))
523b37e3
AM
8192+ AuIOErr("%pd, failed hiding children, ignored %d\n",
8193+ dentry, err);
027c5e7a
AM
8194+ }
8195+ au_do_hide(dentry);
8196+}
1facf9fc 8197+
027c5e7a
AM
8198+/*
8199+ * By adding a dirty branch, a cached dentry may be affected in various ways.
8200+ *
8201+ * a dirty branch is added
8202+ * - on the top of layers
8203+ * - in the middle of layers
8204+ * - to the bottom of layers
8205+ *
8206+ * on the added branch there exists
8207+ * - a whiteout
8208+ * - a diropq
8209+ * - a same named entry
8210+ * + exist
8211+ * * negative --> positive
8212+ * * positive --> positive
8213+ * - type is unchanged
8214+ * - type is changed
8215+ * + doesn't exist
8216+ * * negative --> negative
8217+ * * positive --> negative (rejected by au_br_del() for non-dir case)
8218+ * - none
8219+ */
8220+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
8221+ struct au_dinfo *tmp)
8222+{
8223+ int err;
5afbbe0d 8224+ aufs_bindex_t bindex, bbot;
027c5e7a
AM
8225+ struct {
8226+ struct dentry *dentry;
8227+ struct inode *inode;
8228+ mode_t mode;
be52b249
AM
8229+ } orig_h, tmp_h = {
8230+ .dentry = NULL
8231+ };
027c5e7a
AM
8232+ struct au_hdentry *hd;
8233+ struct inode *inode, *h_inode;
8234+ struct dentry *h_dentry;
8235+
8236+ err = 0;
5afbbe0d 8237+ AuDebugOn(dinfo->di_btop < 0);
027c5e7a 8238+ orig_h.mode = 0;
5afbbe0d 8239+ orig_h.dentry = au_hdentry(dinfo, dinfo->di_btop)->hd_dentry;
5527c038
JR
8240+ orig_h.inode = NULL;
8241+ if (d_is_positive(orig_h.dentry)) {
8242+ orig_h.inode = d_inode(orig_h.dentry);
027c5e7a 8243+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
5527c038 8244+ }
5afbbe0d
AM
8245+ if (tmp->di_btop >= 0) {
8246+ tmp_h.dentry = au_hdentry(tmp, tmp->di_btop)->hd_dentry;
5527c038
JR
8247+ if (d_is_positive(tmp_h.dentry)) {
8248+ tmp_h.inode = d_inode(tmp_h.dentry);
027c5e7a 8249+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
5527c038 8250+ }
027c5e7a
AM
8251+ }
8252+
5527c038
JR
8253+ inode = NULL;
8254+ if (d_really_is_positive(dentry))
8255+ inode = d_inode(dentry);
027c5e7a
AM
8256+ if (!orig_h.inode) {
8257+ AuDbg("nagative originally\n");
8258+ if (inode) {
8259+ au_hide(dentry);
8260+ goto out;
8261+ }
8262+ AuDebugOn(inode);
5afbbe0d 8263+ AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
027c5e7a
AM
8264+ AuDebugOn(dinfo->di_bdiropq != -1);
8265+
8266+ if (!tmp_h.inode) {
8267+ AuDbg("negative --> negative\n");
8268+ /* should have only one negative lower */
5afbbe0d
AM
8269+ if (tmp->di_btop >= 0
8270+ && tmp->di_btop < dinfo->di_btop) {
8271+ AuDebugOn(tmp->di_btop != tmp->di_bbot);
8272+ AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
8273+ au_set_h_dptr(dentry, dinfo->di_btop, NULL);
027c5e7a 8274+ au_di_cp(dinfo, tmp);
5afbbe0d
AM
8275+ hd = au_hdentry(tmp, tmp->di_btop);
8276+ au_set_h_dptr(dentry, tmp->di_btop,
027c5e7a
AM
8277+ dget(hd->hd_dentry));
8278+ }
8279+ au_dbg_verify_dinode(dentry);
8280+ } else {
8281+ AuDbg("negative --> positive\n");
8282+ /*
8283+ * similar to the behaviour of creating with bypassing
8284+ * aufs.
8285+ * unhash it in order to force an error in the
8286+ * succeeding create operation.
8287+ * we should not set S_DEAD here.
8288+ */
8289+ d_drop(dentry);
8290+ /* au_di_swap(tmp, dinfo); */
8291+ au_dbg_verify_dinode(dentry);
8292+ }
8293+ } else {
8294+ AuDbg("positive originally\n");
8295+ /* inode may be NULL */
8296+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
8297+ if (!tmp_h.inode) {
8298+ AuDbg("positive --> negative\n");
8299+ /* or bypassing aufs */
8300+ au_hide(dentry);
5afbbe0d 8301+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_btop)
027c5e7a
AM
8302+ dinfo->di_bwh = tmp->di_bwh;
8303+ if (inode)
8304+ err = au_refresh_hinode_self(inode);
8305+ au_dbg_verify_dinode(dentry);
8306+ } else if (orig_h.mode == tmp_h.mode) {
8307+ AuDbg("positive --> positive, same type\n");
8308+ if (!S_ISDIR(orig_h.mode)
5afbbe0d 8309+ && dinfo->di_btop > tmp->di_btop) {
027c5e7a
AM
8310+ /*
8311+ * similar to the behaviour of removing and
8312+ * creating.
8313+ */
8314+ au_hide(dentry);
8315+ if (inode)
8316+ err = au_refresh_hinode_self(inode);
8317+ au_dbg_verify_dinode(dentry);
8318+ } else {
8319+ /* fill empty slots */
5afbbe0d
AM
8320+ if (dinfo->di_btop > tmp->di_btop)
8321+ dinfo->di_btop = tmp->di_btop;
8322+ if (dinfo->di_bbot < tmp->di_bbot)
8323+ dinfo->di_bbot = tmp->di_bbot;
027c5e7a
AM
8324+ dinfo->di_bwh = tmp->di_bwh;
8325+ dinfo->di_bdiropq = tmp->di_bdiropq;
5afbbe0d
AM
8326+ bbot = dinfo->di_bbot;
8327+ bindex = tmp->di_btop;
8328+ hd = au_hdentry(tmp, bindex);
8329+ for (; bindex <= bbot; bindex++, hd++) {
027c5e7a
AM
8330+ if (au_h_dptr(dentry, bindex))
8331+ continue;
5afbbe0d 8332+ h_dentry = hd->hd_dentry;
027c5e7a
AM
8333+ if (!h_dentry)
8334+ continue;
5527c038
JR
8335+ AuDebugOn(d_is_negative(h_dentry));
8336+ h_inode = d_inode(h_dentry);
027c5e7a
AM
8337+ AuDebugOn(orig_h.mode
8338+ != (h_inode->i_mode
8339+ & S_IFMT));
8340+ au_set_h_dptr(dentry, bindex,
8341+ dget(h_dentry));
8342+ }
5afbbe0d
AM
8343+ if (inode)
8344+ err = au_refresh_hinode(inode, dentry);
027c5e7a
AM
8345+ au_dbg_verify_dinode(dentry);
8346+ }
8347+ } else {
8348+ AuDbg("positive --> positive, different type\n");
8349+ /* similar to the behaviour of removing and creating */
8350+ au_hide(dentry);
8351+ if (inode)
8352+ err = au_refresh_hinode_self(inode);
8353+ au_dbg_verify_dinode(dentry);
8354+ }
8355+ }
8356+
8357+out:
8358+ return err;
8359+}
8360+
79b8bda9
AM
8361+void au_refresh_dop(struct dentry *dentry, int force_reval)
8362+{
8363+ const struct dentry_operations *dop
8364+ = force_reval ? &aufs_dop : dentry->d_sb->s_d_op;
8365+ static const unsigned int mask
8366+ = DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE;
8367+
8368+ BUILD_BUG_ON(sizeof(mask) != sizeof(dentry->d_flags));
8369+
8370+ if (dentry->d_op == dop)
8371+ return;
8372+
8373+ AuDbg("%pd\n", dentry);
8374+ spin_lock(&dentry->d_lock);
8375+ if (dop == &aufs_dop)
8376+ dentry->d_flags |= mask;
8377+ else
8378+ dentry->d_flags &= ~mask;
8379+ dentry->d_op = dop;
8380+ spin_unlock(&dentry->d_lock);
8381+}
8382+
027c5e7a
AM
8383+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
8384+{
e2f27e51 8385+ int err, ebrange, nbr;
027c5e7a
AM
8386+ unsigned int sigen;
8387+ struct au_dinfo *dinfo, *tmp;
8388+ struct super_block *sb;
8389+ struct inode *inode;
8390+
8391+ DiMustWriteLock(dentry);
8392+ AuDebugOn(IS_ROOT(dentry));
5527c038 8393+ AuDebugOn(d_really_is_negative(parent));
027c5e7a
AM
8394+
8395+ sb = dentry->d_sb;
027c5e7a
AM
8396+ sigen = au_sigen(sb);
8397+ err = au_digen_test(parent, sigen);
8398+ if (unlikely(err))
8399+ goto out;
8400+
e2f27e51 8401+ nbr = au_sbbot(sb) + 1;
027c5e7a 8402+ dinfo = au_di(dentry);
e2f27e51 8403+ err = au_di_realloc(dinfo, nbr, /*may_shrink*/0);
027c5e7a
AM
8404+ if (unlikely(err))
8405+ goto out;
8406+ ebrange = au_dbrange_test(dentry);
8407+ if (!ebrange)
8408+ ebrange = au_do_refresh_hdentry(dentry, parent);
8409+
38d290e6 8410+ if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
5afbbe0d 8411+ AuDebugOn(au_dbtop(dentry) < 0 && au_dbbot(dentry) >= 0);
5527c038
JR
8412+ if (d_really_is_positive(dentry)) {
8413+ inode = d_inode(dentry);
027c5e7a 8414+ err = au_refresh_hinode_self(inode);
5527c038 8415+ }
027c5e7a
AM
8416+ au_dbg_verify_dinode(dentry);
8417+ if (!err)
8418+ goto out_dgen; /* success */
8419+ goto out;
8420+ }
8421+
8422+ /* temporary dinfo */
8423+ AuDbgDentry(dentry);
8424+ err = -ENOMEM;
8425+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
8426+ if (unlikely(!tmp))
8427+ goto out;
8428+ au_di_swap(tmp, dinfo);
8429+ /* returns the number of positive dentries */
8430+ /*
8431+ * if current working dir is removed, it returns an error.
8432+ * but the dentry is legal.
8433+ */
5afbbe0d 8434+ err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
027c5e7a
AM
8435+ AuDbgDentry(dentry);
8436+ au_di_swap(tmp, dinfo);
8437+ if (err == -ENOENT)
8438+ err = 0;
8439+ if (err >= 0) {
8440+ /* compare/refresh by dinfo */
8441+ AuDbgDentry(dentry);
8442+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
8443+ au_dbg_verify_dinode(dentry);
8444+ AuTraceErr(err);
8445+ }
e2f27e51 8446+ au_di_realloc(dinfo, nbr, /*may_shrink*/1); /* harmless if err */
027c5e7a
AM
8447+ au_rw_write_unlock(&tmp->di_rwsem);
8448+ au_di_free(tmp);
8449+ if (unlikely(err))
8450+ goto out;
8451+
8452+out_dgen:
8453+ au_update_digen(dentry);
8454+out:
8455+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
523b37e3 8456+ AuIOErr("failed refreshing %pd, %d\n", dentry, err);
027c5e7a
AM
8457+ AuDbgDentry(dentry);
8458+ }
8459+ AuTraceErr(err);
8460+ return err;
8461+}
8462+
b4510431
AM
8463+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags,
8464+ struct dentry *dentry, aufs_bindex_t bindex)
027c5e7a
AM
8465+{
8466+ int err, valid;
027c5e7a
AM
8467+
8468+ err = 0;
8469+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
8470+ goto out;
027c5e7a
AM
8471+
8472+ AuDbg("b%d\n", bindex);
b4510431
AM
8473+ /*
8474+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
8475+ * due to whiteout and branch permission.
8476+ */
8477+ flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
8478+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
8479+ /* it may return tri-state */
8480+ valid = h_dentry->d_op->d_revalidate(h_dentry, flags);
1facf9fc 8481+
8482+ if (unlikely(valid < 0))
8483+ err = valid;
8484+ else if (!valid)
8485+ err = -EINVAL;
8486+
4f0767ce 8487+out:
1facf9fc 8488+ AuTraceErr(err);
8489+ return err;
8490+}
8491+
8492+/* todo: remove this */
8493+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
b4510431 8494+ unsigned int flags, int do_udba)
1facf9fc 8495+{
8496+ int err;
8497+ umode_t mode, h_mode;
5afbbe0d 8498+ aufs_bindex_t bindex, btail, btop, ibs, ibe;
38d290e6 8499+ unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
4a4d8108 8500+ struct inode *h_inode, *h_cached_inode;
1facf9fc 8501+ struct dentry *h_dentry;
8502+ struct qstr *name, *h_name;
8503+
8504+ err = 0;
8505+ plus = 0;
8506+ mode = 0;
1facf9fc 8507+ ibs = -1;
8508+ ibe = -1;
8509+ unhashed = !!d_unhashed(dentry);
8510+ is_root = !!IS_ROOT(dentry);
8511+ name = &dentry->d_name;
38d290e6 8512+ tmpfile = au_di(dentry)->di_tmpfile;
1facf9fc 8513+
8514+ /*
7f207e10
AM
8515+ * Theoretically, REVAL test should be unnecessary in case of
8516+ * {FS,I}NOTIFY.
8517+ * But {fs,i}notify doesn't fire some necessary events,
1facf9fc 8518+ * IN_ATTRIB for atime/nlink/pageio
1facf9fc 8519+ * Let's do REVAL test too.
8520+ */
8521+ if (do_udba && inode) {
8522+ mode = (inode->i_mode & S_IFMT);
8523+ plus = (inode->i_nlink > 0);
5afbbe0d
AM
8524+ ibs = au_ibtop(inode);
8525+ ibe = au_ibbot(inode);
1facf9fc 8526+ }
8527+
5afbbe0d
AM
8528+ btop = au_dbtop(dentry);
8529+ btail = btop;
1facf9fc 8530+ if (inode && S_ISDIR(inode->i_mode))
8531+ btail = au_dbtaildir(dentry);
5afbbe0d 8532+ for (bindex = btop; bindex <= btail; bindex++) {
1facf9fc 8533+ h_dentry = au_h_dptr(dentry, bindex);
8534+ if (!h_dentry)
8535+ continue;
8536+
523b37e3
AM
8537+ AuDbg("b%d, %pd\n", bindex, h_dentry);
8538+ h_nfs = !!au_test_nfs(h_dentry->d_sb);
027c5e7a 8539+ spin_lock(&h_dentry->d_lock);
1facf9fc 8540+ h_name = &h_dentry->d_name;
8541+ if (unlikely(do_udba
8542+ && !is_root
523b37e3
AM
8543+ && ((!h_nfs
8544+ && (unhashed != !!d_unhashed(h_dentry)
38d290e6
JR
8545+ || (!tmpfile
8546+ && !au_qstreq(name, h_name))
8547+ ))
523b37e3
AM
8548+ || (h_nfs
8549+ && !(flags & LOOKUP_OPEN)
8550+ && (h_dentry->d_flags
8551+ & DCACHE_NFSFS_RENAMED)))
1facf9fc 8552+ )) {
38d290e6
JR
8553+ int h_unhashed;
8554+
8555+ h_unhashed = d_unhashed(h_dentry);
027c5e7a 8556+ spin_unlock(&h_dentry->d_lock);
38d290e6
JR
8557+ AuDbg("unhash 0x%x 0x%x, %pd %pd\n",
8558+ unhashed, h_unhashed, dentry, h_dentry);
1facf9fc 8559+ goto err;
8560+ }
027c5e7a 8561+ spin_unlock(&h_dentry->d_lock);
1facf9fc 8562+
b4510431 8563+ err = au_do_h_d_reval(h_dentry, flags, dentry, bindex);
1facf9fc 8564+ if (unlikely(err))
8565+ /* do not goto err, to keep the errno */
8566+ break;
8567+
8568+ /* todo: plink too? */
8569+ if (!do_udba)
8570+ continue;
8571+
8572+ /* UDBA tests */
5527c038 8573+ if (unlikely(!!inode != d_is_positive(h_dentry)))
1facf9fc 8574+ goto err;
8575+
5527c038
JR
8576+ h_inode = NULL;
8577+ if (d_is_positive(h_dentry))
8578+ h_inode = d_inode(h_dentry);
1facf9fc 8579+ h_plus = plus;
8580+ h_mode = mode;
8581+ h_cached_inode = h_inode;
8582+ if (h_inode) {
8583+ h_mode = (h_inode->i_mode & S_IFMT);
8584+ h_plus = (h_inode->i_nlink > 0);
8585+ }
8586+ if (inode && ibs <= bindex && bindex <= ibe)
8587+ h_cached_inode = au_h_iptr(inode, bindex);
8588+
523b37e3 8589+ if (!h_nfs) {
38d290e6 8590+ if (unlikely(plus != h_plus && !tmpfile))
523b37e3
AM
8591+ goto err;
8592+ } else {
8593+ if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED)
8594+ && !is_root
8595+ && !IS_ROOT(h_dentry)
8596+ && unhashed != d_unhashed(h_dentry)))
8597+ goto err;
8598+ }
8599+ if (unlikely(mode != h_mode
1facf9fc 8600+ || h_cached_inode != h_inode))
8601+ goto err;
8602+ continue;
8603+
f6b6e03d 8604+err:
1facf9fc 8605+ err = -EINVAL;
8606+ break;
8607+ }
8608+
523b37e3 8609+ AuTraceErr(err);
1facf9fc 8610+ return err;
8611+}
8612+
027c5e7a 8613+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
1facf9fc 8614+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
8615+{
8616+ int err;
8617+ struct dentry *parent;
1facf9fc 8618+
027c5e7a 8619+ if (!au_digen_test(dentry, sigen))
1facf9fc 8620+ return 0;
8621+
8622+ parent = dget_parent(dentry);
8623+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8624+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 8625+ au_dbg_verify_gen(parent, sigen);
027c5e7a 8626+ err = au_refresh_dentry(dentry, parent);
1facf9fc 8627+ di_read_unlock(parent, AuLock_IR);
8628+ dput(parent);
027c5e7a 8629+ AuTraceErr(err);
1facf9fc 8630+ return err;
8631+}
8632+
8633+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
8634+{
8635+ int err;
8636+ struct dentry *d, *parent;
1facf9fc 8637+
027c5e7a 8638+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
1facf9fc 8639+ return simple_reval_dpath(dentry, sigen);
8640+
8641+ /* slow loop, keep it simple and stupid */
8642+ /* cf: au_cpup_dirs() */
8643+ err = 0;
8644+ parent = NULL;
027c5e7a 8645+ while (au_digen_test(dentry, sigen)) {
1facf9fc 8646+ d = dentry;
8647+ while (1) {
8648+ dput(parent);
8649+ parent = dget_parent(d);
027c5e7a 8650+ if (!au_digen_test(parent, sigen))
1facf9fc 8651+ break;
8652+ d = parent;
8653+ }
8654+
1facf9fc 8655+ if (d != dentry)
027c5e7a 8656+ di_write_lock_child2(d);
1facf9fc 8657+
8658+ /* someone might update our dentry while we were sleeping */
027c5e7a
AM
8659+ if (au_digen_test(d, sigen)) {
8660+ /*
8661+ * todo: consolidate with simple_reval_dpath(),
8662+ * do_refresh() and au_reval_for_attr().
8663+ */
1facf9fc 8664+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8665+ err = au_refresh_dentry(d, parent);
1facf9fc 8666+ di_read_unlock(parent, AuLock_IR);
8667+ }
8668+
8669+ if (d != dentry)
8670+ di_write_unlock(d);
8671+ dput(parent);
8672+ if (unlikely(err))
8673+ break;
8674+ }
8675+
8676+ return err;
8677+}
8678+
8679+/*
8680+ * if valid returns 1, otherwise 0.
8681+ */
b4510431 8682+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
1facf9fc 8683+{
8684+ int valid, err;
8685+ unsigned int sigen;
8686+ unsigned char do_udba;
8687+ struct super_block *sb;
8688+ struct inode *inode;
8689+
027c5e7a 8690+ /* todo: support rcu-walk? */
b4510431 8691+ if (flags & LOOKUP_RCU)
027c5e7a
AM
8692+ return -ECHILD;
8693+
8694+ valid = 0;
8695+ if (unlikely(!au_di(dentry)))
8696+ goto out;
8697+
e49829fe 8698+ valid = 1;
1facf9fc 8699+ sb = dentry->d_sb;
e49829fe
JR
8700+ /*
8701+ * todo: very ugly
8702+ * i_mutex of parent dir may be held,
8703+ * but we should not return 'invalid' due to busy.
8704+ */
8705+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
8706+ if (unlikely(err)) {
8707+ valid = err;
027c5e7a 8708+ AuTraceErr(err);
e49829fe
JR
8709+ goto out;
8710+ }
5527c038
JR
8711+ inode = NULL;
8712+ if (d_really_is_positive(dentry))
8713+ inode = d_inode(dentry);
5afbbe0d 8714+ if (unlikely(inode && au_is_bad_inode(inode))) {
c1595e42
JR
8715+ err = -EINVAL;
8716+ AuTraceErr(err);
8717+ goto out_dgrade;
8718+ }
027c5e7a
AM
8719+ if (unlikely(au_dbrange_test(dentry))) {
8720+ err = -EINVAL;
8721+ AuTraceErr(err);
8722+ goto out_dgrade;
1facf9fc 8723+ }
027c5e7a
AM
8724+
8725+ sigen = au_sigen(sb);
8726+ if (au_digen_test(dentry, sigen)) {
1facf9fc 8727+ AuDebugOn(IS_ROOT(dentry));
027c5e7a
AM
8728+ err = au_reval_dpath(dentry, sigen);
8729+ if (unlikely(err)) {
8730+ AuTraceErr(err);
1facf9fc 8731+ goto out_dgrade;
027c5e7a 8732+ }
1facf9fc 8733+ }
8734+ di_downgrade_lock(dentry, AuLock_IR);
8735+
1facf9fc 8736+ err = -EINVAL;
c1595e42 8737+ if (!(flags & (LOOKUP_OPEN | LOOKUP_EMPTY))
523b37e3 8738+ && inode
38d290e6 8739+ && !(inode->i_state && I_LINKABLE)
79b8bda9
AM
8740+ && (IS_DEADDIR(inode) || !inode->i_nlink)) {
8741+ AuTraceErr(err);
027c5e7a 8742+ goto out_inval;
79b8bda9 8743+ }
027c5e7a 8744+
1facf9fc 8745+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
8746+ if (do_udba && inode) {
5afbbe0d 8747+ aufs_bindex_t btop = au_ibtop(inode);
027c5e7a 8748+ struct inode *h_inode;
1facf9fc 8749+
5afbbe0d
AM
8750+ if (btop >= 0) {
8751+ h_inode = au_h_iptr(inode, btop);
79b8bda9
AM
8752+ if (h_inode && au_test_higen(inode, h_inode)) {
8753+ AuTraceErr(err);
027c5e7a 8754+ goto out_inval;
79b8bda9 8755+ }
027c5e7a 8756+ }
1facf9fc 8757+ }
8758+
b4510431 8759+ err = h_d_revalidate(dentry, inode, flags, do_udba);
5afbbe0d 8760+ if (unlikely(!err && do_udba && au_dbtop(dentry) < 0)) {
1facf9fc 8761+ err = -EIO;
523b37e3
AM
8762+ AuDbg("both of real entry and whiteout found, %p, err %d\n",
8763+ dentry, err);
027c5e7a 8764+ }
e49829fe 8765+ goto out_inval;
1facf9fc 8766+
4f0767ce 8767+out_dgrade:
1facf9fc 8768+ di_downgrade_lock(dentry, AuLock_IR);
e49829fe 8769+out_inval:
1facf9fc 8770+ aufs_read_unlock(dentry, AuLock_IR);
8771+ AuTraceErr(err);
8772+ valid = !err;
e49829fe 8773+out:
027c5e7a 8774+ if (!valid) {
523b37e3 8775+ AuDbg("%pd invalid, %d\n", dentry, valid);
027c5e7a
AM
8776+ d_drop(dentry);
8777+ }
1facf9fc 8778+ return valid;
8779+}
8780+
8781+static void aufs_d_release(struct dentry *dentry)
8782+{
027c5e7a 8783+ if (au_di(dentry)) {
4a4d8108
AM
8784+ au_di_fin(dentry);
8785+ au_hn_di_reinit(dentry);
1facf9fc 8786+ }
1facf9fc 8787+}
8788+
4a4d8108 8789+const struct dentry_operations aufs_dop = {
c06a8ce3
AM
8790+ .d_revalidate = aufs_d_revalidate,
8791+ .d_weak_revalidate = aufs_d_revalidate,
8792+ .d_release = aufs_d_release
1facf9fc 8793+};
79b8bda9
AM
8794+
8795+/* aufs_dop without d_revalidate */
8796+const struct dentry_operations aufs_dop_noreval = {
8797+ .d_release = aufs_d_release
8798+};
7f207e10
AM
8799diff -urN /usr/share/empty/fs/aufs/dentry.h linux/fs/aufs/dentry.h
8800--- /usr/share/empty/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 8801+++ linux/fs/aufs/dentry.h 2016-10-09 16:55:38.889431135 +0200
f0c0a007 8802@@ -0,0 +1,255 @@
1facf9fc 8803+/*
8cdd5066 8804+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 8805+ *
8806+ * This program, aufs is free software; you can redistribute it and/or modify
8807+ * it under the terms of the GNU General Public License as published by
8808+ * the Free Software Foundation; either version 2 of the License, or
8809+ * (at your option) any later version.
dece6358
AM
8810+ *
8811+ * This program is distributed in the hope that it will be useful,
8812+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8813+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8814+ * GNU General Public License for more details.
8815+ *
8816+ * You should have received a copy of the GNU General Public License
523b37e3 8817+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 8818+ */
8819+
8820+/*
8821+ * lookup and dentry operations
8822+ */
8823+
8824+#ifndef __AUFS_DENTRY_H__
8825+#define __AUFS_DENTRY_H__
8826+
8827+#ifdef __KERNEL__
8828+
dece6358 8829+#include <linux/dcache.h>
1facf9fc 8830+#include "rwsem.h"
8831+
1facf9fc 8832+struct au_hdentry {
8833+ struct dentry *hd_dentry;
027c5e7a 8834+ aufs_bindex_t hd_id;
1facf9fc 8835+};
8836+
8837+struct au_dinfo {
8838+ atomic_t di_generation;
8839+
dece6358 8840+ struct au_rwsem di_rwsem;
5afbbe0d 8841+ aufs_bindex_t di_btop, di_bbot, di_bwh, di_bdiropq;
38d290e6 8842+ unsigned char di_tmpfile; /* to allow the different name */
f0c0a007
AM
8843+ union {
8844+ struct au_hdentry *di_hdentry;
8845+ struct llist_node di_lnode; /* delayed free */
8846+ };
4a4d8108 8847+} ____cacheline_aligned_in_smp;
1facf9fc 8848+
8849+/* ---------------------------------------------------------------------- */
8850+
5afbbe0d
AM
8851+/* flags for au_lkup_dentry() */
8852+#define AuLkup_ALLOW_NEG 1
8853+#define AuLkup_IGNORE_PERM (1 << 1)
8854+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
8855+#define au_fset_lkup(flags, name) \
8856+ do { (flags) |= AuLkup_##name; } while (0)
8857+#define au_fclr_lkup(flags, name) \
8858+ do { (flags) &= ~AuLkup_##name; } while (0)
8859+
8860+/* ---------------------------------------------------------------------- */
8861+
1facf9fc 8862+/* dentry.c */
79b8bda9 8863+extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
1facf9fc 8864+struct au_branch;
076b876e 8865+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
1facf9fc 8866+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
8867+ struct dentry *h_parent, struct au_branch *br);
8868+
5afbbe0d
AM
8869+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
8870+ unsigned int flags);
86dc4139 8871+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
027c5e7a 8872+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
1facf9fc 8873+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
79b8bda9 8874+void au_refresh_dop(struct dentry *dentry, int force_reval);
1facf9fc 8875+
8876+/* dinfo.c */
4a4d8108 8877+void au_di_init_once(void *_di);
027c5e7a
AM
8878+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
8879+void au_di_free(struct au_dinfo *dinfo);
8880+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
8881+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
4a4d8108
AM
8882+int au_di_init(struct dentry *dentry);
8883+void au_di_fin(struct dentry *dentry);
e2f27e51 8884+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink);
1facf9fc 8885+
8886+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
8887+void di_read_unlock(struct dentry *d, int flags);
8888+void di_downgrade_lock(struct dentry *d, int flags);
8889+void di_write_lock(struct dentry *d, unsigned int lsc);
8890+void di_write_unlock(struct dentry *d);
8891+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
8892+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
8893+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
8894+
8895+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
2cbb1c4b 8896+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
1facf9fc 8897+aufs_bindex_t au_dbtail(struct dentry *dentry);
8898+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
8899+
8900+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
8901+ struct dentry *h_dentry);
027c5e7a
AM
8902+int au_digen_test(struct dentry *dentry, unsigned int sigen);
8903+int au_dbrange_test(struct dentry *dentry);
1facf9fc 8904+void au_update_digen(struct dentry *dentry);
8905+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
5afbbe0d
AM
8906+void au_update_dbtop(struct dentry *dentry);
8907+void au_update_dbbot(struct dentry *dentry);
1facf9fc 8908+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
8909+
8910+/* ---------------------------------------------------------------------- */
8911+
8912+static inline struct au_dinfo *au_di(struct dentry *dentry)
8913+{
8914+ return dentry->d_fsdata;
8915+}
8916+
8917+/* ---------------------------------------------------------------------- */
8918+
8919+/* lock subclass for dinfo */
8920+enum {
8921+ AuLsc_DI_CHILD, /* child first */
4a4d8108 8922+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
1facf9fc 8923+ AuLsc_DI_CHILD3, /* copyup dirs */
8924+ AuLsc_DI_PARENT,
8925+ AuLsc_DI_PARENT2,
027c5e7a
AM
8926+ AuLsc_DI_PARENT3,
8927+ AuLsc_DI_TMP /* temp for replacing dinfo */
1facf9fc 8928+};
8929+
8930+/*
8931+ * di_read_lock_child, di_write_lock_child,
8932+ * di_read_lock_child2, di_write_lock_child2,
8933+ * di_read_lock_child3, di_write_lock_child3,
8934+ * di_read_lock_parent, di_write_lock_parent,
8935+ * di_read_lock_parent2, di_write_lock_parent2,
8936+ * di_read_lock_parent3, di_write_lock_parent3,
8937+ */
8938+#define AuReadLockFunc(name, lsc) \
8939+static inline void di_read_lock_##name(struct dentry *d, int flags) \
8940+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
8941+
8942+#define AuWriteLockFunc(name, lsc) \
8943+static inline void di_write_lock_##name(struct dentry *d) \
8944+{ di_write_lock(d, AuLsc_DI_##lsc); }
8945+
8946+#define AuRWLockFuncs(name, lsc) \
8947+ AuReadLockFunc(name, lsc) \
8948+ AuWriteLockFunc(name, lsc)
8949+
8950+AuRWLockFuncs(child, CHILD);
8951+AuRWLockFuncs(child2, CHILD2);
8952+AuRWLockFuncs(child3, CHILD3);
8953+AuRWLockFuncs(parent, PARENT);
8954+AuRWLockFuncs(parent2, PARENT2);
8955+AuRWLockFuncs(parent3, PARENT3);
8956+
8957+#undef AuReadLockFunc
8958+#undef AuWriteLockFunc
8959+#undef AuRWLockFuncs
8960+
8961+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
dece6358
AM
8962+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
8963+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
1facf9fc 8964+
8965+/* ---------------------------------------------------------------------- */
8966+
8967+/* todo: memory barrier? */
8968+static inline unsigned int au_digen(struct dentry *d)
8969+{
8970+ return atomic_read(&au_di(d)->di_generation);
8971+}
8972+
8973+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
8974+{
8975+ hdentry->hd_dentry = NULL;
8976+}
8977+
5afbbe0d
AM
8978+static inline struct au_hdentry *au_hdentry(struct au_dinfo *di,
8979+ aufs_bindex_t bindex)
8980+{
8981+ return di->di_hdentry + bindex;
8982+}
8983+
1facf9fc 8984+static inline void au_hdput(struct au_hdentry *hd)
8985+{
4a4d8108
AM
8986+ if (hd)
8987+ dput(hd->hd_dentry);
1facf9fc 8988+}
8989+
5afbbe0d 8990+static inline aufs_bindex_t au_dbtop(struct dentry *dentry)
1facf9fc 8991+{
1308ab2a 8992+ DiMustAnyLock(dentry);
5afbbe0d 8993+ return au_di(dentry)->di_btop;
1facf9fc 8994+}
8995+
5afbbe0d 8996+static inline aufs_bindex_t au_dbbot(struct dentry *dentry)
1facf9fc 8997+{
1308ab2a 8998+ DiMustAnyLock(dentry);
5afbbe0d 8999+ return au_di(dentry)->di_bbot;
1facf9fc 9000+}
9001+
9002+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
9003+{
1308ab2a 9004+ DiMustAnyLock(dentry);
1facf9fc 9005+ return au_di(dentry)->di_bwh;
9006+}
9007+
9008+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
9009+{
1308ab2a 9010+ DiMustAnyLock(dentry);
1facf9fc 9011+ return au_di(dentry)->di_bdiropq;
9012+}
9013+
9014+/* todo: hard/soft set? */
5afbbe0d 9015+static inline void au_set_dbtop(struct dentry *dentry, aufs_bindex_t bindex)
1facf9fc 9016+{
1308ab2a 9017+ DiMustWriteLock(dentry);
5afbbe0d 9018+ au_di(dentry)->di_btop = bindex;
1facf9fc 9019+}
9020+
5afbbe0d 9021+static inline void au_set_dbbot(struct dentry *dentry, aufs_bindex_t bindex)
1facf9fc 9022+{
1308ab2a 9023+ DiMustWriteLock(dentry);
5afbbe0d 9024+ au_di(dentry)->di_bbot = bindex;
1facf9fc 9025+}
9026+
9027+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
9028+{
1308ab2a 9029+ DiMustWriteLock(dentry);
5afbbe0d 9030+ /* dbwh can be outside of btop - bbot range */
1facf9fc 9031+ au_di(dentry)->di_bwh = bindex;
9032+}
9033+
9034+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
9035+{
1308ab2a 9036+ DiMustWriteLock(dentry);
1facf9fc 9037+ au_di(dentry)->di_bdiropq = bindex;
9038+}
9039+
9040+/* ---------------------------------------------------------------------- */
9041+
4a4d8108 9042+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 9043+static inline void au_digen_dec(struct dentry *d)
9044+{
e49829fe 9045+ atomic_dec(&au_di(d)->di_generation);
1facf9fc 9046+}
9047+
4a4d8108 9048+static inline void au_hn_di_reinit(struct dentry *dentry)
1facf9fc 9049+{
9050+ dentry->d_fsdata = NULL;
9051+}
9052+#else
4a4d8108
AM
9053+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
9054+#endif /* CONFIG_AUFS_HNOTIFY */
1facf9fc 9055+
9056+#endif /* __KERNEL__ */
9057+#endif /* __AUFS_DENTRY_H__ */
7f207e10
AM
9058diff -urN /usr/share/empty/fs/aufs/dinfo.c linux/fs/aufs/dinfo.c
9059--- /usr/share/empty/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
9060+++ linux/fs/aufs/dinfo.c 2016-10-09 16:55:38.889431135 +0200
9061@@ -0,0 +1,553 @@
1facf9fc 9062+/*
8cdd5066 9063+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 9064+ *
9065+ * This program, aufs is free software; you can redistribute it and/or modify
9066+ * it under the terms of the GNU General Public License as published by
9067+ * the Free Software Foundation; either version 2 of the License, or
9068+ * (at your option) any later version.
dece6358
AM
9069+ *
9070+ * This program is distributed in the hope that it will be useful,
9071+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9072+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9073+ * GNU General Public License for more details.
9074+ *
9075+ * You should have received a copy of the GNU General Public License
523b37e3 9076+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 9077+ */
9078+
9079+/*
9080+ * dentry private data
9081+ */
9082+
9083+#include "aufs.h"
9084+
e49829fe 9085+void au_di_init_once(void *_dinfo)
4a4d8108 9086+{
e49829fe 9087+ struct au_dinfo *dinfo = _dinfo;
4a4d8108 9088+
e49829fe 9089+ au_rw_init(&dinfo->di_rwsem);
4a4d8108
AM
9090+}
9091+
027c5e7a 9092+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
1facf9fc 9093+{
9094+ struct au_dinfo *dinfo;
027c5e7a 9095+ int nbr, i;
1facf9fc 9096+
9097+ dinfo = au_cache_alloc_dinfo();
9098+ if (unlikely(!dinfo))
9099+ goto out;
9100+
5afbbe0d 9101+ nbr = au_sbbot(sb) + 1;
1facf9fc 9102+ if (nbr <= 0)
9103+ nbr = 1;
9104+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
027c5e7a
AM
9105+ if (dinfo->di_hdentry) {
9106+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
5afbbe0d
AM
9107+ dinfo->di_btop = -1;
9108+ dinfo->di_bbot = -1;
027c5e7a
AM
9109+ dinfo->di_bwh = -1;
9110+ dinfo->di_bdiropq = -1;
38d290e6 9111+ dinfo->di_tmpfile = 0;
027c5e7a
AM
9112+ for (i = 0; i < nbr; i++)
9113+ dinfo->di_hdentry[i].hd_id = -1;
9114+ goto out;
9115+ }
1facf9fc 9116+
f0c0a007 9117+ au_cache_dfree_dinfo(dinfo);
027c5e7a
AM
9118+ dinfo = NULL;
9119+
4f0767ce 9120+out:
027c5e7a 9121+ return dinfo;
1facf9fc 9122+}
9123+
027c5e7a 9124+void au_di_free(struct au_dinfo *dinfo)
4a4d8108 9125+{
4a4d8108 9126+ struct au_hdentry *p;
5afbbe0d 9127+ aufs_bindex_t bbot, bindex;
4a4d8108
AM
9128+
9129+ /* dentry may not be revalidated */
5afbbe0d 9130+ bindex = dinfo->di_btop;
4a4d8108 9131+ if (bindex >= 0) {
5afbbe0d
AM
9132+ bbot = dinfo->di_bbot;
9133+ p = au_hdentry(dinfo, bindex);
9134+ while (bindex++ <= bbot)
4a4d8108
AM
9135+ au_hdput(p++);
9136+ }
f0c0a007
AM
9137+ au_delayed_kfree(dinfo->di_hdentry);
9138+ au_cache_dfree_dinfo(dinfo);
027c5e7a
AM
9139+}
9140+
9141+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
9142+{
9143+ struct au_hdentry *p;
9144+ aufs_bindex_t bi;
9145+
9146+ AuRwMustWriteLock(&a->di_rwsem);
9147+ AuRwMustWriteLock(&b->di_rwsem);
9148+
9149+#define DiSwap(v, name) \
9150+ do { \
9151+ v = a->di_##name; \
9152+ a->di_##name = b->di_##name; \
9153+ b->di_##name = v; \
9154+ } while (0)
9155+
9156+ DiSwap(p, hdentry);
5afbbe0d
AM
9157+ DiSwap(bi, btop);
9158+ DiSwap(bi, bbot);
027c5e7a
AM
9159+ DiSwap(bi, bwh);
9160+ DiSwap(bi, bdiropq);
9161+ /* smp_mb(); */
9162+
9163+#undef DiSwap
9164+}
9165+
9166+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
9167+{
9168+ AuRwMustWriteLock(&dst->di_rwsem);
9169+ AuRwMustWriteLock(&src->di_rwsem);
9170+
5afbbe0d
AM
9171+ dst->di_btop = src->di_btop;
9172+ dst->di_bbot = src->di_bbot;
027c5e7a
AM
9173+ dst->di_bwh = src->di_bwh;
9174+ dst->di_bdiropq = src->di_bdiropq;
9175+ /* smp_mb(); */
9176+}
9177+
9178+int au_di_init(struct dentry *dentry)
9179+{
9180+ int err;
9181+ struct super_block *sb;
9182+ struct au_dinfo *dinfo;
9183+
9184+ err = 0;
9185+ sb = dentry->d_sb;
9186+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
9187+ if (dinfo) {
9188+ atomic_set(&dinfo->di_generation, au_sigen(sb));
9189+ /* smp_mb(); */ /* atomic_set */
9190+ dentry->d_fsdata = dinfo;
9191+ } else
9192+ err = -ENOMEM;
9193+
9194+ return err;
9195+}
9196+
9197+void au_di_fin(struct dentry *dentry)
9198+{
9199+ struct au_dinfo *dinfo;
9200+
9201+ dinfo = au_di(dentry);
9202+ AuRwDestroy(&dinfo->di_rwsem);
9203+ au_di_free(dinfo);
4a4d8108
AM
9204+}
9205+
e2f27e51 9206+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink)
1facf9fc 9207+{
9208+ int err, sz;
9209+ struct au_hdentry *hdp;
9210+
1308ab2a 9211+ AuRwMustWriteLock(&dinfo->di_rwsem);
9212+
1facf9fc 9213+ err = -ENOMEM;
5afbbe0d 9214+ sz = sizeof(*hdp) * (dinfo->di_bbot + 1);
1facf9fc 9215+ if (!sz)
9216+ sz = sizeof(*hdp);
e2f27e51
AM
9217+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS,
9218+ may_shrink);
1facf9fc 9219+ if (hdp) {
9220+ dinfo->di_hdentry = hdp;
9221+ err = 0;
9222+ }
9223+
9224+ return err;
9225+}
9226+
9227+/* ---------------------------------------------------------------------- */
9228+
9229+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
9230+{
9231+ switch (lsc) {
9232+ case AuLsc_DI_CHILD:
9233+ ii_write_lock_child(inode);
9234+ break;
9235+ case AuLsc_DI_CHILD2:
9236+ ii_write_lock_child2(inode);
9237+ break;
9238+ case AuLsc_DI_CHILD3:
9239+ ii_write_lock_child3(inode);
9240+ break;
9241+ case AuLsc_DI_PARENT:
9242+ ii_write_lock_parent(inode);
9243+ break;
9244+ case AuLsc_DI_PARENT2:
9245+ ii_write_lock_parent2(inode);
9246+ break;
9247+ case AuLsc_DI_PARENT3:
9248+ ii_write_lock_parent3(inode);
9249+ break;
9250+ default:
9251+ BUG();
9252+ }
9253+}
9254+
9255+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
9256+{
9257+ switch (lsc) {
9258+ case AuLsc_DI_CHILD:
9259+ ii_read_lock_child(inode);
9260+ break;
9261+ case AuLsc_DI_CHILD2:
9262+ ii_read_lock_child2(inode);
9263+ break;
9264+ case AuLsc_DI_CHILD3:
9265+ ii_read_lock_child3(inode);
9266+ break;
9267+ case AuLsc_DI_PARENT:
9268+ ii_read_lock_parent(inode);
9269+ break;
9270+ case AuLsc_DI_PARENT2:
9271+ ii_read_lock_parent2(inode);
9272+ break;
9273+ case AuLsc_DI_PARENT3:
9274+ ii_read_lock_parent3(inode);
9275+ break;
9276+ default:
9277+ BUG();
9278+ }
9279+}
9280+
9281+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
9282+{
5527c038
JR
9283+ struct inode *inode;
9284+
dece6358 9285+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
5527c038
JR
9286+ if (d_really_is_positive(d)) {
9287+ inode = d_inode(d);
1facf9fc 9288+ if (au_ftest_lock(flags, IW))
5527c038 9289+ do_ii_write_lock(inode, lsc);
1facf9fc 9290+ else if (au_ftest_lock(flags, IR))
5527c038 9291+ do_ii_read_lock(inode, lsc);
1facf9fc 9292+ }
9293+}
9294+
9295+void di_read_unlock(struct dentry *d, int flags)
9296+{
5527c038
JR
9297+ struct inode *inode;
9298+
9299+ if (d_really_is_positive(d)) {
9300+ inode = d_inode(d);
027c5e7a
AM
9301+ if (au_ftest_lock(flags, IW)) {
9302+ au_dbg_verify_dinode(d);
5527c038 9303+ ii_write_unlock(inode);
027c5e7a
AM
9304+ } else if (au_ftest_lock(flags, IR)) {
9305+ au_dbg_verify_dinode(d);
5527c038 9306+ ii_read_unlock(inode);
027c5e7a 9307+ }
1facf9fc 9308+ }
dece6358 9309+ au_rw_read_unlock(&au_di(d)->di_rwsem);
1facf9fc 9310+}
9311+
9312+void di_downgrade_lock(struct dentry *d, int flags)
9313+{
5527c038
JR
9314+ if (d_really_is_positive(d) && au_ftest_lock(flags, IR))
9315+ ii_downgrade_lock(d_inode(d));
dece6358 9316+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
1facf9fc 9317+}
9318+
9319+void di_write_lock(struct dentry *d, unsigned int lsc)
9320+{
dece6358 9321+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
5527c038
JR
9322+ if (d_really_is_positive(d))
9323+ do_ii_write_lock(d_inode(d), lsc);
1facf9fc 9324+}
9325+
9326+void di_write_unlock(struct dentry *d)
9327+{
027c5e7a 9328+ au_dbg_verify_dinode(d);
5527c038
JR
9329+ if (d_really_is_positive(d))
9330+ ii_write_unlock(d_inode(d));
dece6358 9331+ au_rw_write_unlock(&au_di(d)->di_rwsem);
1facf9fc 9332+}
9333+
9334+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
9335+{
9336+ AuDebugOn(d1 == d2
5527c038 9337+ || d_inode(d1) == d_inode(d2)
1facf9fc 9338+ || d1->d_sb != d2->d_sb);
9339+
9340+ if (isdir && au_test_subdir(d1, d2)) {
9341+ di_write_lock_child(d1);
9342+ di_write_lock_child2(d2);
9343+ } else {
9344+ /* there should be no races */
9345+ di_write_lock_child(d2);
9346+ di_write_lock_child2(d1);
9347+ }
9348+}
9349+
9350+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
9351+{
9352+ AuDebugOn(d1 == d2
5527c038 9353+ || d_inode(d1) == d_inode(d2)
1facf9fc 9354+ || d1->d_sb != d2->d_sb);
9355+
9356+ if (isdir && au_test_subdir(d1, d2)) {
9357+ di_write_lock_parent(d1);
9358+ di_write_lock_parent2(d2);
9359+ } else {
9360+ /* there should be no races */
9361+ di_write_lock_parent(d2);
9362+ di_write_lock_parent2(d1);
9363+ }
9364+}
9365+
9366+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
9367+{
9368+ di_write_unlock(d1);
5527c038 9369+ if (d_inode(d1) == d_inode(d2))
dece6358 9370+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
1facf9fc 9371+ else
9372+ di_write_unlock(d2);
9373+}
9374+
9375+/* ---------------------------------------------------------------------- */
9376+
9377+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
9378+{
9379+ struct dentry *d;
9380+
1308ab2a 9381+ DiMustAnyLock(dentry);
9382+
5afbbe0d 9383+ if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
1facf9fc 9384+ return NULL;
9385+ AuDebugOn(bindex < 0);
5afbbe0d 9386+ d = au_hdentry(au_di(dentry), bindex)->hd_dentry;
c1595e42 9387+ AuDebugOn(d && au_dcount(d) <= 0);
1facf9fc 9388+ return d;
9389+}
9390+
2cbb1c4b
JR
9391+/*
9392+ * extended version of au_h_dptr().
38d290e6
JR
9393+ * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
9394+ * error.
2cbb1c4b
JR
9395+ */
9396+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
9397+{
9398+ struct dentry *h_dentry;
9399+ struct inode *inode, *h_inode;
9400+
5527c038 9401+ AuDebugOn(d_really_is_negative(dentry));
2cbb1c4b
JR
9402+
9403+ h_dentry = NULL;
5afbbe0d
AM
9404+ if (au_dbtop(dentry) <= bindex
9405+ && bindex <= au_dbbot(dentry))
2cbb1c4b 9406+ h_dentry = au_h_dptr(dentry, bindex);
38d290e6 9407+ if (h_dentry && !au_d_linkable(h_dentry)) {
2cbb1c4b
JR
9408+ dget(h_dentry);
9409+ goto out; /* success */
9410+ }
9411+
5527c038 9412+ inode = d_inode(dentry);
5afbbe0d
AM
9413+ AuDebugOn(bindex < au_ibtop(inode));
9414+ AuDebugOn(au_ibbot(inode) < bindex);
2cbb1c4b
JR
9415+ h_inode = au_h_iptr(inode, bindex);
9416+ h_dentry = d_find_alias(h_inode);
9417+ if (h_dentry) {
9418+ if (!IS_ERR(h_dentry)) {
38d290e6 9419+ if (!au_d_linkable(h_dentry))
2cbb1c4b
JR
9420+ goto out; /* success */
9421+ dput(h_dentry);
9422+ } else
9423+ goto out;
9424+ }
9425+
9426+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
9427+ h_dentry = au_plink_lkup(inode, bindex);
9428+ AuDebugOn(!h_dentry);
9429+ if (!IS_ERR(h_dentry)) {
9430+ if (!au_d_hashed_positive(h_dentry))
9431+ goto out; /* success */
9432+ dput(h_dentry);
9433+ h_dentry = NULL;
9434+ }
9435+ }
9436+
9437+out:
9438+ AuDbgDentry(h_dentry);
9439+ return h_dentry;
9440+}
9441+
1facf9fc 9442+aufs_bindex_t au_dbtail(struct dentry *dentry)
9443+{
5afbbe0d 9444+ aufs_bindex_t bbot, bwh;
1facf9fc 9445+
5afbbe0d
AM
9446+ bbot = au_dbbot(dentry);
9447+ if (0 <= bbot) {
1facf9fc 9448+ bwh = au_dbwh(dentry);
9449+ if (!bwh)
9450+ return bwh;
5afbbe0d 9451+ if (0 < bwh && bwh < bbot)
1facf9fc 9452+ return bwh - 1;
9453+ }
5afbbe0d 9454+ return bbot;
1facf9fc 9455+}
9456+
9457+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
9458+{
5afbbe0d 9459+ aufs_bindex_t bbot, bopq;
1facf9fc 9460+
5afbbe0d
AM
9461+ bbot = au_dbtail(dentry);
9462+ if (0 <= bbot) {
1facf9fc 9463+ bopq = au_dbdiropq(dentry);
5afbbe0d
AM
9464+ if (0 <= bopq && bopq < bbot)
9465+ bbot = bopq;
1facf9fc 9466+ }
5afbbe0d 9467+ return bbot;
1facf9fc 9468+}
9469+
9470+/* ---------------------------------------------------------------------- */
9471+
9472+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
9473+ struct dentry *h_dentry)
9474+{
5afbbe0d
AM
9475+ struct au_dinfo *dinfo;
9476+ struct au_hdentry *hd;
027c5e7a 9477+ struct au_branch *br;
1facf9fc 9478+
1308ab2a 9479+ DiMustWriteLock(dentry);
9480+
5afbbe0d
AM
9481+ dinfo = au_di(dentry);
9482+ hd = au_hdentry(dinfo, bindex);
4a4d8108 9483+ au_hdput(hd);
1facf9fc 9484+ hd->hd_dentry = h_dentry;
027c5e7a
AM
9485+ if (h_dentry) {
9486+ br = au_sbr(dentry->d_sb, bindex);
9487+ hd->hd_id = br->br_id;
9488+ }
9489+}
9490+
9491+int au_dbrange_test(struct dentry *dentry)
9492+{
9493+ int err;
5afbbe0d 9494+ aufs_bindex_t btop, bbot;
027c5e7a
AM
9495+
9496+ err = 0;
5afbbe0d
AM
9497+ btop = au_dbtop(dentry);
9498+ bbot = au_dbbot(dentry);
9499+ if (btop >= 0)
9500+ AuDebugOn(bbot < 0 && btop > bbot);
027c5e7a
AM
9501+ else {
9502+ err = -EIO;
5afbbe0d 9503+ AuDebugOn(bbot >= 0);
027c5e7a
AM
9504+ }
9505+
9506+ return err;
9507+}
9508+
9509+int au_digen_test(struct dentry *dentry, unsigned int sigen)
9510+{
9511+ int err;
9512+
9513+ err = 0;
9514+ if (unlikely(au_digen(dentry) != sigen
5527c038 9515+ || au_iigen_test(d_inode(dentry), sigen)))
027c5e7a
AM
9516+ err = -EIO;
9517+
9518+ return err;
1facf9fc 9519+}
9520+
9521+void au_update_digen(struct dentry *dentry)
9522+{
9523+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
9524+ /* smp_mb(); */ /* atomic_set */
9525+}
9526+
9527+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
9528+{
9529+ struct au_dinfo *dinfo;
9530+ struct dentry *h_d;
4a4d8108 9531+ struct au_hdentry *hdp;
5afbbe0d 9532+ aufs_bindex_t bindex, bbot;
1facf9fc 9533+
1308ab2a 9534+ DiMustWriteLock(dentry);
9535+
1facf9fc 9536+ dinfo = au_di(dentry);
5afbbe0d 9537+ if (!dinfo || dinfo->di_btop < 0)
1facf9fc 9538+ return;
9539+
9540+ if (do_put_zero) {
5afbbe0d
AM
9541+ bbot = dinfo->di_bbot;
9542+ bindex = dinfo->di_btop;
9543+ hdp = au_hdentry(dinfo, bindex);
9544+ for (; bindex <= bbot; bindex++, hdp++) {
9545+ h_d = hdp->hd_dentry;
5527c038 9546+ if (h_d && d_is_negative(h_d))
1facf9fc 9547+ au_set_h_dptr(dentry, bindex, NULL);
9548+ }
9549+ }
9550+
5afbbe0d
AM
9551+ dinfo->di_btop = 0;
9552+ hdp = au_hdentry(dinfo, dinfo->di_btop);
9553+ for (; dinfo->di_btop <= dinfo->di_bbot; dinfo->di_btop++, hdp++)
9554+ if (hdp->hd_dentry)
1facf9fc 9555+ break;
5afbbe0d
AM
9556+ if (dinfo->di_btop > dinfo->di_bbot) {
9557+ dinfo->di_btop = -1;
9558+ dinfo->di_bbot = -1;
1facf9fc 9559+ return;
9560+ }
9561+
5afbbe0d
AM
9562+ hdp = au_hdentry(dinfo, dinfo->di_bbot);
9563+ for (; dinfo->di_bbot >= 0; dinfo->di_bbot--, hdp--)
9564+ if (hdp->hd_dentry)
1facf9fc 9565+ break;
5afbbe0d 9566+ AuDebugOn(dinfo->di_btop > dinfo->di_bbot || dinfo->di_bbot < 0);
1facf9fc 9567+}
9568+
5afbbe0d 9569+void au_update_dbtop(struct dentry *dentry)
1facf9fc 9570+{
5afbbe0d 9571+ aufs_bindex_t bindex, bbot;
1facf9fc 9572+ struct dentry *h_dentry;
9573+
5afbbe0d
AM
9574+ bbot = au_dbbot(dentry);
9575+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
1facf9fc 9576+ h_dentry = au_h_dptr(dentry, bindex);
9577+ if (!h_dentry)
9578+ continue;
5527c038 9579+ if (d_is_positive(h_dentry)) {
5afbbe0d 9580+ au_set_dbtop(dentry, bindex);
1facf9fc 9581+ return;
9582+ }
9583+ au_set_h_dptr(dentry, bindex, NULL);
9584+ }
9585+}
9586+
5afbbe0d 9587+void au_update_dbbot(struct dentry *dentry)
1facf9fc 9588+{
5afbbe0d 9589+ aufs_bindex_t bindex, btop;
1facf9fc 9590+ struct dentry *h_dentry;
9591+
5afbbe0d
AM
9592+ btop = au_dbtop(dentry);
9593+ for (bindex = au_dbbot(dentry); bindex >= btop; bindex--) {
1facf9fc 9594+ h_dentry = au_h_dptr(dentry, bindex);
9595+ if (!h_dentry)
9596+ continue;
5527c038 9597+ if (d_is_positive(h_dentry)) {
5afbbe0d 9598+ au_set_dbbot(dentry, bindex);
1facf9fc 9599+ return;
9600+ }
9601+ au_set_h_dptr(dentry, bindex, NULL);
9602+ }
9603+}
9604+
9605+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
9606+{
5afbbe0d 9607+ aufs_bindex_t bindex, bbot;
1facf9fc 9608+
5afbbe0d
AM
9609+ bbot = au_dbbot(dentry);
9610+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++)
1facf9fc 9611+ if (au_h_dptr(dentry, bindex) == h_dentry)
9612+ return bindex;
9613+ return -1;
9614+}
7f207e10
AM
9615diff -urN /usr/share/empty/fs/aufs/dir.c linux/fs/aufs/dir.c
9616--- /usr/share/empty/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 9617+++ linux/fs/aufs/dir.c 2016-10-09 16:55:36.489368218 +0200
f0c0a007 9618@@ -0,0 +1,762 @@
1facf9fc 9619+/*
8cdd5066 9620+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 9621+ *
9622+ * This program, aufs is free software; you can redistribute it and/or modify
9623+ * it under the terms of the GNU General Public License as published by
9624+ * the Free Software Foundation; either version 2 of the License, or
9625+ * (at your option) any later version.
dece6358
AM
9626+ *
9627+ * This program is distributed in the hope that it will be useful,
9628+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9629+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9630+ * GNU General Public License for more details.
9631+ *
9632+ * You should have received a copy of the GNU General Public License
523b37e3 9633+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 9634+ */
9635+
9636+/*
9637+ * directory operations
9638+ */
9639+
9640+#include <linux/fs_stack.h>
9641+#include "aufs.h"
9642+
9643+void au_add_nlink(struct inode *dir, struct inode *h_dir)
9644+{
9dbd164d
AM
9645+ unsigned int nlink;
9646+
1facf9fc 9647+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9648+
9dbd164d
AM
9649+ nlink = dir->i_nlink;
9650+ nlink += h_dir->i_nlink - 2;
1facf9fc 9651+ if (h_dir->i_nlink < 2)
9dbd164d 9652+ nlink += 2;
f6b6e03d 9653+ smp_mb(); /* for i_nlink */
7eafdf33 9654+ /* 0 can happen in revaliding */
92d182d2 9655+ set_nlink(dir, nlink);
1facf9fc 9656+}
9657+
9658+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
9659+{
9dbd164d
AM
9660+ unsigned int nlink;
9661+
1facf9fc 9662+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9663+
9dbd164d
AM
9664+ nlink = dir->i_nlink;
9665+ nlink -= h_dir->i_nlink - 2;
1facf9fc 9666+ if (h_dir->i_nlink < 2)
9dbd164d 9667+ nlink -= 2;
f6b6e03d 9668+ smp_mb(); /* for i_nlink */
92d182d2 9669+ /* nlink == 0 means the branch-fs is broken */
9dbd164d 9670+ set_nlink(dir, nlink);
1facf9fc 9671+}
9672+
1308ab2a 9673+loff_t au_dir_size(struct file *file, struct dentry *dentry)
9674+{
9675+ loff_t sz;
5afbbe0d 9676+ aufs_bindex_t bindex, bbot;
1308ab2a 9677+ struct file *h_file;
9678+ struct dentry *h_dentry;
9679+
9680+ sz = 0;
9681+ if (file) {
2000de60 9682+ AuDebugOn(!d_is_dir(file->f_path.dentry));
1308ab2a 9683+
5afbbe0d
AM
9684+ bbot = au_fbbot_dir(file);
9685+ for (bindex = au_fbtop(file);
9686+ bindex <= bbot && sz < KMALLOC_MAX_SIZE;
1308ab2a 9687+ bindex++) {
4a4d8108 9688+ h_file = au_hf_dir(file, bindex);
c06a8ce3
AM
9689+ if (h_file && file_inode(h_file))
9690+ sz += vfsub_f_size_read(h_file);
1308ab2a 9691+ }
9692+ } else {
9693+ AuDebugOn(!dentry);
2000de60 9694+ AuDebugOn(!d_is_dir(dentry));
1308ab2a 9695+
5afbbe0d
AM
9696+ bbot = au_dbtaildir(dentry);
9697+ for (bindex = au_dbtop(dentry);
9698+ bindex <= bbot && sz < KMALLOC_MAX_SIZE;
1308ab2a 9699+ bindex++) {
9700+ h_dentry = au_h_dptr(dentry, bindex);
5527c038
JR
9701+ if (h_dentry && d_is_positive(h_dentry))
9702+ sz += i_size_read(d_inode(h_dentry));
1308ab2a 9703+ }
9704+ }
9705+ if (sz < KMALLOC_MAX_SIZE)
9706+ sz = roundup_pow_of_two(sz);
9707+ if (sz > KMALLOC_MAX_SIZE)
9708+ sz = KMALLOC_MAX_SIZE;
9709+ else if (sz < NAME_MAX) {
9710+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
9711+ sz = AUFS_RDBLK_DEF;
9712+ }
9713+ return sz;
9714+}
9715+
b912730e
AM
9716+struct au_dir_ts_arg {
9717+ struct dentry *dentry;
9718+ aufs_bindex_t brid;
9719+};
9720+
9721+static void au_do_dir_ts(void *arg)
9722+{
9723+ struct au_dir_ts_arg *a = arg;
9724+ struct au_dtime dt;
9725+ struct path h_path;
9726+ struct inode *dir, *h_dir;
9727+ struct super_block *sb;
9728+ struct au_branch *br;
9729+ struct au_hinode *hdir;
9730+ int err;
5afbbe0d 9731+ aufs_bindex_t btop, bindex;
b912730e
AM
9732+
9733+ sb = a->dentry->d_sb;
5527c038 9734+ if (d_really_is_negative(a->dentry))
b912730e 9735+ goto out;
5527c038 9736+ /* no dir->i_mutex lock */
b95c5147
AM
9737+ aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
9738+
5527c038 9739+ dir = d_inode(a->dentry);
5afbbe0d 9740+ btop = au_ibtop(dir);
b912730e 9741+ bindex = au_br_index(sb, a->brid);
5afbbe0d 9742+ if (bindex < btop)
b912730e
AM
9743+ goto out_unlock;
9744+
9745+ br = au_sbr(sb, bindex);
9746+ h_path.dentry = au_h_dptr(a->dentry, bindex);
9747+ if (!h_path.dentry)
9748+ goto out_unlock;
9749+ h_path.mnt = au_br_mnt(br);
9750+ au_dtime_store(&dt, a->dentry, &h_path);
9751+
5afbbe0d 9752+ br = au_sbr(sb, btop);
b912730e
AM
9753+ if (!au_br_writable(br->br_perm))
9754+ goto out_unlock;
5afbbe0d 9755+ h_path.dentry = au_h_dptr(a->dentry, btop);
b912730e
AM
9756+ h_path.mnt = au_br_mnt(br);
9757+ err = vfsub_mnt_want_write(h_path.mnt);
9758+ if (err)
9759+ goto out_unlock;
5afbbe0d
AM
9760+ hdir = au_hi(dir, btop);
9761+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
9762+ h_dir = au_h_iptr(dir, btop);
b912730e
AM
9763+ if (h_dir->i_nlink
9764+ && timespec_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
9765+ dt.dt_h_path = h_path;
9766+ au_dtime_revert(&dt);
9767+ }
5afbbe0d 9768+ au_hn_inode_unlock(hdir);
b912730e
AM
9769+ vfsub_mnt_drop_write(h_path.mnt);
9770+ au_cpup_attr_timesizes(dir);
9771+
9772+out_unlock:
9773+ aufs_read_unlock(a->dentry, AuLock_DW);
9774+out:
9775+ dput(a->dentry);
9776+ au_nwt_done(&au_sbi(sb)->si_nowait);
f0c0a007 9777+ au_delayed_kfree(arg);
b912730e
AM
9778+}
9779+
9780+void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
9781+{
9782+ int perm, wkq_err;
5afbbe0d 9783+ aufs_bindex_t btop;
b912730e
AM
9784+ struct au_dir_ts_arg *arg;
9785+ struct dentry *dentry;
9786+ struct super_block *sb;
9787+
9788+ IMustLock(dir);
9789+
9790+ dentry = d_find_any_alias(dir);
9791+ AuDebugOn(!dentry);
9792+ sb = dentry->d_sb;
5afbbe0d
AM
9793+ btop = au_ibtop(dir);
9794+ if (btop == bindex) {
b912730e
AM
9795+ au_cpup_attr_timesizes(dir);
9796+ goto out;
9797+ }
9798+
5afbbe0d 9799+ perm = au_sbr_perm(sb, btop);
b912730e
AM
9800+ if (!au_br_writable(perm))
9801+ goto out;
9802+
9803+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
9804+ if (!arg)
9805+ goto out;
9806+
9807+ arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
9808+ arg->brid = au_sbr_id(sb, bindex);
9809+ wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
9810+ if (unlikely(wkq_err)) {
9811+ pr_err("wkq %d\n", wkq_err);
9812+ dput(dentry);
f0c0a007 9813+ au_delayed_kfree(arg);
b912730e
AM
9814+ }
9815+
9816+out:
9817+ dput(dentry);
9818+}
9819+
1facf9fc 9820+/* ---------------------------------------------------------------------- */
9821+
9822+static int reopen_dir(struct file *file)
9823+{
9824+ int err;
9825+ unsigned int flags;
5afbbe0d 9826+ aufs_bindex_t bindex, btail, btop;
1facf9fc 9827+ struct dentry *dentry, *h_dentry;
9828+ struct file *h_file;
9829+
9830+ /* open all lower dirs */
2000de60 9831+ dentry = file->f_path.dentry;
5afbbe0d
AM
9832+ btop = au_dbtop(dentry);
9833+ for (bindex = au_fbtop(file); bindex < btop; bindex++)
1facf9fc 9834+ au_set_h_fptr(file, bindex, NULL);
5afbbe0d 9835+ au_set_fbtop(file, btop);
1facf9fc 9836+
9837+ btail = au_dbtaildir(dentry);
5afbbe0d 9838+ for (bindex = au_fbbot_dir(file); btail < bindex; bindex--)
1facf9fc 9839+ au_set_h_fptr(file, bindex, NULL);
5afbbe0d 9840+ au_set_fbbot_dir(file, btail);
1facf9fc 9841+
4a4d8108 9842+ flags = vfsub_file_flags(file);
5afbbe0d 9843+ for (bindex = btop; bindex <= btail; bindex++) {
1facf9fc 9844+ h_dentry = au_h_dptr(dentry, bindex);
9845+ if (!h_dentry)
9846+ continue;
4a4d8108 9847+ h_file = au_hf_dir(file, bindex);
1facf9fc 9848+ if (h_file)
9849+ continue;
9850+
392086de 9851+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9852+ err = PTR_ERR(h_file);
9853+ if (IS_ERR(h_file))
9854+ goto out; /* close all? */
9855+ au_set_h_fptr(file, bindex, h_file);
9856+ }
9857+ au_update_figen(file);
9858+ /* todo: necessary? */
9859+ /* file->f_ra = h_file->f_ra; */
9860+ err = 0;
9861+
4f0767ce 9862+out:
1facf9fc 9863+ return err;
9864+}
9865+
b912730e 9866+static int do_open_dir(struct file *file, int flags, struct file *h_file)
1facf9fc 9867+{
9868+ int err;
9869+ aufs_bindex_t bindex, btail;
9870+ struct dentry *dentry, *h_dentry;
8cdd5066 9871+ struct vfsmount *mnt;
1facf9fc 9872+
1308ab2a 9873+ FiMustWriteLock(file);
b912730e 9874+ AuDebugOn(h_file);
1308ab2a 9875+
523b37e3 9876+ err = 0;
8cdd5066 9877+ mnt = file->f_path.mnt;
2000de60 9878+ dentry = file->f_path.dentry;
5527c038 9879+ file->f_version = d_inode(dentry)->i_version;
5afbbe0d
AM
9880+ bindex = au_dbtop(dentry);
9881+ au_set_fbtop(file, bindex);
1facf9fc 9882+ btail = au_dbtaildir(dentry);
5afbbe0d 9883+ au_set_fbbot_dir(file, btail);
1facf9fc 9884+ for (; !err && bindex <= btail; bindex++) {
9885+ h_dentry = au_h_dptr(dentry, bindex);
9886+ if (!h_dentry)
9887+ continue;
9888+
8cdd5066
JR
9889+ err = vfsub_test_mntns(mnt, h_dentry->d_sb);
9890+ if (unlikely(err))
9891+ break;
392086de 9892+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9893+ if (IS_ERR(h_file)) {
9894+ err = PTR_ERR(h_file);
9895+ break;
9896+ }
9897+ au_set_h_fptr(file, bindex, h_file);
9898+ }
9899+ au_update_figen(file);
9900+ /* todo: necessary? */
9901+ /* file->f_ra = h_file->f_ra; */
9902+ if (!err)
9903+ return 0; /* success */
9904+
9905+ /* close all */
5afbbe0d 9906+ for (bindex = au_fbtop(file); bindex <= btail; bindex++)
1facf9fc 9907+ au_set_h_fptr(file, bindex, NULL);
5afbbe0d
AM
9908+ au_set_fbtop(file, -1);
9909+ au_set_fbbot_dir(file, -1);
4a4d8108 9910+
1facf9fc 9911+ return err;
9912+}
9913+
9914+static int aufs_open_dir(struct inode *inode __maybe_unused,
9915+ struct file *file)
9916+{
4a4d8108
AM
9917+ int err;
9918+ struct super_block *sb;
9919+ struct au_fidir *fidir;
9920+
9921+ err = -ENOMEM;
2000de60 9922+ sb = file->f_path.dentry->d_sb;
4a4d8108 9923+ si_read_lock(sb, AuLock_FLUSH);
e49829fe 9924+ fidir = au_fidir_alloc(sb);
4a4d8108 9925+ if (fidir) {
b912730e
AM
9926+ struct au_do_open_args args = {
9927+ .open = do_open_dir,
9928+ .fidir = fidir
9929+ };
9930+ err = au_do_open(file, &args);
4a4d8108 9931+ if (unlikely(err))
f0c0a007 9932+ au_delayed_kfree(fidir);
4a4d8108
AM
9933+ }
9934+ si_read_unlock(sb);
9935+ return err;
1facf9fc 9936+}
9937+
9938+static int aufs_release_dir(struct inode *inode __maybe_unused,
9939+ struct file *file)
9940+{
9941+ struct au_vdir *vdir_cache;
4a4d8108
AM
9942+ struct au_finfo *finfo;
9943+ struct au_fidir *fidir;
f0c0a007 9944+ struct au_hfile *hf;
5afbbe0d 9945+ aufs_bindex_t bindex, bbot;
f0c0a007 9946+ int execed, delayed;
1facf9fc 9947+
f0c0a007 9948+ delayed = (current->flags & PF_KTHREAD) || in_interrupt();
4a4d8108
AM
9949+ finfo = au_fi(file);
9950+ fidir = finfo->fi_hdir;
9951+ if (fidir) {
076b876e 9952+ au_sphl_del(&finfo->fi_hlist,
2000de60 9953+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
4a4d8108
AM
9954+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
9955+ if (vdir_cache)
f0c0a007 9956+ au_vdir_free(vdir_cache, delayed);
4a4d8108
AM
9957+
9958+ bindex = finfo->fi_btop;
9959+ if (bindex >= 0) {
f0c0a007
AM
9960+ execed = vfsub_file_execed(file);
9961+ hf = fidir->fd_hfile + bindex;
4a4d8108
AM
9962+ /*
9963+ * calls fput() instead of filp_close(),
9964+ * since no dnotify or lock for the lower file.
9965+ */
5afbbe0d 9966+ bbot = fidir->fd_bbot;
f0c0a007
AM
9967+ for (; bindex <= bbot; bindex++, hf++)
9968+ if (hf->hf_file)
9969+ au_hfput(hf, execed);
4a4d8108 9970+ }
f0c0a007 9971+ au_delayed_kfree(fidir);
4a4d8108 9972+ finfo->fi_hdir = NULL;
1facf9fc 9973+ }
f0c0a007 9974+ au_finfo_fin(file, delayed);
1facf9fc 9975+ return 0;
9976+}
9977+
9978+/* ---------------------------------------------------------------------- */
9979+
4a4d8108
AM
9980+static int au_do_flush_dir(struct file *file, fl_owner_t id)
9981+{
9982+ int err;
5afbbe0d 9983+ aufs_bindex_t bindex, bbot;
4a4d8108
AM
9984+ struct file *h_file;
9985+
9986+ err = 0;
5afbbe0d
AM
9987+ bbot = au_fbbot_dir(file);
9988+ for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
4a4d8108
AM
9989+ h_file = au_hf_dir(file, bindex);
9990+ if (h_file)
9991+ err = vfsub_flush(h_file, id);
9992+ }
9993+ return err;
9994+}
9995+
9996+static int aufs_flush_dir(struct file *file, fl_owner_t id)
9997+{
9998+ return au_do_flush(file, id, au_do_flush_dir);
9999+}
10000+
10001+/* ---------------------------------------------------------------------- */
10002+
1facf9fc 10003+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
10004+{
10005+ int err;
5afbbe0d 10006+ aufs_bindex_t bbot, bindex;
1facf9fc 10007+ struct inode *inode;
10008+ struct super_block *sb;
10009+
10010+ err = 0;
10011+ sb = dentry->d_sb;
5527c038 10012+ inode = d_inode(dentry);
1facf9fc 10013+ IMustLock(inode);
5afbbe0d
AM
10014+ bbot = au_dbbot(dentry);
10015+ for (bindex = au_dbtop(dentry); !err && bindex <= bbot; bindex++) {
1facf9fc 10016+ struct path h_path;
1facf9fc 10017+
10018+ if (au_test_ro(sb, bindex, inode))
10019+ continue;
10020+ h_path.dentry = au_h_dptr(dentry, bindex);
10021+ if (!h_path.dentry)
10022+ continue;
1facf9fc 10023+
1facf9fc 10024+ h_path.mnt = au_sbr_mnt(sb, bindex);
53392da6 10025+ err = vfsub_fsync(NULL, &h_path, datasync);
1facf9fc 10026+ }
10027+
10028+ return err;
10029+}
10030+
10031+static int au_do_fsync_dir(struct file *file, int datasync)
10032+{
10033+ int err;
5afbbe0d 10034+ aufs_bindex_t bbot, bindex;
1facf9fc 10035+ struct file *h_file;
10036+ struct super_block *sb;
10037+ struct inode *inode;
1facf9fc 10038+
10039+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
10040+ if (unlikely(err))
10041+ goto out;
10042+
c06a8ce3 10043+ inode = file_inode(file);
b912730e 10044+ sb = inode->i_sb;
5afbbe0d
AM
10045+ bbot = au_fbbot_dir(file);
10046+ for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
4a4d8108 10047+ h_file = au_hf_dir(file, bindex);
1facf9fc 10048+ if (!h_file || au_test_ro(sb, bindex, inode))
10049+ continue;
10050+
53392da6 10051+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
1facf9fc 10052+ }
10053+
4f0767ce 10054+out:
1facf9fc 10055+ return err;
10056+}
10057+
10058+/*
10059+ * @file may be NULL
10060+ */
1e00d052
AM
10061+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
10062+ int datasync)
1facf9fc 10063+{
10064+ int err;
b752ccd1 10065+ struct dentry *dentry;
5527c038 10066+ struct inode *inode;
1facf9fc 10067+ struct super_block *sb;
1facf9fc 10068+
10069+ err = 0;
2000de60 10070+ dentry = file->f_path.dentry;
5527c038 10071+ inode = d_inode(dentry);
febd17d6 10072+ inode_lock(inode);
1facf9fc 10073+ sb = dentry->d_sb;
10074+ si_noflush_read_lock(sb);
10075+ if (file)
10076+ err = au_do_fsync_dir(file, datasync);
10077+ else {
10078+ di_write_lock_child(dentry);
10079+ err = au_do_fsync_dir_no_file(dentry, datasync);
10080+ }
5527c038 10081+ au_cpup_attr_timesizes(inode);
1facf9fc 10082+ di_write_unlock(dentry);
10083+ if (file)
10084+ fi_write_unlock(file);
10085+
10086+ si_read_unlock(sb);
febd17d6 10087+ inode_unlock(inode);
1facf9fc 10088+ return err;
10089+}
10090+
10091+/* ---------------------------------------------------------------------- */
10092+
5afbbe0d 10093+static int aufs_iterate_shared(struct file *file, struct dir_context *ctx)
1facf9fc 10094+{
10095+ int err;
10096+ struct dentry *dentry;
9dbd164d 10097+ struct inode *inode, *h_inode;
1facf9fc 10098+ struct super_block *sb;
10099+
523b37e3 10100+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 10101+
2000de60 10102+ dentry = file->f_path.dentry;
5527c038 10103+ inode = d_inode(dentry);
1facf9fc 10104+ IMustLock(inode);
10105+
10106+ sb = dentry->d_sb;
10107+ si_read_lock(sb, AuLock_FLUSH);
10108+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
10109+ if (unlikely(err))
10110+ goto out;
027c5e7a
AM
10111+ err = au_alive_dir(dentry);
10112+ if (!err)
10113+ err = au_vdir_init(file);
1facf9fc 10114+ di_downgrade_lock(dentry, AuLock_IR);
10115+ if (unlikely(err))
10116+ goto out_unlock;
10117+
5afbbe0d 10118+ h_inode = au_h_iptr(inode, au_ibtop(inode));
b752ccd1 10119+ if (!au_test_nfsd()) {
392086de 10120+ err = au_vdir_fill_de(file, ctx);
9dbd164d 10121+ fsstack_copy_attr_atime(inode, h_inode);
1facf9fc 10122+ } else {
10123+ /*
10124+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
10125+ * encode_fh() and others.
10126+ */
9dbd164d 10127+ atomic_inc(&h_inode->i_count);
1facf9fc 10128+ di_read_unlock(dentry, AuLock_IR);
10129+ si_read_unlock(sb);
392086de 10130+ err = au_vdir_fill_de(file, ctx);
1facf9fc 10131+ fsstack_copy_attr_atime(inode, h_inode);
10132+ fi_write_unlock(file);
9dbd164d 10133+ iput(h_inode);
1facf9fc 10134+
10135+ AuTraceErr(err);
10136+ return err;
10137+ }
10138+
4f0767ce 10139+out_unlock:
1facf9fc 10140+ di_read_unlock(dentry, AuLock_IR);
10141+ fi_write_unlock(file);
4f0767ce 10142+out:
1facf9fc 10143+ si_read_unlock(sb);
10144+ return err;
10145+}
10146+
10147+/* ---------------------------------------------------------------------- */
10148+
10149+#define AuTestEmpty_WHONLY 1
dece6358
AM
10150+#define AuTestEmpty_CALLED (1 << 1)
10151+#define AuTestEmpty_SHWH (1 << 2)
1facf9fc 10152+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
7f207e10
AM
10153+#define au_fset_testempty(flags, name) \
10154+ do { (flags) |= AuTestEmpty_##name; } while (0)
10155+#define au_fclr_testempty(flags, name) \
10156+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
1facf9fc 10157+
dece6358
AM
10158+#ifndef CONFIG_AUFS_SHWH
10159+#undef AuTestEmpty_SHWH
10160+#define AuTestEmpty_SHWH 0
10161+#endif
10162+
1facf9fc 10163+struct test_empty_arg {
392086de 10164+ struct dir_context ctx;
1308ab2a 10165+ struct au_nhash *whlist;
1facf9fc 10166+ unsigned int flags;
10167+ int err;
10168+ aufs_bindex_t bindex;
10169+};
10170+
392086de
AM
10171+static int test_empty_cb(struct dir_context *ctx, const char *__name,
10172+ int namelen, loff_t offset __maybe_unused, u64 ino,
dece6358 10173+ unsigned int d_type)
1facf9fc 10174+{
392086de
AM
10175+ struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
10176+ ctx);
1facf9fc 10177+ char *name = (void *)__name;
10178+
10179+ arg->err = 0;
10180+ au_fset_testempty(arg->flags, CALLED);
10181+ /* smp_mb(); */
10182+ if (name[0] == '.'
10183+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
10184+ goto out; /* success */
10185+
10186+ if (namelen <= AUFS_WH_PFX_LEN
10187+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
10188+ if (au_ftest_testempty(arg->flags, WHONLY)
1308ab2a 10189+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 10190+ arg->err = -ENOTEMPTY;
10191+ goto out;
10192+ }
10193+
10194+ name += AUFS_WH_PFX_LEN;
10195+ namelen -= AUFS_WH_PFX_LEN;
1308ab2a 10196+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 10197+ arg->err = au_nhash_append_wh
1308ab2a 10198+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
dece6358 10199+ au_ftest_testempty(arg->flags, SHWH));
1facf9fc 10200+
4f0767ce 10201+out:
1facf9fc 10202+ /* smp_mb(); */
10203+ AuTraceErr(arg->err);
10204+ return arg->err;
10205+}
10206+
10207+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
10208+{
10209+ int err;
10210+ struct file *h_file;
10211+
10212+ h_file = au_h_open(dentry, arg->bindex,
10213+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
392086de 10214+ /*file*/NULL, /*force_wr*/0);
1facf9fc 10215+ err = PTR_ERR(h_file);
10216+ if (IS_ERR(h_file))
10217+ goto out;
10218+
10219+ err = 0;
10220+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
c06a8ce3 10221+ && !file_inode(h_file)->i_nlink)
1facf9fc 10222+ goto out_put;
10223+
10224+ do {
10225+ arg->err = 0;
10226+ au_fclr_testempty(arg->flags, CALLED);
10227+ /* smp_mb(); */
392086de 10228+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1facf9fc 10229+ if (err >= 0)
10230+ err = arg->err;
10231+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
10232+
4f0767ce 10233+out_put:
1facf9fc 10234+ fput(h_file);
10235+ au_sbr_put(dentry->d_sb, arg->bindex);
4f0767ce 10236+out:
1facf9fc 10237+ return err;
10238+}
10239+
10240+struct do_test_empty_args {
10241+ int *errp;
10242+ struct dentry *dentry;
10243+ struct test_empty_arg *arg;
10244+};
10245+
10246+static void call_do_test_empty(void *args)
10247+{
10248+ struct do_test_empty_args *a = args;
10249+ *a->errp = do_test_empty(a->dentry, a->arg);
10250+}
10251+
10252+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
10253+{
10254+ int err, wkq_err;
10255+ struct dentry *h_dentry;
10256+ struct inode *h_inode;
10257+
10258+ h_dentry = au_h_dptr(dentry, arg->bindex);
5527c038 10259+ h_inode = d_inode(h_dentry);
53392da6 10260+ /* todo: i_mode changes anytime? */
febd17d6 10261+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
1facf9fc 10262+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
febd17d6 10263+ inode_unlock(h_inode);
1facf9fc 10264+ if (!err)
10265+ err = do_test_empty(dentry, arg);
10266+ else {
10267+ struct do_test_empty_args args = {
10268+ .errp = &err,
10269+ .dentry = dentry,
10270+ .arg = arg
10271+ };
10272+ unsigned int flags = arg->flags;
10273+
10274+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
10275+ if (unlikely(wkq_err))
10276+ err = wkq_err;
10277+ arg->flags = flags;
10278+ }
10279+
10280+ return err;
10281+}
10282+
10283+int au_test_empty_lower(struct dentry *dentry)
10284+{
10285+ int err;
1308ab2a 10286+ unsigned int rdhash;
5afbbe0d 10287+ aufs_bindex_t bindex, btop, btail;
1308ab2a 10288+ struct au_nhash whlist;
392086de
AM
10289+ struct test_empty_arg arg = {
10290+ .ctx = {
2000de60 10291+ .actor = test_empty_cb
392086de
AM
10292+ }
10293+ };
076b876e 10294+ int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
1facf9fc 10295+
dece6358
AM
10296+ SiMustAnyLock(dentry->d_sb);
10297+
1308ab2a 10298+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
10299+ if (!rdhash)
10300+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
10301+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
dece6358 10302+ if (unlikely(err))
1facf9fc 10303+ goto out;
10304+
1facf9fc 10305+ arg.flags = 0;
1308ab2a 10306+ arg.whlist = &whlist;
5afbbe0d 10307+ btop = au_dbtop(dentry);
dece6358
AM
10308+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10309+ au_fset_testempty(arg.flags, SHWH);
076b876e
AM
10310+ test_empty = do_test_empty;
10311+ if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
10312+ test_empty = sio_test_empty;
5afbbe0d 10313+ arg.bindex = btop;
076b876e 10314+ err = test_empty(dentry, &arg);
1facf9fc 10315+ if (unlikely(err))
10316+ goto out_whlist;
10317+
10318+ au_fset_testempty(arg.flags, WHONLY);
10319+ btail = au_dbtaildir(dentry);
5afbbe0d 10320+ for (bindex = btop + 1; !err && bindex <= btail; bindex++) {
1facf9fc 10321+ struct dentry *h_dentry;
10322+
10323+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 10324+ if (h_dentry && d_is_positive(h_dentry)) {
1facf9fc 10325+ arg.bindex = bindex;
076b876e 10326+ err = test_empty(dentry, &arg);
1facf9fc 10327+ }
10328+ }
10329+
4f0767ce 10330+out_whlist:
1308ab2a 10331+ au_nhash_wh_free(&whlist);
4f0767ce 10332+out:
1facf9fc 10333+ return err;
10334+}
10335+
10336+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
10337+{
10338+ int err;
392086de
AM
10339+ struct test_empty_arg arg = {
10340+ .ctx = {
2000de60 10341+ .actor = test_empty_cb
392086de
AM
10342+ }
10343+ };
1facf9fc 10344+ aufs_bindex_t bindex, btail;
10345+
10346+ err = 0;
1308ab2a 10347+ arg.whlist = whlist;
1facf9fc 10348+ arg.flags = AuTestEmpty_WHONLY;
dece6358
AM
10349+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10350+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 10351+ btail = au_dbtaildir(dentry);
5afbbe0d 10352+ for (bindex = au_dbtop(dentry); !err && bindex <= btail; bindex++) {
1facf9fc 10353+ struct dentry *h_dentry;
10354+
10355+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 10356+ if (h_dentry && d_is_positive(h_dentry)) {
1facf9fc 10357+ arg.bindex = bindex;
10358+ err = sio_test_empty(dentry, &arg);
10359+ }
10360+ }
10361+
10362+ return err;
10363+}
10364+
10365+/* ---------------------------------------------------------------------- */
10366+
10367+const struct file_operations aufs_dir_fop = {
4a4d8108 10368+ .owner = THIS_MODULE,
027c5e7a 10369+ .llseek = default_llseek,
1facf9fc 10370+ .read = generic_read_dir,
5afbbe0d 10371+ .iterate_shared = aufs_iterate_shared,
1facf9fc 10372+ .unlocked_ioctl = aufs_ioctl_dir,
b752ccd1
AM
10373+#ifdef CONFIG_COMPAT
10374+ .compat_ioctl = aufs_compat_ioctl_dir,
10375+#endif
1facf9fc 10376+ .open = aufs_open_dir,
10377+ .release = aufs_release_dir,
4a4d8108 10378+ .flush = aufs_flush_dir,
1facf9fc 10379+ .fsync = aufs_fsync_dir
10380+};
7f207e10
AM
10381diff -urN /usr/share/empty/fs/aufs/dir.h linux/fs/aufs/dir.h
10382--- /usr/share/empty/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 10383+++ linux/fs/aufs/dir.h 2016-10-09 16:55:36.489368218 +0200
f0c0a007 10384@@ -0,0 +1,137 @@
1facf9fc 10385+/*
8cdd5066 10386+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 10387+ *
10388+ * This program, aufs is free software; you can redistribute it and/or modify
10389+ * it under the terms of the GNU General Public License as published by
10390+ * the Free Software Foundation; either version 2 of the License, or
10391+ * (at your option) any later version.
dece6358
AM
10392+ *
10393+ * This program is distributed in the hope that it will be useful,
10394+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10395+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10396+ * GNU General Public License for more details.
10397+ *
10398+ * You should have received a copy of the GNU General Public License
523b37e3 10399+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10400+ */
10401+
10402+/*
10403+ * directory operations
10404+ */
10405+
10406+#ifndef __AUFS_DIR_H__
10407+#define __AUFS_DIR_H__
10408+
10409+#ifdef __KERNEL__
10410+
10411+#include <linux/fs.h>
1facf9fc 10412+
10413+/* ---------------------------------------------------------------------- */
10414+
10415+/* need to be faster and smaller */
10416+
10417+struct au_nhash {
dece6358
AM
10418+ unsigned int nh_num;
10419+ struct hlist_head *nh_head;
1facf9fc 10420+};
10421+
10422+struct au_vdir_destr {
10423+ unsigned char len;
10424+ unsigned char name[0];
10425+} __packed;
10426+
10427+struct au_vdir_dehstr {
10428+ struct hlist_node hash;
f0c0a007
AM
10429+ union {
10430+ struct au_vdir_destr *str;
10431+ struct llist_node lnode; /* delayed free */
10432+ };
4a4d8108 10433+} ____cacheline_aligned_in_smp;
1facf9fc 10434+
10435+struct au_vdir_de {
10436+ ino_t de_ino;
10437+ unsigned char de_type;
10438+ /* caution: packed */
10439+ struct au_vdir_destr de_str;
10440+} __packed;
10441+
10442+struct au_vdir_wh {
10443+ struct hlist_node wh_hash;
dece6358
AM
10444+#ifdef CONFIG_AUFS_SHWH
10445+ ino_t wh_ino;
1facf9fc 10446+ aufs_bindex_t wh_bindex;
dece6358
AM
10447+ unsigned char wh_type;
10448+#else
10449+ aufs_bindex_t wh_bindex;
10450+#endif
10451+ /* caution: packed */
1facf9fc 10452+ struct au_vdir_destr wh_str;
10453+} __packed;
10454+
10455+union au_vdir_deblk_p {
10456+ unsigned char *deblk;
10457+ struct au_vdir_de *de;
10458+};
10459+
10460+struct au_vdir {
10461+ unsigned char **vd_deblk;
10462+ unsigned long vd_nblk;
1facf9fc 10463+ struct {
10464+ unsigned long ul;
10465+ union au_vdir_deblk_p p;
10466+ } vd_last;
10467+
10468+ unsigned long vd_version;
dece6358 10469+ unsigned int vd_deblk_sz;
f0c0a007
AM
10470+ union {
10471+ unsigned long vd_jiffy;
10472+ struct llist_node vd_lnode; /* delayed free */
10473+ };
4a4d8108 10474+} ____cacheline_aligned_in_smp;
1facf9fc 10475+
10476+/* ---------------------------------------------------------------------- */
10477+
10478+/* dir.c */
10479+extern const struct file_operations aufs_dir_fop;
10480+void au_add_nlink(struct inode *dir, struct inode *h_dir);
10481+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
1308ab2a 10482+loff_t au_dir_size(struct file *file, struct dentry *dentry);
b912730e 10483+void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
1facf9fc 10484+int au_test_empty_lower(struct dentry *dentry);
10485+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
10486+
10487+/* vdir.c */
1308ab2a 10488+unsigned int au_rdhash_est(loff_t sz);
dece6358
AM
10489+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
10490+void au_nhash_wh_free(struct au_nhash *whlist);
1facf9fc 10491+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
10492+ int limit);
dece6358
AM
10493+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
10494+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
10495+ unsigned int d_type, aufs_bindex_t bindex,
10496+ unsigned char shwh);
f0c0a007 10497+void au_vdir_free(struct au_vdir *vdir, int atonce);
1facf9fc 10498+int au_vdir_init(struct file *file);
392086de 10499+int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
1facf9fc 10500+
10501+/* ioctl.c */
10502+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
10503+
1308ab2a 10504+#ifdef CONFIG_AUFS_RDU
10505+/* rdu.c */
10506+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
10507+#ifdef CONFIG_COMPAT
10508+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
10509+ unsigned long arg);
10510+#endif
1308ab2a 10511+#else
c1595e42
JR
10512+AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
10513+ unsigned int cmd, unsigned long arg)
b752ccd1 10514+#ifdef CONFIG_COMPAT
c1595e42
JR
10515+AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
10516+ unsigned int cmd, unsigned long arg)
b752ccd1 10517+#endif
1308ab2a 10518+#endif
10519+
1facf9fc 10520+#endif /* __KERNEL__ */
10521+#endif /* __AUFS_DIR_H__ */
7f207e10
AM
10522diff -urN /usr/share/empty/fs/aufs/dynop.c linux/fs/aufs/dynop.c
10523--- /usr/share/empty/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
10524+++ linux/fs/aufs/dynop.c 2016-10-09 16:55:36.489368218 +0200
10525@@ -0,0 +1,371 @@
1facf9fc 10526+/*
8cdd5066 10527+ * Copyright (C) 2010-2016 Junjiro R. Okajima
1facf9fc 10528+ *
10529+ * This program, aufs is free software; you can redistribute it and/or modify
10530+ * it under the terms of the GNU General Public License as published by
10531+ * the Free Software Foundation; either version 2 of the License, or
10532+ * (at your option) any later version.
dece6358
AM
10533+ *
10534+ * This program is distributed in the hope that it will be useful,
10535+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10536+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10537+ * GNU General Public License for more details.
10538+ *
10539+ * You should have received a copy of the GNU General Public License
523b37e3 10540+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10541+ */
10542+
10543+/*
4a4d8108 10544+ * dynamically customizable operations for regular files
1facf9fc 10545+ */
10546+
1facf9fc 10547+#include "aufs.h"
10548+
4a4d8108 10549+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
1facf9fc 10550+
4a4d8108
AM
10551+/*
10552+ * How large will these lists be?
10553+ * Usually just a few elements, 20-30 at most for each, I guess.
10554+ */
f0c0a007 10555+static struct au_sphlhead dynop[AuDyLast];
4a4d8108 10556+
f0c0a007 10557+static struct au_dykey *dy_gfind_get(struct au_sphlhead *sphl, const void *h_op)
1facf9fc 10558+{
4a4d8108 10559+ struct au_dykey *key, *tmp;
f0c0a007 10560+ struct hlist_head *head;
1facf9fc 10561+
4a4d8108 10562+ key = NULL;
f0c0a007 10563+ head = &sphl->head;
4a4d8108 10564+ rcu_read_lock();
f0c0a007 10565+ hlist_for_each_entry_rcu(tmp, head, dk_hnode)
4a4d8108
AM
10566+ if (tmp->dk_op.dy_hop == h_op) {
10567+ key = tmp;
10568+ kref_get(&key->dk_kref);
10569+ break;
10570+ }
10571+ rcu_read_unlock();
10572+
10573+ return key;
1facf9fc 10574+}
10575+
4a4d8108 10576+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
1facf9fc 10577+{
4a4d8108
AM
10578+ struct au_dykey **k, *found;
10579+ const void *h_op = key->dk_op.dy_hop;
10580+ int i;
1facf9fc 10581+
4a4d8108
AM
10582+ found = NULL;
10583+ k = br->br_dykey;
10584+ for (i = 0; i < AuBrDynOp; i++)
10585+ if (k[i]) {
10586+ if (k[i]->dk_op.dy_hop == h_op) {
10587+ found = k[i];
10588+ break;
10589+ }
10590+ } else
10591+ break;
10592+ if (!found) {
10593+ spin_lock(&br->br_dykey_lock);
10594+ for (; i < AuBrDynOp; i++)
10595+ if (k[i]) {
10596+ if (k[i]->dk_op.dy_hop == h_op) {
10597+ found = k[i];
10598+ break;
10599+ }
10600+ } else {
10601+ k[i] = key;
10602+ break;
10603+ }
10604+ spin_unlock(&br->br_dykey_lock);
10605+ BUG_ON(i == AuBrDynOp); /* expand the array */
10606+ }
10607+
10608+ return found;
1facf9fc 10609+}
10610+
4a4d8108 10611+/* kref_get() if @key is already added */
f0c0a007 10612+static struct au_dykey *dy_gadd(struct au_sphlhead *sphl, struct au_dykey *key)
4a4d8108
AM
10613+{
10614+ struct au_dykey *tmp, *found;
f0c0a007 10615+ struct hlist_head *head;
4a4d8108 10616+ const void *h_op = key->dk_op.dy_hop;
1facf9fc 10617+
4a4d8108 10618+ found = NULL;
f0c0a007
AM
10619+ head = &sphl->head;
10620+ spin_lock(&sphl->spin);
10621+ hlist_for_each_entry(tmp, head, dk_hnode)
4a4d8108
AM
10622+ if (tmp->dk_op.dy_hop == h_op) {
10623+ kref_get(&tmp->dk_kref);
10624+ found = tmp;
10625+ break;
10626+ }
10627+ if (!found)
f0c0a007
AM
10628+ hlist_add_head_rcu(&key->dk_hnode, head);
10629+ spin_unlock(&sphl->spin);
1facf9fc 10630+
4a4d8108
AM
10631+ if (!found)
10632+ DyPrSym(key);
10633+ return found;
10634+}
10635+
10636+static void dy_free_rcu(struct rcu_head *rcu)
1facf9fc 10637+{
4a4d8108
AM
10638+ struct au_dykey *key;
10639+
10640+ key = container_of(rcu, struct au_dykey, dk_rcu);
10641+ DyPrSym(key);
f0c0a007 10642+ kfree(key); /* not delayed */
1facf9fc 10643+}
10644+
4a4d8108
AM
10645+static void dy_free(struct kref *kref)
10646+{
10647+ struct au_dykey *key;
f0c0a007 10648+ struct au_sphlhead *sphl;
1facf9fc 10649+
4a4d8108 10650+ key = container_of(kref, struct au_dykey, dk_kref);
f0c0a007
AM
10651+ sphl = dynop + key->dk_op.dy_type;
10652+ au_sphl_del_rcu(&key->dk_hnode, sphl);
4a4d8108
AM
10653+ call_rcu(&key->dk_rcu, dy_free_rcu);
10654+}
10655+
10656+void au_dy_put(struct au_dykey *key)
1facf9fc 10657+{
4a4d8108
AM
10658+ kref_put(&key->dk_kref, dy_free);
10659+}
1facf9fc 10660+
4a4d8108
AM
10661+/* ---------------------------------------------------------------------- */
10662+
10663+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
10664+
10665+#ifdef CONFIG_AUFS_DEBUG
10666+#define DyDbgDeclare(cnt) unsigned int cnt = 0
4f0767ce 10667+#define DyDbgInc(cnt) do { cnt++; } while (0)
4a4d8108
AM
10668+#else
10669+#define DyDbgDeclare(cnt) do {} while (0)
10670+#define DyDbgInc(cnt) do {} while (0)
10671+#endif
10672+
10673+#define DySet(func, dst, src, h_op, h_sb) do { \
10674+ DyDbgInc(cnt); \
10675+ if (h_op->func) { \
10676+ if (src.func) \
10677+ dst.func = src.func; \
10678+ else \
10679+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
10680+ } \
10681+} while (0)
10682+
10683+#define DySetForce(func, dst, src) do { \
10684+ AuDebugOn(!src.func); \
10685+ DyDbgInc(cnt); \
10686+ dst.func = src.func; \
10687+} while (0)
10688+
10689+#define DySetAop(func) \
10690+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
10691+#define DySetAopForce(func) \
10692+ DySetForce(func, dyaop->da_op, aufs_aop)
10693+
10694+static void dy_aop(struct au_dykey *key, const void *h_op,
10695+ struct super_block *h_sb __maybe_unused)
10696+{
10697+ struct au_dyaop *dyaop = (void *)key;
10698+ const struct address_space_operations *h_aop = h_op;
10699+ DyDbgDeclare(cnt);
10700+
10701+ AuDbg("%s\n", au_sbtype(h_sb));
10702+
10703+ DySetAop(writepage);
10704+ DySetAopForce(readpage); /* force */
4a4d8108
AM
10705+ DySetAop(writepages);
10706+ DySetAop(set_page_dirty);
10707+ DySetAop(readpages);
10708+ DySetAop(write_begin);
10709+ DySetAop(write_end);
10710+ DySetAop(bmap);
10711+ DySetAop(invalidatepage);
10712+ DySetAop(releasepage);
027c5e7a 10713+ DySetAop(freepage);
7e9cd9fe 10714+ /* this one will be changed according to an aufs mount option */
4a4d8108 10715+ DySetAop(direct_IO);
4a4d8108 10716+ DySetAop(migratepage);
e2f27e51
AM
10717+ DySetAop(isolate_page);
10718+ DySetAop(putback_page);
4a4d8108
AM
10719+ DySetAop(launder_page);
10720+ DySetAop(is_partially_uptodate);
392086de 10721+ DySetAop(is_dirty_writeback);
4a4d8108 10722+ DySetAop(error_remove_page);
b4510431
AM
10723+ DySetAop(swap_activate);
10724+ DySetAop(swap_deactivate);
4a4d8108
AM
10725+
10726+ DyDbgSize(cnt, *h_aop);
4a4d8108
AM
10727+}
10728+
4a4d8108
AM
10729+/* ---------------------------------------------------------------------- */
10730+
10731+static void dy_bug(struct kref *kref)
10732+{
10733+ BUG();
10734+}
10735+
10736+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
10737+{
10738+ struct au_dykey *key, *old;
f0c0a007 10739+ struct au_sphlhead *sphl;
b752ccd1 10740+ struct op {
4a4d8108 10741+ unsigned int sz;
b752ccd1
AM
10742+ void (*set)(struct au_dykey *key, const void *h_op,
10743+ struct super_block *h_sb __maybe_unused);
10744+ };
10745+ static const struct op a[] = {
4a4d8108
AM
10746+ [AuDy_AOP] = {
10747+ .sz = sizeof(struct au_dyaop),
b752ccd1 10748+ .set = dy_aop
4a4d8108 10749+ }
b752ccd1
AM
10750+ };
10751+ const struct op *p;
4a4d8108 10752+
f0c0a007
AM
10753+ sphl = dynop + op->dy_type;
10754+ key = dy_gfind_get(sphl, op->dy_hop);
4a4d8108
AM
10755+ if (key)
10756+ goto out_add; /* success */
10757+
10758+ p = a + op->dy_type;
10759+ key = kzalloc(p->sz, GFP_NOFS);
10760+ if (unlikely(!key)) {
10761+ key = ERR_PTR(-ENOMEM);
10762+ goto out;
10763+ }
10764+
10765+ key->dk_op.dy_hop = op->dy_hop;
10766+ kref_init(&key->dk_kref);
86dc4139 10767+ p->set(key, op->dy_hop, au_br_sb(br));
f0c0a007 10768+ old = dy_gadd(sphl, key);
4a4d8108 10769+ if (old) {
f0c0a007 10770+ au_delayed_kfree(key);
4a4d8108
AM
10771+ key = old;
10772+ }
10773+
10774+out_add:
10775+ old = dy_bradd(br, key);
10776+ if (old)
10777+ /* its ref-count should never be zero here */
10778+ kref_put(&key->dk_kref, dy_bug);
10779+out:
10780+ return key;
10781+}
10782+
10783+/* ---------------------------------------------------------------------- */
10784+/*
10785+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
c1595e42 10786+ * This behaviour is necessary to return an error from open(O_DIRECT) instead
4a4d8108
AM
10787+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
10788+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
10789+ * See the aufs manual in detail.
4a4d8108
AM
10790+ */
10791+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
10792+{
7e9cd9fe 10793+ if (!do_dx)
4a4d8108 10794+ dyaop->da_op.direct_IO = NULL;
7e9cd9fe 10795+ else
4a4d8108 10796+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
4a4d8108
AM
10797+}
10798+
10799+static struct au_dyaop *dy_aget(struct au_branch *br,
10800+ const struct address_space_operations *h_aop,
10801+ int do_dx)
10802+{
10803+ struct au_dyaop *dyaop;
10804+ struct au_dynop op;
10805+
10806+ op.dy_type = AuDy_AOP;
10807+ op.dy_haop = h_aop;
10808+ dyaop = (void *)dy_get(&op, br);
10809+ if (IS_ERR(dyaop))
10810+ goto out;
10811+ dy_adx(dyaop, do_dx);
10812+
10813+out:
10814+ return dyaop;
10815+}
10816+
10817+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10818+ struct inode *h_inode)
10819+{
10820+ int err, do_dx;
10821+ struct super_block *sb;
10822+ struct au_branch *br;
10823+ struct au_dyaop *dyaop;
10824+
10825+ AuDebugOn(!S_ISREG(h_inode->i_mode));
10826+ IiMustWriteLock(inode);
10827+
10828+ sb = inode->i_sb;
10829+ br = au_sbr(sb, bindex);
10830+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
10831+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
10832+ err = PTR_ERR(dyaop);
10833+ if (IS_ERR(dyaop))
10834+ /* unnecessary to call dy_fput() */
10835+ goto out;
10836+
10837+ err = 0;
10838+ inode->i_mapping->a_ops = &dyaop->da_op;
10839+
10840+out:
10841+ return err;
10842+}
10843+
b752ccd1
AM
10844+/*
10845+ * Is it safe to replace a_ops during the inode/file is in operation?
10846+ * Yes, I hope so.
10847+ */
10848+int au_dy_irefresh(struct inode *inode)
10849+{
10850+ int err;
5afbbe0d 10851+ aufs_bindex_t btop;
b752ccd1
AM
10852+ struct inode *h_inode;
10853+
10854+ err = 0;
10855+ if (S_ISREG(inode->i_mode)) {
5afbbe0d
AM
10856+ btop = au_ibtop(inode);
10857+ h_inode = au_h_iptr(inode, btop);
10858+ err = au_dy_iaop(inode, btop, h_inode);
b752ccd1
AM
10859+ }
10860+ return err;
10861+}
10862+
4a4d8108
AM
10863+void au_dy_arefresh(int do_dx)
10864+{
f0c0a007
AM
10865+ struct au_sphlhead *sphl;
10866+ struct hlist_head *head;
4a4d8108
AM
10867+ struct au_dykey *key;
10868+
f0c0a007
AM
10869+ sphl = dynop + AuDy_AOP;
10870+ head = &sphl->head;
10871+ spin_lock(&sphl->spin);
10872+ hlist_for_each_entry(key, head, dk_hnode)
4a4d8108 10873+ dy_adx((void *)key, do_dx);
f0c0a007 10874+ spin_unlock(&sphl->spin);
4a4d8108
AM
10875+}
10876+
4a4d8108
AM
10877+/* ---------------------------------------------------------------------- */
10878+
10879+void __init au_dy_init(void)
10880+{
10881+ int i;
10882+
10883+ /* make sure that 'struct au_dykey *' can be any type */
10884+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
4a4d8108
AM
10885+
10886+ for (i = 0; i < AuDyLast; i++)
f0c0a007 10887+ au_sphl_init(dynop + i);
4a4d8108
AM
10888+}
10889+
10890+void au_dy_fin(void)
10891+{
10892+ int i;
10893+
10894+ for (i = 0; i < AuDyLast; i++)
f0c0a007 10895+ WARN_ON(!hlist_empty(&dynop[i].head));
4a4d8108 10896+}
7f207e10
AM
10897diff -urN /usr/share/empty/fs/aufs/dynop.h linux/fs/aufs/dynop.h
10898--- /usr/share/empty/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 10899+++ linux/fs/aufs/dynop.h 2016-10-09 16:55:36.489368218 +0200
7e9cd9fe 10900@@ -0,0 +1,74 @@
4a4d8108 10901+/*
8cdd5066 10902+ * Copyright (C) 2010-2016 Junjiro R. Okajima
4a4d8108
AM
10903+ *
10904+ * This program, aufs is free software; you can redistribute it and/or modify
10905+ * it under the terms of the GNU General Public License as published by
10906+ * the Free Software Foundation; either version 2 of the License, or
10907+ * (at your option) any later version.
10908+ *
10909+ * This program is distributed in the hope that it will be useful,
10910+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10911+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10912+ * GNU General Public License for more details.
10913+ *
10914+ * You should have received a copy of the GNU General Public License
523b37e3 10915+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
10916+ */
10917+
10918+/*
10919+ * dynamically customizable operations (for regular files only)
10920+ */
10921+
10922+#ifndef __AUFS_DYNOP_H__
10923+#define __AUFS_DYNOP_H__
10924+
10925+#ifdef __KERNEL__
10926+
7e9cd9fe
AM
10927+#include <linux/fs.h>
10928+#include <linux/kref.h>
4a4d8108 10929+
2cbb1c4b 10930+enum {AuDy_AOP, AuDyLast};
4a4d8108
AM
10931+
10932+struct au_dynop {
10933+ int dy_type;
10934+ union {
10935+ const void *dy_hop;
10936+ const struct address_space_operations *dy_haop;
4a4d8108
AM
10937+ };
10938+};
10939+
10940+struct au_dykey {
10941+ union {
f0c0a007 10942+ struct hlist_node dk_hnode;
4a4d8108
AM
10943+ struct rcu_head dk_rcu;
10944+ };
10945+ struct au_dynop dk_op;
10946+
10947+ /*
10948+ * during I am in the branch local array, kref is gotten. when the
10949+ * branch is removed, kref is put.
10950+ */
10951+ struct kref dk_kref;
10952+};
10953+
10954+/* stop unioning since their sizes are very different from each other */
10955+struct au_dyaop {
10956+ struct au_dykey da_key;
10957+ struct address_space_operations da_op; /* not const */
4a4d8108
AM
10958+};
10959+
4a4d8108
AM
10960+/* ---------------------------------------------------------------------- */
10961+
10962+/* dynop.c */
10963+struct au_branch;
10964+void au_dy_put(struct au_dykey *key);
10965+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10966+ struct inode *h_inode);
b752ccd1 10967+int au_dy_irefresh(struct inode *inode);
4a4d8108 10968+void au_dy_arefresh(int do_dio);
4a4d8108
AM
10969+
10970+void __init au_dy_init(void);
10971+void au_dy_fin(void);
10972+
4a4d8108
AM
10973+#endif /* __KERNEL__ */
10974+#endif /* __AUFS_DYNOP_H__ */
7f207e10
AM
10975diff -urN /usr/share/empty/fs/aufs/export.c linux/fs/aufs/export.c
10976--- /usr/share/empty/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 10977+++ linux/fs/aufs/export.c 2016-10-09 16:55:36.489368218 +0200
5afbbe0d 10978@@ -0,0 +1,837 @@
4a4d8108 10979+/*
8cdd5066 10980+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
10981+ *
10982+ * This program, aufs is free software; you can redistribute it and/or modify
10983+ * it under the terms of the GNU General Public License as published by
10984+ * the Free Software Foundation; either version 2 of the License, or
10985+ * (at your option) any later version.
10986+ *
10987+ * This program is distributed in the hope that it will be useful,
10988+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10989+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10990+ * GNU General Public License for more details.
10991+ *
10992+ * You should have received a copy of the GNU General Public License
523b37e3 10993+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
10994+ */
10995+
10996+/*
10997+ * export via nfs
10998+ */
10999+
11000+#include <linux/exportfs.h>
7eafdf33 11001+#include <linux/fs_struct.h>
4a4d8108
AM
11002+#include <linux/namei.h>
11003+#include <linux/nsproxy.h>
11004+#include <linux/random.h>
11005+#include <linux/writeback.h>
7eafdf33 11006+#include "../fs/mount.h"
4a4d8108
AM
11007+#include "aufs.h"
11008+
11009+union conv {
11010+#ifdef CONFIG_AUFS_INO_T_64
11011+ __u32 a[2];
11012+#else
11013+ __u32 a[1];
11014+#endif
11015+ ino_t ino;
11016+};
11017+
11018+static ino_t decode_ino(__u32 *a)
11019+{
11020+ union conv u;
11021+
11022+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
11023+ u.a[0] = a[0];
11024+#ifdef CONFIG_AUFS_INO_T_64
11025+ u.a[1] = a[1];
11026+#endif
11027+ return u.ino;
11028+}
11029+
11030+static void encode_ino(__u32 *a, ino_t ino)
11031+{
11032+ union conv u;
11033+
11034+ u.ino = ino;
11035+ a[0] = u.a[0];
11036+#ifdef CONFIG_AUFS_INO_T_64
11037+ a[1] = u.a[1];
11038+#endif
11039+}
11040+
11041+/* NFS file handle */
11042+enum {
11043+ Fh_br_id,
11044+ Fh_sigen,
11045+#ifdef CONFIG_AUFS_INO_T_64
11046+ /* support 64bit inode number */
11047+ Fh_ino1,
11048+ Fh_ino2,
11049+ Fh_dir_ino1,
11050+ Fh_dir_ino2,
11051+#else
11052+ Fh_ino1,
11053+ Fh_dir_ino1,
11054+#endif
11055+ Fh_igen,
11056+ Fh_h_type,
11057+ Fh_tail,
11058+
11059+ Fh_ino = Fh_ino1,
11060+ Fh_dir_ino = Fh_dir_ino1
11061+};
11062+
11063+static int au_test_anon(struct dentry *dentry)
11064+{
027c5e7a 11065+ /* note: read d_flags without d_lock */
4a4d8108
AM
11066+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
11067+}
11068+
a2a7ad62
AM
11069+int au_test_nfsd(void)
11070+{
11071+ int ret;
11072+ struct task_struct *tsk = current;
11073+ char comm[sizeof(tsk->comm)];
11074+
11075+ ret = 0;
11076+ if (tsk->flags & PF_KTHREAD) {
11077+ get_task_comm(comm, tsk);
11078+ ret = !strcmp(comm, "nfsd");
11079+ }
11080+
11081+ return ret;
11082+}
11083+
4a4d8108
AM
11084+/* ---------------------------------------------------------------------- */
11085+/* inode generation external table */
11086+
b752ccd1 11087+void au_xigen_inc(struct inode *inode)
4a4d8108 11088+{
4a4d8108
AM
11089+ loff_t pos;
11090+ ssize_t sz;
11091+ __u32 igen;
11092+ struct super_block *sb;
11093+ struct au_sbinfo *sbinfo;
11094+
4a4d8108 11095+ sb = inode->i_sb;
b752ccd1 11096+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
1facf9fc 11097+
b752ccd1 11098+ sbinfo = au_sbi(sb);
1facf9fc 11099+ pos = inode->i_ino;
11100+ pos *= sizeof(igen);
11101+ igen = inode->i_generation + 1;
1facf9fc 11102+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
11103+ sizeof(igen), &pos);
11104+ if (sz == sizeof(igen))
b752ccd1 11105+ return; /* success */
1facf9fc 11106+
b752ccd1 11107+ if (unlikely(sz >= 0))
1facf9fc 11108+ AuIOErr("xigen error (%zd)\n", sz);
1facf9fc 11109+}
11110+
11111+int au_xigen_new(struct inode *inode)
11112+{
11113+ int err;
11114+ loff_t pos;
11115+ ssize_t sz;
11116+ struct super_block *sb;
11117+ struct au_sbinfo *sbinfo;
11118+ struct file *file;
11119+
11120+ err = 0;
11121+ /* todo: dirty, at mount time */
11122+ if (inode->i_ino == AUFS_ROOT_INO)
11123+ goto out;
11124+ sb = inode->i_sb;
dece6358 11125+ SiMustAnyLock(sb);
1facf9fc 11126+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
11127+ goto out;
11128+
11129+ err = -EFBIG;
11130+ pos = inode->i_ino;
11131+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
11132+ AuIOErr1("too large i%lld\n", pos);
11133+ goto out;
11134+ }
11135+ pos *= sizeof(inode->i_generation);
11136+
11137+ err = 0;
11138+ sbinfo = au_sbi(sb);
11139+ file = sbinfo->si_xigen;
11140+ BUG_ON(!file);
11141+
c06a8ce3 11142+ if (vfsub_f_size_read(file)
1facf9fc 11143+ < pos + sizeof(inode->i_generation)) {
11144+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
11145+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
11146+ sizeof(inode->i_generation), &pos);
11147+ } else
11148+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
11149+ sizeof(inode->i_generation), &pos);
11150+ if (sz == sizeof(inode->i_generation))
11151+ goto out; /* success */
11152+
11153+ err = sz;
11154+ if (unlikely(sz >= 0)) {
11155+ err = -EIO;
11156+ AuIOErr("xigen error (%zd)\n", sz);
11157+ }
11158+
4f0767ce 11159+out:
1facf9fc 11160+ return err;
11161+}
11162+
11163+int au_xigen_set(struct super_block *sb, struct file *base)
11164+{
11165+ int err;
11166+ struct au_sbinfo *sbinfo;
11167+ struct file *file;
11168+
dece6358
AM
11169+ SiMustWriteLock(sb);
11170+
1facf9fc 11171+ sbinfo = au_sbi(sb);
11172+ file = au_xino_create2(base, sbinfo->si_xigen);
11173+ err = PTR_ERR(file);
11174+ if (IS_ERR(file))
11175+ goto out;
11176+ err = 0;
11177+ if (sbinfo->si_xigen)
11178+ fput(sbinfo->si_xigen);
11179+ sbinfo->si_xigen = file;
11180+
4f0767ce 11181+out:
1facf9fc 11182+ return err;
11183+}
11184+
11185+void au_xigen_clr(struct super_block *sb)
11186+{
11187+ struct au_sbinfo *sbinfo;
11188+
dece6358
AM
11189+ SiMustWriteLock(sb);
11190+
1facf9fc 11191+ sbinfo = au_sbi(sb);
11192+ if (sbinfo->si_xigen) {
11193+ fput(sbinfo->si_xigen);
11194+ sbinfo->si_xigen = NULL;
11195+ }
11196+}
11197+
11198+/* ---------------------------------------------------------------------- */
11199+
11200+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
11201+ ino_t dir_ino)
11202+{
11203+ struct dentry *dentry, *d;
11204+ struct inode *inode;
11205+ unsigned int sigen;
11206+
11207+ dentry = NULL;
11208+ inode = ilookup(sb, ino);
11209+ if (!inode)
11210+ goto out;
11211+
11212+ dentry = ERR_PTR(-ESTALE);
11213+ sigen = au_sigen(sb);
5afbbe0d 11214+ if (unlikely(au_is_bad_inode(inode)
1facf9fc 11215+ || IS_DEADDIR(inode)
537831f9 11216+ || sigen != au_iigen(inode, NULL)))
1facf9fc 11217+ goto out_iput;
11218+
11219+ dentry = NULL;
11220+ if (!dir_ino || S_ISDIR(inode->i_mode))
11221+ dentry = d_find_alias(inode);
11222+ else {
027c5e7a 11223+ spin_lock(&inode->i_lock);
c1595e42 11224+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 11225+ spin_lock(&d->d_lock);
1facf9fc 11226+ if (!au_test_anon(d)
5527c038 11227+ && d_inode(d->d_parent)->i_ino == dir_ino) {
027c5e7a
AM
11228+ dentry = dget_dlock(d);
11229+ spin_unlock(&d->d_lock);
1facf9fc 11230+ break;
11231+ }
027c5e7a
AM
11232+ spin_unlock(&d->d_lock);
11233+ }
11234+ spin_unlock(&inode->i_lock);
1facf9fc 11235+ }
027c5e7a 11236+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
2cbb1c4b 11237+ /* need to refresh */
1facf9fc 11238+ dput(dentry);
2cbb1c4b 11239+ dentry = NULL;
1facf9fc 11240+ }
11241+
4f0767ce 11242+out_iput:
1facf9fc 11243+ iput(inode);
4f0767ce 11244+out:
2cbb1c4b 11245+ AuTraceErrPtr(dentry);
1facf9fc 11246+ return dentry;
11247+}
11248+
11249+/* ---------------------------------------------------------------------- */
11250+
11251+/* todo: dirty? */
11252+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
4a4d8108
AM
11253+
11254+struct au_compare_mnt_args {
11255+ /* input */
11256+ struct super_block *sb;
11257+
11258+ /* output */
11259+ struct vfsmount *mnt;
11260+};
11261+
11262+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
11263+{
11264+ struct au_compare_mnt_args *a = arg;
11265+
11266+ if (mnt->mnt_sb != a->sb)
11267+ return 0;
11268+ a->mnt = mntget(mnt);
11269+ return 1;
11270+}
11271+
1facf9fc 11272+static struct vfsmount *au_mnt_get(struct super_block *sb)
11273+{
4a4d8108 11274+ int err;
7eafdf33 11275+ struct path root;
4a4d8108
AM
11276+ struct au_compare_mnt_args args = {
11277+ .sb = sb
11278+ };
1facf9fc 11279+
7eafdf33 11280+ get_fs_root(current->fs, &root);
523b37e3 11281+ rcu_read_lock();
7eafdf33 11282+ err = iterate_mounts(au_compare_mnt, &args, root.mnt);
523b37e3 11283+ rcu_read_unlock();
7eafdf33 11284+ path_put(&root);
4a4d8108
AM
11285+ AuDebugOn(!err);
11286+ AuDebugOn(!args.mnt);
11287+ return args.mnt;
1facf9fc 11288+}
11289+
11290+struct au_nfsd_si_lock {
4a4d8108 11291+ unsigned int sigen;
027c5e7a 11292+ aufs_bindex_t bindex, br_id;
1facf9fc 11293+ unsigned char force_lock;
11294+};
11295+
027c5e7a
AM
11296+static int si_nfsd_read_lock(struct super_block *sb,
11297+ struct au_nfsd_si_lock *nsi_lock)
1facf9fc 11298+{
027c5e7a 11299+ int err;
1facf9fc 11300+ aufs_bindex_t bindex;
11301+
11302+ si_read_lock(sb, AuLock_FLUSH);
11303+
11304+ /* branch id may be wrapped around */
027c5e7a 11305+ err = 0;
1facf9fc 11306+ bindex = au_br_index(sb, nsi_lock->br_id);
11307+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
11308+ goto out; /* success */
11309+
027c5e7a
AM
11310+ err = -ESTALE;
11311+ bindex = -1;
1facf9fc 11312+ if (!nsi_lock->force_lock)
11313+ si_read_unlock(sb);
1facf9fc 11314+
4f0767ce 11315+out:
027c5e7a
AM
11316+ nsi_lock->bindex = bindex;
11317+ return err;
1facf9fc 11318+}
11319+
11320+struct find_name_by_ino {
392086de 11321+ struct dir_context ctx;
1facf9fc 11322+ int called, found;
11323+ ino_t ino;
11324+ char *name;
11325+ int namelen;
11326+};
11327+
11328+static int
392086de
AM
11329+find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
11330+ loff_t offset, u64 ino, unsigned int d_type)
1facf9fc 11331+{
392086de
AM
11332+ struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
11333+ ctx);
1facf9fc 11334+
11335+ a->called++;
11336+ if (a->ino != ino)
11337+ return 0;
11338+
11339+ memcpy(a->name, name, namelen);
11340+ a->namelen = namelen;
11341+ a->found = 1;
11342+ return 1;
11343+}
11344+
11345+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
11346+ struct au_nfsd_si_lock *nsi_lock)
11347+{
11348+ struct dentry *dentry, *parent;
11349+ struct file *file;
11350+ struct inode *dir;
392086de
AM
11351+ struct find_name_by_ino arg = {
11352+ .ctx = {
2000de60 11353+ .actor = find_name_by_ino
392086de
AM
11354+ }
11355+ };
1facf9fc 11356+ int err;
11357+
11358+ parent = path->dentry;
11359+ if (nsi_lock)
11360+ si_read_unlock(parent->d_sb);
4a4d8108 11361+ file = vfsub_dentry_open(path, au_dir_roflags);
1facf9fc 11362+ dentry = (void *)file;
11363+ if (IS_ERR(file))
11364+ goto out;
11365+
11366+ dentry = ERR_PTR(-ENOMEM);
537831f9 11367+ arg.name = (void *)__get_free_page(GFP_NOFS);
1facf9fc 11368+ if (unlikely(!arg.name))
11369+ goto out_file;
11370+ arg.ino = ino;
11371+ arg.found = 0;
11372+ do {
11373+ arg.called = 0;
11374+ /* smp_mb(); */
392086de 11375+ err = vfsub_iterate_dir(file, &arg.ctx);
1facf9fc 11376+ } while (!err && !arg.found && arg.called);
11377+ dentry = ERR_PTR(err);
11378+ if (unlikely(err))
11379+ goto out_name;
1716fcea
AM
11380+ /* instead of ENOENT */
11381+ dentry = ERR_PTR(-ESTALE);
1facf9fc 11382+ if (!arg.found)
11383+ goto out_name;
11384+
b4510431 11385+ /* do not call vfsub_lkup_one() */
5527c038 11386+ dir = d_inode(parent);
febd17d6 11387+ dentry = vfsub_lookup_one_len_unlocked(arg.name, parent, arg.namelen);
1facf9fc 11388+ AuTraceErrPtr(dentry);
11389+ if (IS_ERR(dentry))
11390+ goto out_name;
11391+ AuDebugOn(au_test_anon(dentry));
5527c038 11392+ if (unlikely(d_really_is_negative(dentry))) {
1facf9fc 11393+ dput(dentry);
11394+ dentry = ERR_PTR(-ENOENT);
11395+ }
11396+
4f0767ce 11397+out_name:
f0c0a007 11398+ au_delayed_free_page((unsigned long)arg.name);
4f0767ce 11399+out_file:
1facf9fc 11400+ fput(file);
4f0767ce 11401+out:
1facf9fc 11402+ if (unlikely(nsi_lock
11403+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
11404+ if (!IS_ERR(dentry)) {
11405+ dput(dentry);
11406+ dentry = ERR_PTR(-ESTALE);
11407+ }
11408+ AuTraceErrPtr(dentry);
11409+ return dentry;
11410+}
11411+
11412+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
11413+ ino_t dir_ino,
11414+ struct au_nfsd_si_lock *nsi_lock)
11415+{
11416+ struct dentry *dentry;
11417+ struct path path;
11418+
11419+ if (dir_ino != AUFS_ROOT_INO) {
11420+ path.dentry = decode_by_ino(sb, dir_ino, 0);
11421+ dentry = path.dentry;
11422+ if (!path.dentry || IS_ERR(path.dentry))
11423+ goto out;
11424+ AuDebugOn(au_test_anon(path.dentry));
11425+ } else
11426+ path.dentry = dget(sb->s_root);
11427+
11428+ path.mnt = au_mnt_get(sb);
11429+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
11430+ path_put(&path);
11431+
4f0767ce 11432+out:
1facf9fc 11433+ AuTraceErrPtr(dentry);
11434+ return dentry;
11435+}
11436+
11437+/* ---------------------------------------------------------------------- */
11438+
11439+static int h_acceptable(void *expv, struct dentry *dentry)
11440+{
11441+ return 1;
11442+}
11443+
11444+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
11445+ char *buf, int len, struct super_block *sb)
11446+{
11447+ char *p;
11448+ int n;
11449+ struct path path;
11450+
11451+ p = d_path(h_rootpath, buf, len);
11452+ if (IS_ERR(p))
11453+ goto out;
11454+ n = strlen(p);
11455+
11456+ path.mnt = h_rootpath->mnt;
11457+ path.dentry = h_parent;
11458+ p = d_path(&path, buf, len);
11459+ if (IS_ERR(p))
11460+ goto out;
11461+ if (n != 1)
11462+ p += n;
11463+
11464+ path.mnt = au_mnt_get(sb);
11465+ path.dentry = sb->s_root;
11466+ p = d_path(&path, buf, len - strlen(p));
11467+ mntput(path.mnt);
11468+ if (IS_ERR(p))
11469+ goto out;
11470+ if (n != 1)
11471+ p[strlen(p)] = '/';
11472+
4f0767ce 11473+out:
1facf9fc 11474+ AuTraceErrPtr(p);
11475+ return p;
11476+}
11477+
11478+static
027c5e7a
AM
11479+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
11480+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
1facf9fc 11481+{
11482+ struct dentry *dentry, *h_parent, *root;
11483+ struct super_block *h_sb;
11484+ char *pathname, *p;
11485+ struct vfsmount *h_mnt;
11486+ struct au_branch *br;
11487+ int err;
11488+ struct path path;
11489+
027c5e7a 11490+ br = au_sbr(sb, nsi_lock->bindex);
86dc4139 11491+ h_mnt = au_br_mnt(br);
1facf9fc 11492+ h_sb = h_mnt->mnt_sb;
11493+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
5afbbe0d 11494+ lockdep_off();
1facf9fc 11495+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
11496+ fh_len - Fh_tail, fh[Fh_h_type],
11497+ h_acceptable, /*context*/NULL);
5afbbe0d 11498+ lockdep_on();
1facf9fc 11499+ dentry = h_parent;
11500+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
11501+ AuWarn1("%s decode_fh failed, %ld\n",
11502+ au_sbtype(h_sb), PTR_ERR(h_parent));
11503+ goto out;
11504+ }
11505+ dentry = NULL;
11506+ if (unlikely(au_test_anon(h_parent))) {
11507+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
11508+ au_sbtype(h_sb));
11509+ goto out_h_parent;
11510+ }
11511+
11512+ dentry = ERR_PTR(-ENOMEM);
11513+ pathname = (void *)__get_free_page(GFP_NOFS);
11514+ if (unlikely(!pathname))
11515+ goto out_h_parent;
11516+
11517+ root = sb->s_root;
11518+ path.mnt = h_mnt;
11519+ di_read_lock_parent(root, !AuLock_IR);
027c5e7a 11520+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
1facf9fc 11521+ di_read_unlock(root, !AuLock_IR);
11522+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
11523+ dentry = (void *)p;
11524+ if (IS_ERR(p))
11525+ goto out_pathname;
11526+
11527+ si_read_unlock(sb);
11528+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
11529+ dentry = ERR_PTR(err);
11530+ if (unlikely(err))
11531+ goto out_relock;
11532+
11533+ dentry = ERR_PTR(-ENOENT);
11534+ AuDebugOn(au_test_anon(path.dentry));
5527c038 11535+ if (unlikely(d_really_is_negative(path.dentry)))
1facf9fc 11536+ goto out_path;
11537+
5527c038 11538+ if (ino != d_inode(path.dentry)->i_ino)
1facf9fc 11539+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
11540+ else
11541+ dentry = dget(path.dentry);
11542+
4f0767ce 11543+out_path:
1facf9fc 11544+ path_put(&path);
4f0767ce 11545+out_relock:
1facf9fc 11546+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
11547+ if (!IS_ERR(dentry)) {
11548+ dput(dentry);
11549+ dentry = ERR_PTR(-ESTALE);
11550+ }
4f0767ce 11551+out_pathname:
f0c0a007 11552+ au_delayed_free_page((unsigned long)pathname);
4f0767ce 11553+out_h_parent:
1facf9fc 11554+ dput(h_parent);
4f0767ce 11555+out:
1facf9fc 11556+ AuTraceErrPtr(dentry);
11557+ return dentry;
11558+}
11559+
11560+/* ---------------------------------------------------------------------- */
11561+
11562+static struct dentry *
11563+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
11564+ int fh_type)
11565+{
11566+ struct dentry *dentry;
11567+ __u32 *fh = fid->raw;
027c5e7a 11568+ struct au_branch *br;
1facf9fc 11569+ ino_t ino, dir_ino;
1facf9fc 11570+ struct au_nfsd_si_lock nsi_lock = {
1facf9fc 11571+ .force_lock = 0
11572+ };
11573+
1facf9fc 11574+ dentry = ERR_PTR(-ESTALE);
4a4d8108
AM
11575+ /* it should never happen, but the file handle is unreliable */
11576+ if (unlikely(fh_len < Fh_tail))
11577+ goto out;
11578+ nsi_lock.sigen = fh[Fh_sigen];
11579+ nsi_lock.br_id = fh[Fh_br_id];
11580+
1facf9fc 11581+ /* branch id may be wrapped around */
027c5e7a
AM
11582+ br = NULL;
11583+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
1facf9fc 11584+ goto out;
11585+ nsi_lock.force_lock = 1;
11586+
11587+ /* is this inode still cached? */
11588+ ino = decode_ino(fh + Fh_ino);
4a4d8108
AM
11589+ /* it should never happen */
11590+ if (unlikely(ino == AUFS_ROOT_INO))
8cdd5066 11591+ goto out_unlock;
4a4d8108 11592+
1facf9fc 11593+ dir_ino = decode_ino(fh + Fh_dir_ino);
11594+ dentry = decode_by_ino(sb, ino, dir_ino);
11595+ if (IS_ERR(dentry))
11596+ goto out_unlock;
11597+ if (dentry)
11598+ goto accept;
11599+
11600+ /* is the parent dir cached? */
027c5e7a 11601+ br = au_sbr(sb, nsi_lock.bindex);
5afbbe0d 11602+ au_br_get(br);
1facf9fc 11603+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
11604+ if (IS_ERR(dentry))
11605+ goto out_unlock;
11606+ if (dentry)
11607+ goto accept;
11608+
11609+ /* lookup path */
027c5e7a 11610+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
1facf9fc 11611+ if (IS_ERR(dentry))
11612+ goto out_unlock;
11613+ if (unlikely(!dentry))
11614+ /* todo?: make it ESTALE */
11615+ goto out_unlock;
11616+
4f0767ce 11617+accept:
027c5e7a 11618+ if (!au_digen_test(dentry, au_sigen(sb))
5527c038 11619+ && d_inode(dentry)->i_generation == fh[Fh_igen])
1facf9fc 11620+ goto out_unlock; /* success */
11621+
11622+ dput(dentry);
11623+ dentry = ERR_PTR(-ESTALE);
4f0767ce 11624+out_unlock:
027c5e7a 11625+ if (br)
5afbbe0d 11626+ au_br_put(br);
1facf9fc 11627+ si_read_unlock(sb);
4f0767ce 11628+out:
1facf9fc 11629+ AuTraceErrPtr(dentry);
11630+ return dentry;
11631+}
11632+
11633+#if 0 /* reserved for future use */
11634+/* support subtreecheck option */
11635+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
11636+ int fh_len, int fh_type)
11637+{
11638+ struct dentry *parent;
11639+ __u32 *fh = fid->raw;
11640+ ino_t dir_ino;
11641+
11642+ dir_ino = decode_ino(fh + Fh_dir_ino);
11643+ parent = decode_by_ino(sb, dir_ino, 0);
11644+ if (IS_ERR(parent))
11645+ goto out;
11646+ if (!parent)
11647+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
11648+ dir_ino, fh, fh_len);
11649+
4f0767ce 11650+out:
1facf9fc 11651+ AuTraceErrPtr(parent);
11652+ return parent;
11653+}
11654+#endif
11655+
11656+/* ---------------------------------------------------------------------- */
11657+
0c3ec466
AM
11658+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
11659+ struct inode *dir)
1facf9fc 11660+{
11661+ int err;
0c3ec466 11662+ aufs_bindex_t bindex;
1facf9fc 11663+ struct super_block *sb, *h_sb;
0c3ec466
AM
11664+ struct dentry *dentry, *parent, *h_parent;
11665+ struct inode *h_dir;
1facf9fc 11666+ struct au_branch *br;
11667+
1facf9fc 11668+ err = -ENOSPC;
11669+ if (unlikely(*max_len <= Fh_tail)) {
11670+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
11671+ goto out;
11672+ }
11673+
11674+ err = FILEID_ROOT;
0c3ec466
AM
11675+ if (inode->i_ino == AUFS_ROOT_INO) {
11676+ AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
1facf9fc 11677+ goto out;
11678+ }
11679+
1facf9fc 11680+ h_parent = NULL;
0c3ec466
AM
11681+ sb = inode->i_sb;
11682+ err = si_read_lock(sb, AuLock_FLUSH);
027c5e7a
AM
11683+ if (unlikely(err))
11684+ goto out;
11685+
1facf9fc 11686+#ifdef CONFIG_AUFS_DEBUG
11687+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
11688+ AuWarn1("NFS-exporting requires xino\n");
11689+#endif
027c5e7a 11690+ err = -EIO;
0c3ec466
AM
11691+ parent = NULL;
11692+ ii_read_lock_child(inode);
5afbbe0d 11693+ bindex = au_ibtop(inode);
0c3ec466 11694+ if (!dir) {
c1595e42 11695+ dentry = d_find_any_alias(inode);
0c3ec466
AM
11696+ if (unlikely(!dentry))
11697+ goto out_unlock;
11698+ AuDebugOn(au_test_anon(dentry));
11699+ parent = dget_parent(dentry);
11700+ dput(dentry);
11701+ if (unlikely(!parent))
11702+ goto out_unlock;
5527c038
JR
11703+ if (d_really_is_positive(parent))
11704+ dir = d_inode(parent);
1facf9fc 11705+ }
0c3ec466
AM
11706+
11707+ ii_read_lock_parent(dir);
11708+ h_dir = au_h_iptr(dir, bindex);
11709+ ii_read_unlock(dir);
11710+ if (unlikely(!h_dir))
11711+ goto out_parent;
c1595e42 11712+ h_parent = d_find_any_alias(h_dir);
1facf9fc 11713+ if (unlikely(!h_parent))
0c3ec466 11714+ goto out_hparent;
1facf9fc 11715+
11716+ err = -EPERM;
11717+ br = au_sbr(sb, bindex);
86dc4139 11718+ h_sb = au_br_sb(br);
1facf9fc 11719+ if (unlikely(!h_sb->s_export_op)) {
11720+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
0c3ec466 11721+ goto out_hparent;
1facf9fc 11722+ }
11723+
11724+ fh[Fh_br_id] = br->br_id;
11725+ fh[Fh_sigen] = au_sigen(sb);
11726+ encode_ino(fh + Fh_ino, inode->i_ino);
0c3ec466 11727+ encode_ino(fh + Fh_dir_ino, dir->i_ino);
1facf9fc 11728+ fh[Fh_igen] = inode->i_generation;
11729+
11730+ *max_len -= Fh_tail;
11731+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
11732+ max_len,
11733+ /*connectable or subtreecheck*/0);
11734+ err = fh[Fh_h_type];
11735+ *max_len += Fh_tail;
11736+ /* todo: macros? */
1716fcea 11737+ if (err != FILEID_INVALID)
1facf9fc 11738+ err = 99;
11739+ else
11740+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
11741+
0c3ec466 11742+out_hparent:
1facf9fc 11743+ dput(h_parent);
0c3ec466 11744+out_parent:
1facf9fc 11745+ dput(parent);
0c3ec466
AM
11746+out_unlock:
11747+ ii_read_unlock(inode);
11748+ si_read_unlock(sb);
4f0767ce 11749+out:
1facf9fc 11750+ if (unlikely(err < 0))
1716fcea 11751+ err = FILEID_INVALID;
1facf9fc 11752+ return err;
11753+}
11754+
11755+/* ---------------------------------------------------------------------- */
11756+
4a4d8108
AM
11757+static int aufs_commit_metadata(struct inode *inode)
11758+{
11759+ int err;
11760+ aufs_bindex_t bindex;
11761+ struct super_block *sb;
11762+ struct inode *h_inode;
11763+ int (*f)(struct inode *inode);
11764+
11765+ sb = inode->i_sb;
e49829fe 11766+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 11767+ ii_write_lock_child(inode);
5afbbe0d 11768+ bindex = au_ibtop(inode);
4a4d8108
AM
11769+ AuDebugOn(bindex < 0);
11770+ h_inode = au_h_iptr(inode, bindex);
11771+
11772+ f = h_inode->i_sb->s_export_op->commit_metadata;
11773+ if (f)
11774+ err = f(h_inode);
11775+ else {
11776+ struct writeback_control wbc = {
11777+ .sync_mode = WB_SYNC_ALL,
11778+ .nr_to_write = 0 /* metadata only */
11779+ };
11780+
11781+ err = sync_inode(h_inode, &wbc);
11782+ }
11783+
11784+ au_cpup_attr_timesizes(inode);
11785+ ii_write_unlock(inode);
11786+ si_read_unlock(sb);
11787+ return err;
11788+}
11789+
11790+/* ---------------------------------------------------------------------- */
11791+
1facf9fc 11792+static struct export_operations aufs_export_op = {
4a4d8108 11793+ .fh_to_dentry = aufs_fh_to_dentry,
1facf9fc 11794+ /* .fh_to_parent = aufs_fh_to_parent, */
4a4d8108
AM
11795+ .encode_fh = aufs_encode_fh,
11796+ .commit_metadata = aufs_commit_metadata
1facf9fc 11797+};
11798+
11799+void au_export_init(struct super_block *sb)
11800+{
11801+ struct au_sbinfo *sbinfo;
11802+ __u32 u;
11803+
5afbbe0d
AM
11804+ BUILD_BUG_ON_MSG(IS_BUILTIN(CONFIG_AUFS_FS)
11805+ && IS_MODULE(CONFIG_EXPORTFS),
11806+ AUFS_NAME ": unsupported configuration "
11807+ "CONFIG_EXPORTFS=m and CONFIG_AUFS_FS=y");
11808+
1facf9fc 11809+ sb->s_export_op = &aufs_export_op;
11810+ sbinfo = au_sbi(sb);
11811+ sbinfo->si_xigen = NULL;
11812+ get_random_bytes(&u, sizeof(u));
11813+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
11814+ atomic_set(&sbinfo->si_xigen_next, u);
11815+}
076b876e
AM
11816diff -urN /usr/share/empty/fs/aufs/fhsm.c linux/fs/aufs/fhsm.c
11817--- /usr/share/empty/fs/aufs/fhsm.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 11818+++ linux/fs/aufs/fhsm.c 2016-10-09 16:55:36.489368218 +0200
c1595e42 11819@@ -0,0 +1,426 @@
076b876e 11820+/*
8cdd5066 11821+ * Copyright (C) 2011-2016 Junjiro R. Okajima
076b876e
AM
11822+ *
11823+ * This program, aufs is free software; you can redistribute it and/or modify
11824+ * it under the terms of the GNU General Public License as published by
11825+ * the Free Software Foundation; either version 2 of the License, or
11826+ * (at your option) any later version.
11827+ *
11828+ * This program is distributed in the hope that it will be useful,
11829+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11830+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11831+ * GNU General Public License for more details.
11832+ *
11833+ * You should have received a copy of the GNU General Public License
11834+ * along with this program; if not, write to the Free Software
11835+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11836+ */
11837+
11838+/*
11839+ * File-based Hierarchy Storage Management
11840+ */
11841+
11842+#include <linux/anon_inodes.h>
11843+#include <linux/poll.h>
11844+#include <linux/seq_file.h>
11845+#include <linux/statfs.h>
11846+#include "aufs.h"
11847+
c1595e42
JR
11848+static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
11849+{
11850+ struct au_sbinfo *sbinfo;
11851+ struct au_fhsm *fhsm;
11852+
11853+ SiMustAnyLock(sb);
11854+
11855+ sbinfo = au_sbi(sb);
11856+ fhsm = &sbinfo->si_fhsm;
11857+ AuDebugOn(!fhsm);
11858+ return fhsm->fhsm_bottom;
11859+}
11860+
11861+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
11862+{
11863+ struct au_sbinfo *sbinfo;
11864+ struct au_fhsm *fhsm;
11865+
11866+ SiMustWriteLock(sb);
11867+
11868+ sbinfo = au_sbi(sb);
11869+ fhsm = &sbinfo->si_fhsm;
11870+ AuDebugOn(!fhsm);
11871+ fhsm->fhsm_bottom = bindex;
11872+}
11873+
11874+/* ---------------------------------------------------------------------- */
11875+
076b876e
AM
11876+static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
11877+{
11878+ struct au_br_fhsm *bf;
11879+
11880+ bf = br->br_fhsm;
11881+ MtxMustLock(&bf->bf_lock);
11882+
11883+ return !bf->bf_readable
11884+ || time_after(jiffies,
11885+ bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
11886+}
11887+
11888+/* ---------------------------------------------------------------------- */
11889+
11890+static void au_fhsm_notify(struct super_block *sb, int val)
11891+{
11892+ struct au_sbinfo *sbinfo;
11893+ struct au_fhsm *fhsm;
11894+
11895+ SiMustAnyLock(sb);
11896+
11897+ sbinfo = au_sbi(sb);
11898+ fhsm = &sbinfo->si_fhsm;
11899+ if (au_fhsm_pid(fhsm)
11900+ && atomic_read(&fhsm->fhsm_readable) != -1) {
11901+ atomic_set(&fhsm->fhsm_readable, val);
11902+ if (val)
11903+ wake_up(&fhsm->fhsm_wqh);
11904+ }
11905+}
11906+
11907+static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
11908+ struct aufs_stfs *rstfs, int do_lock, int do_notify)
11909+{
11910+ int err;
11911+ struct au_branch *br;
11912+ struct au_br_fhsm *bf;
11913+
11914+ br = au_sbr(sb, bindex);
11915+ AuDebugOn(au_br_rdonly(br));
11916+ bf = br->br_fhsm;
11917+ AuDebugOn(!bf);
11918+
11919+ if (do_lock)
11920+ mutex_lock(&bf->bf_lock);
11921+ else
11922+ MtxMustLock(&bf->bf_lock);
11923+
11924+ /* sb->s_root for NFS is unreliable */
11925+ err = au_br_stfs(br, &bf->bf_stfs);
11926+ if (unlikely(err)) {
11927+ AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
11928+ goto out;
11929+ }
11930+
11931+ bf->bf_jiffy = jiffies;
11932+ bf->bf_readable = 1;
11933+ if (do_notify)
11934+ au_fhsm_notify(sb, /*val*/1);
11935+ if (rstfs)
11936+ *rstfs = bf->bf_stfs;
11937+
11938+out:
11939+ if (do_lock)
11940+ mutex_unlock(&bf->bf_lock);
11941+ au_fhsm_notify(sb, /*val*/1);
11942+
11943+ return err;
11944+}
11945+
11946+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
11947+{
11948+ int err;
076b876e
AM
11949+ struct au_sbinfo *sbinfo;
11950+ struct au_fhsm *fhsm;
11951+ struct au_branch *br;
11952+ struct au_br_fhsm *bf;
11953+
11954+ AuDbg("b%d, force %d\n", bindex, force);
11955+ SiMustAnyLock(sb);
11956+
11957+ sbinfo = au_sbi(sb);
11958+ fhsm = &sbinfo->si_fhsm;
c1595e42
JR
11959+ if (!au_ftest_si(sbinfo, FHSM)
11960+ || fhsm->fhsm_bottom == bindex)
076b876e
AM
11961+ return;
11962+
11963+ br = au_sbr(sb, bindex);
11964+ bf = br->br_fhsm;
11965+ AuDebugOn(!bf);
11966+ mutex_lock(&bf->bf_lock);
11967+ if (force
11968+ || au_fhsm_pid(fhsm)
11969+ || au_fhsm_test_jiffy(sbinfo, br))
11970+ err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
11971+ /*do_notify*/1);
11972+ mutex_unlock(&bf->bf_lock);
11973+}
11974+
11975+void au_fhsm_wrote_all(struct super_block *sb, int force)
11976+{
5afbbe0d 11977+ aufs_bindex_t bindex, bbot;
076b876e
AM
11978+ struct au_branch *br;
11979+
11980+ /* exclude the bottom */
5afbbe0d
AM
11981+ bbot = au_fhsm_bottom(sb);
11982+ for (bindex = 0; bindex < bbot; bindex++) {
076b876e
AM
11983+ br = au_sbr(sb, bindex);
11984+ if (au_br_fhsm(br->br_perm))
11985+ au_fhsm_wrote(sb, bindex, force);
11986+ }
11987+}
11988+
11989+/* ---------------------------------------------------------------------- */
11990+
11991+static unsigned int au_fhsm_poll(struct file *file,
11992+ struct poll_table_struct *wait)
11993+{
11994+ unsigned int mask;
11995+ struct au_sbinfo *sbinfo;
11996+ struct au_fhsm *fhsm;
11997+
11998+ mask = 0;
11999+ sbinfo = file->private_data;
12000+ fhsm = &sbinfo->si_fhsm;
12001+ poll_wait(file, &fhsm->fhsm_wqh, wait);
12002+ if (atomic_read(&fhsm->fhsm_readable))
12003+ mask = POLLIN /* | POLLRDNORM */;
12004+
12005+ AuTraceErr((int)mask);
12006+ return mask;
12007+}
12008+
12009+static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
12010+ struct aufs_stfs *stfs, __s16 brid)
12011+{
12012+ int err;
12013+
12014+ err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
12015+ if (!err)
12016+ err = __put_user(brid, &stbr->brid);
12017+ if (unlikely(err))
12018+ err = -EFAULT;
12019+
12020+ return err;
12021+}
12022+
12023+static ssize_t au_fhsm_do_read(struct super_block *sb,
12024+ struct aufs_stbr __user *stbr, size_t count)
12025+{
12026+ ssize_t err;
12027+ int nstbr;
5afbbe0d 12028+ aufs_bindex_t bindex, bbot;
076b876e
AM
12029+ struct au_branch *br;
12030+ struct au_br_fhsm *bf;
12031+
12032+ /* except the bottom branch */
12033+ err = 0;
12034+ nstbr = 0;
5afbbe0d
AM
12035+ bbot = au_fhsm_bottom(sb);
12036+ for (bindex = 0; !err && bindex < bbot; bindex++) {
076b876e
AM
12037+ br = au_sbr(sb, bindex);
12038+ if (!au_br_fhsm(br->br_perm))
12039+ continue;
12040+
12041+ bf = br->br_fhsm;
12042+ mutex_lock(&bf->bf_lock);
12043+ if (bf->bf_readable) {
12044+ err = -EFAULT;
12045+ if (count >= sizeof(*stbr))
12046+ err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
12047+ br->br_id);
12048+ if (!err) {
12049+ bf->bf_readable = 0;
12050+ count -= sizeof(*stbr);
12051+ nstbr++;
12052+ }
12053+ }
12054+ mutex_unlock(&bf->bf_lock);
12055+ }
12056+ if (!err)
12057+ err = sizeof(*stbr) * nstbr;
12058+
12059+ return err;
12060+}
12061+
12062+static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
12063+ loff_t *pos)
12064+{
12065+ ssize_t err;
12066+ int readable;
5afbbe0d 12067+ aufs_bindex_t nfhsm, bindex, bbot;
076b876e
AM
12068+ struct au_sbinfo *sbinfo;
12069+ struct au_fhsm *fhsm;
12070+ struct au_branch *br;
12071+ struct super_block *sb;
12072+
12073+ err = 0;
12074+ sbinfo = file->private_data;
12075+ fhsm = &sbinfo->si_fhsm;
12076+need_data:
12077+ spin_lock_irq(&fhsm->fhsm_wqh.lock);
12078+ if (!atomic_read(&fhsm->fhsm_readable)) {
12079+ if (vfsub_file_flags(file) & O_NONBLOCK)
12080+ err = -EAGAIN;
12081+ else
12082+ err = wait_event_interruptible_locked_irq
12083+ (fhsm->fhsm_wqh,
12084+ atomic_read(&fhsm->fhsm_readable));
12085+ }
12086+ spin_unlock_irq(&fhsm->fhsm_wqh.lock);
12087+ if (unlikely(err))
12088+ goto out;
12089+
12090+ /* sb may already be dead */
12091+ au_rw_read_lock(&sbinfo->si_rwsem);
12092+ readable = atomic_read(&fhsm->fhsm_readable);
12093+ if (readable > 0) {
12094+ sb = sbinfo->si_sb;
12095+ AuDebugOn(!sb);
12096+ /* exclude the bottom branch */
12097+ nfhsm = 0;
5afbbe0d
AM
12098+ bbot = au_fhsm_bottom(sb);
12099+ for (bindex = 0; bindex < bbot; bindex++) {
076b876e
AM
12100+ br = au_sbr(sb, bindex);
12101+ if (au_br_fhsm(br->br_perm))
12102+ nfhsm++;
12103+ }
12104+ err = -EMSGSIZE;
12105+ if (nfhsm * sizeof(struct aufs_stbr) <= count) {
12106+ atomic_set(&fhsm->fhsm_readable, 0);
12107+ err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
12108+ count);
12109+ }
12110+ }
12111+ au_rw_read_unlock(&sbinfo->si_rwsem);
12112+ if (!readable)
12113+ goto need_data;
12114+
12115+out:
12116+ return err;
12117+}
12118+
12119+static int au_fhsm_release(struct inode *inode, struct file *file)
12120+{
12121+ struct au_sbinfo *sbinfo;
12122+ struct au_fhsm *fhsm;
12123+
12124+ /* sb may already be dead */
12125+ sbinfo = file->private_data;
12126+ fhsm = &sbinfo->si_fhsm;
12127+ spin_lock(&fhsm->fhsm_spin);
12128+ fhsm->fhsm_pid = 0;
12129+ spin_unlock(&fhsm->fhsm_spin);
12130+ kobject_put(&sbinfo->si_kobj);
12131+
12132+ return 0;
12133+}
12134+
12135+static const struct file_operations au_fhsm_fops = {
12136+ .owner = THIS_MODULE,
12137+ .llseek = noop_llseek,
12138+ .read = au_fhsm_read,
12139+ .poll = au_fhsm_poll,
12140+ .release = au_fhsm_release
12141+};
12142+
12143+int au_fhsm_fd(struct super_block *sb, int oflags)
12144+{
12145+ int err, fd;
12146+ struct au_sbinfo *sbinfo;
12147+ struct au_fhsm *fhsm;
12148+
12149+ err = -EPERM;
12150+ if (unlikely(!capable(CAP_SYS_ADMIN)))
12151+ goto out;
12152+
12153+ err = -EINVAL;
12154+ if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
12155+ goto out;
12156+
12157+ err = 0;
12158+ sbinfo = au_sbi(sb);
12159+ fhsm = &sbinfo->si_fhsm;
12160+ spin_lock(&fhsm->fhsm_spin);
12161+ if (!fhsm->fhsm_pid)
12162+ fhsm->fhsm_pid = current->pid;
12163+ else
12164+ err = -EBUSY;
12165+ spin_unlock(&fhsm->fhsm_spin);
12166+ if (unlikely(err))
12167+ goto out;
12168+
12169+ oflags |= O_RDONLY;
12170+ /* oflags |= FMODE_NONOTIFY; */
12171+ fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
12172+ err = fd;
12173+ if (unlikely(fd < 0))
12174+ goto out_pid;
12175+
12176+ /* succeed reglardless 'fhsm' status */
12177+ kobject_get(&sbinfo->si_kobj);
12178+ si_noflush_read_lock(sb);
12179+ if (au_ftest_si(sbinfo, FHSM))
12180+ au_fhsm_wrote_all(sb, /*force*/0);
12181+ si_read_unlock(sb);
12182+ goto out; /* success */
12183+
12184+out_pid:
12185+ spin_lock(&fhsm->fhsm_spin);
12186+ fhsm->fhsm_pid = 0;
12187+ spin_unlock(&fhsm->fhsm_spin);
12188+out:
12189+ AuTraceErr(err);
12190+ return err;
12191+}
12192+
12193+/* ---------------------------------------------------------------------- */
12194+
12195+int au_fhsm_br_alloc(struct au_branch *br)
12196+{
12197+ int err;
12198+
12199+ err = 0;
12200+ br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
12201+ if (br->br_fhsm)
12202+ au_br_fhsm_init(br->br_fhsm);
12203+ else
12204+ err = -ENOMEM;
12205+
12206+ return err;
12207+}
12208+
12209+/* ---------------------------------------------------------------------- */
12210+
12211+void au_fhsm_fin(struct super_block *sb)
12212+{
12213+ au_fhsm_notify(sb, /*val*/-1);
12214+}
12215+
12216+void au_fhsm_init(struct au_sbinfo *sbinfo)
12217+{
12218+ struct au_fhsm *fhsm;
12219+
12220+ fhsm = &sbinfo->si_fhsm;
12221+ spin_lock_init(&fhsm->fhsm_spin);
12222+ init_waitqueue_head(&fhsm->fhsm_wqh);
12223+ atomic_set(&fhsm->fhsm_readable, 0);
12224+ fhsm->fhsm_expire
12225+ = msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
c1595e42 12226+ fhsm->fhsm_bottom = -1;
076b876e
AM
12227+}
12228+
12229+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
12230+{
12231+ sbinfo->si_fhsm.fhsm_expire
12232+ = msecs_to_jiffies(sec * MSEC_PER_SEC);
12233+}
12234+
12235+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
12236+{
12237+ unsigned int u;
12238+
12239+ if (!au_ftest_si(sbinfo, FHSM))
12240+ return;
12241+
12242+ u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
12243+ if (u != AUFS_FHSM_CACHE_DEF_SEC)
12244+ seq_printf(seq, ",fhsm_sec=%u", u);
12245+}
7f207e10
AM
12246diff -urN /usr/share/empty/fs/aufs/file.c linux/fs/aufs/file.c
12247--- /usr/share/empty/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
12248+++ linux/fs/aufs/file.c 2016-10-09 16:55:38.889431135 +0200
12249@@ -0,0 +1,857 @@
1facf9fc 12250+/*
8cdd5066 12251+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 12252+ *
12253+ * This program, aufs is free software; you can redistribute it and/or modify
12254+ * it under the terms of the GNU General Public License as published by
12255+ * the Free Software Foundation; either version 2 of the License, or
12256+ * (at your option) any later version.
dece6358
AM
12257+ *
12258+ * This program is distributed in the hope that it will be useful,
12259+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12260+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12261+ * GNU General Public License for more details.
12262+ *
12263+ * You should have received a copy of the GNU General Public License
523b37e3 12264+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 12265+ */
12266+
12267+/*
4a4d8108 12268+ * handling file/dir, and address_space operation
1facf9fc 12269+ */
12270+
7eafdf33
AM
12271+#ifdef CONFIG_AUFS_DEBUG
12272+#include <linux/migrate.h>
12273+#endif
4a4d8108 12274+#include <linux/pagemap.h>
1facf9fc 12275+#include "aufs.h"
12276+
4a4d8108
AM
12277+/* drop flags for writing */
12278+unsigned int au_file_roflags(unsigned int flags)
12279+{
12280+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
12281+ flags |= O_RDONLY | O_NOATIME;
12282+ return flags;
12283+}
12284+
12285+/* common functions to regular file and dir */
12286+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 12287+ struct file *file, int force_wr)
1facf9fc 12288+{
1308ab2a 12289+ struct file *h_file;
4a4d8108
AM
12290+ struct dentry *h_dentry;
12291+ struct inode *h_inode;
12292+ struct super_block *sb;
12293+ struct au_branch *br;
12294+ struct path h_path;
b912730e 12295+ int err;
1facf9fc 12296+
4a4d8108
AM
12297+ /* a race condition can happen between open and unlink/rmdir */
12298+ h_file = ERR_PTR(-ENOENT);
12299+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 12300+ if (au_test_nfsd() && (!h_dentry || d_is_negative(h_dentry)))
4a4d8108 12301+ goto out;
5527c038 12302+ h_inode = d_inode(h_dentry);
027c5e7a
AM
12303+ spin_lock(&h_dentry->d_lock);
12304+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
5527c038 12305+ /* || !d_inode(dentry)->i_nlink */
027c5e7a
AM
12306+ ;
12307+ spin_unlock(&h_dentry->d_lock);
12308+ if (unlikely(err))
4a4d8108 12309+ goto out;
1facf9fc 12310+
4a4d8108
AM
12311+ sb = dentry->d_sb;
12312+ br = au_sbr(sb, bindex);
b912730e
AM
12313+ err = au_br_test_oflag(flags, br);
12314+ h_file = ERR_PTR(err);
12315+ if (unlikely(err))
027c5e7a 12316+ goto out;
1facf9fc 12317+
4a4d8108 12318+ /* drop flags for writing */
5527c038 12319+ if (au_test_ro(sb, bindex, d_inode(dentry))) {
392086de
AM
12320+ if (force_wr && !(flags & O_WRONLY))
12321+ force_wr = 0;
4a4d8108 12322+ flags = au_file_roflags(flags);
392086de
AM
12323+ if (force_wr) {
12324+ h_file = ERR_PTR(-EROFS);
12325+ flags = au_file_roflags(flags);
12326+ if (unlikely(vfsub_native_ro(h_inode)
12327+ || IS_APPEND(h_inode)))
12328+ goto out;
12329+ flags &= ~O_ACCMODE;
12330+ flags |= O_WRONLY;
12331+ }
12332+ }
4a4d8108 12333+ flags &= ~O_CREAT;
5afbbe0d 12334+ au_br_get(br);
4a4d8108 12335+ h_path.dentry = h_dentry;
86dc4139 12336+ h_path.mnt = au_br_mnt(br);
38d290e6 12337+ h_file = vfsub_dentry_open(&h_path, flags);
4a4d8108
AM
12338+ if (IS_ERR(h_file))
12339+ goto out_br;
dece6358 12340+
b912730e 12341+ if (flags & __FMODE_EXEC) {
4a4d8108
AM
12342+ err = deny_write_access(h_file);
12343+ if (unlikely(err)) {
12344+ fput(h_file);
12345+ h_file = ERR_PTR(err);
12346+ goto out_br;
12347+ }
12348+ }
953406b4 12349+ fsnotify_open(h_file);
4a4d8108 12350+ goto out; /* success */
1facf9fc 12351+
4f0767ce 12352+out_br:
5afbbe0d 12353+ au_br_put(br);
4f0767ce 12354+out:
4a4d8108
AM
12355+ return h_file;
12356+}
1308ab2a 12357+
076b876e
AM
12358+static int au_cmoo(struct dentry *dentry)
12359+{
12360+ int err, cmoo;
12361+ unsigned int udba;
12362+ struct path h_path;
12363+ struct au_pin pin;
12364+ struct au_cp_generic cpg = {
12365+ .dentry = dentry,
12366+ .bdst = -1,
12367+ .bsrc = -1,
12368+ .len = -1,
12369+ .pin = &pin,
12370+ .flags = AuCpup_DTIME | AuCpup_HOPEN
12371+ };
7e9cd9fe 12372+ struct inode *delegated;
076b876e
AM
12373+ struct super_block *sb;
12374+ struct au_sbinfo *sbinfo;
12375+ struct au_fhsm *fhsm;
12376+ pid_t pid;
12377+ struct au_branch *br;
12378+ struct dentry *parent;
12379+ struct au_hinode *hdir;
12380+
12381+ DiMustWriteLock(dentry);
5527c038 12382+ IiMustWriteLock(d_inode(dentry));
076b876e
AM
12383+
12384+ err = 0;
12385+ if (IS_ROOT(dentry))
12386+ goto out;
5afbbe0d 12387+ cpg.bsrc = au_dbtop(dentry);
076b876e
AM
12388+ if (!cpg.bsrc)
12389+ goto out;
12390+
12391+ sb = dentry->d_sb;
12392+ sbinfo = au_sbi(sb);
12393+ fhsm = &sbinfo->si_fhsm;
12394+ pid = au_fhsm_pid(fhsm);
12395+ if (pid
12396+ && (current->pid == pid
12397+ || current->real_parent->pid == pid))
12398+ goto out;
12399+
12400+ br = au_sbr(sb, cpg.bsrc);
12401+ cmoo = au_br_cmoo(br->br_perm);
12402+ if (!cmoo)
12403+ goto out;
7e9cd9fe 12404+ if (!d_is_reg(dentry))
076b876e
AM
12405+ cmoo &= AuBrAttr_COO_ALL;
12406+ if (!cmoo)
12407+ goto out;
12408+
12409+ parent = dget_parent(dentry);
12410+ di_write_lock_parent(parent);
12411+ err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
12412+ cpg.bdst = err;
12413+ if (unlikely(err < 0)) {
12414+ err = 0; /* there is no upper writable branch */
12415+ goto out_dgrade;
12416+ }
12417+ AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
12418+
12419+ /* do not respect the coo attrib for the target branch */
12420+ err = au_cpup_dirs(dentry, cpg.bdst);
12421+ if (unlikely(err))
12422+ goto out_dgrade;
12423+
12424+ di_downgrade_lock(parent, AuLock_IR);
12425+ udba = au_opt_udba(sb);
12426+ err = au_pin(&pin, dentry, cpg.bdst, udba,
12427+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12428+ if (unlikely(err))
12429+ goto out_parent;
12430+
12431+ err = au_sio_cpup_simple(&cpg);
12432+ au_unpin(&pin);
12433+ if (unlikely(err))
12434+ goto out_parent;
12435+ if (!(cmoo & AuBrWAttr_MOO))
12436+ goto out_parent; /* success */
12437+
12438+ err = au_pin(&pin, dentry, cpg.bsrc, udba,
12439+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12440+ if (unlikely(err))
12441+ goto out_parent;
12442+
12443+ h_path.mnt = au_br_mnt(br);
12444+ h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
5527c038 12445+ hdir = au_hi(d_inode(parent), cpg.bsrc);
076b876e
AM
12446+ delegated = NULL;
12447+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
12448+ au_unpin(&pin);
12449+ /* todo: keep h_dentry or not? */
12450+ if (unlikely(err == -EWOULDBLOCK)) {
12451+ pr_warn("cannot retry for NFSv4 delegation"
12452+ " for an internal unlink\n");
12453+ iput(delegated);
12454+ }
12455+ if (unlikely(err)) {
12456+ pr_err("unlink %pd after coo failed (%d), ignored\n",
12457+ dentry, err);
12458+ err = 0;
12459+ }
12460+ goto out_parent; /* success */
12461+
12462+out_dgrade:
12463+ di_downgrade_lock(parent, AuLock_IR);
12464+out_parent:
12465+ di_read_unlock(parent, AuLock_IR);
12466+ dput(parent);
12467+out:
12468+ AuTraceErr(err);
12469+ return err;
12470+}
12471+
b912730e 12472+int au_do_open(struct file *file, struct au_do_open_args *args)
1facf9fc 12473+{
b912730e 12474+ int err, no_lock = args->no_lock;
1facf9fc 12475+ struct dentry *dentry;
076b876e 12476+ struct au_finfo *finfo;
1308ab2a 12477+
b912730e
AM
12478+ if (!no_lock)
12479+ err = au_finfo_init(file, args->fidir);
12480+ else {
12481+ lockdep_off();
12482+ err = au_finfo_init(file, args->fidir);
12483+ lockdep_on();
12484+ }
4a4d8108
AM
12485+ if (unlikely(err))
12486+ goto out;
1facf9fc 12487+
2000de60 12488+ dentry = file->f_path.dentry;
b912730e
AM
12489+ AuDebugOn(IS_ERR_OR_NULL(dentry));
12490+ if (!no_lock) {
12491+ di_write_lock_child(dentry);
12492+ err = au_cmoo(dentry);
12493+ di_downgrade_lock(dentry, AuLock_IR);
12494+ if (!err)
12495+ err = args->open(file, vfsub_file_flags(file), NULL);
12496+ di_read_unlock(dentry, AuLock_IR);
12497+ } else {
12498+ err = au_cmoo(dentry);
12499+ if (!err)
12500+ err = args->open(file, vfsub_file_flags(file),
12501+ args->h_file);
5afbbe0d 12502+ if (!err && au_fbtop(file) != au_dbtop(dentry))
b912730e
AM
12503+ /*
12504+ * cmoo happens after h_file was opened.
12505+ * need to refresh file later.
12506+ */
12507+ atomic_dec(&au_fi(file)->fi_generation);
12508+ }
1facf9fc 12509+
076b876e
AM
12510+ finfo = au_fi(file);
12511+ if (!err) {
12512+ finfo->fi_file = file;
12513+ au_sphl_add(&finfo->fi_hlist,
2000de60 12514+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
076b876e 12515+ }
b912730e
AM
12516+ if (!no_lock)
12517+ fi_write_unlock(file);
12518+ else {
12519+ lockdep_off();
12520+ fi_write_unlock(file);
12521+ lockdep_on();
12522+ }
4a4d8108 12523+ if (unlikely(err)) {
076b876e 12524+ finfo->fi_hdir = NULL;
f0c0a007 12525+ au_finfo_fin(file, /*atonce*/0);
1308ab2a 12526+ }
4a4d8108 12527+
4f0767ce 12528+out:
1308ab2a 12529+ return err;
12530+}
dece6358 12531+
4a4d8108 12532+int au_reopen_nondir(struct file *file)
1308ab2a 12533+{
4a4d8108 12534+ int err;
5afbbe0d 12535+ aufs_bindex_t btop;
4a4d8108
AM
12536+ struct dentry *dentry;
12537+ struct file *h_file, *h_file_tmp;
1308ab2a 12538+
2000de60 12539+ dentry = file->f_path.dentry;
5afbbe0d 12540+ btop = au_dbtop(dentry);
4a4d8108 12541+ h_file_tmp = NULL;
5afbbe0d 12542+ if (au_fbtop(file) == btop) {
4a4d8108
AM
12543+ h_file = au_hf_top(file);
12544+ if (file->f_mode == h_file->f_mode)
12545+ return 0; /* success */
12546+ h_file_tmp = h_file;
12547+ get_file(h_file_tmp);
5afbbe0d 12548+ au_set_h_fptr(file, btop, NULL);
4a4d8108
AM
12549+ }
12550+ AuDebugOn(au_fi(file)->fi_hdir);
86dc4139
AM
12551+ /*
12552+ * it can happen
12553+ * file exists on both of rw and ro
5afbbe0d 12554+ * open --> dbtop and fbtop are both 0
86dc4139
AM
12555+ * prepend a branch as rw, "rw" become ro
12556+ * remove rw/file
12557+ * delete the top branch, "rw" becomes rw again
5afbbe0d
AM
12558+ * --> dbtop is 1, fbtop is still 0
12559+ * write --> fbtop is 0 but dbtop is 1
86dc4139 12560+ */
5afbbe0d 12561+ /* AuDebugOn(au_fbtop(file) < btop); */
1308ab2a 12562+
5afbbe0d 12563+ h_file = au_h_open(dentry, btop, vfsub_file_flags(file) & ~O_TRUNC,
392086de 12564+ file, /*force_wr*/0);
4a4d8108 12565+ err = PTR_ERR(h_file);
86dc4139
AM
12566+ if (IS_ERR(h_file)) {
12567+ if (h_file_tmp) {
5afbbe0d
AM
12568+ au_sbr_get(dentry->d_sb, btop);
12569+ au_set_h_fptr(file, btop, h_file_tmp);
86dc4139
AM
12570+ h_file_tmp = NULL;
12571+ }
4a4d8108 12572+ goto out; /* todo: close all? */
86dc4139 12573+ }
4a4d8108
AM
12574+
12575+ err = 0;
5afbbe0d
AM
12576+ au_set_fbtop(file, btop);
12577+ au_set_h_fptr(file, btop, h_file);
4a4d8108
AM
12578+ au_update_figen(file);
12579+ /* todo: necessary? */
12580+ /* file->f_ra = h_file->f_ra; */
12581+
4f0767ce 12582+out:
4a4d8108
AM
12583+ if (h_file_tmp)
12584+ fput(h_file_tmp);
12585+ return err;
1facf9fc 12586+}
12587+
1308ab2a 12588+/* ---------------------------------------------------------------------- */
12589+
4a4d8108
AM
12590+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
12591+ struct dentry *hi_wh)
1facf9fc 12592+{
4a4d8108 12593+ int err;
5afbbe0d 12594+ aufs_bindex_t btop;
4a4d8108
AM
12595+ struct au_dinfo *dinfo;
12596+ struct dentry *h_dentry;
12597+ struct au_hdentry *hdp;
1facf9fc 12598+
2000de60 12599+ dinfo = au_di(file->f_path.dentry);
4a4d8108 12600+ AuRwMustWriteLock(&dinfo->di_rwsem);
dece6358 12601+
5afbbe0d
AM
12602+ btop = dinfo->di_btop;
12603+ dinfo->di_btop = btgt;
12604+ hdp = au_hdentry(dinfo, btgt);
12605+ h_dentry = hdp->hd_dentry;
12606+ hdp->hd_dentry = hi_wh;
4a4d8108 12607+ err = au_reopen_nondir(file);
5afbbe0d
AM
12608+ hdp->hd_dentry = h_dentry;
12609+ dinfo->di_btop = btop;
1facf9fc 12610+
1facf9fc 12611+ return err;
12612+}
12613+
4a4d8108 12614+static int au_ready_to_write_wh(struct file *file, loff_t len,
86dc4139 12615+ aufs_bindex_t bcpup, struct au_pin *pin)
1facf9fc 12616+{
4a4d8108 12617+ int err;
027c5e7a 12618+ struct inode *inode, *h_inode;
c2b27bf2
AM
12619+ struct dentry *h_dentry, *hi_wh;
12620+ struct au_cp_generic cpg = {
2000de60 12621+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12622+ .bdst = bcpup,
12623+ .bsrc = -1,
12624+ .len = len,
12625+ .pin = pin
12626+ };
1facf9fc 12627+
5afbbe0d 12628+ au_update_dbtop(cpg.dentry);
5527c038 12629+ inode = d_inode(cpg.dentry);
027c5e7a 12630+ h_inode = NULL;
5afbbe0d
AM
12631+ if (au_dbtop(cpg.dentry) <= bcpup
12632+ && au_dbbot(cpg.dentry) >= bcpup) {
c2b27bf2 12633+ h_dentry = au_h_dptr(cpg.dentry, bcpup);
5527c038
JR
12634+ if (h_dentry && d_is_positive(h_dentry))
12635+ h_inode = d_inode(h_dentry);
027c5e7a 12636+ }
4a4d8108 12637+ hi_wh = au_hi_wh(inode, bcpup);
027c5e7a 12638+ if (!hi_wh && !h_inode)
c2b27bf2 12639+ err = au_sio_cpup_wh(&cpg, file);
4a4d8108
AM
12640+ else
12641+ /* already copied-up after unlink */
12642+ err = au_reopen_wh(file, bcpup, hi_wh);
1facf9fc 12643+
4a4d8108 12644+ if (!err
38d290e6
JR
12645+ && (inode->i_nlink > 1
12646+ || (inode->i_state & I_LINKABLE))
c2b27bf2
AM
12647+ && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
12648+ au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
1308ab2a 12649+
dece6358 12650+ return err;
1facf9fc 12651+}
12652+
4a4d8108
AM
12653+/*
12654+ * prepare the @file for writing.
12655+ */
12656+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
1facf9fc 12657+{
4a4d8108 12658+ int err;
5afbbe0d 12659+ aufs_bindex_t dbtop;
c1595e42 12660+ struct dentry *parent;
86dc4139 12661+ struct inode *inode;
1facf9fc 12662+ struct super_block *sb;
4a4d8108 12663+ struct file *h_file;
c2b27bf2 12664+ struct au_cp_generic cpg = {
2000de60 12665+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12666+ .bdst = -1,
12667+ .bsrc = -1,
12668+ .len = len,
12669+ .pin = pin,
12670+ .flags = AuCpup_DTIME
12671+ };
1facf9fc 12672+
c2b27bf2 12673+ sb = cpg.dentry->d_sb;
5527c038 12674+ inode = d_inode(cpg.dentry);
5afbbe0d 12675+ cpg.bsrc = au_fbtop(file);
c2b27bf2 12676+ err = au_test_ro(sb, cpg.bsrc, inode);
4a4d8108 12677+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
c2b27bf2
AM
12678+ err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
12679+ /*flags*/0);
1facf9fc 12680+ goto out;
4a4d8108 12681+ }
1facf9fc 12682+
027c5e7a 12683+ /* need to cpup or reopen */
c2b27bf2 12684+ parent = dget_parent(cpg.dentry);
4a4d8108 12685+ di_write_lock_parent(parent);
c2b27bf2
AM
12686+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12687+ cpg.bdst = err;
4a4d8108
AM
12688+ if (unlikely(err < 0))
12689+ goto out_dgrade;
12690+ err = 0;
12691+
c2b27bf2
AM
12692+ if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
12693+ err = au_cpup_dirs(cpg.dentry, cpg.bdst);
1facf9fc 12694+ if (unlikely(err))
4a4d8108
AM
12695+ goto out_dgrade;
12696+ }
12697+
c2b27bf2 12698+ err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108
AM
12699+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12700+ if (unlikely(err))
12701+ goto out_dgrade;
12702+
5afbbe0d
AM
12703+ dbtop = au_dbtop(cpg.dentry);
12704+ if (dbtop <= cpg.bdst)
c2b27bf2 12705+ cpg.bsrc = cpg.bdst;
027c5e7a 12706+
5afbbe0d 12707+ if (dbtop <= cpg.bdst /* just reopen */
c2b27bf2 12708+ || !d_unhashed(cpg.dentry) /* copyup and reopen */
027c5e7a 12709+ ) {
392086de 12710+ h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
86dc4139 12711+ if (IS_ERR(h_file))
027c5e7a 12712+ err = PTR_ERR(h_file);
86dc4139 12713+ else {
027c5e7a 12714+ di_downgrade_lock(parent, AuLock_IR);
5afbbe0d 12715+ if (dbtop > cpg.bdst)
c2b27bf2 12716+ err = au_sio_cpup_simple(&cpg);
027c5e7a
AM
12717+ if (!err)
12718+ err = au_reopen_nondir(file);
c2b27bf2 12719+ au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
027c5e7a 12720+ }
027c5e7a
AM
12721+ } else { /* copyup as wh and reopen */
12722+ /*
12723+ * since writable hfsplus branch is not supported,
12724+ * h_open_pre/post() are unnecessary.
12725+ */
c2b27bf2 12726+ err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
4a4d8108 12727+ di_downgrade_lock(parent, AuLock_IR);
4a4d8108 12728+ }
4a4d8108
AM
12729+
12730+ if (!err) {
12731+ au_pin_set_parent_lflag(pin, /*lflag*/0);
12732+ goto out_dput; /* success */
12733+ }
12734+ au_unpin(pin);
12735+ goto out_unlock;
1facf9fc 12736+
4f0767ce 12737+out_dgrade:
4a4d8108 12738+ di_downgrade_lock(parent, AuLock_IR);
4f0767ce 12739+out_unlock:
4a4d8108 12740+ di_read_unlock(parent, AuLock_IR);
4f0767ce 12741+out_dput:
4a4d8108 12742+ dput(parent);
4f0767ce 12743+out:
1facf9fc 12744+ return err;
12745+}
12746+
4a4d8108
AM
12747+/* ---------------------------------------------------------------------- */
12748+
12749+int au_do_flush(struct file *file, fl_owner_t id,
12750+ int (*flush)(struct file *file, fl_owner_t id))
1facf9fc 12751+{
4a4d8108 12752+ int err;
1facf9fc 12753+ struct super_block *sb;
4a4d8108 12754+ struct inode *inode;
1facf9fc 12755+
c06a8ce3
AM
12756+ inode = file_inode(file);
12757+ sb = inode->i_sb;
4a4d8108
AM
12758+ si_noflush_read_lock(sb);
12759+ fi_read_lock(file);
b752ccd1 12760+ ii_read_lock_child(inode);
1facf9fc 12761+
4a4d8108
AM
12762+ err = flush(file, id);
12763+ au_cpup_attr_timesizes(inode);
1facf9fc 12764+
b752ccd1 12765+ ii_read_unlock(inode);
4a4d8108 12766+ fi_read_unlock(file);
1308ab2a 12767+ si_read_unlock(sb);
dece6358 12768+ return err;
1facf9fc 12769+}
12770+
4a4d8108
AM
12771+/* ---------------------------------------------------------------------- */
12772+
12773+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
1facf9fc 12774+{
4a4d8108 12775+ int err;
4a4d8108
AM
12776+ struct au_pin pin;
12777+ struct au_finfo *finfo;
c2b27bf2 12778+ struct dentry *parent, *hi_wh;
4a4d8108 12779+ struct inode *inode;
1facf9fc 12780+ struct super_block *sb;
c2b27bf2 12781+ struct au_cp_generic cpg = {
2000de60 12782+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12783+ .bdst = -1,
12784+ .bsrc = -1,
12785+ .len = -1,
12786+ .pin = &pin,
12787+ .flags = AuCpup_DTIME
12788+ };
1facf9fc 12789+
4a4d8108
AM
12790+ FiMustWriteLock(file);
12791+
12792+ err = 0;
12793+ finfo = au_fi(file);
c2b27bf2 12794+ sb = cpg.dentry->d_sb;
5527c038 12795+ inode = d_inode(cpg.dentry);
5afbbe0d 12796+ cpg.bdst = au_ibtop(inode);
c2b27bf2 12797+ if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
1308ab2a 12798+ goto out;
dece6358 12799+
c2b27bf2
AM
12800+ parent = dget_parent(cpg.dentry);
12801+ if (au_test_ro(sb, cpg.bdst, inode)) {
4a4d8108 12802+ di_read_lock_parent(parent, !AuLock_IR);
c2b27bf2
AM
12803+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12804+ cpg.bdst = err;
4a4d8108
AM
12805+ di_read_unlock(parent, !AuLock_IR);
12806+ if (unlikely(err < 0))
12807+ goto out_parent;
12808+ err = 0;
1facf9fc 12809+ }
1facf9fc 12810+
4a4d8108 12811+ di_read_lock_parent(parent, AuLock_IR);
c2b27bf2 12812+ hi_wh = au_hi_wh(inode, cpg.bdst);
7f207e10
AM
12813+ if (!S_ISDIR(inode->i_mode)
12814+ && au_opt_test(au_mntflags(sb), PLINK)
4a4d8108 12815+ && au_plink_test(inode)
c2b27bf2 12816+ && !d_unhashed(cpg.dentry)
5afbbe0d 12817+ && cpg.bdst < au_dbtop(cpg.dentry)) {
c2b27bf2 12818+ err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
4a4d8108
AM
12819+ if (unlikely(err))
12820+ goto out_unlock;
12821+
12822+ /* always superio. */
c2b27bf2 12823+ err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108 12824+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 12825+ if (!err) {
c2b27bf2 12826+ err = au_sio_cpup_simple(&cpg);
367653fa
AM
12827+ au_unpin(&pin);
12828+ }
4a4d8108
AM
12829+ } else if (hi_wh) {
12830+ /* already copied-up after unlink */
c2b27bf2 12831+ err = au_reopen_wh(file, cpg.bdst, hi_wh);
4a4d8108
AM
12832+ *need_reopen = 0;
12833+ }
1facf9fc 12834+
4f0767ce 12835+out_unlock:
4a4d8108 12836+ di_read_unlock(parent, AuLock_IR);
4f0767ce 12837+out_parent:
4a4d8108 12838+ dput(parent);
4f0767ce 12839+out:
1308ab2a 12840+ return err;
dece6358 12841+}
1facf9fc 12842+
4a4d8108 12843+static void au_do_refresh_dir(struct file *file)
dece6358 12844+{
f0c0a007 12845+ int execed;
5afbbe0d 12846+ aufs_bindex_t bindex, bbot, new_bindex, brid;
4a4d8108
AM
12847+ struct au_hfile *p, tmp, *q;
12848+ struct au_finfo *finfo;
1308ab2a 12849+ struct super_block *sb;
4a4d8108 12850+ struct au_fidir *fidir;
1facf9fc 12851+
4a4d8108 12852+ FiMustWriteLock(file);
1facf9fc 12853+
2000de60 12854+ sb = file->f_path.dentry->d_sb;
4a4d8108
AM
12855+ finfo = au_fi(file);
12856+ fidir = finfo->fi_hdir;
12857+ AuDebugOn(!fidir);
12858+ p = fidir->fd_hfile + finfo->fi_btop;
12859+ brid = p->hf_br->br_id;
5afbbe0d
AM
12860+ bbot = fidir->fd_bbot;
12861+ for (bindex = finfo->fi_btop; bindex <= bbot; bindex++, p++) {
4a4d8108
AM
12862+ if (!p->hf_file)
12863+ continue;
1308ab2a 12864+
4a4d8108
AM
12865+ new_bindex = au_br_index(sb, p->hf_br->br_id);
12866+ if (new_bindex == bindex)
12867+ continue;
12868+ if (new_bindex < 0) {
12869+ au_set_h_fptr(file, bindex, NULL);
12870+ continue;
12871+ }
1308ab2a 12872+
4a4d8108
AM
12873+ /* swap two lower inode, and loop again */
12874+ q = fidir->fd_hfile + new_bindex;
12875+ tmp = *q;
12876+ *q = *p;
12877+ *p = tmp;
12878+ if (tmp.hf_file) {
12879+ bindex--;
12880+ p--;
12881+ }
12882+ }
1308ab2a 12883+
f0c0a007 12884+ execed = vfsub_file_execed(file);
4a4d8108 12885+ p = fidir->fd_hfile;
2000de60 12886+ if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
5afbbe0d
AM
12887+ bbot = au_sbbot(sb);
12888+ for (finfo->fi_btop = 0; finfo->fi_btop <= bbot;
4a4d8108
AM
12889+ finfo->fi_btop++, p++)
12890+ if (p->hf_file) {
c06a8ce3 12891+ if (file_inode(p->hf_file))
4a4d8108 12892+ break;
f0c0a007 12893+ au_hfput(p, execed);
4a4d8108
AM
12894+ }
12895+ } else {
5afbbe0d
AM
12896+ bbot = au_br_index(sb, brid);
12897+ for (finfo->fi_btop = 0; finfo->fi_btop < bbot;
4a4d8108
AM
12898+ finfo->fi_btop++, p++)
12899+ if (p->hf_file)
f0c0a007 12900+ au_hfput(p, execed);
5afbbe0d 12901+ bbot = au_sbbot(sb);
4a4d8108 12902+ }
1308ab2a 12903+
5afbbe0d
AM
12904+ p = fidir->fd_hfile + bbot;
12905+ for (fidir->fd_bbot = bbot; fidir->fd_bbot >= finfo->fi_btop;
4a4d8108
AM
12906+ fidir->fd_bbot--, p--)
12907+ if (p->hf_file) {
c06a8ce3 12908+ if (file_inode(p->hf_file))
4a4d8108 12909+ break;
f0c0a007 12910+ au_hfput(p, execed);
4a4d8108
AM
12911+ }
12912+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
1308ab2a 12913+}
12914+
4a4d8108
AM
12915+/*
12916+ * after branch manipulating, refresh the file.
12917+ */
12918+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
1facf9fc 12919+{
e2f27e51 12920+ int err, need_reopen, nbr;
5afbbe0d 12921+ aufs_bindex_t bbot, bindex;
4a4d8108 12922+ struct dentry *dentry;
e2f27e51 12923+ struct super_block *sb;
1308ab2a 12924+ struct au_finfo *finfo;
4a4d8108 12925+ struct au_hfile *hfile;
1facf9fc 12926+
2000de60 12927+ dentry = file->f_path.dentry;
e2f27e51
AM
12928+ sb = dentry->d_sb;
12929+ nbr = au_sbbot(sb) + 1;
1308ab2a 12930+ finfo = au_fi(file);
4a4d8108
AM
12931+ if (!finfo->fi_hdir) {
12932+ hfile = &finfo->fi_htop;
12933+ AuDebugOn(!hfile->hf_file);
e2f27e51 12934+ bindex = au_br_index(sb, hfile->hf_br->br_id);
4a4d8108
AM
12935+ AuDebugOn(bindex < 0);
12936+ if (bindex != finfo->fi_btop)
5afbbe0d 12937+ au_set_fbtop(file, bindex);
4a4d8108 12938+ } else {
e2f27e51 12939+ err = au_fidir_realloc(finfo, nbr, /*may_shrink*/0);
4a4d8108
AM
12940+ if (unlikely(err))
12941+ goto out;
12942+ au_do_refresh_dir(file);
12943+ }
1facf9fc 12944+
4a4d8108
AM
12945+ err = 0;
12946+ need_reopen = 1;
12947+ if (!au_test_mmapped(file))
12948+ err = au_file_refresh_by_inode(file, &need_reopen);
e2f27e51
AM
12949+ if (finfo->fi_hdir)
12950+ /* harmless if err */
12951+ au_fidir_realloc(finfo, nbr, /*may_shrink*/1);
027c5e7a 12952+ if (!err && need_reopen && !d_unlinked(dentry))
4a4d8108
AM
12953+ err = reopen(file);
12954+ if (!err) {
12955+ au_update_figen(file);
12956+ goto out; /* success */
12957+ }
12958+
12959+ /* error, close all lower files */
12960+ if (finfo->fi_hdir) {
5afbbe0d
AM
12961+ bbot = au_fbbot_dir(file);
12962+ for (bindex = au_fbtop(file); bindex <= bbot; bindex++)
4a4d8108
AM
12963+ au_set_h_fptr(file, bindex, NULL);
12964+ }
1facf9fc 12965+
4f0767ce 12966+out:
1facf9fc 12967+ return err;
12968+}
12969+
4a4d8108
AM
12970+/* common function to regular file and dir */
12971+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
12972+ int wlock)
dece6358 12973+{
1308ab2a 12974+ int err;
4a4d8108 12975+ unsigned int sigen, figen;
5afbbe0d 12976+ aufs_bindex_t btop;
4a4d8108
AM
12977+ unsigned char pseudo_link;
12978+ struct dentry *dentry;
12979+ struct inode *inode;
1facf9fc 12980+
4a4d8108 12981+ err = 0;
2000de60 12982+ dentry = file->f_path.dentry;
5527c038 12983+ inode = d_inode(dentry);
4a4d8108
AM
12984+ sigen = au_sigen(dentry->d_sb);
12985+ fi_write_lock(file);
12986+ figen = au_figen(file);
12987+ di_write_lock_child(dentry);
5afbbe0d
AM
12988+ btop = au_dbtop(dentry);
12989+ pseudo_link = (btop != au_ibtop(inode));
12990+ if (sigen == figen && !pseudo_link && au_fbtop(file) == btop) {
4a4d8108
AM
12991+ if (!wlock) {
12992+ di_downgrade_lock(dentry, AuLock_IR);
12993+ fi_downgrade_lock(file);
12994+ }
12995+ goto out; /* success */
12996+ }
dece6358 12997+
4a4d8108 12998+ AuDbg("sigen %d, figen %d\n", sigen, figen);
027c5e7a 12999+ if (au_digen_test(dentry, sigen)) {
4a4d8108 13000+ err = au_reval_dpath(dentry, sigen);
027c5e7a 13001+ AuDebugOn(!err && au_digen_test(dentry, sigen));
4a4d8108 13002+ }
dece6358 13003+
027c5e7a
AM
13004+ if (!err)
13005+ err = refresh_file(file, reopen);
4a4d8108
AM
13006+ if (!err) {
13007+ if (!wlock) {
13008+ di_downgrade_lock(dentry, AuLock_IR);
13009+ fi_downgrade_lock(file);
13010+ }
13011+ } else {
13012+ di_write_unlock(dentry);
13013+ fi_write_unlock(file);
13014+ }
1facf9fc 13015+
4f0767ce 13016+out:
1308ab2a 13017+ return err;
13018+}
1facf9fc 13019+
4a4d8108
AM
13020+/* ---------------------------------------------------------------------- */
13021+
13022+/* cf. aufs_nopage() */
13023+/* for madvise(2) */
13024+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
1308ab2a 13025+{
4a4d8108
AM
13026+ unlock_page(page);
13027+ return 0;
13028+}
1facf9fc 13029+
4a4d8108 13030+/* it will never be called, but necessary to support O_DIRECT */
5afbbe0d 13031+static ssize_t aufs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
4a4d8108 13032+{ BUG(); return 0; }
1facf9fc 13033+
4a4d8108
AM
13034+/* they will never be called. */
13035+#ifdef CONFIG_AUFS_DEBUG
13036+static int aufs_write_begin(struct file *file, struct address_space *mapping,
13037+ loff_t pos, unsigned len, unsigned flags,
13038+ struct page **pagep, void **fsdata)
13039+{ AuUnsupport(); return 0; }
13040+static int aufs_write_end(struct file *file, struct address_space *mapping,
13041+ loff_t pos, unsigned len, unsigned copied,
13042+ struct page *page, void *fsdata)
13043+{ AuUnsupport(); return 0; }
13044+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
13045+{ AuUnsupport(); return 0; }
1308ab2a 13046+
4a4d8108
AM
13047+static int aufs_set_page_dirty(struct page *page)
13048+{ AuUnsupport(); return 0; }
392086de
AM
13049+static void aufs_invalidatepage(struct page *page, unsigned int offset,
13050+ unsigned int length)
4a4d8108
AM
13051+{ AuUnsupport(); }
13052+static int aufs_releasepage(struct page *page, gfp_t gfp)
13053+{ AuUnsupport(); return 0; }
79b8bda9 13054+#if 0 /* called by memory compaction regardless file */
4a4d8108 13055+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
7eafdf33 13056+ struct page *page, enum migrate_mode mode)
4a4d8108 13057+{ AuUnsupport(); return 0; }
79b8bda9 13058+#endif
e2f27e51
AM
13059+static bool aufs_isolate_page(struct page *page, isolate_mode_t mode)
13060+{ AuUnsupport(); return true; }
13061+static void aufs_putback_page(struct page *page)
13062+{ AuUnsupport(); }
4a4d8108
AM
13063+static int aufs_launder_page(struct page *page)
13064+{ AuUnsupport(); return 0; }
13065+static int aufs_is_partially_uptodate(struct page *page,
38d290e6
JR
13066+ unsigned long from,
13067+ unsigned long count)
4a4d8108 13068+{ AuUnsupport(); return 0; }
392086de
AM
13069+static void aufs_is_dirty_writeback(struct page *page, bool *dirty,
13070+ bool *writeback)
13071+{ AuUnsupport(); }
4a4d8108
AM
13072+static int aufs_error_remove_page(struct address_space *mapping,
13073+ struct page *page)
13074+{ AuUnsupport(); return 0; }
b4510431
AM
13075+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
13076+ sector_t *span)
13077+{ AuUnsupport(); return 0; }
13078+static void aufs_swap_deactivate(struct file *file)
13079+{ AuUnsupport(); }
4a4d8108
AM
13080+#endif /* CONFIG_AUFS_DEBUG */
13081+
13082+const struct address_space_operations aufs_aop = {
13083+ .readpage = aufs_readpage,
13084+ .direct_IO = aufs_direct_IO,
4a4d8108
AM
13085+#ifdef CONFIG_AUFS_DEBUG
13086+ .writepage = aufs_writepage,
4a4d8108
AM
13087+ /* no writepages, because of writepage */
13088+ .set_page_dirty = aufs_set_page_dirty,
13089+ /* no readpages, because of readpage */
13090+ .write_begin = aufs_write_begin,
13091+ .write_end = aufs_write_end,
13092+ /* no bmap, no block device */
13093+ .invalidatepage = aufs_invalidatepage,
13094+ .releasepage = aufs_releasepage,
79b8bda9
AM
13095+ /* is fallback_migrate_page ok? */
13096+ /* .migratepage = aufs_migratepage, */
e2f27e51
AM
13097+ .isolate_page = aufs_isolate_page,
13098+ .putback_page = aufs_putback_page,
4a4d8108
AM
13099+ .launder_page = aufs_launder_page,
13100+ .is_partially_uptodate = aufs_is_partially_uptodate,
392086de 13101+ .is_dirty_writeback = aufs_is_dirty_writeback,
b4510431
AM
13102+ .error_remove_page = aufs_error_remove_page,
13103+ .swap_activate = aufs_swap_activate,
13104+ .swap_deactivate = aufs_swap_deactivate
4a4d8108 13105+#endif /* CONFIG_AUFS_DEBUG */
dece6358 13106+};
7f207e10
AM
13107diff -urN /usr/share/empty/fs/aufs/file.h linux/fs/aufs/file.h
13108--- /usr/share/empty/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 13109+++ linux/fs/aufs/file.h 2016-10-09 16:55:38.889431135 +0200
f0c0a007 13110@@ -0,0 +1,294 @@
4a4d8108 13111+/*
8cdd5066 13112+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
13113+ *
13114+ * This program, aufs is free software; you can redistribute it and/or modify
13115+ * it under the terms of the GNU General Public License as published by
13116+ * the Free Software Foundation; either version 2 of the License, or
13117+ * (at your option) any later version.
13118+ *
13119+ * This program is distributed in the hope that it will be useful,
13120+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13121+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13122+ * GNU General Public License for more details.
13123+ *
13124+ * You should have received a copy of the GNU General Public License
523b37e3 13125+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 13126+ */
1facf9fc 13127+
4a4d8108
AM
13128+/*
13129+ * file operations
13130+ */
1facf9fc 13131+
4a4d8108
AM
13132+#ifndef __AUFS_FILE_H__
13133+#define __AUFS_FILE_H__
1facf9fc 13134+
4a4d8108 13135+#ifdef __KERNEL__
1facf9fc 13136+
2cbb1c4b 13137+#include <linux/file.h>
4a4d8108
AM
13138+#include <linux/fs.h>
13139+#include <linux/poll.h>
4a4d8108 13140+#include "rwsem.h"
1facf9fc 13141+
4a4d8108
AM
13142+struct au_branch;
13143+struct au_hfile {
13144+ struct file *hf_file;
13145+ struct au_branch *hf_br;
13146+};
1facf9fc 13147+
4a4d8108
AM
13148+struct au_vdir;
13149+struct au_fidir {
13150+ aufs_bindex_t fd_bbot;
13151+ aufs_bindex_t fd_nent;
13152+ struct au_vdir *fd_vdir_cache;
13153+ struct au_hfile fd_hfile[];
13154+};
1facf9fc 13155+
4a4d8108 13156+static inline int au_fidir_sz(int nent)
dece6358 13157+{
4f0767ce
JR
13158+ AuDebugOn(nent < 0);
13159+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
4a4d8108 13160+}
1facf9fc 13161+
4a4d8108
AM
13162+struct au_finfo {
13163+ atomic_t fi_generation;
dece6358 13164+
4a4d8108
AM
13165+ struct au_rwsem fi_rwsem;
13166+ aufs_bindex_t fi_btop;
13167+
13168+ /* do not union them */
13169+ struct { /* for non-dir */
13170+ struct au_hfile fi_htop;
2cbb1c4b 13171+ atomic_t fi_mmapped;
4a4d8108
AM
13172+ };
13173+ struct au_fidir *fi_hdir; /* for dir only */
523b37e3
AM
13174+
13175+ struct hlist_node fi_hlist;
f0c0a007
AM
13176+ union {
13177+ struct file *fi_file; /* very ugly */
13178+ struct llist_node fi_lnode; /* delayed free */
13179+ };
4a4d8108 13180+} ____cacheline_aligned_in_smp;
1facf9fc 13181+
4a4d8108 13182+/* ---------------------------------------------------------------------- */
1facf9fc 13183+
4a4d8108
AM
13184+/* file.c */
13185+extern const struct address_space_operations aufs_aop;
13186+unsigned int au_file_roflags(unsigned int flags);
13187+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 13188+ struct file *file, int force_wr);
b912730e
AM
13189+struct au_do_open_args {
13190+ int no_lock;
13191+ int (*open)(struct file *file, int flags,
13192+ struct file *h_file);
13193+ struct au_fidir *fidir;
13194+ struct file *h_file;
13195+};
13196+int au_do_open(struct file *file, struct au_do_open_args *args);
4a4d8108
AM
13197+int au_reopen_nondir(struct file *file);
13198+struct au_pin;
13199+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
13200+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
13201+ int wlock);
13202+int au_do_flush(struct file *file, fl_owner_t id,
13203+ int (*flush)(struct file *file, fl_owner_t id));
1facf9fc 13204+
4a4d8108
AM
13205+/* poll.c */
13206+#ifdef CONFIG_AUFS_POLL
13207+unsigned int aufs_poll(struct file *file, poll_table *wait);
13208+#endif
1facf9fc 13209+
4a4d8108
AM
13210+#ifdef CONFIG_AUFS_BR_HFSPLUS
13211+/* hfsplus.c */
392086de
AM
13212+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
13213+ int force_wr);
4a4d8108
AM
13214+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
13215+ struct file *h_file);
13216+#else
c1595e42
JR
13217+AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
13218+ aufs_bindex_t bindex, int force_wr)
4a4d8108
AM
13219+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
13220+ struct file *h_file);
13221+#endif
1facf9fc 13222+
4a4d8108
AM
13223+/* f_op.c */
13224+extern const struct file_operations aufs_file_fop;
b912730e 13225+int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
4a4d8108 13226+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
b912730e 13227+struct file *au_read_pre(struct file *file, int keep_fi);
4a4d8108 13228+
4a4d8108 13229+/* finfo.c */
f0c0a007 13230+void au_hfput(struct au_hfile *hf, int execed);
4a4d8108
AM
13231+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
13232+ struct file *h_file);
1facf9fc 13233+
4a4d8108 13234+void au_update_figen(struct file *file);
4a4d8108 13235+struct au_fidir *au_fidir_alloc(struct super_block *sb);
e2f27e51 13236+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink);
1facf9fc 13237+
4a4d8108 13238+void au_fi_init_once(void *_fi);
f0c0a007 13239+void au_finfo_fin(struct file *file, int atonce);
4a4d8108 13240+int au_finfo_init(struct file *file, struct au_fidir *fidir);
1facf9fc 13241+
4a4d8108
AM
13242+/* ioctl.c */
13243+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
13244+#ifdef CONFIG_COMPAT
13245+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
13246+ unsigned long arg);
c2b27bf2
AM
13247+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
13248+ unsigned long arg);
b752ccd1 13249+#endif
1facf9fc 13250+
4a4d8108 13251+/* ---------------------------------------------------------------------- */
1facf9fc 13252+
4a4d8108
AM
13253+static inline struct au_finfo *au_fi(struct file *file)
13254+{
38d290e6 13255+ return file->private_data;
4a4d8108 13256+}
1facf9fc 13257+
4a4d8108 13258+/* ---------------------------------------------------------------------- */
1facf9fc 13259+
4a4d8108
AM
13260+/*
13261+ * fi_read_lock, fi_write_lock,
13262+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
13263+ */
13264+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
1308ab2a 13265+
4a4d8108
AM
13266+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
13267+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
13268+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
1facf9fc 13269+
1308ab2a 13270+/* ---------------------------------------------------------------------- */
13271+
4a4d8108 13272+/* todo: hard/soft set? */
5afbbe0d 13273+static inline aufs_bindex_t au_fbtop(struct file *file)
dece6358 13274+{
4a4d8108
AM
13275+ FiMustAnyLock(file);
13276+ return au_fi(file)->fi_btop;
13277+}
dece6358 13278+
5afbbe0d 13279+static inline aufs_bindex_t au_fbbot_dir(struct file *file)
4a4d8108
AM
13280+{
13281+ FiMustAnyLock(file);
13282+ AuDebugOn(!au_fi(file)->fi_hdir);
13283+ return au_fi(file)->fi_hdir->fd_bbot;
13284+}
1facf9fc 13285+
4a4d8108
AM
13286+static inline struct au_vdir *au_fvdir_cache(struct file *file)
13287+{
13288+ FiMustAnyLock(file);
13289+ AuDebugOn(!au_fi(file)->fi_hdir);
13290+ return au_fi(file)->fi_hdir->fd_vdir_cache;
13291+}
1facf9fc 13292+
5afbbe0d 13293+static inline void au_set_fbtop(struct file *file, aufs_bindex_t bindex)
4a4d8108
AM
13294+{
13295+ FiMustWriteLock(file);
13296+ au_fi(file)->fi_btop = bindex;
13297+}
1facf9fc 13298+
5afbbe0d 13299+static inline void au_set_fbbot_dir(struct file *file, aufs_bindex_t bindex)
4a4d8108
AM
13300+{
13301+ FiMustWriteLock(file);
13302+ AuDebugOn(!au_fi(file)->fi_hdir);
13303+ au_fi(file)->fi_hdir->fd_bbot = bindex;
13304+}
1308ab2a 13305+
4a4d8108
AM
13306+static inline void au_set_fvdir_cache(struct file *file,
13307+ struct au_vdir *vdir_cache)
13308+{
13309+ FiMustWriteLock(file);
13310+ AuDebugOn(!au_fi(file)->fi_hdir);
13311+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
13312+}
dece6358 13313+
4a4d8108
AM
13314+static inline struct file *au_hf_top(struct file *file)
13315+{
13316+ FiMustAnyLock(file);
13317+ AuDebugOn(au_fi(file)->fi_hdir);
13318+ return au_fi(file)->fi_htop.hf_file;
13319+}
1facf9fc 13320+
4a4d8108
AM
13321+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
13322+{
13323+ FiMustAnyLock(file);
13324+ AuDebugOn(!au_fi(file)->fi_hdir);
13325+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
dece6358
AM
13326+}
13327+
4a4d8108
AM
13328+/* todo: memory barrier? */
13329+static inline unsigned int au_figen(struct file *f)
dece6358 13330+{
4a4d8108
AM
13331+ return atomic_read(&au_fi(f)->fi_generation);
13332+}
dece6358 13333+
2cbb1c4b
JR
13334+static inline void au_set_mmapped(struct file *f)
13335+{
13336+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
13337+ return;
0c3ec466 13338+ pr_warn("fi_mmapped wrapped around\n");
2cbb1c4b
JR
13339+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
13340+ ;
13341+}
13342+
13343+static inline void au_unset_mmapped(struct file *f)
13344+{
13345+ atomic_dec(&au_fi(f)->fi_mmapped);
13346+}
13347+
4a4d8108
AM
13348+static inline int au_test_mmapped(struct file *f)
13349+{
2cbb1c4b
JR
13350+ return atomic_read(&au_fi(f)->fi_mmapped);
13351+}
13352+
13353+/* customize vma->vm_file */
13354+
13355+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
13356+ struct file *file)
13357+{
53392da6
AM
13358+ struct file *f;
13359+
13360+ f = vma->vm_file;
2cbb1c4b
JR
13361+ get_file(file);
13362+ vma->vm_file = file;
53392da6 13363+ fput(f);
2cbb1c4b
JR
13364+}
13365+
13366+#ifdef CONFIG_MMU
13367+#define AuDbgVmRegion(file, vma) do {} while (0)
13368+
13369+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13370+ struct file *file)
13371+{
13372+ au_do_vm_file_reset(vma, file);
13373+}
13374+#else
13375+#define AuDbgVmRegion(file, vma) \
13376+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
13377+
13378+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13379+ struct file *file)
13380+{
53392da6
AM
13381+ struct file *f;
13382+
2cbb1c4b 13383+ au_do_vm_file_reset(vma, file);
53392da6 13384+ f = vma->vm_region->vm_file;
2cbb1c4b
JR
13385+ get_file(file);
13386+ vma->vm_region->vm_file = file;
53392da6 13387+ fput(f);
2cbb1c4b
JR
13388+}
13389+#endif /* CONFIG_MMU */
13390+
13391+/* handle vma->vm_prfile */
fb47a38f 13392+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
2cbb1c4b
JR
13393+ struct file *file)
13394+{
2cbb1c4b
JR
13395+ get_file(file);
13396+ vma->vm_prfile = file;
13397+#ifndef CONFIG_MMU
13398+ get_file(file);
13399+ vma->vm_region->vm_prfile = file;
13400+#endif
fb47a38f 13401+}
1308ab2a 13402+
4a4d8108
AM
13403+#endif /* __KERNEL__ */
13404+#endif /* __AUFS_FILE_H__ */
7f207e10
AM
13405diff -urN /usr/share/empty/fs/aufs/finfo.c linux/fs/aufs/finfo.c
13406--- /usr/share/empty/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 13407+++ linux/fs/aufs/finfo.c 2016-10-09 16:55:38.889431135 +0200
f0c0a007 13408@@ -0,0 +1,151 @@
4a4d8108 13409+/*
8cdd5066 13410+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
13411+ *
13412+ * This program, aufs is free software; you can redistribute it and/or modify
13413+ * it under the terms of the GNU General Public License as published by
13414+ * the Free Software Foundation; either version 2 of the License, or
13415+ * (at your option) any later version.
13416+ *
13417+ * This program is distributed in the hope that it will be useful,
13418+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13419+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13420+ * GNU General Public License for more details.
13421+ *
13422+ * You should have received a copy of the GNU General Public License
523b37e3 13423+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 13424+ */
1308ab2a 13425+
4a4d8108
AM
13426+/*
13427+ * file private data
13428+ */
1facf9fc 13429+
4a4d8108 13430+#include "aufs.h"
1facf9fc 13431+
f0c0a007 13432+void au_hfput(struct au_hfile *hf, int execed)
4a4d8108 13433+{
f0c0a007 13434+ if (execed)
4a4d8108
AM
13435+ allow_write_access(hf->hf_file);
13436+ fput(hf->hf_file);
13437+ hf->hf_file = NULL;
5afbbe0d 13438+ au_br_put(hf->hf_br);
4a4d8108
AM
13439+ hf->hf_br = NULL;
13440+}
1facf9fc 13441+
4a4d8108
AM
13442+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
13443+{
13444+ struct au_finfo *finfo = au_fi(file);
13445+ struct au_hfile *hf;
13446+ struct au_fidir *fidir;
13447+
13448+ fidir = finfo->fi_hdir;
13449+ if (!fidir) {
13450+ AuDebugOn(finfo->fi_btop != bindex);
13451+ hf = &finfo->fi_htop;
13452+ } else
13453+ hf = fidir->fd_hfile + bindex;
13454+
13455+ if (hf && hf->hf_file)
f0c0a007 13456+ au_hfput(hf, vfsub_file_execed(file));
4a4d8108
AM
13457+ if (val) {
13458+ FiMustWriteLock(file);
b912730e 13459+ AuDebugOn(IS_ERR_OR_NULL(file->f_path.dentry));
4a4d8108 13460+ hf->hf_file = val;
2000de60 13461+ hf->hf_br = au_sbr(file->f_path.dentry->d_sb, bindex);
1308ab2a 13462+ }
4a4d8108 13463+}
1facf9fc 13464+
4a4d8108
AM
13465+void au_update_figen(struct file *file)
13466+{
2000de60 13467+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_path.dentry));
4a4d8108 13468+ /* smp_mb(); */ /* atomic_set */
1facf9fc 13469+}
13470+
4a4d8108
AM
13471+/* ---------------------------------------------------------------------- */
13472+
4a4d8108
AM
13473+struct au_fidir *au_fidir_alloc(struct super_block *sb)
13474+{
13475+ struct au_fidir *fidir;
13476+ int nbr;
13477+
5afbbe0d 13478+ nbr = au_sbbot(sb) + 1;
4a4d8108
AM
13479+ if (nbr < 2)
13480+ nbr = 2; /* initial allocate for 2 branches */
13481+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
13482+ if (fidir) {
13483+ fidir->fd_bbot = -1;
13484+ fidir->fd_nent = nbr;
4a4d8108
AM
13485+ }
13486+
13487+ return fidir;
13488+}
13489+
e2f27e51 13490+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink)
4a4d8108
AM
13491+{
13492+ int err;
13493+ struct au_fidir *fidir, *p;
13494+
13495+ AuRwMustWriteLock(&finfo->fi_rwsem);
13496+ fidir = finfo->fi_hdir;
13497+ AuDebugOn(!fidir);
13498+
13499+ err = -ENOMEM;
13500+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
e2f27e51 13501+ GFP_NOFS, may_shrink);
4a4d8108
AM
13502+ if (p) {
13503+ p->fd_nent = nbr;
13504+ finfo->fi_hdir = p;
13505+ err = 0;
13506+ }
1facf9fc 13507+
dece6358 13508+ return err;
1facf9fc 13509+}
1308ab2a 13510+
13511+/* ---------------------------------------------------------------------- */
13512+
f0c0a007 13513+void au_finfo_fin(struct file *file, int atonce)
1308ab2a 13514+{
4a4d8108
AM
13515+ struct au_finfo *finfo;
13516+
2000de60 13517+ au_nfiles_dec(file->f_path.dentry->d_sb);
7f207e10 13518+
4a4d8108
AM
13519+ finfo = au_fi(file);
13520+ AuDebugOn(finfo->fi_hdir);
13521+ AuRwDestroy(&finfo->fi_rwsem);
f0c0a007
AM
13522+ if (!atonce)
13523+ au_cache_dfree_finfo(finfo);
13524+ else
13525+ au_cache_free_finfo(finfo);
1308ab2a 13526+}
1308ab2a 13527+
e49829fe 13528+void au_fi_init_once(void *_finfo)
4a4d8108 13529+{
e49829fe 13530+ struct au_finfo *finfo = _finfo;
1308ab2a 13531+
e49829fe 13532+ au_rw_init(&finfo->fi_rwsem);
4a4d8108 13533+}
1308ab2a 13534+
4a4d8108
AM
13535+int au_finfo_init(struct file *file, struct au_fidir *fidir)
13536+{
1716fcea 13537+ int err;
4a4d8108
AM
13538+ struct au_finfo *finfo;
13539+ struct dentry *dentry;
13540+
13541+ err = -ENOMEM;
2000de60 13542+ dentry = file->f_path.dentry;
4a4d8108
AM
13543+ finfo = au_cache_alloc_finfo();
13544+ if (unlikely(!finfo))
13545+ goto out;
13546+
13547+ err = 0;
7f207e10 13548+ au_nfiles_inc(dentry->d_sb);
4a4d8108
AM
13549+ au_rw_write_lock(&finfo->fi_rwsem);
13550+ finfo->fi_btop = -1;
13551+ finfo->fi_hdir = fidir;
13552+ atomic_set(&finfo->fi_generation, au_digen(dentry));
13553+ /* smp_mb(); */ /* atomic_set */
13554+
13555+ file->private_data = finfo;
13556+
13557+out:
13558+ return err;
13559+}
7f207e10
AM
13560diff -urN /usr/share/empty/fs/aufs/f_op.c linux/fs/aufs/f_op.c
13561--- /usr/share/empty/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 13562+++ linux/fs/aufs/f_op.c 2016-10-09 16:55:36.489368218 +0200
f0c0a007 13563@@ -0,0 +1,772 @@
dece6358 13564+/*
8cdd5066 13565+ * Copyright (C) 2005-2016 Junjiro R. Okajima
dece6358
AM
13566+ *
13567+ * This program, aufs is free software; you can redistribute it and/or modify
13568+ * it under the terms of the GNU General Public License as published by
13569+ * the Free Software Foundation; either version 2 of the License, or
13570+ * (at your option) any later version.
13571+ *
13572+ * This program is distributed in the hope that it will be useful,
13573+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13574+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13575+ * GNU General Public License for more details.
13576+ *
13577+ * You should have received a copy of the GNU General Public License
523b37e3 13578+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 13579+ */
1facf9fc 13580+
13581+/*
4a4d8108 13582+ * file and vm operations
1facf9fc 13583+ */
dece6358 13584+
86dc4139 13585+#include <linux/aio.h>
4a4d8108
AM
13586+#include <linux/fs_stack.h>
13587+#include <linux/mman.h>
4a4d8108 13588+#include <linux/security.h>
dece6358
AM
13589+#include "aufs.h"
13590+
b912730e 13591+int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
1facf9fc 13592+{
4a4d8108
AM
13593+ int err;
13594+ aufs_bindex_t bindex;
8cdd5066 13595+ struct dentry *dentry, *h_dentry;
4a4d8108 13596+ struct au_finfo *finfo;
38d290e6 13597+ struct inode *h_inode;
4a4d8108
AM
13598+
13599+ FiMustWriteLock(file);
13600+
523b37e3 13601+ err = 0;
2000de60 13602+ dentry = file->f_path.dentry;
b912730e 13603+ AuDebugOn(IS_ERR_OR_NULL(dentry));
4a4d8108
AM
13604+ finfo = au_fi(file);
13605+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
2cbb1c4b 13606+ atomic_set(&finfo->fi_mmapped, 0);
5afbbe0d 13607+ bindex = au_dbtop(dentry);
8cdd5066
JR
13608+ if (!h_file) {
13609+ h_dentry = au_h_dptr(dentry, bindex);
13610+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
13611+ if (unlikely(err))
13612+ goto out;
b912730e 13613+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
8cdd5066
JR
13614+ } else {
13615+ h_dentry = h_file->f_path.dentry;
13616+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
13617+ if (unlikely(err))
13618+ goto out;
b912730e 13619+ get_file(h_file);
8cdd5066 13620+ }
4a4d8108
AM
13621+ if (IS_ERR(h_file))
13622+ err = PTR_ERR(h_file);
13623+ else {
38d290e6
JR
13624+ if ((flags & __O_TMPFILE)
13625+ && !(flags & O_EXCL)) {
13626+ h_inode = file_inode(h_file);
13627+ spin_lock(&h_inode->i_lock);
13628+ h_inode->i_state |= I_LINKABLE;
13629+ spin_unlock(&h_inode->i_lock);
13630+ }
5afbbe0d 13631+ au_set_fbtop(file, bindex);
4a4d8108
AM
13632+ au_set_h_fptr(file, bindex, h_file);
13633+ au_update_figen(file);
13634+ /* todo: necessary? */
13635+ /* file->f_ra = h_file->f_ra; */
13636+ }
027c5e7a 13637+
8cdd5066 13638+out:
4a4d8108 13639+ return err;
1facf9fc 13640+}
13641+
4a4d8108
AM
13642+static int aufs_open_nondir(struct inode *inode __maybe_unused,
13643+ struct file *file)
1facf9fc 13644+{
4a4d8108 13645+ int err;
1308ab2a 13646+ struct super_block *sb;
b912730e
AM
13647+ struct au_do_open_args args = {
13648+ .open = au_do_open_nondir
13649+ };
1facf9fc 13650+
523b37e3
AM
13651+ AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
13652+ file, vfsub_file_flags(file), file->f_mode);
1facf9fc 13653+
2000de60 13654+ sb = file->f_path.dentry->d_sb;
4a4d8108 13655+ si_read_lock(sb, AuLock_FLUSH);
b912730e 13656+ err = au_do_open(file, &args);
4a4d8108
AM
13657+ si_read_unlock(sb);
13658+ return err;
13659+}
1facf9fc 13660+
4a4d8108
AM
13661+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
13662+{
13663+ struct au_finfo *finfo;
13664+ aufs_bindex_t bindex;
f0c0a007 13665+ int delayed;
1facf9fc 13666+
4a4d8108 13667+ finfo = au_fi(file);
2000de60
JR
13668+ au_sphl_del(&finfo->fi_hlist,
13669+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
4a4d8108 13670+ bindex = finfo->fi_btop;
b4510431 13671+ if (bindex >= 0)
4a4d8108 13672+ au_set_h_fptr(file, bindex, NULL);
7f207e10 13673+
f0c0a007
AM
13674+ delayed = (current->flags & PF_KTHREAD) || in_interrupt();
13675+ au_finfo_fin(file, delayed);
4a4d8108 13676+ return 0;
1facf9fc 13677+}
13678+
4a4d8108
AM
13679+/* ---------------------------------------------------------------------- */
13680+
13681+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
dece6358 13682+{
1308ab2a 13683+ int err;
4a4d8108
AM
13684+ struct file *h_file;
13685+
13686+ err = 0;
13687+ h_file = au_hf_top(file);
13688+ if (h_file)
13689+ err = vfsub_flush(h_file, id);
13690+ return err;
13691+}
13692+
13693+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
13694+{
13695+ return au_do_flush(file, id, au_do_flush_nondir);
13696+}
13697+
13698+/* ---------------------------------------------------------------------- */
9dbd164d
AM
13699+/*
13700+ * read and write functions acquire [fdi]_rwsem once, but release before
13701+ * mmap_sem. This is because to stop a race condition between mmap(2).
13702+ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
13703+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
13704+ * read functions after [fdi]_rwsem are released, but it should be harmless.
13705+ */
4a4d8108 13706+
b912730e
AM
13707+/* Callers should call au_read_post() or fput() in the end */
13708+struct file *au_read_pre(struct file *file, int keep_fi)
4a4d8108 13709+{
4a4d8108 13710+ struct file *h_file;
b912730e 13711+ int err;
1facf9fc 13712+
4a4d8108 13713+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
b912730e
AM
13714+ if (!err) {
13715+ di_read_unlock(file->f_path.dentry, AuLock_IR);
13716+ h_file = au_hf_top(file);
13717+ get_file(h_file);
13718+ if (!keep_fi)
13719+ fi_read_unlock(file);
13720+ } else
13721+ h_file = ERR_PTR(err);
13722+
13723+ return h_file;
13724+}
13725+
13726+static void au_read_post(struct inode *inode, struct file *h_file)
13727+{
13728+ /* update without lock, I don't think it a problem */
13729+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13730+ fput(h_file);
13731+}
13732+
13733+struct au_write_pre {
13734+ blkcnt_t blks;
5afbbe0d 13735+ aufs_bindex_t btop;
b912730e
AM
13736+};
13737+
13738+/*
13739+ * return with iinfo is write-locked
13740+ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the
13741+ * end
13742+ */
13743+static struct file *au_write_pre(struct file *file, int do_ready,
13744+ struct au_write_pre *wpre)
13745+{
13746+ struct file *h_file;
13747+ struct dentry *dentry;
13748+ int err;
13749+ struct au_pin pin;
13750+
13751+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
13752+ h_file = ERR_PTR(err);
dece6358
AM
13753+ if (unlikely(err))
13754+ goto out;
1facf9fc 13755+
b912730e
AM
13756+ dentry = file->f_path.dentry;
13757+ if (do_ready) {
13758+ err = au_ready_to_write(file, -1, &pin);
13759+ if (unlikely(err)) {
13760+ h_file = ERR_PTR(err);
13761+ di_write_unlock(dentry);
13762+ goto out_fi;
13763+ }
13764+ }
13765+
13766+ di_downgrade_lock(dentry, /*flags*/0);
13767+ if (wpre)
5afbbe0d 13768+ wpre->btop = au_fbtop(file);
4a4d8108 13769+ h_file = au_hf_top(file);
9dbd164d 13770+ get_file(h_file);
b912730e
AM
13771+ if (wpre)
13772+ wpre->blks = file_inode(h_file)->i_blocks;
13773+ if (do_ready)
13774+ au_unpin(&pin);
13775+ di_read_unlock(dentry, /*flags*/0);
13776+
13777+out_fi:
13778+ fi_write_unlock(file);
13779+out:
13780+ return h_file;
13781+}
13782+
13783+static void au_write_post(struct inode *inode, struct file *h_file,
13784+ struct au_write_pre *wpre, ssize_t written)
13785+{
13786+ struct inode *h_inode;
13787+
13788+ au_cpup_attr_timesizes(inode);
5afbbe0d 13789+ AuDebugOn(au_ibtop(inode) != wpre->btop);
b912730e
AM
13790+ h_inode = file_inode(h_file);
13791+ inode->i_mode = h_inode->i_mode;
13792+ ii_write_unlock(inode);
13793+ fput(h_file);
13794+
13795+ /* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
13796+ if (written > 0)
5afbbe0d 13797+ au_fhsm_wrote(inode->i_sb, wpre->btop,
b912730e
AM
13798+ /*force*/h_inode->i_blocks > wpre->blks);
13799+}
13800+
13801+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
13802+ loff_t *ppos)
13803+{
13804+ ssize_t err;
13805+ struct inode *inode;
13806+ struct file *h_file;
13807+ struct super_block *sb;
13808+
13809+ inode = file_inode(file);
13810+ sb = inode->i_sb;
13811+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
13812+
13813+ h_file = au_read_pre(file, /*keep_fi*/0);
13814+ err = PTR_ERR(h_file);
13815+ if (IS_ERR(h_file))
13816+ goto out;
9dbd164d
AM
13817+
13818+ /* filedata may be obsoleted by concurrent copyup, but no problem */
4a4d8108
AM
13819+ err = vfsub_read_u(h_file, buf, count, ppos);
13820+ /* todo: necessary? */
13821+ /* file->f_ra = h_file->f_ra; */
b912730e 13822+ au_read_post(inode, h_file);
1308ab2a 13823+
4f0767ce 13824+out:
dece6358
AM
13825+ si_read_unlock(sb);
13826+ return err;
13827+}
1facf9fc 13828+
e49829fe
JR
13829+/*
13830+ * todo: very ugly
13831+ * it locks both of i_mutex and si_rwsem for read in safe.
13832+ * if the plink maintenance mode continues forever (that is the problem),
13833+ * may loop forever.
13834+ */
13835+static void au_mtx_and_read_lock(struct inode *inode)
13836+{
13837+ int err;
13838+ struct super_block *sb = inode->i_sb;
13839+
13840+ while (1) {
febd17d6 13841+ inode_lock(inode);
e49829fe
JR
13842+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
13843+ if (!err)
13844+ break;
febd17d6 13845+ inode_unlock(inode);
e49829fe
JR
13846+ si_read_lock(sb, AuLock_NOPLMW);
13847+ si_read_unlock(sb);
13848+ }
13849+}
13850+
4a4d8108
AM
13851+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
13852+ size_t count, loff_t *ppos)
dece6358 13853+{
4a4d8108 13854+ ssize_t err;
b912730e
AM
13855+ struct au_write_pre wpre;
13856+ struct inode *inode;
4a4d8108
AM
13857+ struct file *h_file;
13858+ char __user *buf = (char __user *)ubuf;
1facf9fc 13859+
b912730e 13860+ inode = file_inode(file);
e49829fe 13861+ au_mtx_and_read_lock(inode);
1facf9fc 13862+
b912730e
AM
13863+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13864+ err = PTR_ERR(h_file);
13865+ if (IS_ERR(h_file))
9dbd164d 13866+ goto out;
9dbd164d 13867+
4a4d8108 13868+ err = vfsub_write_u(h_file, buf, count, ppos);
b912730e 13869+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13870+
4f0767ce 13871+out:
b912730e 13872+ si_read_unlock(inode->i_sb);
febd17d6 13873+ inode_unlock(inode);
dece6358
AM
13874+ return err;
13875+}
1facf9fc 13876+
076b876e
AM
13877+static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
13878+ struct iov_iter *iov_iter)
dece6358 13879+{
4a4d8108
AM
13880+ ssize_t err;
13881+ struct file *file;
076b876e 13882+ ssize_t (*iter)(struct kiocb *, struct iov_iter *);
1facf9fc 13883+
4a4d8108
AM
13884+ err = security_file_permission(h_file, rw);
13885+ if (unlikely(err))
13886+ goto out;
1facf9fc 13887+
4a4d8108 13888+ err = -ENOSYS;
076b876e 13889+ iter = NULL;
5527c038 13890+ if (rw == MAY_READ)
076b876e 13891+ iter = h_file->f_op->read_iter;
5527c038 13892+ else if (rw == MAY_WRITE)
076b876e 13893+ iter = h_file->f_op->write_iter;
076b876e
AM
13894+
13895+ file = kio->ki_filp;
13896+ kio->ki_filp = h_file;
13897+ if (iter) {
2cbb1c4b 13898+ lockdep_off();
076b876e
AM
13899+ err = iter(kio, iov_iter);
13900+ lockdep_on();
4a4d8108
AM
13901+ } else
13902+ /* currently there is no such fs */
13903+ WARN_ON_ONCE(1);
076b876e 13904+ kio->ki_filp = file;
1facf9fc 13905+
4f0767ce 13906+out:
dece6358
AM
13907+ return err;
13908+}
1facf9fc 13909+
076b876e 13910+static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1facf9fc 13911+{
4a4d8108
AM
13912+ ssize_t err;
13913+ struct file *file, *h_file;
b912730e 13914+ struct inode *inode;
dece6358 13915+ struct super_block *sb;
1facf9fc 13916+
4a4d8108 13917+ file = kio->ki_filp;
b912730e
AM
13918+ inode = file_inode(file);
13919+ sb = inode->i_sb;
e49829fe 13920+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 13921+
5afbbe0d 13922+ h_file = au_read_pre(file, /*keep_fi*/1);
b912730e
AM
13923+ err = PTR_ERR(h_file);
13924+ if (IS_ERR(h_file))
13925+ goto out;
9dbd164d 13926+
5afbbe0d
AM
13927+ if (au_test_loopback_kthread()) {
13928+ au_warn_loopback(h_file->f_path.dentry->d_sb);
13929+ if (file->f_mapping != h_file->f_mapping) {
13930+ file->f_mapping = h_file->f_mapping;
13931+ smp_mb(); /* unnecessary? */
13932+ }
13933+ }
13934+ fi_read_unlock(file);
13935+
076b876e 13936+ err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
4a4d8108
AM
13937+ /* todo: necessary? */
13938+ /* file->f_ra = h_file->f_ra; */
b912730e 13939+ au_read_post(inode, h_file);
1facf9fc 13940+
4f0767ce 13941+out:
4a4d8108 13942+ si_read_unlock(sb);
1308ab2a 13943+ return err;
13944+}
1facf9fc 13945+
076b876e 13946+static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1308ab2a 13947+{
4a4d8108 13948+ ssize_t err;
b912730e
AM
13949+ struct au_write_pre wpre;
13950+ struct inode *inode;
4a4d8108 13951+ struct file *file, *h_file;
1308ab2a 13952+
4a4d8108 13953+ file = kio->ki_filp;
b912730e 13954+ inode = file_inode(file);
e49829fe
JR
13955+ au_mtx_and_read_lock(inode);
13956+
b912730e
AM
13957+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13958+ err = PTR_ERR(h_file);
13959+ if (IS_ERR(h_file))
9dbd164d 13960+ goto out;
9dbd164d 13961+
076b876e 13962+ err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
b912730e 13963+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13964+
4f0767ce 13965+out:
b912730e 13966+ si_read_unlock(inode->i_sb);
febd17d6 13967+ inode_unlock(inode);
dece6358 13968+ return err;
1facf9fc 13969+}
13970+
4a4d8108
AM
13971+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
13972+ struct pipe_inode_info *pipe, size_t len,
13973+ unsigned int flags)
1facf9fc 13974+{
4a4d8108
AM
13975+ ssize_t err;
13976+ struct file *h_file;
b912730e 13977+ struct inode *inode;
dece6358 13978+ struct super_block *sb;
1facf9fc 13979+
b912730e
AM
13980+ inode = file_inode(file);
13981+ sb = inode->i_sb;
e49829fe 13982+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e 13983+
5afbbe0d 13984+ h_file = au_read_pre(file, /*keep_fi*/0);
b912730e
AM
13985+ err = PTR_ERR(h_file);
13986+ if (IS_ERR(h_file))
dece6358 13987+ goto out;
1facf9fc 13988+
4a4d8108
AM
13989+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
13990+ /* todo: necessasry? */
13991+ /* file->f_ra = h_file->f_ra; */
b912730e 13992+ au_read_post(inode, h_file);
1facf9fc 13993+
4f0767ce 13994+out:
4a4d8108 13995+ si_read_unlock(sb);
dece6358 13996+ return err;
1facf9fc 13997+}
13998+
4a4d8108
AM
13999+static ssize_t
14000+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
14001+ size_t len, unsigned int flags)
1facf9fc 14002+{
4a4d8108 14003+ ssize_t err;
b912730e
AM
14004+ struct au_write_pre wpre;
14005+ struct inode *inode;
076b876e 14006+ struct file *h_file;
1facf9fc 14007+
b912730e 14008+ inode = file_inode(file);
e49829fe 14009+ au_mtx_and_read_lock(inode);
9dbd164d 14010+
b912730e
AM
14011+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14012+ err = PTR_ERR(h_file);
14013+ if (IS_ERR(h_file))
9dbd164d 14014+ goto out;
9dbd164d 14015+
4a4d8108 14016+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
b912730e 14017+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 14018+
4f0767ce 14019+out:
b912730e 14020+ si_read_unlock(inode->i_sb);
febd17d6 14021+ inode_unlock(inode);
4a4d8108
AM
14022+ return err;
14023+}
1facf9fc 14024+
38d290e6
JR
14025+static long aufs_fallocate(struct file *file, int mode, loff_t offset,
14026+ loff_t len)
14027+{
14028+ long err;
b912730e 14029+ struct au_write_pre wpre;
38d290e6
JR
14030+ struct inode *inode;
14031+ struct file *h_file;
14032+
b912730e 14033+ inode = file_inode(file);
38d290e6
JR
14034+ au_mtx_and_read_lock(inode);
14035+
b912730e
AM
14036+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14037+ err = PTR_ERR(h_file);
14038+ if (IS_ERR(h_file))
38d290e6 14039+ goto out;
38d290e6
JR
14040+
14041+ lockdep_off();
03673fb0 14042+ err = vfs_fallocate(h_file, mode, offset, len);
38d290e6 14043+ lockdep_on();
b912730e 14044+ au_write_post(inode, h_file, &wpre, /*written*/1);
38d290e6
JR
14045+
14046+out:
b912730e 14047+ si_read_unlock(inode->i_sb);
febd17d6 14048+ inode_unlock(inode);
38d290e6
JR
14049+ return err;
14050+}
14051+
4a4d8108
AM
14052+/* ---------------------------------------------------------------------- */
14053+
9dbd164d
AM
14054+/*
14055+ * The locking order around current->mmap_sem.
14056+ * - in most and regular cases
14057+ * file I/O syscall -- aufs_read() or something
14058+ * -- si_rwsem for read -- mmap_sem
14059+ * (Note that [fdi]i_rwsem are released before mmap_sem).
14060+ * - in mmap case
14061+ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
14062+ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
14063+ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
14064+ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
14065+ * It means that when aufs acquires si_rwsem for write, the process should never
14066+ * acquire mmap_sem.
14067+ *
392086de 14068+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
9dbd164d
AM
14069+ * problem either since any directory is not able to be mmap-ed.
14070+ * The similar scenario is applied to aufs_readlink() too.
14071+ */
14072+
38d290e6 14073+#if 0 /* stop calling security_file_mmap() */
2dfbb274
AM
14074+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
14075+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
14076+
14077+static unsigned long au_arch_prot_conv(unsigned long flags)
14078+{
14079+ /* currently ppc64 only */
14080+#ifdef CONFIG_PPC64
14081+ /* cf. linux/arch/powerpc/include/asm/mman.h */
14082+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
14083+ return AuConv_VM_PROT(flags, SAO);
14084+#else
14085+ AuDebugOn(arch_calc_vm_prot_bits(-1));
14086+ return 0;
14087+#endif
14088+}
14089+
14090+static unsigned long au_prot_conv(unsigned long flags)
14091+{
14092+ return AuConv_VM_PROT(flags, READ)
14093+ | AuConv_VM_PROT(flags, WRITE)
14094+ | AuConv_VM_PROT(flags, EXEC)
14095+ | au_arch_prot_conv(flags);
14096+}
14097+
14098+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
14099+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
14100+
14101+static unsigned long au_flag_conv(unsigned long flags)
14102+{
14103+ return AuConv_VM_MAP(flags, GROWSDOWN)
14104+ | AuConv_VM_MAP(flags, DENYWRITE)
2dfbb274
AM
14105+ | AuConv_VM_MAP(flags, LOCKED);
14106+}
38d290e6 14107+#endif
2dfbb274 14108+
9dbd164d 14109+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
dece6358 14110+{
4a4d8108 14111+ int err;
4a4d8108 14112+ const unsigned char wlock
9dbd164d 14113+ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
4a4d8108 14114+ struct super_block *sb;
9dbd164d 14115+ struct file *h_file;
b912730e 14116+ struct inode *inode;
9dbd164d
AM
14117+
14118+ AuDbgVmRegion(file, vma);
1308ab2a 14119+
b912730e
AM
14120+ inode = file_inode(file);
14121+ sb = inode->i_sb;
9dbd164d 14122+ lockdep_off();
e49829fe 14123+ si_read_lock(sb, AuLock_NOPLMW);
4a4d8108 14124+
b912730e 14125+ h_file = au_write_pre(file, wlock, /*wpre*/NULL);
9dbd164d 14126+ lockdep_on();
b912730e
AM
14127+ err = PTR_ERR(h_file);
14128+ if (IS_ERR(h_file))
14129+ goto out;
1308ab2a 14130+
b912730e
AM
14131+ err = 0;
14132+ au_set_mmapped(file);
9dbd164d 14133+ au_vm_file_reset(vma, h_file);
38d290e6
JR
14134+ /*
14135+ * we cannot call security_mmap_file() here since it may acquire
14136+ * mmap_sem or i_mutex.
14137+ *
14138+ * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
14139+ * au_flag_conv(vma->vm_flags));
14140+ */
9dbd164d
AM
14141+ if (!err)
14142+ err = h_file->f_op->mmap(h_file, vma);
b912730e
AM
14143+ if (!err) {
14144+ au_vm_prfile_set(vma, file);
14145+ fsstack_copy_attr_atime(inode, file_inode(h_file));
14146+ goto out_fput; /* success */
14147+ }
2cbb1c4b
JR
14148+ au_unset_mmapped(file);
14149+ au_vm_file_reset(vma, file);
b912730e 14150+
2cbb1c4b 14151+out_fput:
9dbd164d 14152+ lockdep_off();
b912730e
AM
14153+ ii_write_unlock(inode);
14154+ lockdep_on();
14155+ fput(h_file);
4f0767ce 14156+out:
b912730e 14157+ lockdep_off();
9dbd164d
AM
14158+ si_read_unlock(sb);
14159+ lockdep_on();
14160+ AuTraceErr(err);
4a4d8108
AM
14161+ return err;
14162+}
14163+
14164+/* ---------------------------------------------------------------------- */
14165+
1e00d052
AM
14166+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
14167+ int datasync)
4a4d8108
AM
14168+{
14169+ int err;
b912730e 14170+ struct au_write_pre wpre;
4a4d8108
AM
14171+ struct inode *inode;
14172+ struct file *h_file;
4a4d8108
AM
14173+
14174+ err = 0; /* -EBADF; */ /* posix? */
14175+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
b912730e 14176+ goto out;
4a4d8108 14177+
b912730e
AM
14178+ inode = file_inode(file);
14179+ au_mtx_and_read_lock(inode);
14180+
14181+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14182+ err = PTR_ERR(h_file);
14183+ if (IS_ERR(h_file))
4a4d8108 14184+ goto out_unlock;
4a4d8108 14185+
53392da6 14186+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
b912730e 14187+ au_write_post(inode, h_file, &wpre, /*written*/0);
4a4d8108 14188+
4f0767ce 14189+out_unlock:
b912730e 14190+ si_read_unlock(inode->i_sb);
febd17d6 14191+ inode_unlock(inode);
b912730e 14192+out:
4a4d8108 14193+ return err;
dece6358
AM
14194+}
14195+
4a4d8108
AM
14196+/* no one supports this operation, currently */
14197+#if 0
14198+static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
dece6358 14199+{
4a4d8108 14200+ int err;
b912730e 14201+ struct au_write_pre wpre;
febd17d6 14202+ struct inode *inode, *h_inode;
4a4d8108 14203+ struct file *file, *h_file;
1308ab2a 14204+
4a4d8108
AM
14205+ err = 0; /* -EBADF; */ /* posix? */
14206+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
14207+ goto out;
1308ab2a 14208+
b912730e
AM
14209+ file = kio->ki_filp;
14210+ inode = file_inode(file);
14211+ au_mtx_and_read_lock(inode);
14212+
14213+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14214+ err = PTR_ERR(h_file);
14215+ if (IS_ERR(h_file))
4a4d8108 14216+ goto out_unlock;
1308ab2a 14217+
4a4d8108
AM
14218+ err = -ENOSYS;
14219+ h_file = au_hf_top(file);
523b37e3 14220+ if (h_file->f_op->aio_fsync) {
febd17d6 14221+ h_inode = file_inode(h_file);
4a4d8108
AM
14222+ if (!is_sync_kiocb(kio)) {
14223+ get_file(h_file);
14224+ fput(file);
14225+ }
14226+ kio->ki_filp = h_file;
14227+ err = h_file->f_op->aio_fsync(kio, datasync);
febd17d6 14228+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
4a4d8108
AM
14229+ if (!err)
14230+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
14231+ /*ignore*/
febd17d6 14232+ inode_unlock(h_inode);
4a4d8108 14233+ }
b912730e 14234+ au_write_post(inode, h_file, &wpre, /*written*/0);
1308ab2a 14235+
4f0767ce 14236+out_unlock:
e49829fe 14237+ si_read_unlock(inode->sb);
febd17d6 14238+ inode_unlock(inode);
b912730e 14239+out:
4a4d8108 14240+ return err;
dece6358 14241+}
4a4d8108 14242+#endif
dece6358 14243+
4a4d8108 14244+static int aufs_fasync(int fd, struct file *file, int flag)
dece6358 14245+{
4a4d8108
AM
14246+ int err;
14247+ struct file *h_file;
4a4d8108 14248+ struct super_block *sb;
1308ab2a 14249+
b912730e 14250+ sb = file->f_path.dentry->d_sb;
e49829fe 14251+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
14252+
14253+ h_file = au_read_pre(file, /*keep_fi*/0);
14254+ err = PTR_ERR(h_file);
14255+ if (IS_ERR(h_file))
4a4d8108
AM
14256+ goto out;
14257+
523b37e3 14258+ if (h_file->f_op->fasync)
4a4d8108 14259+ err = h_file->f_op->fasync(fd, h_file, flag);
b912730e 14260+ fput(h_file); /* instead of au_read_post() */
1308ab2a 14261+
4f0767ce 14262+out:
4a4d8108 14263+ si_read_unlock(sb);
1308ab2a 14264+ return err;
dece6358 14265+}
4a4d8108 14266+
febd17d6
JR
14267+static int aufs_setfl(struct file *file, unsigned long arg)
14268+{
14269+ int err;
14270+ struct file *h_file;
14271+ struct super_block *sb;
14272+
14273+ sb = file->f_path.dentry->d_sb;
14274+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
14275+
14276+ h_file = au_read_pre(file, /*keep_fi*/0);
14277+ err = PTR_ERR(h_file);
14278+ if (IS_ERR(h_file))
14279+ goto out;
14280+
14281+ arg |= vfsub_file_flags(file) & FASYNC; /* stop calling h_file->fasync */
14282+ err = setfl(/*unused fd*/-1, h_file, arg);
14283+ fput(h_file); /* instead of au_read_post() */
14284+
14285+out:
14286+ si_read_unlock(sb);
14287+ return err;
14288+}
14289+
4a4d8108
AM
14290+/* ---------------------------------------------------------------------- */
14291+
14292+/* no one supports this operation, currently */
14293+#if 0
14294+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
2000de60 14295+ size_t len, loff_t *pos, int more)
4a4d8108
AM
14296+{
14297+}
14298+#endif
14299+
14300+/* ---------------------------------------------------------------------- */
14301+
14302+const struct file_operations aufs_file_fop = {
14303+ .owner = THIS_MODULE,
2cbb1c4b 14304+
027c5e7a 14305+ .llseek = default_llseek,
4a4d8108
AM
14306+
14307+ .read = aufs_read,
14308+ .write = aufs_write,
076b876e
AM
14309+ .read_iter = aufs_read_iter,
14310+ .write_iter = aufs_write_iter,
14311+
4a4d8108
AM
14312+#ifdef CONFIG_AUFS_POLL
14313+ .poll = aufs_poll,
14314+#endif
14315+ .unlocked_ioctl = aufs_ioctl_nondir,
b752ccd1 14316+#ifdef CONFIG_COMPAT
c2b27bf2 14317+ .compat_ioctl = aufs_compat_ioctl_nondir,
b752ccd1 14318+#endif
4a4d8108
AM
14319+ .mmap = aufs_mmap,
14320+ .open = aufs_open_nondir,
14321+ .flush = aufs_flush_nondir,
14322+ .release = aufs_release_nondir,
14323+ .fsync = aufs_fsync_nondir,
14324+ /* .aio_fsync = aufs_aio_fsync_nondir, */
14325+ .fasync = aufs_fasync,
14326+ /* .sendpage = aufs_sendpage, */
febd17d6 14327+ .setfl = aufs_setfl,
4a4d8108
AM
14328+ .splice_write = aufs_splice_write,
14329+ .splice_read = aufs_splice_read,
14330+#if 0
14331+ .aio_splice_write = aufs_aio_splice_write,
38d290e6 14332+ .aio_splice_read = aufs_aio_splice_read,
4a4d8108 14333+#endif
38d290e6 14334+ .fallocate = aufs_fallocate
4a4d8108 14335+};
7f207e10
AM
14336diff -urN /usr/share/empty/fs/aufs/fstype.h linux/fs/aufs/fstype.h
14337--- /usr/share/empty/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 14338+++ linux/fs/aufs/fstype.h 2016-10-09 16:55:36.492701639 +0200
b912730e 14339@@ -0,0 +1,400 @@
4a4d8108 14340+/*
8cdd5066 14341+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
14342+ *
14343+ * This program, aufs is free software; you can redistribute it and/or modify
14344+ * it under the terms of the GNU General Public License as published by
14345+ * the Free Software Foundation; either version 2 of the License, or
14346+ * (at your option) any later version.
14347+ *
14348+ * This program is distributed in the hope that it will be useful,
14349+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14350+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14351+ * GNU General Public License for more details.
14352+ *
14353+ * You should have received a copy of the GNU General Public License
523b37e3 14354+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
14355+ */
14356+
14357+/*
14358+ * judging filesystem type
14359+ */
14360+
14361+#ifndef __AUFS_FSTYPE_H__
14362+#define __AUFS_FSTYPE_H__
14363+
14364+#ifdef __KERNEL__
14365+
14366+#include <linux/fs.h>
14367+#include <linux/magic.h>
b912730e 14368+#include <linux/nfs_fs.h>
b95c5147 14369+#include <linux/romfs_fs.h>
4a4d8108
AM
14370+
14371+static inline int au_test_aufs(struct super_block *sb)
14372+{
14373+ return sb->s_magic == AUFS_SUPER_MAGIC;
14374+}
14375+
14376+static inline const char *au_sbtype(struct super_block *sb)
14377+{
14378+ return sb->s_type->name;
14379+}
1308ab2a 14380+
14381+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
14382+{
f0c0a007 14383+#if IS_ENABLED(CONFIG_ISO9660_FS)
2000de60 14384+ return sb->s_magic == ISOFS_SUPER_MAGIC;
dece6358
AM
14385+#else
14386+ return 0;
14387+#endif
14388+}
14389+
1308ab2a 14390+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
dece6358 14391+{
f0c0a007 14392+#if IS_ENABLED(CONFIG_ROMFS_FS)
2000de60 14393+ return sb->s_magic == ROMFS_MAGIC;
dece6358
AM
14394+#else
14395+ return 0;
14396+#endif
14397+}
14398+
1308ab2a 14399+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
dece6358 14400+{
f0c0a007 14401+#if IS_ENABLED(CONFIG_CRAMFS)
1308ab2a 14402+ return sb->s_magic == CRAMFS_MAGIC;
14403+#endif
14404+ return 0;
14405+}
14406+
14407+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
14408+{
f0c0a007 14409+#if IS_ENABLED(CONFIG_NFS_FS)
1308ab2a 14410+ return sb->s_magic == NFS_SUPER_MAGIC;
dece6358
AM
14411+#else
14412+ return 0;
14413+#endif
14414+}
14415+
1308ab2a 14416+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
dece6358 14417+{
f0c0a007 14418+#if IS_ENABLED(CONFIG_FUSE_FS)
1308ab2a 14419+ return sb->s_magic == FUSE_SUPER_MAGIC;
dece6358
AM
14420+#else
14421+ return 0;
14422+#endif
14423+}
14424+
1308ab2a 14425+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
dece6358 14426+{
f0c0a007 14427+#if IS_ENABLED(CONFIG_XFS_FS)
1308ab2a 14428+ return sb->s_magic == XFS_SB_MAGIC;
dece6358
AM
14429+#else
14430+ return 0;
14431+#endif
14432+}
14433+
1308ab2a 14434+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
dece6358 14435+{
1308ab2a 14436+#ifdef CONFIG_TMPFS
14437+ return sb->s_magic == TMPFS_MAGIC;
14438+#else
14439+ return 0;
dece6358 14440+#endif
dece6358
AM
14441+}
14442+
1308ab2a 14443+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
1facf9fc 14444+{
f0c0a007 14445+#if IS_ENABLED(CONFIG_ECRYPT_FS)
1308ab2a 14446+ return !strcmp(au_sbtype(sb), "ecryptfs");
14447+#else
14448+ return 0;
14449+#endif
1facf9fc 14450+}
14451+
1308ab2a 14452+static inline int au_test_ramfs(struct super_block *sb)
14453+{
14454+ return sb->s_magic == RAMFS_MAGIC;
14455+}
14456+
14457+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
14458+{
f0c0a007 14459+#if IS_ENABLED(CONFIG_UBIFS_FS)
1308ab2a 14460+ return sb->s_magic == UBIFS_SUPER_MAGIC;
14461+#else
14462+ return 0;
14463+#endif
14464+}
14465+
14466+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
14467+{
14468+#ifdef CONFIG_PROC_FS
14469+ return sb->s_magic == PROC_SUPER_MAGIC;
14470+#else
14471+ return 0;
14472+#endif
14473+}
14474+
14475+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
14476+{
14477+#ifdef CONFIG_SYSFS
14478+ return sb->s_magic == SYSFS_MAGIC;
14479+#else
14480+ return 0;
14481+#endif
14482+}
14483+
14484+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
14485+{
f0c0a007 14486+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
1308ab2a 14487+ return sb->s_magic == CONFIGFS_MAGIC;
14488+#else
14489+ return 0;
14490+#endif
14491+}
14492+
14493+static inline int au_test_minix(struct super_block *sb __maybe_unused)
14494+{
f0c0a007 14495+#if IS_ENABLED(CONFIG_MINIX_FS)
1308ab2a 14496+ return sb->s_magic == MINIX3_SUPER_MAGIC
14497+ || sb->s_magic == MINIX2_SUPER_MAGIC
14498+ || sb->s_magic == MINIX2_SUPER_MAGIC2
14499+ || sb->s_magic == MINIX_SUPER_MAGIC
14500+ || sb->s_magic == MINIX_SUPER_MAGIC2;
14501+#else
14502+ return 0;
14503+#endif
14504+}
14505+
1308ab2a 14506+static inline int au_test_fat(struct super_block *sb __maybe_unused)
14507+{
f0c0a007 14508+#if IS_ENABLED(CONFIG_FAT_FS)
1308ab2a 14509+ return sb->s_magic == MSDOS_SUPER_MAGIC;
14510+#else
14511+ return 0;
14512+#endif
14513+}
14514+
14515+static inline int au_test_msdos(struct super_block *sb)
14516+{
14517+ return au_test_fat(sb);
14518+}
14519+
14520+static inline int au_test_vfat(struct super_block *sb)
14521+{
14522+ return au_test_fat(sb);
14523+}
14524+
14525+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
14526+{
14527+#ifdef CONFIG_SECURITYFS
14528+ return sb->s_magic == SECURITYFS_MAGIC;
14529+#else
14530+ return 0;
14531+#endif
14532+}
14533+
14534+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
14535+{
f0c0a007 14536+#if IS_ENABLED(CONFIG_SQUASHFS)
1308ab2a 14537+ return sb->s_magic == SQUASHFS_MAGIC;
14538+#else
14539+ return 0;
14540+#endif
14541+}
14542+
14543+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
14544+{
f0c0a007 14545+#if IS_ENABLED(CONFIG_BTRFS_FS)
1308ab2a 14546+ return sb->s_magic == BTRFS_SUPER_MAGIC;
14547+#else
14548+ return 0;
14549+#endif
14550+}
14551+
14552+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
14553+{
f0c0a007 14554+#if IS_ENABLED(CONFIG_XENFS)
1308ab2a 14555+ return sb->s_magic == XENFS_SUPER_MAGIC;
14556+#else
14557+ return 0;
14558+#endif
14559+}
14560+
14561+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
14562+{
14563+#ifdef CONFIG_DEBUG_FS
14564+ return sb->s_magic == DEBUGFS_MAGIC;
14565+#else
14566+ return 0;
14567+#endif
14568+}
14569+
14570+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
14571+{
f0c0a007 14572+#if IS_ENABLED(CONFIG_NILFS)
1308ab2a 14573+ return sb->s_magic == NILFS_SUPER_MAGIC;
14574+#else
14575+ return 0;
14576+#endif
14577+}
14578+
4a4d8108
AM
14579+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
14580+{
f0c0a007 14581+#if IS_ENABLED(CONFIG_HFSPLUS_FS)
4a4d8108
AM
14582+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
14583+#else
14584+ return 0;
14585+#endif
14586+}
14587+
1308ab2a 14588+/* ---------------------------------------------------------------------- */
14589+/*
14590+ * they can't be an aufs branch.
14591+ */
14592+static inline int au_test_fs_unsuppoted(struct super_block *sb)
14593+{
14594+ return
14595+#ifndef CONFIG_AUFS_BR_RAMFS
14596+ au_test_ramfs(sb) ||
14597+#endif
14598+ au_test_procfs(sb)
14599+ || au_test_sysfs(sb)
14600+ || au_test_configfs(sb)
14601+ || au_test_debugfs(sb)
14602+ || au_test_securityfs(sb)
14603+ || au_test_xenfs(sb)
14604+ || au_test_ecryptfs(sb)
14605+ /* || !strcmp(au_sbtype(sb), "unionfs") */
14606+ || au_test_aufs(sb); /* will be supported in next version */
14607+}
14608+
1308ab2a 14609+static inline int au_test_fs_remote(struct super_block *sb)
14610+{
14611+ return !au_test_tmpfs(sb)
14612+#ifdef CONFIG_AUFS_BR_RAMFS
14613+ && !au_test_ramfs(sb)
14614+#endif
14615+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
14616+}
14617+
14618+/* ---------------------------------------------------------------------- */
14619+
14620+/*
14621+ * Note: these functions (below) are created after reading ->getattr() in all
14622+ * filesystems under linux/fs. it means we have to do so in every update...
14623+ */
14624+
14625+/*
14626+ * some filesystems require getattr to refresh the inode attributes before
14627+ * referencing.
14628+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
14629+ * and leave the work for d_revalidate()
14630+ */
14631+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
14632+{
14633+ return au_test_nfs(sb)
14634+ || au_test_fuse(sb)
1308ab2a 14635+ /* || au_test_btrfs(sb) */ /* untested */
1308ab2a 14636+ ;
14637+}
14638+
14639+/*
14640+ * filesystems which don't maintain i_size or i_blocks.
14641+ */
14642+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
14643+{
14644+ return au_test_xfs(sb)
4a4d8108
AM
14645+ || au_test_btrfs(sb)
14646+ || au_test_ubifs(sb)
14647+ || au_test_hfsplus(sb) /* maintained, but incorrect */
1308ab2a 14648+ /* || au_test_minix(sb) */ /* untested */
14649+ ;
14650+}
14651+
14652+/*
14653+ * filesystems which don't store the correct value in some of their inode
14654+ * attributes.
14655+ */
14656+static inline int au_test_fs_bad_iattr(struct super_block *sb)
14657+{
14658+ return au_test_fs_bad_iattr_size(sb)
1308ab2a 14659+ || au_test_fat(sb)
14660+ || au_test_msdos(sb)
14661+ || au_test_vfat(sb);
1facf9fc 14662+}
14663+
14664+/* they don't check i_nlink in link(2) */
14665+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
14666+{
14667+ return au_test_tmpfs(sb)
14668+#ifdef CONFIG_AUFS_BR_RAMFS
14669+ || au_test_ramfs(sb)
14670+#endif
4a4d8108 14671+ || au_test_ubifs(sb)
4a4d8108 14672+ || au_test_hfsplus(sb);
1facf9fc 14673+}
14674+
14675+/*
14676+ * filesystems which sets S_NOATIME and S_NOCMTIME.
14677+ */
14678+static inline int au_test_fs_notime(struct super_block *sb)
14679+{
14680+ return au_test_nfs(sb)
14681+ || au_test_fuse(sb)
dece6358 14682+ || au_test_ubifs(sb)
1facf9fc 14683+ ;
14684+}
14685+
1facf9fc 14686+/* temporary support for i#1 in cramfs */
14687+static inline int au_test_fs_unique_ino(struct inode *inode)
14688+{
14689+ if (au_test_cramfs(inode->i_sb))
14690+ return inode->i_ino != 1;
14691+ return 1;
14692+}
14693+
14694+/* ---------------------------------------------------------------------- */
14695+
14696+/*
14697+ * the filesystem where the xino files placed must support i/o after unlink and
14698+ * maintain i_size and i_blocks.
14699+ */
14700+static inline int au_test_fs_bad_xino(struct super_block *sb)
14701+{
14702+ return au_test_fs_remote(sb)
14703+ || au_test_fs_bad_iattr_size(sb)
1facf9fc 14704+ /* don't want unnecessary work for xino */
14705+ || au_test_aufs(sb)
1308ab2a 14706+ || au_test_ecryptfs(sb)
14707+ || au_test_nilfs(sb);
1facf9fc 14708+}
14709+
14710+static inline int au_test_fs_trunc_xino(struct super_block *sb)
14711+{
14712+ return au_test_tmpfs(sb)
14713+ || au_test_ramfs(sb);
14714+}
14715+
14716+/*
14717+ * test if the @sb is real-readonly.
14718+ */
14719+static inline int au_test_fs_rr(struct super_block *sb)
14720+{
14721+ return au_test_squashfs(sb)
14722+ || au_test_iso9660(sb)
14723+ || au_test_cramfs(sb)
14724+ || au_test_romfs(sb);
14725+}
14726+
b912730e
AM
14727+/*
14728+ * test if the @inode is nfs with 'noacl' option
14729+ * NFS always sets MS_POSIXACL regardless its mount option 'noacl.'
14730+ */
14731+static inline int au_test_nfs_noacl(struct inode *inode)
14732+{
14733+ return au_test_nfs(inode->i_sb)
14734+ /* && IS_POSIXACL(inode) */
14735+ && !nfs_server_capable(inode, NFS_CAP_ACLS);
14736+}
14737+
1facf9fc 14738+#endif /* __KERNEL__ */
14739+#endif /* __AUFS_FSTYPE_H__ */
7f207e10
AM
14740diff -urN /usr/share/empty/fs/aufs/hfsnotify.c linux/fs/aufs/hfsnotify.c
14741--- /usr/share/empty/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 14742+++ linux/fs/aufs/hfsnotify.c 2016-10-09 16:55:36.492701639 +0200
5afbbe0d 14743@@ -0,0 +1,287 @@
1facf9fc 14744+/*
8cdd5066 14745+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 14746+ *
14747+ * This program, aufs is free software; you can redistribute it and/or modify
14748+ * it under the terms of the GNU General Public License as published by
14749+ * the Free Software Foundation; either version 2 of the License, or
14750+ * (at your option) any later version.
dece6358
AM
14751+ *
14752+ * This program is distributed in the hope that it will be useful,
14753+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14754+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14755+ * GNU General Public License for more details.
14756+ *
14757+ * You should have received a copy of the GNU General Public License
523b37e3 14758+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 14759+ */
14760+
14761+/*
4a4d8108 14762+ * fsnotify for the lower directories
1facf9fc 14763+ */
14764+
14765+#include "aufs.h"
14766+
4a4d8108
AM
14767+/* FS_IN_IGNORED is unnecessary */
14768+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
14769+ | FS_CREATE | FS_EVENT_ON_CHILD);
7f207e10 14770+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
7eafdf33 14771+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
1facf9fc 14772+
0c5527e5 14773+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
1facf9fc 14774+{
0c5527e5
AM
14775+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
14776+ hn_mark);
5afbbe0d 14777+ /* AuDbg("here\n"); */
f0c0a007 14778+ au_cache_dfree_hnotify(hn);
076b876e 14779+ smp_mb__before_atomic();
1716fcea
AM
14780+ if (atomic64_dec_and_test(&au_hfsn_ifree))
14781+ wake_up(&au_hfsn_wq);
4a4d8108 14782+}
1facf9fc 14783+
027c5e7a 14784+static int au_hfsn_alloc(struct au_hinode *hinode)
4a4d8108 14785+{
1716fcea 14786+ int err;
027c5e7a
AM
14787+ struct au_hnotify *hn;
14788+ struct super_block *sb;
14789+ struct au_branch *br;
0c5527e5 14790+ struct fsnotify_mark *mark;
027c5e7a 14791+ aufs_bindex_t bindex;
1facf9fc 14792+
027c5e7a
AM
14793+ hn = hinode->hi_notify;
14794+ sb = hn->hn_aufs_inode->i_sb;
14795+ bindex = au_br_index(sb, hinode->hi_id);
14796+ br = au_sbr(sb, bindex);
1716fcea
AM
14797+ AuDebugOn(!br->br_hfsn);
14798+
0c5527e5
AM
14799+ mark = &hn->hn_mark;
14800+ fsnotify_init_mark(mark, au_hfsn_free_mark);
14801+ mark->mask = AuHfsnMask;
7f207e10
AM
14802+ /*
14803+ * by udba rename or rmdir, aufs assign a new inode to the known
14804+ * h_inode, so specify 1 to allow dups.
14805+ */
c1595e42 14806+ lockdep_off();
1716fcea 14807+ err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode,
027c5e7a 14808+ /*mnt*/NULL, /*allow_dups*/1);
c1595e42 14809+ lockdep_on();
1716fcea
AM
14810+
14811+ return err;
1facf9fc 14812+}
14813+
7eafdf33 14814+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
1facf9fc 14815+{
0c5527e5 14816+ struct fsnotify_mark *mark;
7eafdf33 14817+ unsigned long long ull;
1716fcea 14818+ struct fsnotify_group *group;
7eafdf33
AM
14819+
14820+ ull = atomic64_inc_return(&au_hfsn_ifree);
14821+ BUG_ON(!ull);
953406b4 14822+
0c5527e5 14823+ mark = &hn->hn_mark;
1716fcea
AM
14824+ spin_lock(&mark->lock);
14825+ group = mark->group;
14826+ fsnotify_get_group(group);
14827+ spin_unlock(&mark->lock);
c1595e42 14828+ lockdep_off();
1716fcea 14829+ fsnotify_destroy_mark(mark, group);
5afbbe0d 14830+ fsnotify_put_mark(mark);
1716fcea 14831+ fsnotify_put_group(group);
c1595e42 14832+ lockdep_on();
7f207e10 14833+
7eafdf33
AM
14834+ /* free hn by myself */
14835+ return 0;
1facf9fc 14836+}
14837+
14838+/* ---------------------------------------------------------------------- */
14839+
4a4d8108 14840+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
1facf9fc 14841+{
0c5527e5 14842+ struct fsnotify_mark *mark;
1facf9fc 14843+
0c5527e5
AM
14844+ mark = &hinode->hi_notify->hn_mark;
14845+ spin_lock(&mark->lock);
1facf9fc 14846+ if (do_set) {
0c5527e5
AM
14847+ AuDebugOn(mark->mask & AuHfsnMask);
14848+ mark->mask |= AuHfsnMask;
1facf9fc 14849+ } else {
0c5527e5
AM
14850+ AuDebugOn(!(mark->mask & AuHfsnMask));
14851+ mark->mask &= ~AuHfsnMask;
1facf9fc 14852+ }
0c5527e5 14853+ spin_unlock(&mark->lock);
4a4d8108 14854+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
1facf9fc 14855+}
14856+
4a4d8108 14857+/* ---------------------------------------------------------------------- */
1facf9fc 14858+
4a4d8108
AM
14859+/* #define AuDbgHnotify */
14860+#ifdef AuDbgHnotify
14861+static char *au_hfsn_name(u32 mask)
14862+{
14863+#ifdef CONFIG_AUFS_DEBUG
c06a8ce3
AM
14864+#define test_ret(flag) \
14865+ do { \
14866+ if (mask & flag) \
14867+ return #flag; \
14868+ } while (0)
4a4d8108
AM
14869+ test_ret(FS_ACCESS);
14870+ test_ret(FS_MODIFY);
14871+ test_ret(FS_ATTRIB);
14872+ test_ret(FS_CLOSE_WRITE);
14873+ test_ret(FS_CLOSE_NOWRITE);
14874+ test_ret(FS_OPEN);
14875+ test_ret(FS_MOVED_FROM);
14876+ test_ret(FS_MOVED_TO);
14877+ test_ret(FS_CREATE);
14878+ test_ret(FS_DELETE);
14879+ test_ret(FS_DELETE_SELF);
14880+ test_ret(FS_MOVE_SELF);
14881+ test_ret(FS_UNMOUNT);
14882+ test_ret(FS_Q_OVERFLOW);
14883+ test_ret(FS_IN_IGNORED);
b912730e 14884+ test_ret(FS_ISDIR);
4a4d8108
AM
14885+ test_ret(FS_IN_ONESHOT);
14886+ test_ret(FS_EVENT_ON_CHILD);
14887+ return "";
14888+#undef test_ret
14889+#else
14890+ return "??";
14891+#endif
1facf9fc 14892+}
4a4d8108 14893+#endif
1facf9fc 14894+
14895+/* ---------------------------------------------------------------------- */
14896+
1716fcea
AM
14897+static void au_hfsn_free_group(struct fsnotify_group *group)
14898+{
14899+ struct au_br_hfsnotify *hfsn = group->private;
14900+
5afbbe0d 14901+ /* AuDbg("here\n"); */
f0c0a007 14902+ au_delayed_kfree(hfsn);
1716fcea
AM
14903+}
14904+
4a4d8108 14905+static int au_hfsn_handle_event(struct fsnotify_group *group,
fb47a38f 14906+ struct inode *inode,
0c5527e5
AM
14907+ struct fsnotify_mark *inode_mark,
14908+ struct fsnotify_mark *vfsmount_mark,
fb47a38f
JR
14909+ u32 mask, void *data, int data_type,
14910+ const unsigned char *file_name, u32 cookie)
1facf9fc 14911+{
14912+ int err;
4a4d8108
AM
14913+ struct au_hnotify *hnotify;
14914+ struct inode *h_dir, *h_inode;
fb47a38f 14915+ struct qstr h_child_qstr = QSTR_INIT(file_name, strlen(file_name));
4a4d8108 14916+
fb47a38f 14917+ AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
1facf9fc 14918+
14919+ err = 0;
0c5527e5 14920+ /* if FS_UNMOUNT happens, there must be another bug */
4a4d8108 14921+ AuDebugOn(mask & FS_UNMOUNT);
0c5527e5 14922+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
1facf9fc 14923+ goto out;
1facf9fc 14924+
fb47a38f
JR
14925+ h_dir = inode;
14926+ h_inode = NULL;
4a4d8108 14927+#ifdef AuDbgHnotify
392086de 14928+ au_debug_on();
4a4d8108
AM
14929+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
14930+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
14931+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
14932+ h_dir->i_ino, mask, au_hfsn_name(mask),
14933+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
14934+ /* WARN_ON(1); */
1facf9fc 14935+ }
392086de 14936+ au_debug_off();
1facf9fc 14937+#endif
4a4d8108 14938+
0c5527e5
AM
14939+ AuDebugOn(!inode_mark);
14940+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
14941+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
1facf9fc 14942+
4a4d8108
AM
14943+out:
14944+ return err;
14945+}
1facf9fc 14946+
4a4d8108 14947+static struct fsnotify_ops au_hfsn_ops = {
1716fcea
AM
14948+ .handle_event = au_hfsn_handle_event,
14949+ .free_group_priv = au_hfsn_free_group
4a4d8108
AM
14950+};
14951+
14952+/* ---------------------------------------------------------------------- */
14953+
027c5e7a
AM
14954+static void au_hfsn_fin_br(struct au_branch *br)
14955+{
1716fcea 14956+ struct au_br_hfsnotify *hfsn;
027c5e7a 14957+
1716fcea 14958+ hfsn = br->br_hfsn;
c1595e42
JR
14959+ if (hfsn) {
14960+ lockdep_off();
1716fcea 14961+ fsnotify_put_group(hfsn->hfsn_group);
c1595e42
JR
14962+ lockdep_on();
14963+ }
027c5e7a
AM
14964+}
14965+
1716fcea 14966+static int au_hfsn_init_br(struct au_branch *br, int perm)
4a4d8108
AM
14967+{
14968+ int err;
1716fcea
AM
14969+ struct fsnotify_group *group;
14970+ struct au_br_hfsnotify *hfsn;
1facf9fc 14971+
4a4d8108 14972+ err = 0;
1716fcea
AM
14973+ br->br_hfsn = NULL;
14974+ if (!au_br_hnotifyable(perm))
027c5e7a 14975+ goto out;
027c5e7a 14976+
1716fcea
AM
14977+ err = -ENOMEM;
14978+ hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
14979+ if (unlikely(!hfsn))
027c5e7a
AM
14980+ goto out;
14981+
1716fcea
AM
14982+ err = 0;
14983+ group = fsnotify_alloc_group(&au_hfsn_ops);
14984+ if (IS_ERR(group)) {
14985+ err = PTR_ERR(group);
0c5527e5 14986+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
1716fcea 14987+ goto out_hfsn;
4a4d8108 14988+ }
1facf9fc 14989+
1716fcea
AM
14990+ group->private = hfsn;
14991+ hfsn->hfsn_group = group;
14992+ br->br_hfsn = hfsn;
14993+ goto out; /* success */
14994+
14995+out_hfsn:
f0c0a007 14996+ au_delayed_kfree(hfsn);
027c5e7a 14997+out:
1716fcea
AM
14998+ return err;
14999+}
15000+
15001+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
15002+{
15003+ int err;
15004+
15005+ err = 0;
15006+ if (!br->br_hfsn)
15007+ err = au_hfsn_init_br(br, perm);
15008+
1facf9fc 15009+ return err;
15010+}
15011+
7eafdf33
AM
15012+/* ---------------------------------------------------------------------- */
15013+
15014+static void au_hfsn_fin(void)
15015+{
15016+ AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
15017+ wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
15018+}
15019+
4a4d8108
AM
15020+const struct au_hnotify_op au_hnotify_op = {
15021+ .ctl = au_hfsn_ctl,
15022+ .alloc = au_hfsn_alloc,
15023+ .free = au_hfsn_free,
1facf9fc 15024+
7eafdf33
AM
15025+ .fin = au_hfsn_fin,
15026+
027c5e7a
AM
15027+ .reset_br = au_hfsn_reset_br,
15028+ .fin_br = au_hfsn_fin_br,
15029+ .init_br = au_hfsn_init_br
4a4d8108 15030+};
7f207e10
AM
15031diff -urN /usr/share/empty/fs/aufs/hfsplus.c linux/fs/aufs/hfsplus.c
15032--- /usr/share/empty/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 15033+++ linux/fs/aufs/hfsplus.c 2016-10-09 16:55:36.492701639 +0200
523b37e3 15034@@ -0,0 +1,56 @@
4a4d8108 15035+/*
8cdd5066 15036+ * Copyright (C) 2010-2016 Junjiro R. Okajima
4a4d8108
AM
15037+ *
15038+ * This program, aufs is free software; you can redistribute it and/or modify
15039+ * it under the terms of the GNU General Public License as published by
15040+ * the Free Software Foundation; either version 2 of the License, or
15041+ * (at your option) any later version.
15042+ *
15043+ * This program is distributed in the hope that it will be useful,
15044+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15045+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15046+ * GNU General Public License for more details.
15047+ *
15048+ * You should have received a copy of the GNU General Public License
523b37e3 15049+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 15050+ */
1facf9fc 15051+
4a4d8108
AM
15052+/*
15053+ * special support for filesystems which aqucires an inode mutex
15054+ * at final closing a file, eg, hfsplus.
15055+ *
15056+ * This trick is very simple and stupid, just to open the file before really
15057+ * neceeary open to tell hfsplus that this is not the final closing.
15058+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
15059+ * and au_h_open_post() after releasing it.
15060+ */
1facf9fc 15061+
4a4d8108 15062+#include "aufs.h"
1facf9fc 15063+
392086de
AM
15064+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
15065+ int force_wr)
4a4d8108
AM
15066+{
15067+ struct file *h_file;
15068+ struct dentry *h_dentry;
1facf9fc 15069+
4a4d8108
AM
15070+ h_dentry = au_h_dptr(dentry, bindex);
15071+ AuDebugOn(!h_dentry);
5527c038 15072+ AuDebugOn(d_is_negative(h_dentry));
4a4d8108
AM
15073+
15074+ h_file = NULL;
15075+ if (au_test_hfsplus(h_dentry->d_sb)
7e9cd9fe 15076+ && d_is_reg(h_dentry))
4a4d8108
AM
15077+ h_file = au_h_open(dentry, bindex,
15078+ O_RDONLY | O_NOATIME | O_LARGEFILE,
392086de 15079+ /*file*/NULL, force_wr);
4a4d8108 15080+ return h_file;
1facf9fc 15081+}
15082+
4a4d8108
AM
15083+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
15084+ struct file *h_file)
15085+{
15086+ if (h_file) {
15087+ fput(h_file);
15088+ au_sbr_put(dentry->d_sb, bindex);
15089+ }
15090+}
7f207e10
AM
15091diff -urN /usr/share/empty/fs/aufs/hnotify.c linux/fs/aufs/hnotify.c
15092--- /usr/share/empty/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 15093+++ linux/fs/aufs/hnotify.c 2016-10-09 16:55:36.492701639 +0200
f0c0a007 15094@@ -0,0 +1,723 @@
e49829fe 15095+/*
8cdd5066 15096+ * Copyright (C) 2005-2016 Junjiro R. Okajima
e49829fe
JR
15097+ *
15098+ * This program, aufs is free software; you can redistribute it and/or modify
15099+ * it under the terms of the GNU General Public License as published by
15100+ * the Free Software Foundation; either version 2 of the License, or
15101+ * (at your option) any later version.
15102+ *
15103+ * This program is distributed in the hope that it will be useful,
15104+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15105+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15106+ * GNU General Public License for more details.
15107+ *
15108+ * You should have received a copy of the GNU General Public License
523b37e3 15109+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
15110+ */
15111+
15112+/*
7f207e10 15113+ * abstraction to notify the direct changes on lower directories
e49829fe
JR
15114+ */
15115+
15116+#include "aufs.h"
15117+
027c5e7a 15118+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
e49829fe
JR
15119+{
15120+ int err;
7f207e10 15121+ struct au_hnotify *hn;
1facf9fc 15122+
4a4d8108
AM
15123+ err = -ENOMEM;
15124+ hn = au_cache_alloc_hnotify();
15125+ if (hn) {
15126+ hn->hn_aufs_inode = inode;
027c5e7a
AM
15127+ hinode->hi_notify = hn;
15128+ err = au_hnotify_op.alloc(hinode);
15129+ AuTraceErr(err);
15130+ if (unlikely(err)) {
15131+ hinode->hi_notify = NULL;
f0c0a007 15132+ au_cache_dfree_hnotify(hn);
4a4d8108
AM
15133+ /*
15134+ * The upper dir was removed by udba, but the same named
15135+ * dir left. In this case, aufs assignes a new inode
15136+ * number and set the monitor again.
15137+ * For the lower dir, the old monitnor is still left.
15138+ */
15139+ if (err == -EEXIST)
15140+ err = 0;
15141+ }
1308ab2a 15142+ }
1308ab2a 15143+
027c5e7a 15144+ AuTraceErr(err);
1308ab2a 15145+ return err;
dece6358 15146+}
1facf9fc 15147+
4a4d8108 15148+void au_hn_free(struct au_hinode *hinode)
dece6358 15149+{
4a4d8108 15150+ struct au_hnotify *hn;
1facf9fc 15151+
4a4d8108
AM
15152+ hn = hinode->hi_notify;
15153+ if (hn) {
4a4d8108 15154+ hinode->hi_notify = NULL;
7eafdf33 15155+ if (au_hnotify_op.free(hinode, hn))
f0c0a007 15156+ au_cache_dfree_hnotify(hn);
4a4d8108
AM
15157+ }
15158+}
dece6358 15159+
4a4d8108 15160+/* ---------------------------------------------------------------------- */
dece6358 15161+
4a4d8108
AM
15162+void au_hn_ctl(struct au_hinode *hinode, int do_set)
15163+{
15164+ if (hinode->hi_notify)
15165+ au_hnotify_op.ctl(hinode, do_set);
15166+}
15167+
15168+void au_hn_reset(struct inode *inode, unsigned int flags)
15169+{
5afbbe0d 15170+ aufs_bindex_t bindex, bbot;
4a4d8108
AM
15171+ struct inode *hi;
15172+ struct dentry *iwhdentry;
1facf9fc 15173+
5afbbe0d
AM
15174+ bbot = au_ibbot(inode);
15175+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
4a4d8108
AM
15176+ hi = au_h_iptr(inode, bindex);
15177+ if (!hi)
15178+ continue;
1308ab2a 15179+
febd17d6 15180+ /* inode_lock_nested(hi, AuLsc_I_CHILD); */
4a4d8108
AM
15181+ iwhdentry = au_hi_wh(inode, bindex);
15182+ if (iwhdentry)
15183+ dget(iwhdentry);
15184+ au_igrab(hi);
15185+ au_set_h_iptr(inode, bindex, NULL, 0);
15186+ au_set_h_iptr(inode, bindex, au_igrab(hi),
15187+ flags & ~AuHi_XINO);
15188+ iput(hi);
15189+ dput(iwhdentry);
febd17d6 15190+ /* inode_unlock(hi); */
1facf9fc 15191+ }
1facf9fc 15192+}
15193+
1308ab2a 15194+/* ---------------------------------------------------------------------- */
1facf9fc 15195+
4a4d8108 15196+static int hn_xino(struct inode *inode, struct inode *h_inode)
1facf9fc 15197+{
4a4d8108 15198+ int err;
5afbbe0d 15199+ aufs_bindex_t bindex, bbot, bfound, btop;
4a4d8108 15200+ struct inode *h_i;
1facf9fc 15201+
4a4d8108
AM
15202+ err = 0;
15203+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15204+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15205+ goto out;
15206+ }
1facf9fc 15207+
4a4d8108 15208+ bfound = -1;
5afbbe0d
AM
15209+ bbot = au_ibbot(inode);
15210+ btop = au_ibtop(inode);
4a4d8108 15211+#if 0 /* reserved for future use */
5afbbe0d 15212+ if (bindex == bbot) {
4a4d8108
AM
15213+ /* keep this ino in rename case */
15214+ goto out;
15215+ }
15216+#endif
5afbbe0d 15217+ for (bindex = btop; bindex <= bbot; bindex++)
4a4d8108
AM
15218+ if (au_h_iptr(inode, bindex) == h_inode) {
15219+ bfound = bindex;
15220+ break;
15221+ }
15222+ if (bfound < 0)
1308ab2a 15223+ goto out;
1facf9fc 15224+
5afbbe0d 15225+ for (bindex = btop; bindex <= bbot; bindex++) {
4a4d8108
AM
15226+ h_i = au_h_iptr(inode, bindex);
15227+ if (!h_i)
15228+ continue;
1facf9fc 15229+
4a4d8108
AM
15230+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
15231+ /* ignore this error */
15232+ /* bad action? */
1facf9fc 15233+ }
1facf9fc 15234+
4a4d8108 15235+ /* children inode number will be broken */
1facf9fc 15236+
4f0767ce 15237+out:
4a4d8108
AM
15238+ AuTraceErr(err);
15239+ return err;
1facf9fc 15240+}
15241+
4a4d8108 15242+static int hn_gen_tree(struct dentry *dentry)
1facf9fc 15243+{
4a4d8108
AM
15244+ int err, i, j, ndentry;
15245+ struct au_dcsub_pages dpages;
15246+ struct au_dpage *dpage;
15247+ struct dentry **dentries;
1facf9fc 15248+
4a4d8108
AM
15249+ err = au_dpages_init(&dpages, GFP_NOFS);
15250+ if (unlikely(err))
15251+ goto out;
15252+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
15253+ if (unlikely(err))
15254+ goto out_dpages;
1facf9fc 15255+
4a4d8108
AM
15256+ for (i = 0; i < dpages.ndpage; i++) {
15257+ dpage = dpages.dpages + i;
15258+ dentries = dpage->dentries;
15259+ ndentry = dpage->ndentry;
15260+ for (j = 0; j < ndentry; j++) {
15261+ struct dentry *d;
15262+
15263+ d = dentries[j];
15264+ if (IS_ROOT(d))
15265+ continue;
15266+
4a4d8108 15267+ au_digen_dec(d);
5527c038 15268+ if (d_really_is_positive(d))
4a4d8108
AM
15269+ /* todo: reset children xino?
15270+ cached children only? */
5527c038 15271+ au_iigen_dec(d_inode(d));
1308ab2a 15272+ }
dece6358 15273+ }
1facf9fc 15274+
4f0767ce 15275+out_dpages:
4a4d8108 15276+ au_dpages_free(&dpages);
dece6358 15277+
027c5e7a 15278+#if 0
4a4d8108
AM
15279+ /* discard children */
15280+ dentry_unhash(dentry);
15281+ dput(dentry);
027c5e7a 15282+#endif
4f0767ce 15283+out:
dece6358
AM
15284+ return err;
15285+}
15286+
1308ab2a 15287+/*
4a4d8108 15288+ * return 0 if processed.
1308ab2a 15289+ */
4a4d8108
AM
15290+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
15291+ const unsigned int isdir)
dece6358 15292+{
1308ab2a 15293+ int err;
4a4d8108
AM
15294+ struct dentry *d;
15295+ struct qstr *dname;
1facf9fc 15296+
4a4d8108
AM
15297+ err = 1;
15298+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15299+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15300+ err = 0;
15301+ goto out;
15302+ }
dece6358 15303+
4a4d8108
AM
15304+ if (!isdir) {
15305+ AuDebugOn(!name);
15306+ au_iigen_dec(inode);
027c5e7a 15307+ spin_lock(&inode->i_lock);
c1595e42 15308+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 15309+ spin_lock(&d->d_lock);
4a4d8108
AM
15310+ dname = &d->d_name;
15311+ if (dname->len != nlen
027c5e7a
AM
15312+ && memcmp(dname->name, name, nlen)) {
15313+ spin_unlock(&d->d_lock);
4a4d8108 15314+ continue;
027c5e7a 15315+ }
4a4d8108 15316+ err = 0;
4a4d8108
AM
15317+ au_digen_dec(d);
15318+ spin_unlock(&d->d_lock);
15319+ break;
1facf9fc 15320+ }
027c5e7a 15321+ spin_unlock(&inode->i_lock);
1308ab2a 15322+ } else {
027c5e7a 15323+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
c1595e42 15324+ d = d_find_any_alias(inode);
4a4d8108
AM
15325+ if (!d) {
15326+ au_iigen_dec(inode);
15327+ goto out;
15328+ }
1facf9fc 15329+
027c5e7a 15330+ spin_lock(&d->d_lock);
4a4d8108 15331+ dname = &d->d_name;
027c5e7a
AM
15332+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
15333+ spin_unlock(&d->d_lock);
4a4d8108 15334+ err = hn_gen_tree(d);
027c5e7a
AM
15335+ spin_lock(&d->d_lock);
15336+ }
15337+ spin_unlock(&d->d_lock);
4a4d8108
AM
15338+ dput(d);
15339+ }
1facf9fc 15340+
4f0767ce 15341+out:
4a4d8108 15342+ AuTraceErr(err);
1308ab2a 15343+ return err;
15344+}
dece6358 15345+
4a4d8108 15346+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
1facf9fc 15347+{
4a4d8108 15348+ int err;
1facf9fc 15349+
5527c038 15350+ if (IS_ROOT(dentry)) {
0c3ec466 15351+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15352+ return 0;
15353+ }
1308ab2a 15354+
4a4d8108
AM
15355+ err = 0;
15356+ if (!isdir) {
4a4d8108 15357+ au_digen_dec(dentry);
5527c038
JR
15358+ if (d_really_is_positive(dentry))
15359+ au_iigen_dec(d_inode(dentry));
4a4d8108 15360+ } else {
027c5e7a 15361+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
5527c038 15362+ if (d_really_is_positive(dentry))
4a4d8108
AM
15363+ err = hn_gen_tree(dentry);
15364+ }
15365+
15366+ AuTraceErr(err);
15367+ return err;
1facf9fc 15368+}
15369+
4a4d8108 15370+/* ---------------------------------------------------------------------- */
1facf9fc 15371+
4a4d8108
AM
15372+/* hnotify job flags */
15373+#define AuHnJob_XINO0 1
15374+#define AuHnJob_GEN (1 << 1)
15375+#define AuHnJob_DIRENT (1 << 2)
15376+#define AuHnJob_ISDIR (1 << 3)
15377+#define AuHnJob_TRYXINO0 (1 << 4)
15378+#define AuHnJob_MNTPNT (1 << 5)
15379+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
7f207e10
AM
15380+#define au_fset_hnjob(flags, name) \
15381+ do { (flags) |= AuHnJob_##name; } while (0)
15382+#define au_fclr_hnjob(flags, name) \
15383+ do { (flags) &= ~AuHnJob_##name; } while (0)
1facf9fc 15384+
4a4d8108
AM
15385+enum {
15386+ AuHn_CHILD,
15387+ AuHn_PARENT,
15388+ AuHnLast
15389+};
1facf9fc 15390+
4a4d8108
AM
15391+struct au_hnotify_args {
15392+ struct inode *h_dir, *dir, *h_child_inode;
15393+ u32 mask;
15394+ unsigned int flags[AuHnLast];
15395+ unsigned int h_child_nlen;
15396+ char h_child_name[];
15397+};
1facf9fc 15398+
4a4d8108
AM
15399+struct hn_job_args {
15400+ unsigned int flags;
15401+ struct inode *inode, *h_inode, *dir, *h_dir;
15402+ struct dentry *dentry;
15403+ char *h_name;
15404+ int h_nlen;
15405+};
1308ab2a 15406+
4a4d8108
AM
15407+static int hn_job(struct hn_job_args *a)
15408+{
15409+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
076b876e 15410+ int e;
1308ab2a 15411+
4a4d8108
AM
15412+ /* reset xino */
15413+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
15414+ hn_xino(a->inode, a->h_inode); /* ignore this error */
1308ab2a 15415+
4a4d8108
AM
15416+ if (au_ftest_hnjob(a->flags, TRYXINO0)
15417+ && a->inode
15418+ && a->h_inode) {
febd17d6 15419+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
38d290e6
JR
15420+ if (!a->h_inode->i_nlink
15421+ && !(a->h_inode->i_state & I_LINKABLE))
4a4d8108 15422+ hn_xino(a->inode, a->h_inode); /* ignore this error */
febd17d6 15423+ inode_unlock(a->h_inode);
1308ab2a 15424+ }
1facf9fc 15425+
4a4d8108
AM
15426+ /* make the generation obsolete */
15427+ if (au_ftest_hnjob(a->flags, GEN)) {
076b876e 15428+ e = -1;
4a4d8108 15429+ if (a->inode)
076b876e 15430+ e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
4a4d8108 15431+ isdir);
076b876e 15432+ if (e && a->dentry)
4a4d8108
AM
15433+ hn_gen_by_name(a->dentry, isdir);
15434+ /* ignore this error */
1facf9fc 15435+ }
1facf9fc 15436+
4a4d8108
AM
15437+ /* make dir entries obsolete */
15438+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
15439+ struct au_vdir *vdir;
1facf9fc 15440+
4a4d8108
AM
15441+ vdir = au_ivdir(a->inode);
15442+ if (vdir)
15443+ vdir->vd_jiffy = 0;
15444+ /* IMustLock(a->inode); */
15445+ /* a->inode->i_version++; */
15446+ }
1facf9fc 15447+
4a4d8108
AM
15448+ /* can do nothing but warn */
15449+ if (au_ftest_hnjob(a->flags, MNTPNT)
15450+ && a->dentry
15451+ && d_mountpoint(a->dentry))
523b37e3 15452+ pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
1facf9fc 15453+
4a4d8108 15454+ return 0;
1308ab2a 15455+}
1facf9fc 15456+
1308ab2a 15457+/* ---------------------------------------------------------------------- */
1facf9fc 15458+
4a4d8108
AM
15459+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
15460+ struct inode *dir)
1308ab2a 15461+{
4a4d8108
AM
15462+ struct dentry *dentry, *d, *parent;
15463+ struct qstr *dname;
1308ab2a 15464+
c1595e42 15465+ parent = d_find_any_alias(dir);
4a4d8108
AM
15466+ if (!parent)
15467+ return NULL;
1308ab2a 15468+
4a4d8108 15469+ dentry = NULL;
027c5e7a 15470+ spin_lock(&parent->d_lock);
c1595e42 15471+ list_for_each_entry(d, &parent->d_subdirs, d_child) {
523b37e3 15472+ /* AuDbg("%pd\n", d); */
027c5e7a 15473+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
4a4d8108
AM
15474+ dname = &d->d_name;
15475+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
027c5e7a
AM
15476+ goto cont_unlock;
15477+ if (au_di(d))
15478+ au_digen_dec(d);
15479+ else
15480+ goto cont_unlock;
c1595e42 15481+ if (au_dcount(d) > 0) {
027c5e7a 15482+ dentry = dget_dlock(d);
4a4d8108 15483+ spin_unlock(&d->d_lock);
027c5e7a 15484+ break;
dece6358 15485+ }
1facf9fc 15486+
f6b6e03d 15487+cont_unlock:
027c5e7a 15488+ spin_unlock(&d->d_lock);
1308ab2a 15489+ }
027c5e7a 15490+ spin_unlock(&parent->d_lock);
4a4d8108 15491+ dput(parent);
1facf9fc 15492+
4a4d8108
AM
15493+ if (dentry)
15494+ di_write_lock_child(dentry);
1308ab2a 15495+
4a4d8108
AM
15496+ return dentry;
15497+}
dece6358 15498+
4a4d8108
AM
15499+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
15500+ aufs_bindex_t bindex, ino_t h_ino)
15501+{
15502+ struct inode *inode;
15503+ ino_t ino;
15504+ int err;
15505+
15506+ inode = NULL;
15507+ err = au_xino_read(sb, bindex, h_ino, &ino);
15508+ if (!err && ino)
15509+ inode = ilookup(sb, ino);
15510+ if (!inode)
15511+ goto out;
15512+
15513+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15514+ pr_warn("wrong root branch\n");
4a4d8108
AM
15515+ iput(inode);
15516+ inode = NULL;
15517+ goto out;
1308ab2a 15518+ }
15519+
4a4d8108 15520+ ii_write_lock_child(inode);
1308ab2a 15521+
4f0767ce 15522+out:
4a4d8108 15523+ return inode;
dece6358
AM
15524+}
15525+
4a4d8108 15526+static void au_hn_bh(void *_args)
1facf9fc 15527+{
4a4d8108
AM
15528+ struct au_hnotify_args *a = _args;
15529+ struct super_block *sb;
5afbbe0d 15530+ aufs_bindex_t bindex, bbot, bfound;
4a4d8108 15531+ unsigned char xino, try_iput;
1facf9fc 15532+ int err;
1308ab2a 15533+ struct inode *inode;
4a4d8108
AM
15534+ ino_t h_ino;
15535+ struct hn_job_args args;
15536+ struct dentry *dentry;
15537+ struct au_sbinfo *sbinfo;
1facf9fc 15538+
4a4d8108
AM
15539+ AuDebugOn(!_args);
15540+ AuDebugOn(!a->h_dir);
15541+ AuDebugOn(!a->dir);
15542+ AuDebugOn(!a->mask);
15543+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
15544+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
15545+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
1facf9fc 15546+
4a4d8108
AM
15547+ inode = NULL;
15548+ dentry = NULL;
15549+ /*
15550+ * do not lock a->dir->i_mutex here
15551+ * because of d_revalidate() may cause a deadlock.
15552+ */
15553+ sb = a->dir->i_sb;
15554+ AuDebugOn(!sb);
15555+ sbinfo = au_sbi(sb);
15556+ AuDebugOn(!sbinfo);
7f207e10 15557+ si_write_lock(sb, AuLock_NOPLMW);
1facf9fc 15558+
4a4d8108
AM
15559+ ii_read_lock_parent(a->dir);
15560+ bfound = -1;
5afbbe0d
AM
15561+ bbot = au_ibbot(a->dir);
15562+ for (bindex = au_ibtop(a->dir); bindex <= bbot; bindex++)
4a4d8108
AM
15563+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
15564+ bfound = bindex;
15565+ break;
15566+ }
15567+ ii_read_unlock(a->dir);
15568+ if (unlikely(bfound < 0))
15569+ goto out;
1facf9fc 15570+
4a4d8108
AM
15571+ xino = !!au_opt_test(au_mntflags(sb), XINO);
15572+ h_ino = 0;
15573+ if (a->h_child_inode)
15574+ h_ino = a->h_child_inode->i_ino;
1facf9fc 15575+
4a4d8108
AM
15576+ if (a->h_child_nlen
15577+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
15578+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
15579+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
15580+ a->dir);
15581+ try_iput = 0;
5527c038
JR
15582+ if (dentry && d_really_is_positive(dentry))
15583+ inode = d_inode(dentry);
4a4d8108
AM
15584+ if (xino && !inode && h_ino
15585+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
15586+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
15587+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
15588+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
15589+ try_iput = 1;
f0c0a007 15590+ }
1facf9fc 15591+
4a4d8108
AM
15592+ args.flags = a->flags[AuHn_CHILD];
15593+ args.dentry = dentry;
15594+ args.inode = inode;
15595+ args.h_inode = a->h_child_inode;
15596+ args.dir = a->dir;
15597+ args.h_dir = a->h_dir;
15598+ args.h_name = a->h_child_name;
15599+ args.h_nlen = a->h_child_nlen;
15600+ err = hn_job(&args);
15601+ if (dentry) {
027c5e7a 15602+ if (au_di(dentry))
4a4d8108
AM
15603+ di_write_unlock(dentry);
15604+ dput(dentry);
15605+ }
15606+ if (inode && try_iput) {
15607+ ii_write_unlock(inode);
15608+ iput(inode);
15609+ }
1facf9fc 15610+
4a4d8108
AM
15611+ ii_write_lock_parent(a->dir);
15612+ args.flags = a->flags[AuHn_PARENT];
15613+ args.dentry = NULL;
15614+ args.inode = a->dir;
15615+ args.h_inode = a->h_dir;
15616+ args.dir = NULL;
15617+ args.h_dir = NULL;
15618+ args.h_name = NULL;
15619+ args.h_nlen = 0;
15620+ err = hn_job(&args);
15621+ ii_write_unlock(a->dir);
1facf9fc 15622+
4f0767ce 15623+out:
4a4d8108
AM
15624+ iput(a->h_child_inode);
15625+ iput(a->h_dir);
15626+ iput(a->dir);
027c5e7a
AM
15627+ si_write_unlock(sb);
15628+ au_nwt_done(&sbinfo->si_nowait);
f0c0a007 15629+ au_delayed_kfree(a);
dece6358 15630+}
1facf9fc 15631+
4a4d8108
AM
15632+/* ---------------------------------------------------------------------- */
15633+
15634+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
15635+ struct qstr *h_child_qstr, struct inode *h_child_inode)
dece6358 15636+{
4a4d8108 15637+ int err, len;
53392da6 15638+ unsigned int flags[AuHnLast], f;
4a4d8108
AM
15639+ unsigned char isdir, isroot, wh;
15640+ struct inode *dir;
15641+ struct au_hnotify_args *args;
15642+ char *p, *h_child_name;
dece6358 15643+
1308ab2a 15644+ err = 0;
4a4d8108
AM
15645+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
15646+ dir = igrab(hnotify->hn_aufs_inode);
15647+ if (!dir)
15648+ goto out;
1facf9fc 15649+
4a4d8108
AM
15650+ isroot = (dir->i_ino == AUFS_ROOT_INO);
15651+ wh = 0;
15652+ h_child_name = (void *)h_child_qstr->name;
15653+ len = h_child_qstr->len;
15654+ if (h_child_name) {
15655+ if (len > AUFS_WH_PFX_LEN
15656+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
15657+ h_child_name += AUFS_WH_PFX_LEN;
15658+ len -= AUFS_WH_PFX_LEN;
15659+ wh = 1;
15660+ }
1facf9fc 15661+ }
dece6358 15662+
4a4d8108
AM
15663+ isdir = 0;
15664+ if (h_child_inode)
15665+ isdir = !!S_ISDIR(h_child_inode->i_mode);
15666+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
15667+ flags[AuHn_CHILD] = 0;
15668+ if (isdir)
15669+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
15670+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
15671+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
15672+ switch (mask & FS_EVENTS_POSS_ON_CHILD) {
15673+ case FS_MOVED_FROM:
15674+ case FS_MOVED_TO:
15675+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
15676+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15677+ /*FALLTHROUGH*/
15678+ case FS_CREATE:
fb47a38f 15679+ AuDebugOn(!h_child_name);
4a4d8108 15680+ break;
1facf9fc 15681+
4a4d8108
AM
15682+ case FS_DELETE:
15683+ /*
15684+ * aufs never be able to get this child inode.
15685+ * revalidation should be in d_revalidate()
15686+ * by checking i_nlink, i_generation or d_unhashed().
15687+ */
15688+ AuDebugOn(!h_child_name);
15689+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
15690+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15691+ break;
dece6358 15692+
4a4d8108
AM
15693+ default:
15694+ AuDebugOn(1);
15695+ }
1308ab2a 15696+
4a4d8108
AM
15697+ if (wh)
15698+ h_child_inode = NULL;
1308ab2a 15699+
4a4d8108
AM
15700+ err = -ENOMEM;
15701+ /* iput() and kfree() will be called in au_hnotify() */
4a4d8108 15702+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
4a4d8108
AM
15703+ if (unlikely(!args)) {
15704+ AuErr1("no memory\n");
15705+ iput(dir);
15706+ goto out;
15707+ }
15708+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
15709+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
15710+ args->mask = mask;
15711+ args->dir = dir;
15712+ args->h_dir = igrab(h_dir);
15713+ if (h_child_inode)
15714+ h_child_inode = igrab(h_child_inode); /* can be NULL */
15715+ args->h_child_inode = h_child_inode;
15716+ args->h_child_nlen = len;
15717+ if (len) {
15718+ p = (void *)args;
15719+ p += sizeof(*args);
15720+ memcpy(p, h_child_name, len);
15721+ p[len] = 0;
1308ab2a 15722+ }
1308ab2a 15723+
38d290e6 15724+ /* NFS fires the event for silly-renamed one from kworker */
53392da6 15725+ f = 0;
38d290e6
JR
15726+ if (!dir->i_nlink
15727+ || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
53392da6
AM
15728+ f = AuWkq_NEST;
15729+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
4a4d8108
AM
15730+ if (unlikely(err)) {
15731+ pr_err("wkq %d\n", err);
15732+ iput(args->h_child_inode);
15733+ iput(args->h_dir);
15734+ iput(args->dir);
f0c0a007 15735+ au_delayed_kfree(args);
1facf9fc 15736+ }
1facf9fc 15737+
4a4d8108 15738+out:
1facf9fc 15739+ return err;
15740+}
15741+
027c5e7a
AM
15742+/* ---------------------------------------------------------------------- */
15743+
15744+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
15745+{
15746+ int err;
15747+
15748+ AuDebugOn(!(udba & AuOptMask_UDBA));
15749+
15750+ err = 0;
15751+ if (au_hnotify_op.reset_br)
15752+ err = au_hnotify_op.reset_br(udba, br, perm);
15753+
15754+ return err;
15755+}
15756+
15757+int au_hnotify_init_br(struct au_branch *br, int perm)
15758+{
15759+ int err;
15760+
15761+ err = 0;
15762+ if (au_hnotify_op.init_br)
15763+ err = au_hnotify_op.init_br(br, perm);
15764+
15765+ return err;
15766+}
15767+
15768+void au_hnotify_fin_br(struct au_branch *br)
15769+{
15770+ if (au_hnotify_op.fin_br)
15771+ au_hnotify_op.fin_br(br);
15772+}
15773+
4a4d8108
AM
15774+static void au_hn_destroy_cache(void)
15775+{
f0c0a007
AM
15776+ struct au_cache *cp;
15777+
15778+ flush_delayed_work(&au_dfree.dwork);
15779+ cp = au_dfree.cache + AuCache_HNOTIFY;
15780+ AuDebugOn(!llist_empty(&cp->llist));
15781+ kmem_cache_destroy(cp->cache);
15782+ cp->cache = NULL;
4a4d8108 15783+}
1308ab2a 15784+
f0c0a007
AM
15785+AU_CACHE_DFREE_FUNC(hnotify, HNOTIFY, hn_lnode);
15786+
4a4d8108 15787+int __init au_hnotify_init(void)
1facf9fc 15788+{
1308ab2a 15789+ int err;
f0c0a007 15790+ struct au_cache *cp;
1308ab2a 15791+
4a4d8108 15792+ err = -ENOMEM;
f0c0a007
AM
15793+ cp = au_dfree.cache + AuCache_HNOTIFY;
15794+ cp->cache = AuCache(au_hnotify);
15795+ if (cp->cache) {
027c5e7a
AM
15796+ err = 0;
15797+ if (au_hnotify_op.init)
15798+ err = au_hnotify_op.init();
4a4d8108
AM
15799+ if (unlikely(err))
15800+ au_hn_destroy_cache();
1308ab2a 15801+ }
1308ab2a 15802+ AuTraceErr(err);
4a4d8108 15803+ return err;
1308ab2a 15804+}
15805+
4a4d8108 15806+void au_hnotify_fin(void)
1308ab2a 15807+{
f0c0a007
AM
15808+ struct au_cache *cp;
15809+
027c5e7a
AM
15810+ if (au_hnotify_op.fin)
15811+ au_hnotify_op.fin();
f0c0a007 15812+
4a4d8108 15813+ /* cf. au_cache_fin() */
f0c0a007
AM
15814+ cp = au_dfree.cache + AuCache_HNOTIFY;
15815+ if (cp->cache)
4a4d8108 15816+ au_hn_destroy_cache();
dece6358 15817+}
7f207e10
AM
15818diff -urN /usr/share/empty/fs/aufs/iinfo.c linux/fs/aufs/iinfo.c
15819--- /usr/share/empty/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
15820+++ linux/fs/aufs/iinfo.c 2016-10-09 16:55:38.889431135 +0200
15821@@ -0,0 +1,285 @@
dece6358 15822+/*
8cdd5066 15823+ * Copyright (C) 2005-2016 Junjiro R. Okajima
dece6358
AM
15824+ *
15825+ * This program, aufs is free software; you can redistribute it and/or modify
15826+ * it under the terms of the GNU General Public License as published by
15827+ * the Free Software Foundation; either version 2 of the License, or
15828+ * (at your option) any later version.
15829+ *
15830+ * This program is distributed in the hope that it will be useful,
15831+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15832+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15833+ * GNU General Public License for more details.
15834+ *
15835+ * You should have received a copy of the GNU General Public License
523b37e3 15836+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 15837+ */
1facf9fc 15838+
dece6358 15839+/*
4a4d8108 15840+ * inode private data
dece6358 15841+ */
1facf9fc 15842+
1308ab2a 15843+#include "aufs.h"
1facf9fc 15844+
4a4d8108 15845+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 15846+{
4a4d8108 15847+ struct inode *h_inode;
5afbbe0d 15848+ struct au_hinode *hinode;
1facf9fc 15849+
4a4d8108 15850+ IiMustAnyLock(inode);
1facf9fc 15851+
5afbbe0d
AM
15852+ hinode = au_hinode(au_ii(inode), bindex);
15853+ h_inode = hinode->hi_inode;
4a4d8108
AM
15854+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15855+ return h_inode;
15856+}
1facf9fc 15857+
4a4d8108
AM
15858+/* todo: hard/soft set? */
15859+void au_hiput(struct au_hinode *hinode)
15860+{
15861+ au_hn_free(hinode);
15862+ dput(hinode->hi_whdentry);
15863+ iput(hinode->hi_inode);
15864+}
1facf9fc 15865+
4a4d8108
AM
15866+unsigned int au_hi_flags(struct inode *inode, int isdir)
15867+{
15868+ unsigned int flags;
15869+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
1facf9fc 15870+
4a4d8108
AM
15871+ flags = 0;
15872+ if (au_opt_test(mnt_flags, XINO))
15873+ au_fset_hi(flags, XINO);
15874+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
15875+ au_fset_hi(flags, HNOTIFY);
15876+ return flags;
1facf9fc 15877+}
15878+
4a4d8108
AM
15879+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
15880+ struct inode *h_inode, unsigned int flags)
1308ab2a 15881+{
4a4d8108
AM
15882+ struct au_hinode *hinode;
15883+ struct inode *hi;
15884+ struct au_iinfo *iinfo = au_ii(inode);
1facf9fc 15885+
4a4d8108 15886+ IiMustWriteLock(inode);
dece6358 15887+
5afbbe0d 15888+ hinode = au_hinode(iinfo, bindex);
4a4d8108
AM
15889+ hi = hinode->hi_inode;
15890+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15891+
15892+ if (hi)
15893+ au_hiput(hinode);
15894+ hinode->hi_inode = h_inode;
15895+ if (h_inode) {
15896+ int err;
15897+ struct super_block *sb = inode->i_sb;
15898+ struct au_branch *br;
15899+
027c5e7a
AM
15900+ AuDebugOn(inode->i_mode
15901+ && (h_inode->i_mode & S_IFMT)
15902+ != (inode->i_mode & S_IFMT));
5afbbe0d 15903+ if (bindex == iinfo->ii_btop)
4a4d8108
AM
15904+ au_cpup_igen(inode, h_inode);
15905+ br = au_sbr(sb, bindex);
15906+ hinode->hi_id = br->br_id;
15907+ if (au_ftest_hi(flags, XINO)) {
15908+ err = au_xino_write(sb, bindex, h_inode->i_ino,
15909+ inode->i_ino);
15910+ if (unlikely(err))
15911+ AuIOErr1("failed au_xino_write() %d\n", err);
15912+ }
15913+
15914+ if (au_ftest_hi(flags, HNOTIFY)
15915+ && au_br_hnotifyable(br->br_perm)) {
027c5e7a 15916+ err = au_hn_alloc(hinode, inode);
4a4d8108
AM
15917+ if (unlikely(err))
15918+ AuIOErr1("au_hn_alloc() %d\n", err);
1308ab2a 15919+ }
15920+ }
4a4d8108 15921+}
dece6358 15922+
4a4d8108
AM
15923+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
15924+ struct dentry *h_wh)
15925+{
15926+ struct au_hinode *hinode;
dece6358 15927+
4a4d8108
AM
15928+ IiMustWriteLock(inode);
15929+
5afbbe0d 15930+ hinode = au_hinode(au_ii(inode), bindex);
4a4d8108
AM
15931+ AuDebugOn(hinode->hi_whdentry);
15932+ hinode->hi_whdentry = h_wh;
1facf9fc 15933+}
15934+
537831f9 15935+void au_update_iigen(struct inode *inode, int half)
1308ab2a 15936+{
537831f9
AM
15937+ struct au_iinfo *iinfo;
15938+ struct au_iigen *iigen;
15939+ unsigned int sigen;
15940+
15941+ sigen = au_sigen(inode->i_sb);
15942+ iinfo = au_ii(inode);
15943+ iigen = &iinfo->ii_generation;
be52b249 15944+ spin_lock(&iigen->ig_spin);
537831f9
AM
15945+ iigen->ig_generation = sigen;
15946+ if (half)
15947+ au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
15948+ else
15949+ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
be52b249 15950+ spin_unlock(&iigen->ig_spin);
4a4d8108 15951+}
1facf9fc 15952+
4a4d8108
AM
15953+/* it may be called at remount time, too */
15954+void au_update_ibrange(struct inode *inode, int do_put_zero)
15955+{
15956+ struct au_iinfo *iinfo;
5afbbe0d 15957+ aufs_bindex_t bindex, bbot;
1facf9fc 15958+
5afbbe0d 15959+ AuDebugOn(au_is_bad_inode(inode));
4a4d8108 15960+ IiMustWriteLock(inode);
1facf9fc 15961+
5afbbe0d
AM
15962+ iinfo = au_ii(inode);
15963+ if (do_put_zero && iinfo->ii_btop >= 0) {
15964+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
4a4d8108
AM
15965+ bindex++) {
15966+ struct inode *h_i;
1facf9fc 15967+
5afbbe0d 15968+ h_i = au_hinode(iinfo, bindex)->hi_inode;
38d290e6
JR
15969+ if (h_i
15970+ && !h_i->i_nlink
15971+ && !(h_i->i_state & I_LINKABLE))
027c5e7a
AM
15972+ au_set_h_iptr(inode, bindex, NULL, 0);
15973+ }
4a4d8108
AM
15974+ }
15975+
5afbbe0d
AM
15976+ iinfo->ii_btop = -1;
15977+ iinfo->ii_bbot = -1;
15978+ bbot = au_sbbot(inode->i_sb);
15979+ for (bindex = 0; bindex <= bbot; bindex++)
15980+ if (au_hinode(iinfo, bindex)->hi_inode) {
15981+ iinfo->ii_btop = bindex;
4a4d8108 15982+ break;
027c5e7a 15983+ }
5afbbe0d
AM
15984+ if (iinfo->ii_btop >= 0)
15985+ for (bindex = bbot; bindex >= iinfo->ii_btop; bindex--)
15986+ if (au_hinode(iinfo, bindex)->hi_inode) {
15987+ iinfo->ii_bbot = bindex;
027c5e7a
AM
15988+ break;
15989+ }
5afbbe0d 15990+ AuDebugOn(iinfo->ii_btop > iinfo->ii_bbot);
1308ab2a 15991+}
1facf9fc 15992+
dece6358 15993+/* ---------------------------------------------------------------------- */
1facf9fc 15994+
4a4d8108 15995+void au_icntnr_init_once(void *_c)
dece6358 15996+{
4a4d8108
AM
15997+ struct au_icntnr *c = _c;
15998+ struct au_iinfo *iinfo = &c->iinfo;
1facf9fc 15999+
be52b249 16000+ spin_lock_init(&iinfo->ii_generation.ig_spin);
4a4d8108
AM
16001+ au_rw_init(&iinfo->ii_rwsem);
16002+ inode_init_once(&c->vfs_inode);
16003+}
1facf9fc 16004+
5afbbe0d
AM
16005+void au_hinode_init(struct au_hinode *hinode)
16006+{
16007+ hinode->hi_inode = NULL;
16008+ hinode->hi_id = -1;
16009+ au_hn_init(hinode);
16010+ hinode->hi_whdentry = NULL;
16011+}
16012+
4a4d8108
AM
16013+int au_iinfo_init(struct inode *inode)
16014+{
16015+ struct au_iinfo *iinfo;
16016+ struct super_block *sb;
5afbbe0d 16017+ struct au_hinode *hi;
4a4d8108 16018+ int nbr, i;
1facf9fc 16019+
4a4d8108
AM
16020+ sb = inode->i_sb;
16021+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
5afbbe0d 16022+ nbr = au_sbbot(sb) + 1;
4a4d8108
AM
16023+ if (unlikely(nbr <= 0))
16024+ nbr = 1;
5afbbe0d
AM
16025+ hi = kmalloc_array(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
16026+ if (hi) {
7f207e10 16027+ au_ninodes_inc(sb);
5afbbe0d
AM
16028+
16029+ iinfo->ii_hinode = hi;
16030+ for (i = 0; i < nbr; i++, hi++)
16031+ au_hinode_init(hi);
1facf9fc 16032+
537831f9 16033+ iinfo->ii_generation.ig_generation = au_sigen(sb);
5afbbe0d
AM
16034+ iinfo->ii_btop = -1;
16035+ iinfo->ii_bbot = -1;
4a4d8108
AM
16036+ iinfo->ii_vdir = NULL;
16037+ return 0;
1308ab2a 16038+ }
4a4d8108
AM
16039+ return -ENOMEM;
16040+}
1facf9fc 16041+
e2f27e51 16042+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink)
4a4d8108 16043+{
5afbbe0d 16044+ int err, i;
4a4d8108 16045+ struct au_hinode *hip;
1facf9fc 16046+
4a4d8108
AM
16047+ AuRwMustWriteLock(&iinfo->ii_rwsem);
16048+
16049+ err = -ENOMEM;
e2f27e51
AM
16050+ hip = au_krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS,
16051+ may_shrink);
4a4d8108
AM
16052+ if (hip) {
16053+ iinfo->ii_hinode = hip;
5afbbe0d
AM
16054+ i = iinfo->ii_bbot + 1;
16055+ hip += i;
16056+ for (; i < nbr; i++, hip++)
16057+ au_hinode_init(hip);
4a4d8108 16058+ err = 0;
1308ab2a 16059+ }
4a4d8108 16060+
1308ab2a 16061+ return err;
1facf9fc 16062+}
16063+
4a4d8108 16064+void au_iinfo_fin(struct inode *inode)
1facf9fc 16065+{
4a4d8108
AM
16066+ struct au_iinfo *iinfo;
16067+ struct au_hinode *hi;
16068+ struct super_block *sb;
5afbbe0d 16069+ aufs_bindex_t bindex, bbot;
b752ccd1 16070+ const unsigned char unlinked = !inode->i_nlink;
1308ab2a 16071+
5afbbe0d 16072+ AuDebugOn(au_is_bad_inode(inode));
1308ab2a 16073+
b752ccd1 16074+ sb = inode->i_sb;
7f207e10 16075+ au_ninodes_dec(sb);
b752ccd1
AM
16076+ if (si_pid_test(sb))
16077+ au_xino_delete_inode(inode, unlinked);
16078+ else {
16079+ /*
16080+ * it is safe to hide the dependency between sbinfo and
16081+ * sb->s_umount.
16082+ */
16083+ lockdep_off();
16084+ si_noflush_read_lock(sb);
16085+ au_xino_delete_inode(inode, unlinked);
16086+ si_read_unlock(sb);
16087+ lockdep_on();
16088+ }
16089+
5afbbe0d 16090+ iinfo = au_ii(inode);
4a4d8108 16091+ if (iinfo->ii_vdir)
f0c0a007 16092+ au_vdir_free(iinfo->ii_vdir, /*atonce*/0);
1308ab2a 16093+
5afbbe0d 16094+ bindex = iinfo->ii_btop;
b752ccd1 16095+ if (bindex >= 0) {
5afbbe0d
AM
16096+ hi = au_hinode(iinfo, bindex);
16097+ bbot = iinfo->ii_bbot;
16098+ while (bindex++ <= bbot) {
b752ccd1 16099+ if (hi->hi_inode)
4a4d8108 16100+ au_hiput(hi);
4a4d8108
AM
16101+ hi++;
16102+ }
16103+ }
f0c0a007 16104+ au_delayed_kfree(iinfo->ii_hinode);
4a4d8108 16105+ AuRwDestroy(&iinfo->ii_rwsem);
dece6358 16106+}
7f207e10
AM
16107diff -urN /usr/share/empty/fs/aufs/inode.c linux/fs/aufs/inode.c
16108--- /usr/share/empty/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
16109+++ linux/fs/aufs/inode.c 2016-10-09 16:55:38.889431135 +0200
16110@@ -0,0 +1,519 @@
4a4d8108 16111+/*
8cdd5066 16112+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
16113+ *
16114+ * This program, aufs is free software; you can redistribute it and/or modify
16115+ * it under the terms of the GNU General Public License as published by
16116+ * the Free Software Foundation; either version 2 of the License, or
16117+ * (at your option) any later version.
16118+ *
16119+ * This program is distributed in the hope that it will be useful,
16120+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16121+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16122+ * GNU General Public License for more details.
16123+ *
16124+ * You should have received a copy of the GNU General Public License
523b37e3 16125+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 16126+ */
1facf9fc 16127+
4a4d8108
AM
16128+/*
16129+ * inode functions
16130+ */
1facf9fc 16131+
4a4d8108 16132+#include "aufs.h"
1308ab2a 16133+
4a4d8108
AM
16134+struct inode *au_igrab(struct inode *inode)
16135+{
16136+ if (inode) {
16137+ AuDebugOn(!atomic_read(&inode->i_count));
027c5e7a 16138+ ihold(inode);
1facf9fc 16139+ }
4a4d8108
AM
16140+ return inode;
16141+}
1facf9fc 16142+
4a4d8108
AM
16143+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
16144+{
16145+ au_cpup_attr_all(inode, /*force*/0);
537831f9 16146+ au_update_iigen(inode, /*half*/1);
4a4d8108
AM
16147+ if (do_version)
16148+ inode->i_version++;
dece6358 16149+}
1facf9fc 16150+
027c5e7a 16151+static int au_ii_refresh(struct inode *inode, int *update)
dece6358 16152+{
e2f27e51 16153+ int err, e, nbr;
027c5e7a 16154+ umode_t type;
4a4d8108 16155+ aufs_bindex_t bindex, new_bindex;
1308ab2a 16156+ struct super_block *sb;
4a4d8108 16157+ struct au_iinfo *iinfo;
027c5e7a 16158+ struct au_hinode *p, *q, tmp;
1facf9fc 16159+
5afbbe0d 16160+ AuDebugOn(au_is_bad_inode(inode));
4a4d8108 16161+ IiMustWriteLock(inode);
1facf9fc 16162+
027c5e7a 16163+ *update = 0;
4a4d8108 16164+ sb = inode->i_sb;
e2f27e51 16165+ nbr = au_sbbot(sb) + 1;
027c5e7a 16166+ type = inode->i_mode & S_IFMT;
4a4d8108 16167+ iinfo = au_ii(inode);
e2f27e51 16168+ err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0);
4a4d8108 16169+ if (unlikely(err))
1308ab2a 16170+ goto out;
1facf9fc 16171+
5afbbe0d
AM
16172+ AuDebugOn(iinfo->ii_btop < 0);
16173+ p = au_hinode(iinfo, iinfo->ii_btop);
16174+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
4a4d8108
AM
16175+ bindex++, p++) {
16176+ if (!p->hi_inode)
16177+ continue;
1facf9fc 16178+
027c5e7a 16179+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
4a4d8108
AM
16180+ new_bindex = au_br_index(sb, p->hi_id);
16181+ if (new_bindex == bindex)
16182+ continue;
1facf9fc 16183+
4a4d8108 16184+ if (new_bindex < 0) {
027c5e7a 16185+ *update = 1;
4a4d8108
AM
16186+ au_hiput(p);
16187+ p->hi_inode = NULL;
16188+ continue;
1308ab2a 16189+ }
4a4d8108 16190+
5afbbe0d
AM
16191+ if (new_bindex < iinfo->ii_btop)
16192+ iinfo->ii_btop = new_bindex;
16193+ if (iinfo->ii_bbot < new_bindex)
16194+ iinfo->ii_bbot = new_bindex;
4a4d8108 16195+ /* swap two lower inode, and loop again */
5afbbe0d 16196+ q = au_hinode(iinfo, new_bindex);
4a4d8108
AM
16197+ tmp = *q;
16198+ *q = *p;
16199+ *p = tmp;
16200+ if (tmp.hi_inode) {
16201+ bindex--;
16202+ p--;
1308ab2a 16203+ }
16204+ }
4a4d8108 16205+ au_update_ibrange(inode, /*do_put_zero*/0);
e2f27e51 16206+ au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */
4a4d8108
AM
16207+ e = au_dy_irefresh(inode);
16208+ if (unlikely(e && !err))
16209+ err = e;
1facf9fc 16210+
4f0767ce 16211+out:
027c5e7a
AM
16212+ AuTraceErr(err);
16213+ return err;
16214+}
16215+
b95c5147
AM
16216+void au_refresh_iop(struct inode *inode, int force_getattr)
16217+{
16218+ int type;
16219+ struct au_sbinfo *sbi = au_sbi(inode->i_sb);
16220+ const struct inode_operations *iop
16221+ = force_getattr ? aufs_iop : sbi->si_iop_array;
16222+
16223+ if (inode->i_op == iop)
16224+ return;
16225+
16226+ switch (inode->i_mode & S_IFMT) {
16227+ case S_IFDIR:
16228+ type = AuIop_DIR;
16229+ break;
16230+ case S_IFLNK:
16231+ type = AuIop_SYMLINK;
16232+ break;
16233+ default:
16234+ type = AuIop_OTHER;
16235+ break;
16236+ }
16237+
16238+ inode->i_op = iop + type;
16239+ /* unnecessary smp_wmb() */
16240+}
16241+
027c5e7a
AM
16242+int au_refresh_hinode_self(struct inode *inode)
16243+{
16244+ int err, update;
16245+
16246+ err = au_ii_refresh(inode, &update);
16247+ if (!err)
16248+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
16249+
16250+ AuTraceErr(err);
4a4d8108
AM
16251+ return err;
16252+}
1facf9fc 16253+
4a4d8108
AM
16254+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
16255+{
027c5e7a 16256+ int err, e, update;
4a4d8108 16257+ unsigned int flags;
027c5e7a 16258+ umode_t mode;
5afbbe0d 16259+ aufs_bindex_t bindex, bbot;
027c5e7a 16260+ unsigned char isdir;
4a4d8108
AM
16261+ struct au_hinode *p;
16262+ struct au_iinfo *iinfo;
1facf9fc 16263+
027c5e7a 16264+ err = au_ii_refresh(inode, &update);
4a4d8108
AM
16265+ if (unlikely(err))
16266+ goto out;
16267+
16268+ update = 0;
16269+ iinfo = au_ii(inode);
5afbbe0d 16270+ p = au_hinode(iinfo, iinfo->ii_btop);
027c5e7a
AM
16271+ mode = (inode->i_mode & S_IFMT);
16272+ isdir = S_ISDIR(mode);
4a4d8108 16273+ flags = au_hi_flags(inode, isdir);
5afbbe0d
AM
16274+ bbot = au_dbbot(dentry);
16275+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
5527c038 16276+ struct inode *h_i, *h_inode;
4a4d8108
AM
16277+ struct dentry *h_d;
16278+
16279+ h_d = au_h_dptr(dentry, bindex);
5527c038 16280+ if (!h_d || d_is_negative(h_d))
4a4d8108
AM
16281+ continue;
16282+
5527c038
JR
16283+ h_inode = d_inode(h_d);
16284+ AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
5afbbe0d 16285+ if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) {
4a4d8108
AM
16286+ h_i = au_h_iptr(inode, bindex);
16287+ if (h_i) {
5527c038 16288+ if (h_i == h_inode)
4a4d8108
AM
16289+ continue;
16290+ err = -EIO;
16291+ break;
16292+ }
16293+ }
5afbbe0d
AM
16294+ if (bindex < iinfo->ii_btop)
16295+ iinfo->ii_btop = bindex;
16296+ if (iinfo->ii_bbot < bindex)
16297+ iinfo->ii_bbot = bindex;
5527c038 16298+ au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
4a4d8108 16299+ update = 1;
1308ab2a 16300+ }
4a4d8108
AM
16301+ au_update_ibrange(inode, /*do_put_zero*/0);
16302+ e = au_dy_irefresh(inode);
16303+ if (unlikely(e && !err))
16304+ err = e;
027c5e7a
AM
16305+ if (!err)
16306+ au_refresh_hinode_attr(inode, update && isdir);
4a4d8108 16307+
4f0767ce 16308+out:
4a4d8108 16309+ AuTraceErr(err);
1308ab2a 16310+ return err;
dece6358
AM
16311+}
16312+
4a4d8108 16313+static int set_inode(struct inode *inode, struct dentry *dentry)
dece6358 16314+{
4a4d8108
AM
16315+ int err;
16316+ unsigned int flags;
16317+ umode_t mode;
5afbbe0d 16318+ aufs_bindex_t bindex, btop, btail;
4a4d8108
AM
16319+ unsigned char isdir;
16320+ struct dentry *h_dentry;
16321+ struct inode *h_inode;
16322+ struct au_iinfo *iinfo;
b95c5147 16323+ struct inode_operations *iop;
dece6358 16324+
4a4d8108 16325+ IiMustWriteLock(inode);
dece6358 16326+
4a4d8108
AM
16327+ err = 0;
16328+ isdir = 0;
b95c5147 16329+ iop = au_sbi(inode->i_sb)->si_iop_array;
5afbbe0d
AM
16330+ btop = au_dbtop(dentry);
16331+ h_dentry = au_h_dptr(dentry, btop);
5527c038 16332+ h_inode = d_inode(h_dentry);
4a4d8108
AM
16333+ mode = h_inode->i_mode;
16334+ switch (mode & S_IFMT) {
16335+ case S_IFREG:
16336+ btail = au_dbtail(dentry);
b95c5147 16337+ inode->i_op = iop + AuIop_OTHER;
4a4d8108 16338+ inode->i_fop = &aufs_file_fop;
5afbbe0d 16339+ err = au_dy_iaop(inode, btop, h_inode);
4a4d8108
AM
16340+ if (unlikely(err))
16341+ goto out;
16342+ break;
16343+ case S_IFDIR:
16344+ isdir = 1;
16345+ btail = au_dbtaildir(dentry);
b95c5147 16346+ inode->i_op = iop + AuIop_DIR;
4a4d8108
AM
16347+ inode->i_fop = &aufs_dir_fop;
16348+ break;
16349+ case S_IFLNK:
16350+ btail = au_dbtail(dentry);
b95c5147 16351+ inode->i_op = iop + AuIop_SYMLINK;
4a4d8108
AM
16352+ break;
16353+ case S_IFBLK:
16354+ case S_IFCHR:
16355+ case S_IFIFO:
16356+ case S_IFSOCK:
16357+ btail = au_dbtail(dentry);
b95c5147 16358+ inode->i_op = iop + AuIop_OTHER;
38d290e6 16359+ init_special_inode(inode, mode, h_inode->i_rdev);
4a4d8108
AM
16360+ break;
16361+ default:
16362+ AuIOErr("Unknown file type 0%o\n", mode);
16363+ err = -EIO;
1308ab2a 16364+ goto out;
4a4d8108 16365+ }
dece6358 16366+
4a4d8108
AM
16367+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
16368+ flags = au_hi_flags(inode, isdir);
16369+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
16370+ && au_ftest_hi(flags, HNOTIFY)
16371+ && dentry->d_name.len > AUFS_WH_PFX_LEN
16372+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
16373+ au_fclr_hi(flags, HNOTIFY);
16374+ iinfo = au_ii(inode);
5afbbe0d
AM
16375+ iinfo->ii_btop = btop;
16376+ iinfo->ii_bbot = btail;
16377+ for (bindex = btop; bindex <= btail; bindex++) {
4a4d8108
AM
16378+ h_dentry = au_h_dptr(dentry, bindex);
16379+ if (h_dentry)
16380+ au_set_h_iptr(inode, bindex,
5527c038 16381+ au_igrab(d_inode(h_dentry)), flags);
4a4d8108
AM
16382+ }
16383+ au_cpup_attr_all(inode, /*force*/1);
c1595e42
JR
16384+ /*
16385+ * to force calling aufs_get_acl() every time,
16386+ * do not call cache_no_acl() for aufs inode.
16387+ */
dece6358 16388+
4f0767ce 16389+out:
4a4d8108
AM
16390+ return err;
16391+}
dece6358 16392+
027c5e7a
AM
16393+/*
16394+ * successful returns with iinfo write_locked
16395+ * minus: errno
16396+ * zero: success, matched
16397+ * plus: no error, but unmatched
16398+ */
16399+static int reval_inode(struct inode *inode, struct dentry *dentry)
4a4d8108
AM
16400+{
16401+ int err;
cfc41e69 16402+ unsigned int gen, igflags;
5afbbe0d 16403+ aufs_bindex_t bindex, bbot;
4a4d8108 16404+ struct inode *h_inode, *h_dinode;
5527c038 16405+ struct dentry *h_dentry;
dece6358 16406+
4a4d8108
AM
16407+ /*
16408+ * before this function, if aufs got any iinfo lock, it must be only
16409+ * one, the parent dir.
16410+ * it can happen by UDBA and the obsoleted inode number.
16411+ */
16412+ err = -EIO;
16413+ if (unlikely(inode->i_ino == parent_ino(dentry)))
16414+ goto out;
16415+
027c5e7a 16416+ err = 1;
4a4d8108 16417+ ii_write_lock_new_child(inode);
5afbbe0d 16418+ h_dentry = au_h_dptr(dentry, au_dbtop(dentry));
5527c038 16419+ h_dinode = d_inode(h_dentry);
5afbbe0d
AM
16420+ bbot = au_ibbot(inode);
16421+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
4a4d8108 16422+ h_inode = au_h_iptr(inode, bindex);
537831f9
AM
16423+ if (!h_inode || h_inode != h_dinode)
16424+ continue;
16425+
16426+ err = 0;
cfc41e69 16427+ gen = au_iigen(inode, &igflags);
537831f9 16428+ if (gen == au_digen(dentry)
cfc41e69 16429+ && !au_ig_ftest(igflags, HALF_REFRESHED))
4a4d8108 16430+ break;
537831f9
AM
16431+
16432+ /* fully refresh inode using dentry */
16433+ err = au_refresh_hinode(inode, dentry);
16434+ if (!err)
16435+ au_update_iigen(inode, /*half*/0);
16436+ break;
1facf9fc 16437+ }
dece6358 16438+
4a4d8108
AM
16439+ if (unlikely(err))
16440+ ii_write_unlock(inode);
4f0767ce 16441+out:
1facf9fc 16442+ return err;
16443+}
1facf9fc 16444+
4a4d8108
AM
16445+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16446+ unsigned int d_type, ino_t *ino)
1facf9fc 16447+{
4a4d8108
AM
16448+ int err;
16449+ struct mutex *mtx;
1facf9fc 16450+
b752ccd1 16451+ /* prevent hardlinked inode number from race condition */
4a4d8108 16452+ mtx = NULL;
b752ccd1 16453+ if (d_type != DT_DIR) {
4a4d8108
AM
16454+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
16455+ mutex_lock(mtx);
16456+ }
16457+ err = au_xino_read(sb, bindex, h_ino, ino);
16458+ if (unlikely(err))
16459+ goto out;
1308ab2a 16460+
4a4d8108
AM
16461+ if (!*ino) {
16462+ err = -EIO;
16463+ *ino = au_xino_new_ino(sb);
16464+ if (unlikely(!*ino))
1facf9fc 16465+ goto out;
4a4d8108
AM
16466+ err = au_xino_write(sb, bindex, h_ino, *ino);
16467+ if (unlikely(err))
1308ab2a 16468+ goto out;
1308ab2a 16469+ }
1facf9fc 16470+
4f0767ce 16471+out:
b752ccd1 16472+ if (mtx)
4a4d8108 16473+ mutex_unlock(mtx);
1facf9fc 16474+ return err;
16475+}
16476+
4a4d8108
AM
16477+/* successful returns with iinfo write_locked */
16478+/* todo: return with unlocked? */
16479+struct inode *au_new_inode(struct dentry *dentry, int must_new)
1facf9fc 16480+{
5527c038 16481+ struct inode *inode, *h_inode;
4a4d8108
AM
16482+ struct dentry *h_dentry;
16483+ struct super_block *sb;
b752ccd1 16484+ struct mutex *mtx;
4a4d8108 16485+ ino_t h_ino, ino;
1716fcea 16486+ int err;
5afbbe0d 16487+ aufs_bindex_t btop;
1facf9fc 16488+
4a4d8108 16489+ sb = dentry->d_sb;
5afbbe0d
AM
16490+ btop = au_dbtop(dentry);
16491+ h_dentry = au_h_dptr(dentry, btop);
5527c038
JR
16492+ h_inode = d_inode(h_dentry);
16493+ h_ino = h_inode->i_ino;
b752ccd1
AM
16494+
16495+ /*
16496+ * stop 'race'-ing between hardlinks under different
16497+ * parents.
16498+ */
16499+ mtx = NULL;
2000de60 16500+ if (!d_is_dir(h_dentry))
5afbbe0d 16501+ mtx = &au_sbr(sb, btop)->br_xino.xi_nondir_mtx;
b752ccd1 16502+
4f0767ce 16503+new_ino:
b752ccd1
AM
16504+ if (mtx)
16505+ mutex_lock(mtx);
5afbbe0d 16506+ err = au_xino_read(sb, btop, h_ino, &ino);
4a4d8108
AM
16507+ inode = ERR_PTR(err);
16508+ if (unlikely(err))
16509+ goto out;
b752ccd1 16510+
4a4d8108
AM
16511+ if (!ino) {
16512+ ino = au_xino_new_ino(sb);
16513+ if (unlikely(!ino)) {
16514+ inode = ERR_PTR(-EIO);
dece6358
AM
16515+ goto out;
16516+ }
16517+ }
1facf9fc 16518+
4a4d8108
AM
16519+ AuDbg("i%lu\n", (unsigned long)ino);
16520+ inode = au_iget_locked(sb, ino);
16521+ err = PTR_ERR(inode);
16522+ if (IS_ERR(inode))
1facf9fc 16523+ goto out;
1facf9fc 16524+
4a4d8108
AM
16525+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
16526+ if (inode->i_state & I_NEW) {
16527+ ii_write_lock_new_child(inode);
16528+ err = set_inode(inode, dentry);
16529+ if (!err) {
16530+ unlock_new_inode(inode);
16531+ goto out; /* success */
16532+ }
1308ab2a 16533+
027c5e7a
AM
16534+ /*
16535+ * iget_failed() calls iput(), but we need to call
16536+ * ii_write_unlock() after iget_failed(). so dirty hack for
16537+ * i_count.
16538+ */
16539+ atomic_inc(&inode->i_count);
4a4d8108 16540+ iget_failed(inode);
027c5e7a 16541+ ii_write_unlock(inode);
5afbbe0d 16542+ au_xino_write(sb, btop, h_ino, /*ino*/0);
027c5e7a
AM
16543+ /* ignore this error */
16544+ goto out_iput;
16545+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
b752ccd1
AM
16546+ /*
16547+ * horrible race condition between lookup, readdir and copyup
16548+ * (or something).
16549+ */
16550+ if (mtx)
16551+ mutex_unlock(mtx);
027c5e7a
AM
16552+ err = reval_inode(inode, dentry);
16553+ if (unlikely(err < 0)) {
16554+ mtx = NULL;
16555+ goto out_iput;
16556+ }
16557+
b752ccd1
AM
16558+ if (!err) {
16559+ mtx = NULL;
4a4d8108 16560+ goto out; /* success */
b752ccd1
AM
16561+ } else if (mtx)
16562+ mutex_lock(mtx);
4a4d8108
AM
16563+ }
16564+
5527c038 16565+ if (unlikely(au_test_fs_unique_ino(h_inode)))
4a4d8108 16566+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
523b37e3 16567+ " b%d, %s, %pd, hi%lu, i%lu.\n",
5afbbe0d 16568+ btop, au_sbtype(h_dentry->d_sb), dentry,
4a4d8108
AM
16569+ (unsigned long)h_ino, (unsigned long)ino);
16570+ ino = 0;
5afbbe0d 16571+ err = au_xino_write(sb, btop, h_ino, /*ino*/0);
4a4d8108
AM
16572+ if (!err) {
16573+ iput(inode);
b752ccd1
AM
16574+ if (mtx)
16575+ mutex_unlock(mtx);
4a4d8108
AM
16576+ goto new_ino;
16577+ }
1308ab2a 16578+
4f0767ce 16579+out_iput:
4a4d8108 16580+ iput(inode);
4a4d8108 16581+ inode = ERR_PTR(err);
4f0767ce 16582+out:
b752ccd1
AM
16583+ if (mtx)
16584+ mutex_unlock(mtx);
4a4d8108 16585+ return inode;
1facf9fc 16586+}
16587+
4a4d8108 16588+/* ---------------------------------------------------------------------- */
1facf9fc 16589+
4a4d8108
AM
16590+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16591+ struct inode *inode)
16592+{
16593+ int err;
076b876e 16594+ struct inode *hi;
1facf9fc 16595+
4a4d8108 16596+ err = au_br_rdonly(au_sbr(sb, bindex));
1facf9fc 16597+
4a4d8108
AM
16598+ /* pseudo-link after flushed may happen out of bounds */
16599+ if (!err
16600+ && inode
5afbbe0d
AM
16601+ && au_ibtop(inode) <= bindex
16602+ && bindex <= au_ibbot(inode)) {
4a4d8108
AM
16603+ /*
16604+ * permission check is unnecessary since vfsub routine
16605+ * will be called later
16606+ */
076b876e 16607+ hi = au_h_iptr(inode, bindex);
4a4d8108
AM
16608+ if (hi)
16609+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
1facf9fc 16610+ }
16611+
4a4d8108
AM
16612+ return err;
16613+}
dece6358 16614+
4a4d8108
AM
16615+int au_test_h_perm(struct inode *h_inode, int mask)
16616+{
2dfbb274 16617+ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
4a4d8108
AM
16618+ return 0;
16619+ return inode_permission(h_inode, mask);
16620+}
1facf9fc 16621+
4a4d8108
AM
16622+int au_test_h_perm_sio(struct inode *h_inode, int mask)
16623+{
16624+ if (au_test_nfs(h_inode->i_sb)
16625+ && (mask & MAY_WRITE)
16626+ && S_ISDIR(h_inode->i_mode))
16627+ mask |= MAY_READ; /* force permission check */
16628+ return au_test_h_perm(h_inode, mask);
1facf9fc 16629+}
7f207e10
AM
16630diff -urN /usr/share/empty/fs/aufs/inode.h linux/fs/aufs/inode.h
16631--- /usr/share/empty/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 16632+++ linux/fs/aufs/inode.h 2016-10-09 16:55:38.889431135 +0200
f0c0a007 16633@@ -0,0 +1,700 @@
4a4d8108 16634+/*
8cdd5066 16635+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
16636+ *
16637+ * This program, aufs is free software; you can redistribute it and/or modify
16638+ * it under the terms of the GNU General Public License as published by
16639+ * the Free Software Foundation; either version 2 of the License, or
16640+ * (at your option) any later version.
16641+ *
16642+ * This program is distributed in the hope that it will be useful,
16643+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16644+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16645+ * GNU General Public License for more details.
16646+ *
16647+ * You should have received a copy of the GNU General Public License
523b37e3 16648+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 16649+ */
1facf9fc 16650+
1308ab2a 16651+/*
4a4d8108 16652+ * inode operations
1308ab2a 16653+ */
dece6358 16654+
4a4d8108
AM
16655+#ifndef __AUFS_INODE_H__
16656+#define __AUFS_INODE_H__
dece6358 16657+
4a4d8108 16658+#ifdef __KERNEL__
1308ab2a 16659+
4a4d8108 16660+#include <linux/fsnotify.h>
4a4d8108 16661+#include "rwsem.h"
1308ab2a 16662+
4a4d8108 16663+struct vfsmount;
1facf9fc 16664+
4a4d8108
AM
16665+struct au_hnotify {
16666+#ifdef CONFIG_AUFS_HNOTIFY
16667+#ifdef CONFIG_AUFS_HFSNOTIFY
7f207e10 16668+ /* never use fsnotify_add_vfsmount_mark() */
0c5527e5 16669+ struct fsnotify_mark hn_mark;
4a4d8108 16670+#endif
f0c0a007
AM
16671+ union {
16672+ struct inode *hn_aufs_inode; /* no get/put */
16673+ struct llist_node hn_lnode; /* delayed free */
16674+ };
4a4d8108
AM
16675+#endif
16676+} ____cacheline_aligned_in_smp;
1facf9fc 16677+
4a4d8108
AM
16678+struct au_hinode {
16679+ struct inode *hi_inode;
16680+ aufs_bindex_t hi_id;
16681+#ifdef CONFIG_AUFS_HNOTIFY
16682+ struct au_hnotify *hi_notify;
16683+#endif
dece6358 16684+
4a4d8108
AM
16685+ /* reference to the copied-up whiteout with get/put */
16686+ struct dentry *hi_whdentry;
16687+};
dece6358 16688+
537831f9
AM
16689+/* ig_flags */
16690+#define AuIG_HALF_REFRESHED 1
16691+#define au_ig_ftest(flags, name) ((flags) & AuIG_##name)
16692+#define au_ig_fset(flags, name) \
16693+ do { (flags) |= AuIG_##name; } while (0)
16694+#define au_ig_fclr(flags, name) \
16695+ do { (flags) &= ~AuIG_##name; } while (0)
16696+
16697+struct au_iigen {
be52b249 16698+ spinlock_t ig_spin;
537831f9
AM
16699+ __u32 ig_generation, ig_flags;
16700+};
16701+
4a4d8108
AM
16702+struct au_vdir;
16703+struct au_iinfo {
7a9e40b8 16704+ struct au_iigen ii_generation;
4a4d8108 16705+ struct super_block *ii_hsb1; /* no get/put */
1facf9fc 16706+
4a4d8108 16707+ struct au_rwsem ii_rwsem;
5afbbe0d 16708+ aufs_bindex_t ii_btop, ii_bbot;
4a4d8108
AM
16709+ __u32 ii_higen;
16710+ struct au_hinode *ii_hinode;
16711+ struct au_vdir *ii_vdir;
16712+};
1facf9fc 16713+
4a4d8108
AM
16714+struct au_icntnr {
16715+ struct au_iinfo iinfo;
16716+ struct inode vfs_inode;
f0c0a007
AM
16717+ union {
16718+ struct hlist_node plink;
16719+ struct llist_node lnode; /* delayed free */
16720+ };
4a4d8108 16721+} ____cacheline_aligned_in_smp;
1308ab2a 16722+
4a4d8108
AM
16723+/* au_pin flags */
16724+#define AuPin_DI_LOCKED 1
16725+#define AuPin_MNT_WRITE (1 << 1)
16726+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
7f207e10
AM
16727+#define au_fset_pin(flags, name) \
16728+ do { (flags) |= AuPin_##name; } while (0)
16729+#define au_fclr_pin(flags, name) \
16730+ do { (flags) &= ~AuPin_##name; } while (0)
4a4d8108
AM
16731+
16732+struct au_pin {
16733+ /* input */
16734+ struct dentry *dentry;
16735+ unsigned int udba;
16736+ unsigned char lsc_di, lsc_hi, flags;
16737+ aufs_bindex_t bindex;
16738+
16739+ /* output */
16740+ struct dentry *parent;
16741+ struct au_hinode *hdir;
16742+ struct vfsmount *h_mnt;
86dc4139
AM
16743+
16744+ /* temporary unlock/relock for copyup */
16745+ struct dentry *h_dentry, *h_parent;
16746+ struct au_branch *br;
16747+ struct task_struct *task;
4a4d8108 16748+};
1facf9fc 16749+
86dc4139 16750+void au_pin_hdir_unlock(struct au_pin *p);
c1595e42 16751+int au_pin_hdir_lock(struct au_pin *p);
86dc4139 16752+int au_pin_hdir_relock(struct au_pin *p);
86dc4139
AM
16753+void au_pin_hdir_acquire_nest(struct au_pin *p);
16754+void au_pin_hdir_release(struct au_pin *p);
16755+
1308ab2a 16756+/* ---------------------------------------------------------------------- */
16757+
4a4d8108 16758+static inline struct au_iinfo *au_ii(struct inode *inode)
1facf9fc 16759+{
5afbbe0d
AM
16760+ BUG_ON(is_bad_inode(inode));
16761+ return &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
4a4d8108 16762+}
1facf9fc 16763+
4a4d8108 16764+/* ---------------------------------------------------------------------- */
1facf9fc 16765+
4a4d8108
AM
16766+/* inode.c */
16767+struct inode *au_igrab(struct inode *inode);
b95c5147 16768+void au_refresh_iop(struct inode *inode, int force_getattr);
027c5e7a 16769+int au_refresh_hinode_self(struct inode *inode);
4a4d8108
AM
16770+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
16771+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16772+ unsigned int d_type, ino_t *ino);
16773+struct inode *au_new_inode(struct dentry *dentry, int must_new);
16774+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16775+ struct inode *inode);
16776+int au_test_h_perm(struct inode *h_inode, int mask);
16777+int au_test_h_perm_sio(struct inode *h_inode, int mask);
1facf9fc 16778+
4a4d8108
AM
16779+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
16780+ ino_t h_ino, unsigned int d_type, ino_t *ino)
16781+{
16782+#ifdef CONFIG_AUFS_SHWH
16783+ return au_ino(sb, bindex, h_ino, d_type, ino);
16784+#else
16785+ return 0;
16786+#endif
16787+}
1facf9fc 16788+
4a4d8108 16789+/* i_op.c */
b95c5147
AM
16790+enum {
16791+ AuIop_SYMLINK,
16792+ AuIop_DIR,
16793+ AuIop_OTHER,
16794+ AuIop_Last
16795+};
16796+extern struct inode_operations aufs_iop[AuIop_Last],
16797+ aufs_iop_nogetattr[AuIop_Last];
1308ab2a 16798+
4a4d8108
AM
16799+/* au_wr_dir flags */
16800+#define AuWrDir_ADD_ENTRY 1
7e9cd9fe
AM
16801+#define AuWrDir_ISDIR (1 << 1)
16802+#define AuWrDir_TMPFILE (1 << 2)
4a4d8108 16803+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
7f207e10
AM
16804+#define au_fset_wrdir(flags, name) \
16805+ do { (flags) |= AuWrDir_##name; } while (0)
16806+#define au_fclr_wrdir(flags, name) \
16807+ do { (flags) &= ~AuWrDir_##name; } while (0)
1facf9fc 16808+
4a4d8108
AM
16809+struct au_wr_dir_args {
16810+ aufs_bindex_t force_btgt;
16811+ unsigned char flags;
16812+};
16813+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
16814+ struct au_wr_dir_args *args);
dece6358 16815+
4a4d8108
AM
16816+struct dentry *au_pinned_h_parent(struct au_pin *pin);
16817+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
16818+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
16819+ unsigned int udba, unsigned char flags);
16820+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
16821+ unsigned int udba, unsigned char flags) __must_check;
16822+int au_do_pin(struct au_pin *pin) __must_check;
16823+void au_unpin(struct au_pin *pin);
c1595e42
JR
16824+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
16825+
16826+#define AuIcpup_DID_CPUP 1
16827+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
16828+#define au_fset_icpup(flags, name) \
16829+ do { (flags) |= AuIcpup_##name; } while (0)
16830+#define au_fclr_icpup(flags, name) \
16831+ do { (flags) &= ~AuIcpup_##name; } while (0)
16832+
16833+struct au_icpup_args {
16834+ unsigned char flags;
16835+ unsigned char pin_flags;
16836+ aufs_bindex_t btgt;
16837+ unsigned int udba;
16838+ struct au_pin pin;
16839+ struct path h_path;
16840+ struct inode *h_inode;
16841+};
16842+
16843+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
16844+ struct au_icpup_args *a);
16845+
16846+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path);
1facf9fc 16847+
4a4d8108
AM
16848+/* i_op_add.c */
16849+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
16850+ struct dentry *h_parent, int isdir);
7eafdf33
AM
16851+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
16852+ dev_t dev);
4a4d8108 16853+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
7eafdf33 16854+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 16855+ bool want_excl);
b912730e
AM
16856+struct vfsub_aopen_args;
16857+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
16858+ struct vfsub_aopen_args *args);
38d290e6 16859+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode);
4a4d8108
AM
16860+int aufs_link(struct dentry *src_dentry, struct inode *dir,
16861+ struct dentry *dentry);
7eafdf33 16862+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
1facf9fc 16863+
4a4d8108
AM
16864+/* i_op_del.c */
16865+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
16866+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
16867+ struct dentry *h_parent, int isdir);
16868+int aufs_unlink(struct inode *dir, struct dentry *dentry);
16869+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
1308ab2a 16870+
4a4d8108
AM
16871+/* i_op_ren.c */
16872+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
16873+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
16874+ struct inode *dir, struct dentry *dentry);
1facf9fc 16875+
4a4d8108
AM
16876+/* iinfo.c */
16877+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
16878+void au_hiput(struct au_hinode *hinode);
16879+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
16880+ struct dentry *h_wh);
16881+unsigned int au_hi_flags(struct inode *inode, int isdir);
1308ab2a 16882+
4a4d8108
AM
16883+/* hinode flags */
16884+#define AuHi_XINO 1
16885+#define AuHi_HNOTIFY (1 << 1)
16886+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
7f207e10
AM
16887+#define au_fset_hi(flags, name) \
16888+ do { (flags) |= AuHi_##name; } while (0)
16889+#define au_fclr_hi(flags, name) \
16890+ do { (flags) &= ~AuHi_##name; } while (0)
1facf9fc 16891+
4a4d8108
AM
16892+#ifndef CONFIG_AUFS_HNOTIFY
16893+#undef AuHi_HNOTIFY
16894+#define AuHi_HNOTIFY 0
16895+#endif
1facf9fc 16896+
4a4d8108
AM
16897+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
16898+ struct inode *h_inode, unsigned int flags);
1facf9fc 16899+
537831f9 16900+void au_update_iigen(struct inode *inode, int half);
4a4d8108 16901+void au_update_ibrange(struct inode *inode, int do_put_zero);
1facf9fc 16902+
4a4d8108 16903+void au_icntnr_init_once(void *_c);
5afbbe0d 16904+void au_hinode_init(struct au_hinode *hinode);
4a4d8108
AM
16905+int au_iinfo_init(struct inode *inode);
16906+void au_iinfo_fin(struct inode *inode);
e2f27e51 16907+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink);
1308ab2a 16908+
e49829fe 16909+#ifdef CONFIG_PROC_FS
4a4d8108 16910+/* plink.c */
e49829fe 16911+int au_plink_maint(struct super_block *sb, int flags);
7e9cd9fe 16912+struct au_sbinfo;
e49829fe
JR
16913+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
16914+int au_plink_maint_enter(struct super_block *sb);
4a4d8108
AM
16915+#ifdef CONFIG_AUFS_DEBUG
16916+void au_plink_list(struct super_block *sb);
16917+#else
16918+AuStubVoid(au_plink_list, struct super_block *sb)
16919+#endif
16920+int au_plink_test(struct inode *inode);
16921+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
16922+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
16923+ struct dentry *h_dentry);
e49829fe
JR
16924+void au_plink_put(struct super_block *sb, int verbose);
16925+void au_plink_clean(struct super_block *sb, int verbose);
4a4d8108 16926+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
e49829fe
JR
16927+#else
16928+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
16929+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
16930+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
16931+AuStubVoid(au_plink_list, struct super_block *sb);
16932+AuStubInt0(au_plink_test, struct inode *inode);
16933+AuStub(struct dentry *, au_plink_lkup, return NULL,
16934+ struct inode *inode, aufs_bindex_t bindex);
16935+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
16936+ struct dentry *h_dentry);
16937+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
16938+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
16939+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
16940+#endif /* CONFIG_PROC_FS */
1facf9fc 16941+
c1595e42
JR
16942+#ifdef CONFIG_AUFS_XATTR
16943+/* xattr.c */
7e9cd9fe
AM
16944+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
16945+ unsigned int verbose);
c1595e42 16946+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
5afbbe0d
AM
16947+ssize_t aufs_getxattr(struct dentry *dentry, struct inode *inode,
16948+ const char *name, void *value, size_t size);
16949+int aufs_setxattr(struct dentry *dentry, struct inode *inode, const char *name,
16950+ const void *value, size_t size, int flags);
c1595e42
JR
16951+int aufs_removexattr(struct dentry *dentry, const char *name);
16952+
16953+/* void au_xattr_init(struct super_block *sb); */
16954+#else
16955+AuStubInt0(au_cpup_xattr, struct dentry *h_dst, struct dentry *h_src,
7e9cd9fe 16956+ int ignore_flags, unsigned int verbose);
c1595e42
JR
16957+/* AuStubVoid(au_xattr_init, struct super_block *sb); */
16958+#endif
16959+
16960+#ifdef CONFIG_FS_POSIX_ACL
16961+struct posix_acl *aufs_get_acl(struct inode *inode, int type);
16962+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
16963+#endif
16964+
16965+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
16966+enum {
16967+ AU_XATTR_SET,
16968+ AU_XATTR_REMOVE,
16969+ AU_ACL_SET
16970+};
16971+
16972+struct au_srxattr {
16973+ int type;
16974+ union {
16975+ struct {
16976+ const char *name;
16977+ const void *value;
16978+ size_t size;
16979+ int flags;
16980+ } set;
16981+ struct {
16982+ const char *name;
16983+ } remove;
16984+ struct {
16985+ struct posix_acl *acl;
16986+ int type;
16987+ } acl_set;
16988+ } u;
16989+};
5afbbe0d
AM
16990+ssize_t au_srxattr(struct dentry *dentry, struct inode *inode,
16991+ struct au_srxattr *arg);
c1595e42
JR
16992+#endif
16993+
4a4d8108 16994+/* ---------------------------------------------------------------------- */
1308ab2a 16995+
4a4d8108
AM
16996+/* lock subclass for iinfo */
16997+enum {
16998+ AuLsc_II_CHILD, /* child first */
16999+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
17000+ AuLsc_II_CHILD3, /* copyup dirs */
17001+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
17002+ AuLsc_II_PARENT2,
17003+ AuLsc_II_PARENT3, /* copyup dirs */
17004+ AuLsc_II_NEW_CHILD
17005+};
1308ab2a 17006+
1facf9fc 17007+/*
4a4d8108
AM
17008+ * ii_read_lock_child, ii_write_lock_child,
17009+ * ii_read_lock_child2, ii_write_lock_child2,
17010+ * ii_read_lock_child3, ii_write_lock_child3,
17011+ * ii_read_lock_parent, ii_write_lock_parent,
17012+ * ii_read_lock_parent2, ii_write_lock_parent2,
17013+ * ii_read_lock_parent3, ii_write_lock_parent3,
17014+ * ii_read_lock_new_child, ii_write_lock_new_child,
1facf9fc 17015+ */
4a4d8108
AM
17016+#define AuReadLockFunc(name, lsc) \
17017+static inline void ii_read_lock_##name(struct inode *i) \
17018+{ \
17019+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
17020+}
17021+
17022+#define AuWriteLockFunc(name, lsc) \
17023+static inline void ii_write_lock_##name(struct inode *i) \
17024+{ \
17025+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
17026+}
17027+
17028+#define AuRWLockFuncs(name, lsc) \
17029+ AuReadLockFunc(name, lsc) \
17030+ AuWriteLockFunc(name, lsc)
17031+
17032+AuRWLockFuncs(child, CHILD);
17033+AuRWLockFuncs(child2, CHILD2);
17034+AuRWLockFuncs(child3, CHILD3);
17035+AuRWLockFuncs(parent, PARENT);
17036+AuRWLockFuncs(parent2, PARENT2);
17037+AuRWLockFuncs(parent3, PARENT3);
17038+AuRWLockFuncs(new_child, NEW_CHILD);
17039+
17040+#undef AuReadLockFunc
17041+#undef AuWriteLockFunc
17042+#undef AuRWLockFuncs
1facf9fc 17043+
17044+/*
4a4d8108 17045+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
1facf9fc 17046+ */
4a4d8108 17047+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
1facf9fc 17048+
4a4d8108
AM
17049+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
17050+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
17051+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
1facf9fc 17052+
4a4d8108 17053+/* ---------------------------------------------------------------------- */
1308ab2a 17054+
027c5e7a
AM
17055+static inline void au_icntnr_init(struct au_icntnr *c)
17056+{
17057+#ifdef CONFIG_AUFS_DEBUG
17058+ c->vfs_inode.i_mode = 0;
17059+#endif
17060+}
17061+
cfc41e69 17062+static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags)
4a4d8108 17063+{
537831f9
AM
17064+ unsigned int gen;
17065+ struct au_iinfo *iinfo;
be52b249 17066+ struct au_iigen *iigen;
537831f9
AM
17067+
17068+ iinfo = au_ii(inode);
be52b249
AM
17069+ iigen = &iinfo->ii_generation;
17070+ spin_lock(&iigen->ig_spin);
cfc41e69
AM
17071+ if (igflags)
17072+ *igflags = iigen->ig_flags;
be52b249
AM
17073+ gen = iigen->ig_generation;
17074+ spin_unlock(&iigen->ig_spin);
537831f9
AM
17075+
17076+ return gen;
4a4d8108 17077+}
1308ab2a 17078+
4a4d8108
AM
17079+/* tiny test for inode number */
17080+/* tmpfs generation is too rough */
17081+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
17082+{
17083+ struct au_iinfo *iinfo;
1308ab2a 17084+
4a4d8108
AM
17085+ iinfo = au_ii(inode);
17086+ AuRwMustAnyLock(&iinfo->ii_rwsem);
17087+ return !(iinfo->ii_hsb1 == h_inode->i_sb
17088+ && iinfo->ii_higen == h_inode->i_generation);
17089+}
1308ab2a 17090+
4a4d8108
AM
17091+static inline void au_iigen_dec(struct inode *inode)
17092+{
537831f9 17093+ struct au_iinfo *iinfo;
be52b249 17094+ struct au_iigen *iigen;
537831f9
AM
17095+
17096+ iinfo = au_ii(inode);
be52b249
AM
17097+ iigen = &iinfo->ii_generation;
17098+ spin_lock(&iigen->ig_spin);
17099+ iigen->ig_generation--;
17100+ spin_unlock(&iigen->ig_spin);
027c5e7a
AM
17101+}
17102+
17103+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
17104+{
17105+ int err;
17106+
17107+ err = 0;
537831f9 17108+ if (unlikely(inode && au_iigen(inode, NULL) != sigen))
027c5e7a
AM
17109+ err = -EIO;
17110+
17111+ return err;
4a4d8108 17112+}
1308ab2a 17113+
4a4d8108 17114+/* ---------------------------------------------------------------------- */
1308ab2a 17115+
5afbbe0d
AM
17116+static inline struct au_hinode *au_hinode(struct au_iinfo *iinfo,
17117+ aufs_bindex_t bindex)
17118+{
17119+ return iinfo->ii_hinode + bindex;
17120+}
17121+
17122+static inline int au_is_bad_inode(struct inode *inode)
17123+{
17124+ return !!(is_bad_inode(inode) || !au_hinode(au_ii(inode), 0));
17125+}
17126+
4a4d8108
AM
17127+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
17128+ aufs_bindex_t bindex)
17129+{
17130+ IiMustAnyLock(inode);
5afbbe0d 17131+ return au_hinode(au_ii(inode), bindex)->hi_id;
4a4d8108 17132+}
1308ab2a 17133+
5afbbe0d 17134+static inline aufs_bindex_t au_ibtop(struct inode *inode)
4a4d8108
AM
17135+{
17136+ IiMustAnyLock(inode);
5afbbe0d 17137+ return au_ii(inode)->ii_btop;
4a4d8108 17138+}
1308ab2a 17139+
5afbbe0d 17140+static inline aufs_bindex_t au_ibbot(struct inode *inode)
4a4d8108
AM
17141+{
17142+ IiMustAnyLock(inode);
5afbbe0d 17143+ return au_ii(inode)->ii_bbot;
4a4d8108 17144+}
1308ab2a 17145+
4a4d8108
AM
17146+static inline struct au_vdir *au_ivdir(struct inode *inode)
17147+{
17148+ IiMustAnyLock(inode);
17149+ return au_ii(inode)->ii_vdir;
17150+}
1308ab2a 17151+
4a4d8108
AM
17152+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
17153+{
17154+ IiMustAnyLock(inode);
5afbbe0d 17155+ return au_hinode(au_ii(inode), bindex)->hi_whdentry;
4a4d8108 17156+}
1308ab2a 17157+
5afbbe0d 17158+static inline void au_set_ibtop(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 17159+{
4a4d8108 17160+ IiMustWriteLock(inode);
5afbbe0d 17161+ au_ii(inode)->ii_btop = bindex;
4a4d8108 17162+}
1308ab2a 17163+
5afbbe0d 17164+static inline void au_set_ibbot(struct inode *inode, aufs_bindex_t bindex)
4a4d8108
AM
17165+{
17166+ IiMustWriteLock(inode);
5afbbe0d 17167+ au_ii(inode)->ii_bbot = bindex;
1308ab2a 17168+}
17169+
4a4d8108
AM
17170+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
17171+{
17172+ IiMustWriteLock(inode);
17173+ au_ii(inode)->ii_vdir = vdir;
17174+}
1facf9fc 17175+
4a4d8108 17176+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 17177+{
4a4d8108 17178+ IiMustAnyLock(inode);
5afbbe0d 17179+ return au_hinode(au_ii(inode), bindex);
4a4d8108 17180+}
dece6358 17181+
4a4d8108 17182+/* ---------------------------------------------------------------------- */
1facf9fc 17183+
4a4d8108
AM
17184+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
17185+{
17186+ if (pin)
17187+ return pin->parent;
17188+ return NULL;
1facf9fc 17189+}
17190+
4a4d8108 17191+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
1facf9fc 17192+{
4a4d8108
AM
17193+ if (pin && pin->hdir)
17194+ return pin->hdir->hi_inode;
17195+ return NULL;
1308ab2a 17196+}
1facf9fc 17197+
4a4d8108
AM
17198+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
17199+{
17200+ if (pin)
17201+ return pin->hdir;
17202+ return NULL;
17203+}
1facf9fc 17204+
4a4d8108 17205+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
1308ab2a 17206+{
4a4d8108
AM
17207+ if (pin)
17208+ pin->dentry = dentry;
17209+}
1308ab2a 17210+
4a4d8108
AM
17211+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
17212+ unsigned char lflag)
17213+{
17214+ if (pin) {
7f207e10 17215+ if (lflag)
4a4d8108 17216+ au_fset_pin(pin->flags, DI_LOCKED);
7f207e10 17217+ else
4a4d8108 17218+ au_fclr_pin(pin->flags, DI_LOCKED);
1308ab2a 17219+ }
4a4d8108
AM
17220+}
17221+
7e9cd9fe 17222+#if 0 /* reserved */
4a4d8108
AM
17223+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
17224+{
17225+ if (pin) {
17226+ dput(pin->parent);
17227+ pin->parent = dget(parent);
1facf9fc 17228+ }
4a4d8108 17229+}
7e9cd9fe 17230+#endif
1facf9fc 17231+
4a4d8108
AM
17232+/* ---------------------------------------------------------------------- */
17233+
027c5e7a 17234+struct au_branch;
4a4d8108
AM
17235+#ifdef CONFIG_AUFS_HNOTIFY
17236+struct au_hnotify_op {
17237+ void (*ctl)(struct au_hinode *hinode, int do_set);
027c5e7a 17238+ int (*alloc)(struct au_hinode *hinode);
7eafdf33
AM
17239+
17240+ /*
17241+ * if it returns true, the the caller should free hinode->hi_notify,
17242+ * otherwise ->free() frees it.
17243+ */
17244+ int (*free)(struct au_hinode *hinode,
17245+ struct au_hnotify *hn) __must_check;
4a4d8108
AM
17246+
17247+ void (*fin)(void);
17248+ int (*init)(void);
027c5e7a
AM
17249+
17250+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
17251+ void (*fin_br)(struct au_branch *br);
17252+ int (*init_br)(struct au_branch *br, int perm);
4a4d8108
AM
17253+};
17254+
17255+/* hnotify.c */
027c5e7a 17256+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
4a4d8108
AM
17257+void au_hn_free(struct au_hinode *hinode);
17258+void au_hn_ctl(struct au_hinode *hinode, int do_set);
17259+void au_hn_reset(struct inode *inode, unsigned int flags);
17260+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
17261+ struct qstr *h_child_qstr, struct inode *h_child_inode);
027c5e7a
AM
17262+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
17263+int au_hnotify_init_br(struct au_branch *br, int perm);
17264+void au_hnotify_fin_br(struct au_branch *br);
4a4d8108
AM
17265+int __init au_hnotify_init(void);
17266+void au_hnotify_fin(void);
17267+
7f207e10 17268+/* hfsnotify.c */
4a4d8108
AM
17269+extern const struct au_hnotify_op au_hnotify_op;
17270+
17271+static inline
17272+void au_hn_init(struct au_hinode *hinode)
17273+{
17274+ hinode->hi_notify = NULL;
1308ab2a 17275+}
17276+
53392da6
AM
17277+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
17278+{
17279+ return hinode->hi_notify;
17280+}
17281+
4a4d8108 17282+#else
c1595e42
JR
17283+AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
17284+ struct au_hinode *hinode __maybe_unused,
17285+ struct inode *inode __maybe_unused)
17286+AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
4a4d8108
AM
17287+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
17288+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
17289+ int do_set __maybe_unused)
17290+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
17291+ unsigned int flags __maybe_unused)
027c5e7a
AM
17292+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
17293+ struct au_branch *br __maybe_unused,
17294+ int perm __maybe_unused)
17295+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
17296+ int perm __maybe_unused)
17297+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
4a4d8108
AM
17298+AuStubInt0(__init au_hnotify_init, void)
17299+AuStubVoid(au_hnotify_fin, void)
17300+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
17301+#endif /* CONFIG_AUFS_HNOTIFY */
17302+
17303+static inline void au_hn_suspend(struct au_hinode *hdir)
17304+{
17305+ au_hn_ctl(hdir, /*do_set*/0);
1308ab2a 17306+}
17307+
4a4d8108 17308+static inline void au_hn_resume(struct au_hinode *hdir)
1308ab2a 17309+{
4a4d8108
AM
17310+ au_hn_ctl(hdir, /*do_set*/1);
17311+}
1308ab2a 17312+
5afbbe0d 17313+static inline void au_hn_inode_lock(struct au_hinode *hdir)
4a4d8108 17314+{
febd17d6 17315+ inode_lock(hdir->hi_inode);
4a4d8108
AM
17316+ au_hn_suspend(hdir);
17317+}
dece6358 17318+
5afbbe0d 17319+static inline void au_hn_inode_lock_nested(struct au_hinode *hdir,
4a4d8108
AM
17320+ unsigned int sc __maybe_unused)
17321+{
febd17d6 17322+ inode_lock_nested(hdir->hi_inode, sc);
4a4d8108 17323+ au_hn_suspend(hdir);
1facf9fc 17324+}
1facf9fc 17325+
5afbbe0d 17326+static inline void au_hn_inode_unlock(struct au_hinode *hdir)
4a4d8108
AM
17327+{
17328+ au_hn_resume(hdir);
febd17d6 17329+ inode_unlock(hdir->hi_inode);
4a4d8108
AM
17330+}
17331+
17332+#endif /* __KERNEL__ */
17333+#endif /* __AUFS_INODE_H__ */
7f207e10
AM
17334diff -urN /usr/share/empty/fs/aufs/ioctl.c linux/fs/aufs/ioctl.c
17335--- /usr/share/empty/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 17336+++ linux/fs/aufs/ioctl.c 2016-10-09 16:55:36.492701639 +0200
c1595e42 17337@@ -0,0 +1,219 @@
4a4d8108 17338+/*
8cdd5066 17339+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
17340+ *
17341+ * This program, aufs is free software; you can redistribute it and/or modify
17342+ * it under the terms of the GNU General Public License as published by
17343+ * the Free Software Foundation; either version 2 of the License, or
17344+ * (at your option) any later version.
17345+ *
17346+ * This program is distributed in the hope that it will be useful,
17347+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17348+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17349+ * GNU General Public License for more details.
17350+ *
17351+ * You should have received a copy of the GNU General Public License
523b37e3 17352+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
17353+ */
17354+
17355+/*
17356+ * ioctl
17357+ * plink-management and readdir in userspace.
17358+ * assist the pathconf(3) wrapper library.
c2b27bf2 17359+ * move-down
076b876e 17360+ * File-based Hierarchical Storage Management.
4a4d8108
AM
17361+ */
17362+
c2b27bf2
AM
17363+#include <linux/compat.h>
17364+#include <linux/file.h>
4a4d8108
AM
17365+#include "aufs.h"
17366+
1e00d052 17367+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
4a4d8108
AM
17368+{
17369+ int err, fd;
5afbbe0d 17370+ aufs_bindex_t wbi, bindex, bbot;
4a4d8108
AM
17371+ struct file *h_file;
17372+ struct super_block *sb;
17373+ struct dentry *root;
1e00d052
AM
17374+ struct au_branch *br;
17375+ struct aufs_wbr_fd wbrfd = {
17376+ .oflags = au_dir_roflags,
17377+ .brid = -1
17378+ };
17379+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
17380+ | O_NOATIME | O_CLOEXEC;
4a4d8108 17381+
1e00d052
AM
17382+ AuDebugOn(wbrfd.oflags & ~valid);
17383+
17384+ if (arg) {
17385+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
17386+ if (unlikely(err)) {
17387+ err = -EFAULT;
17388+ goto out;
17389+ }
17390+
17391+ err = -EINVAL;
17392+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
17393+ wbrfd.oflags |= au_dir_roflags;
17394+ AuDbg("0%o\n", wbrfd.oflags);
17395+ if (unlikely(wbrfd.oflags & ~valid))
17396+ goto out;
17397+ }
17398+
2000de60 17399+ fd = get_unused_fd_flags(0);
1e00d052
AM
17400+ err = fd;
17401+ if (unlikely(fd < 0))
4a4d8108 17402+ goto out;
4a4d8108 17403+
1e00d052 17404+ h_file = ERR_PTR(-EINVAL);
4a4d8108 17405+ wbi = 0;
1e00d052 17406+ br = NULL;
4a4d8108
AM
17407+ sb = path->dentry->d_sb;
17408+ root = sb->s_root;
17409+ aufs_read_lock(root, AuLock_IR);
5afbbe0d 17410+ bbot = au_sbbot(sb);
1e00d052
AM
17411+ if (wbrfd.brid >= 0) {
17412+ wbi = au_br_index(sb, wbrfd.brid);
5afbbe0d 17413+ if (unlikely(wbi < 0 || wbi > bbot))
1e00d052
AM
17414+ goto out_unlock;
17415+ }
17416+
17417+ h_file = ERR_PTR(-ENOENT);
17418+ br = au_sbr(sb, wbi);
17419+ if (!au_br_writable(br->br_perm)) {
17420+ if (arg)
17421+ goto out_unlock;
17422+
17423+ bindex = wbi + 1;
17424+ wbi = -1;
5afbbe0d 17425+ for (; bindex <= bbot; bindex++) {
1e00d052
AM
17426+ br = au_sbr(sb, bindex);
17427+ if (au_br_writable(br->br_perm)) {
4a4d8108 17428+ wbi = bindex;
1e00d052 17429+ br = au_sbr(sb, wbi);
4a4d8108
AM
17430+ break;
17431+ }
17432+ }
4a4d8108
AM
17433+ }
17434+ AuDbg("wbi %d\n", wbi);
1e00d052 17435+ if (wbi >= 0)
392086de
AM
17436+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
17437+ /*force_wr*/0);
1e00d052
AM
17438+
17439+out_unlock:
4a4d8108
AM
17440+ aufs_read_unlock(root, AuLock_IR);
17441+ err = PTR_ERR(h_file);
17442+ if (IS_ERR(h_file))
17443+ goto out_fd;
17444+
5afbbe0d 17445+ au_br_put(br); /* cf. au_h_open() */
4a4d8108
AM
17446+ fd_install(fd, h_file);
17447+ err = fd;
17448+ goto out; /* success */
17449+
4f0767ce 17450+out_fd:
4a4d8108 17451+ put_unused_fd(fd);
4f0767ce 17452+out:
1e00d052 17453+ AuTraceErr(err);
4a4d8108
AM
17454+ return err;
17455+}
17456+
17457+/* ---------------------------------------------------------------------- */
17458+
17459+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
17460+{
17461+ long err;
c1595e42 17462+ struct dentry *dentry;
4a4d8108
AM
17463+
17464+ switch (cmd) {
4a4d8108
AM
17465+ case AUFS_CTL_RDU:
17466+ case AUFS_CTL_RDU_INO:
17467+ err = au_rdu_ioctl(file, cmd, arg);
17468+ break;
17469+
17470+ case AUFS_CTL_WBR_FD:
1e00d052 17471+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
17472+ break;
17473+
027c5e7a
AM
17474+ case AUFS_CTL_IBUSY:
17475+ err = au_ibusy_ioctl(file, arg);
17476+ break;
17477+
076b876e
AM
17478+ case AUFS_CTL_BRINFO:
17479+ err = au_brinfo_ioctl(file, arg);
17480+ break;
17481+
17482+ case AUFS_CTL_FHSM_FD:
2000de60 17483+ dentry = file->f_path.dentry;
c1595e42
JR
17484+ if (IS_ROOT(dentry))
17485+ err = au_fhsm_fd(dentry->d_sb, arg);
17486+ else
17487+ err = -ENOTTY;
076b876e
AM
17488+ break;
17489+
4a4d8108
AM
17490+ default:
17491+ /* do not call the lower */
17492+ AuDbg("0x%x\n", cmd);
17493+ err = -ENOTTY;
17494+ }
17495+
17496+ AuTraceErr(err);
17497+ return err;
17498+}
17499+
17500+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
17501+{
17502+ long err;
17503+
17504+ switch (cmd) {
c2b27bf2 17505+ case AUFS_CTL_MVDOWN:
2000de60 17506+ err = au_mvdown(file->f_path.dentry, (void __user *)arg);
c2b27bf2
AM
17507+ break;
17508+
4a4d8108 17509+ case AUFS_CTL_WBR_FD:
1e00d052 17510+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
17511+ break;
17512+
17513+ default:
17514+ /* do not call the lower */
17515+ AuDbg("0x%x\n", cmd);
17516+ err = -ENOTTY;
17517+ }
17518+
17519+ AuTraceErr(err);
17520+ return err;
17521+}
b752ccd1
AM
17522+
17523+#ifdef CONFIG_COMPAT
17524+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
17525+ unsigned long arg)
17526+{
17527+ long err;
17528+
17529+ switch (cmd) {
17530+ case AUFS_CTL_RDU:
17531+ case AUFS_CTL_RDU_INO:
17532+ err = au_rdu_compat_ioctl(file, cmd, arg);
17533+ break;
17534+
027c5e7a
AM
17535+ case AUFS_CTL_IBUSY:
17536+ err = au_ibusy_compat_ioctl(file, arg);
17537+ break;
17538+
076b876e
AM
17539+ case AUFS_CTL_BRINFO:
17540+ err = au_brinfo_compat_ioctl(file, arg);
17541+ break;
17542+
b752ccd1
AM
17543+ default:
17544+ err = aufs_ioctl_dir(file, cmd, arg);
17545+ }
17546+
17547+ AuTraceErr(err);
17548+ return err;
17549+}
17550+
b752ccd1
AM
17551+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
17552+ unsigned long arg)
17553+{
17554+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
17555+}
17556+#endif
7f207e10
AM
17557diff -urN /usr/share/empty/fs/aufs/i_op_add.c linux/fs/aufs/i_op_add.c
17558--- /usr/share/empty/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 17559+++ linux/fs/aufs/i_op_add.c 2016-10-09 16:55:36.492701639 +0200
5afbbe0d 17560@@ -0,0 +1,924 @@
4a4d8108 17561+/*
8cdd5066 17562+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
17563+ *
17564+ * This program, aufs is free software; you can redistribute it and/or modify
17565+ * it under the terms of the GNU General Public License as published by
17566+ * the Free Software Foundation; either version 2 of the License, or
17567+ * (at your option) any later version.
17568+ *
17569+ * This program is distributed in the hope that it will be useful,
17570+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17571+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17572+ * GNU General Public License for more details.
17573+ *
17574+ * You should have received a copy of the GNU General Public License
523b37e3 17575+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
17576+ */
17577+
17578+/*
17579+ * inode operations (add entry)
17580+ */
17581+
17582+#include "aufs.h"
17583+
17584+/*
17585+ * final procedure of adding a new entry, except link(2).
17586+ * remove whiteout, instantiate, copyup the parent dir's times and size
17587+ * and update version.
17588+ * if it failed, re-create the removed whiteout.
17589+ */
17590+static int epilog(struct inode *dir, aufs_bindex_t bindex,
17591+ struct dentry *wh_dentry, struct dentry *dentry)
17592+{
17593+ int err, rerr;
17594+ aufs_bindex_t bwh;
17595+ struct path h_path;
076b876e 17596+ struct super_block *sb;
4a4d8108
AM
17597+ struct inode *inode, *h_dir;
17598+ struct dentry *wh;
17599+
17600+ bwh = -1;
076b876e 17601+ sb = dir->i_sb;
4a4d8108 17602+ if (wh_dentry) {
5527c038 17603+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
4a4d8108
AM
17604+ IMustLock(h_dir);
17605+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
17606+ bwh = au_dbwh(dentry);
17607+ h_path.dentry = wh_dentry;
076b876e 17608+ h_path.mnt = au_sbr_mnt(sb, bindex);
4a4d8108
AM
17609+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
17610+ dentry);
17611+ if (unlikely(err))
17612+ goto out;
17613+ }
17614+
17615+ inode = au_new_inode(dentry, /*must_new*/1);
17616+ if (!IS_ERR(inode)) {
17617+ d_instantiate(dentry, inode);
5527c038 17618+ dir = d_inode(dentry->d_parent); /* dir inode is locked */
4a4d8108 17619+ IMustLock(dir);
b912730e 17620+ au_dir_ts(dir, bindex);
4a4d8108 17621+ dir->i_version++;
076b876e 17622+ au_fhsm_wrote(sb, bindex, /*force*/0);
4a4d8108
AM
17623+ return 0; /* success */
17624+ }
17625+
17626+ err = PTR_ERR(inode);
17627+ if (!wh_dentry)
17628+ goto out;
17629+
17630+ /* revert */
17631+ /* dir inode is locked */
17632+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
17633+ rerr = PTR_ERR(wh);
17634+ if (IS_ERR(wh)) {
523b37e3
AM
17635+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
17636+ dentry, err, rerr);
4a4d8108
AM
17637+ err = -EIO;
17638+ } else
17639+ dput(wh);
17640+
4f0767ce 17641+out:
4a4d8108
AM
17642+ return err;
17643+}
17644+
027c5e7a
AM
17645+static int au_d_may_add(struct dentry *dentry)
17646+{
17647+ int err;
17648+
17649+ err = 0;
17650+ if (unlikely(d_unhashed(dentry)))
17651+ err = -ENOENT;
5527c038 17652+ if (unlikely(d_really_is_positive(dentry)))
027c5e7a
AM
17653+ err = -EEXIST;
17654+ return err;
17655+}
17656+
4a4d8108
AM
17657+/*
17658+ * simple tests for the adding inode operations.
17659+ * following the checks in vfs, plus the parent-child relationship.
17660+ */
17661+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
17662+ struct dentry *h_parent, int isdir)
17663+{
17664+ int err;
17665+ umode_t h_mode;
17666+ struct dentry *h_dentry;
17667+ struct inode *h_inode;
17668+
17669+ err = -ENAMETOOLONG;
17670+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17671+ goto out;
17672+
17673+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 17674+ if (d_really_is_negative(dentry)) {
4a4d8108 17675+ err = -EEXIST;
5527c038 17676+ if (unlikely(d_is_positive(h_dentry)))
4a4d8108
AM
17677+ goto out;
17678+ } else {
17679+ /* rename(2) case */
17680+ err = -EIO;
5527c038
JR
17681+ if (unlikely(d_is_negative(h_dentry)))
17682+ goto out;
17683+ h_inode = d_inode(h_dentry);
17684+ if (unlikely(!h_inode->i_nlink))
4a4d8108
AM
17685+ goto out;
17686+
17687+ h_mode = h_inode->i_mode;
17688+ if (!isdir) {
17689+ err = -EISDIR;
17690+ if (unlikely(S_ISDIR(h_mode)))
17691+ goto out;
17692+ } else if (unlikely(!S_ISDIR(h_mode))) {
17693+ err = -ENOTDIR;
17694+ goto out;
17695+ }
17696+ }
17697+
17698+ err = 0;
17699+ /* expected parent dir is locked */
17700+ if (unlikely(h_parent != h_dentry->d_parent))
17701+ err = -EIO;
17702+
4f0767ce 17703+out:
4a4d8108
AM
17704+ AuTraceErr(err);
17705+ return err;
17706+}
17707+
17708+/*
17709+ * initial procedure of adding a new entry.
17710+ * prepare writable branch and the parent dir, lock it,
17711+ * and lookup whiteout for the new entry.
17712+ */
17713+static struct dentry*
17714+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
17715+ struct dentry *src_dentry, struct au_pin *pin,
17716+ struct au_wr_dir_args *wr_dir_args)
17717+{
17718+ struct dentry *wh_dentry, *h_parent;
17719+ struct super_block *sb;
17720+ struct au_branch *br;
17721+ int err;
17722+ unsigned int udba;
17723+ aufs_bindex_t bcpup;
17724+
523b37e3 17725+ AuDbg("%pd\n", dentry);
4a4d8108
AM
17726+
17727+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
17728+ bcpup = err;
17729+ wh_dentry = ERR_PTR(err);
17730+ if (unlikely(err < 0))
17731+ goto out;
17732+
17733+ sb = dentry->d_sb;
17734+ udba = au_opt_udba(sb);
17735+ err = au_pin(pin, dentry, bcpup, udba,
17736+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17737+ wh_dentry = ERR_PTR(err);
17738+ if (unlikely(err))
17739+ goto out;
17740+
17741+ h_parent = au_pinned_h_parent(pin);
17742+ if (udba != AuOpt_UDBA_NONE
5afbbe0d 17743+ && au_dbtop(dentry) == bcpup)
4a4d8108
AM
17744+ err = au_may_add(dentry, bcpup, h_parent,
17745+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
17746+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17747+ err = -ENAMETOOLONG;
17748+ wh_dentry = ERR_PTR(err);
17749+ if (unlikely(err))
17750+ goto out_unpin;
17751+
17752+ br = au_sbr(sb, bcpup);
17753+ if (dt) {
17754+ struct path tmp = {
17755+ .dentry = h_parent,
86dc4139 17756+ .mnt = au_br_mnt(br)
4a4d8108
AM
17757+ };
17758+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
17759+ }
17760+
17761+ wh_dentry = NULL;
17762+ if (bcpup != au_dbwh(dentry))
17763+ goto out; /* success */
17764+
2000de60
JR
17765+ /*
17766+ * ENAMETOOLONG here means that if we allowed create such name, then it
17767+ * would not be able to removed in the future. So we don't allow such
17768+ * name here and we don't handle ENAMETOOLONG differently here.
17769+ */
4a4d8108
AM
17770+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
17771+
4f0767ce 17772+out_unpin:
4a4d8108
AM
17773+ if (IS_ERR(wh_dentry))
17774+ au_unpin(pin);
4f0767ce 17775+out:
4a4d8108
AM
17776+ return wh_dentry;
17777+}
17778+
17779+/* ---------------------------------------------------------------------- */
17780+
17781+enum { Mknod, Symlink, Creat };
17782+struct simple_arg {
17783+ int type;
17784+ union {
17785+ struct {
b912730e
AM
17786+ umode_t mode;
17787+ bool want_excl;
17788+ bool try_aopen;
17789+ struct vfsub_aopen_args *aopen;
4a4d8108
AM
17790+ } c;
17791+ struct {
17792+ const char *symname;
17793+ } s;
17794+ struct {
7eafdf33 17795+ umode_t mode;
4a4d8108
AM
17796+ dev_t dev;
17797+ } m;
17798+ } u;
17799+};
17800+
17801+static int add_simple(struct inode *dir, struct dentry *dentry,
17802+ struct simple_arg *arg)
17803+{
076b876e 17804+ int err, rerr;
5afbbe0d 17805+ aufs_bindex_t btop;
4a4d8108 17806+ unsigned char created;
b912730e
AM
17807+ const unsigned char try_aopen
17808+ = (arg->type == Creat && arg->u.c.try_aopen);
4a4d8108
AM
17809+ struct dentry *wh_dentry, *parent;
17810+ struct inode *h_dir;
b912730e
AM
17811+ struct super_block *sb;
17812+ struct au_branch *br;
c2b27bf2
AM
17813+ /* to reuduce stack size */
17814+ struct {
17815+ struct au_dtime dt;
17816+ struct au_pin pin;
17817+ struct path h_path;
17818+ struct au_wr_dir_args wr_dir_args;
17819+ } *a;
4a4d8108 17820+
523b37e3 17821+ AuDbg("%pd\n", dentry);
4a4d8108
AM
17822+ IMustLock(dir);
17823+
c2b27bf2
AM
17824+ err = -ENOMEM;
17825+ a = kmalloc(sizeof(*a), GFP_NOFS);
17826+ if (unlikely(!a))
17827+ goto out;
17828+ a->wr_dir_args.force_btgt = -1;
17829+ a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
17830+
4a4d8108 17831+ parent = dentry->d_parent; /* dir inode is locked */
b912730e
AM
17832+ if (!try_aopen) {
17833+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
17834+ if (unlikely(err))
17835+ goto out_free;
17836+ }
027c5e7a
AM
17837+ err = au_d_may_add(dentry);
17838+ if (unlikely(err))
17839+ goto out_unlock;
b912730e
AM
17840+ if (!try_aopen)
17841+ di_write_lock_parent(parent);
c2b27bf2
AM
17842+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
17843+ &a->pin, &a->wr_dir_args);
4a4d8108
AM
17844+ err = PTR_ERR(wh_dentry);
17845+ if (IS_ERR(wh_dentry))
027c5e7a 17846+ goto out_parent;
4a4d8108 17847+
5afbbe0d 17848+ btop = au_dbtop(dentry);
b912730e 17849+ sb = dentry->d_sb;
5afbbe0d
AM
17850+ br = au_sbr(sb, btop);
17851+ a->h_path.dentry = au_h_dptr(dentry, btop);
b912730e 17852+ a->h_path.mnt = au_br_mnt(br);
c2b27bf2 17853+ h_dir = au_pinned_h_dir(&a->pin);
4a4d8108
AM
17854+ switch (arg->type) {
17855+ case Creat:
b912730e
AM
17856+ err = 0;
17857+ if (!try_aopen || !h_dir->i_op->atomic_open)
17858+ err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
17859+ arg->u.c.want_excl);
17860+ else
17861+ err = vfsub_atomic_open(h_dir, a->h_path.dentry,
17862+ arg->u.c.aopen, br);
4a4d8108
AM
17863+ break;
17864+ case Symlink:
c2b27bf2 17865+ err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
4a4d8108
AM
17866+ break;
17867+ case Mknod:
c2b27bf2
AM
17868+ err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
17869+ arg->u.m.dev);
4a4d8108
AM
17870+ break;
17871+ default:
17872+ BUG();
17873+ }
17874+ created = !err;
17875+ if (!err)
5afbbe0d 17876+ err = epilog(dir, btop, wh_dentry, dentry);
4a4d8108
AM
17877+
17878+ /* revert */
5527c038 17879+ if (unlikely(created && err && d_is_positive(a->h_path.dentry))) {
523b37e3
AM
17880+ /* no delegation since it is just created */
17881+ rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
17882+ /*force*/0);
4a4d8108 17883+ if (rerr) {
523b37e3
AM
17884+ AuIOErr("%pd revert failure(%d, %d)\n",
17885+ dentry, err, rerr);
4a4d8108
AM
17886+ err = -EIO;
17887+ }
c2b27bf2 17888+ au_dtime_revert(&a->dt);
4a4d8108
AM
17889+ }
17890+
b912730e
AM
17891+ if (!err && try_aopen && !h_dir->i_op->atomic_open)
17892+ *arg->u.c.aopen->opened |= FILE_CREATED;
17893+
c2b27bf2 17894+ au_unpin(&a->pin);
4a4d8108
AM
17895+ dput(wh_dentry);
17896+
027c5e7a 17897+out_parent:
b912730e
AM
17898+ if (!try_aopen)
17899+ di_write_unlock(parent);
027c5e7a 17900+out_unlock:
4a4d8108 17901+ if (unlikely(err)) {
5afbbe0d 17902+ au_update_dbtop(dentry);
4a4d8108
AM
17903+ d_drop(dentry);
17904+ }
b912730e
AM
17905+ if (!try_aopen)
17906+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2 17907+out_free:
f0c0a007 17908+ au_delayed_kfree(a);
027c5e7a 17909+out:
4a4d8108
AM
17910+ return err;
17911+}
17912+
7eafdf33
AM
17913+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
17914+ dev_t dev)
4a4d8108
AM
17915+{
17916+ struct simple_arg arg = {
17917+ .type = Mknod,
17918+ .u.m = {
17919+ .mode = mode,
17920+ .dev = dev
17921+ }
17922+ };
17923+ return add_simple(dir, dentry, &arg);
17924+}
17925+
17926+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
17927+{
17928+ struct simple_arg arg = {
17929+ .type = Symlink,
17930+ .u.s.symname = symname
17931+ };
17932+ return add_simple(dir, dentry, &arg);
17933+}
17934+
7eafdf33 17935+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 17936+ bool want_excl)
4a4d8108
AM
17937+{
17938+ struct simple_arg arg = {
17939+ .type = Creat,
17940+ .u.c = {
b4510431
AM
17941+ .mode = mode,
17942+ .want_excl = want_excl
4a4d8108
AM
17943+ }
17944+ };
17945+ return add_simple(dir, dentry, &arg);
17946+}
17947+
b912730e
AM
17948+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
17949+ struct vfsub_aopen_args *aopen_args)
17950+{
17951+ struct simple_arg arg = {
17952+ .type = Creat,
17953+ .u.c = {
17954+ .mode = aopen_args->create_mode,
17955+ .want_excl = aopen_args->open_flag & O_EXCL,
17956+ .try_aopen = true,
17957+ .aopen = aopen_args
17958+ }
17959+ };
17960+ return add_simple(dir, dentry, &arg);
17961+}
17962+
38d290e6
JR
17963+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
17964+{
17965+ int err;
17966+ aufs_bindex_t bindex;
17967+ struct super_block *sb;
17968+ struct dentry *parent, *h_parent, *h_dentry;
17969+ struct inode *h_dir, *inode;
17970+ struct vfsmount *h_mnt;
17971+ struct au_wr_dir_args wr_dir_args = {
17972+ .force_btgt = -1,
17973+ .flags = AuWrDir_TMPFILE
17974+ };
17975+
17976+ /* copy-up may happen */
febd17d6 17977+ inode_lock(dir);
38d290e6
JR
17978+
17979+ sb = dir->i_sb;
17980+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
17981+ if (unlikely(err))
17982+ goto out;
17983+
17984+ err = au_di_init(dentry);
17985+ if (unlikely(err))
17986+ goto out_si;
17987+
17988+ err = -EBUSY;
17989+ parent = d_find_any_alias(dir);
17990+ AuDebugOn(!parent);
17991+ di_write_lock_parent(parent);
5527c038 17992+ if (unlikely(d_inode(parent) != dir))
38d290e6
JR
17993+ goto out_parent;
17994+
17995+ err = au_digen_test(parent, au_sigen(sb));
17996+ if (unlikely(err))
17997+ goto out_parent;
17998+
5afbbe0d
AM
17999+ bindex = au_dbtop(parent);
18000+ au_set_dbtop(dentry, bindex);
18001+ au_set_dbbot(dentry, bindex);
38d290e6
JR
18002+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
18003+ bindex = err;
18004+ if (unlikely(err < 0))
18005+ goto out_parent;
18006+
18007+ err = -EOPNOTSUPP;
18008+ h_dir = au_h_iptr(dir, bindex);
18009+ if (unlikely(!h_dir->i_op->tmpfile))
18010+ goto out_parent;
18011+
18012+ h_mnt = au_sbr_mnt(sb, bindex);
18013+ err = vfsub_mnt_want_write(h_mnt);
18014+ if (unlikely(err))
18015+ goto out_parent;
18016+
18017+ h_parent = au_h_dptr(parent, bindex);
5527c038 18018+ err = inode_permission(d_inode(h_parent), MAY_WRITE | MAY_EXEC);
38d290e6
JR
18019+ if (unlikely(err))
18020+ goto out_mnt;
18021+
18022+ err = -ENOMEM;
18023+ h_dentry = d_alloc(h_parent, &dentry->d_name);
18024+ if (unlikely(!h_dentry))
18025+ goto out_mnt;
18026+
18027+ err = h_dir->i_op->tmpfile(h_dir, h_dentry, mode);
18028+ if (unlikely(err))
18029+ goto out_dentry;
18030+
5afbbe0d
AM
18031+ au_set_dbtop(dentry, bindex);
18032+ au_set_dbbot(dentry, bindex);
38d290e6
JR
18033+ au_set_h_dptr(dentry, bindex, dget(h_dentry));
18034+ inode = au_new_inode(dentry, /*must_new*/1);
18035+ if (IS_ERR(inode)) {
18036+ err = PTR_ERR(inode);
18037+ au_set_h_dptr(dentry, bindex, NULL);
5afbbe0d
AM
18038+ au_set_dbtop(dentry, -1);
18039+ au_set_dbbot(dentry, -1);
38d290e6
JR
18040+ } else {
18041+ if (!inode->i_nlink)
18042+ set_nlink(inode, 1);
18043+ d_tmpfile(dentry, inode);
18044+ au_di(dentry)->di_tmpfile = 1;
18045+
18046+ /* update without i_mutex */
5afbbe0d 18047+ if (au_ibtop(dir) == au_dbtop(dentry))
38d290e6
JR
18048+ au_cpup_attr_timesizes(dir);
18049+ }
18050+
18051+out_dentry:
18052+ dput(h_dentry);
18053+out_mnt:
18054+ vfsub_mnt_drop_write(h_mnt);
18055+out_parent:
18056+ di_write_unlock(parent);
18057+ dput(parent);
18058+ di_write_unlock(dentry);
5afbbe0d 18059+ if (unlikely(err)) {
38d290e6
JR
18060+ au_di_fin(dentry);
18061+ dentry->d_fsdata = NULL;
18062+ }
18063+out_si:
18064+ si_read_unlock(sb);
18065+out:
febd17d6 18066+ inode_unlock(dir);
38d290e6
JR
18067+ return err;
18068+}
18069+
4a4d8108
AM
18070+/* ---------------------------------------------------------------------- */
18071+
18072+struct au_link_args {
18073+ aufs_bindex_t bdst, bsrc;
18074+ struct au_pin pin;
18075+ struct path h_path;
18076+ struct dentry *src_parent, *parent;
18077+};
18078+
18079+static int au_cpup_before_link(struct dentry *src_dentry,
18080+ struct au_link_args *a)
18081+{
18082+ int err;
18083+ struct dentry *h_src_dentry;
c2b27bf2
AM
18084+ struct au_cp_generic cpg = {
18085+ .dentry = src_dentry,
18086+ .bdst = a->bdst,
18087+ .bsrc = a->bsrc,
18088+ .len = -1,
18089+ .pin = &a->pin,
18090+ .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
18091+ };
4a4d8108
AM
18092+
18093+ di_read_lock_parent(a->src_parent, AuLock_IR);
18094+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
18095+ if (unlikely(err))
18096+ goto out;
18097+
18098+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
4a4d8108
AM
18099+ err = au_pin(&a->pin, src_dentry, a->bdst,
18100+ au_opt_udba(src_dentry->d_sb),
18101+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
18102+ if (unlikely(err))
18103+ goto out;
367653fa 18104+
c2b27bf2 18105+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
18106+ au_unpin(&a->pin);
18107+
4f0767ce 18108+out:
4a4d8108
AM
18109+ di_read_unlock(a->src_parent, AuLock_IR);
18110+ return err;
18111+}
18112+
86dc4139
AM
18113+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
18114+ struct au_link_args *a)
4a4d8108
AM
18115+{
18116+ int err;
18117+ unsigned char plink;
5afbbe0d 18118+ aufs_bindex_t bbot;
4a4d8108 18119+ struct dentry *h_src_dentry;
523b37e3 18120+ struct inode *h_inode, *inode, *delegated;
4a4d8108
AM
18121+ struct super_block *sb;
18122+ struct file *h_file;
18123+
18124+ plink = 0;
18125+ h_inode = NULL;
18126+ sb = src_dentry->d_sb;
5527c038 18127+ inode = d_inode(src_dentry);
5afbbe0d 18128+ if (au_ibtop(inode) <= a->bdst)
4a4d8108
AM
18129+ h_inode = au_h_iptr(inode, a->bdst);
18130+ if (!h_inode || !h_inode->i_nlink) {
18131+ /* copyup src_dentry as the name of dentry. */
5afbbe0d
AM
18132+ bbot = au_dbbot(dentry);
18133+ if (bbot < a->bsrc)
18134+ au_set_dbbot(dentry, a->bsrc);
86dc4139
AM
18135+ au_set_h_dptr(dentry, a->bsrc,
18136+ dget(au_h_dptr(src_dentry, a->bsrc)));
18137+ dget(a->h_path.dentry);
18138+ au_set_h_dptr(dentry, a->bdst, NULL);
c1595e42
JR
18139+ AuDbg("temporary d_inode...\n");
18140+ spin_lock(&dentry->d_lock);
5527c038 18141+ dentry->d_inode = d_inode(src_dentry); /* tmp */
c1595e42 18142+ spin_unlock(&dentry->d_lock);
392086de 18143+ h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
86dc4139 18144+ if (IS_ERR(h_file))
4a4d8108 18145+ err = PTR_ERR(h_file);
86dc4139 18146+ else {
c2b27bf2
AM
18147+ struct au_cp_generic cpg = {
18148+ .dentry = dentry,
18149+ .bdst = a->bdst,
18150+ .bsrc = -1,
18151+ .len = -1,
18152+ .pin = &a->pin,
18153+ .flags = AuCpup_KEEPLINO
18154+ };
18155+ err = au_sio_cpup_simple(&cpg);
86dc4139
AM
18156+ au_h_open_post(dentry, a->bsrc, h_file);
18157+ if (!err) {
18158+ dput(a->h_path.dentry);
18159+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
18160+ } else
18161+ au_set_h_dptr(dentry, a->bdst,
18162+ a->h_path.dentry);
18163+ }
c1595e42 18164+ spin_lock(&dentry->d_lock);
86dc4139 18165+ dentry->d_inode = NULL; /* restore */
c1595e42
JR
18166+ spin_unlock(&dentry->d_lock);
18167+ AuDbg("temporary d_inode...done\n");
86dc4139 18168+ au_set_h_dptr(dentry, a->bsrc, NULL);
5afbbe0d 18169+ au_set_dbbot(dentry, bbot);
4a4d8108
AM
18170+ } else {
18171+ /* the inode of src_dentry already exists on a.bdst branch */
18172+ h_src_dentry = d_find_alias(h_inode);
18173+ if (!h_src_dentry && au_plink_test(inode)) {
18174+ plink = 1;
18175+ h_src_dentry = au_plink_lkup(inode, a->bdst);
18176+ err = PTR_ERR(h_src_dentry);
18177+ if (IS_ERR(h_src_dentry))
18178+ goto out;
18179+
5527c038 18180+ if (unlikely(d_is_negative(h_src_dentry))) {
4a4d8108
AM
18181+ dput(h_src_dentry);
18182+ h_src_dentry = NULL;
18183+ }
18184+
18185+ }
18186+ if (h_src_dentry) {
523b37e3 18187+ delegated = NULL;
4a4d8108 18188+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
18189+ &a->h_path, &delegated);
18190+ if (unlikely(err == -EWOULDBLOCK)) {
18191+ pr_warn("cannot retry for NFSv4 delegation"
18192+ " for an internal link\n");
18193+ iput(delegated);
18194+ }
4a4d8108
AM
18195+ dput(h_src_dentry);
18196+ } else {
18197+ AuIOErr("no dentry found for hi%lu on b%d\n",
18198+ h_inode->i_ino, a->bdst);
18199+ err = -EIO;
18200+ }
18201+ }
18202+
18203+ if (!err && !plink)
18204+ au_plink_append(inode, a->bdst, a->h_path.dentry);
18205+
18206+out:
2cbb1c4b 18207+ AuTraceErr(err);
4a4d8108
AM
18208+ return err;
18209+}
18210+
18211+int aufs_link(struct dentry *src_dentry, struct inode *dir,
18212+ struct dentry *dentry)
18213+{
18214+ int err, rerr;
18215+ struct au_dtime dt;
18216+ struct au_link_args *a;
18217+ struct dentry *wh_dentry, *h_src_dentry;
523b37e3 18218+ struct inode *inode, *delegated;
4a4d8108
AM
18219+ struct super_block *sb;
18220+ struct au_wr_dir_args wr_dir_args = {
18221+ /* .force_btgt = -1, */
18222+ .flags = AuWrDir_ADD_ENTRY
18223+ };
18224+
18225+ IMustLock(dir);
5527c038 18226+ inode = d_inode(src_dentry);
4a4d8108
AM
18227+ IMustLock(inode);
18228+
4a4d8108
AM
18229+ err = -ENOMEM;
18230+ a = kzalloc(sizeof(*a), GFP_NOFS);
18231+ if (unlikely(!a))
18232+ goto out;
18233+
18234+ a->parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
18235+ err = aufs_read_and_write_lock2(dentry, src_dentry,
18236+ AuLock_NOPLM | AuLock_GEN);
e49829fe
JR
18237+ if (unlikely(err))
18238+ goto out_kfree;
38d290e6 18239+ err = au_d_linkable(src_dentry);
027c5e7a
AM
18240+ if (unlikely(err))
18241+ goto out_unlock;
18242+ err = au_d_may_add(dentry);
18243+ if (unlikely(err))
18244+ goto out_unlock;
e49829fe 18245+
4a4d8108 18246+ a->src_parent = dget_parent(src_dentry);
5afbbe0d 18247+ wr_dir_args.force_btgt = au_ibtop(inode);
4a4d8108
AM
18248+
18249+ di_write_lock_parent(a->parent);
18250+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
18251+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
18252+ &wr_dir_args);
18253+ err = PTR_ERR(wh_dentry);
18254+ if (IS_ERR(wh_dentry))
027c5e7a 18255+ goto out_parent;
4a4d8108
AM
18256+
18257+ err = 0;
18258+ sb = dentry->d_sb;
5afbbe0d 18259+ a->bdst = au_dbtop(dentry);
4a4d8108
AM
18260+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
18261+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
5afbbe0d 18262+ a->bsrc = au_ibtop(inode);
2cbb1c4b 18263+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
38d290e6
JR
18264+ if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
18265+ h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
2cbb1c4b 18266+ if (!h_src_dentry) {
5afbbe0d 18267+ a->bsrc = au_dbtop(src_dentry);
2cbb1c4b
JR
18268+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
18269+ AuDebugOn(!h_src_dentry);
38d290e6
JR
18270+ } else if (IS_ERR(h_src_dentry)) {
18271+ err = PTR_ERR(h_src_dentry);
2cbb1c4b 18272+ goto out_parent;
38d290e6 18273+ }
2cbb1c4b 18274+
4a4d8108
AM
18275+ if (au_opt_test(au_mntflags(sb), PLINK)) {
18276+ if (a->bdst < a->bsrc
18277+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
86dc4139 18278+ err = au_cpup_or_link(src_dentry, dentry, a);
523b37e3
AM
18279+ else {
18280+ delegated = NULL;
4a4d8108 18281+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
18282+ &a->h_path, &delegated);
18283+ if (unlikely(err == -EWOULDBLOCK)) {
18284+ pr_warn("cannot retry for NFSv4 delegation"
18285+ " for an internal link\n");
18286+ iput(delegated);
18287+ }
18288+ }
2cbb1c4b 18289+ dput(h_src_dentry);
4a4d8108
AM
18290+ } else {
18291+ /*
18292+ * copyup src_dentry to the branch we process,
18293+ * and then link(2) to it.
18294+ */
2cbb1c4b 18295+ dput(h_src_dentry);
4a4d8108
AM
18296+ if (a->bdst < a->bsrc
18297+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
18298+ au_unpin(&a->pin);
18299+ di_write_unlock(a->parent);
18300+ err = au_cpup_before_link(src_dentry, a);
18301+ di_write_lock_parent(a->parent);
18302+ if (!err)
18303+ err = au_pin(&a->pin, dentry, a->bdst,
18304+ au_opt_udba(sb),
18305+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
18306+ if (unlikely(err))
18307+ goto out_wh;
18308+ }
18309+ if (!err) {
18310+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
18311+ err = -ENOENT;
5527c038 18312+ if (h_src_dentry && d_is_positive(h_src_dentry)) {
523b37e3 18313+ delegated = NULL;
4a4d8108
AM
18314+ err = vfsub_link(h_src_dentry,
18315+ au_pinned_h_dir(&a->pin),
523b37e3
AM
18316+ &a->h_path, &delegated);
18317+ if (unlikely(err == -EWOULDBLOCK)) {
18318+ pr_warn("cannot retry"
18319+ " for NFSv4 delegation"
18320+ " for an internal link\n");
18321+ iput(delegated);
18322+ }
18323+ }
4a4d8108
AM
18324+ }
18325+ }
18326+ if (unlikely(err))
18327+ goto out_unpin;
18328+
18329+ if (wh_dentry) {
18330+ a->h_path.dentry = wh_dentry;
18331+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
18332+ dentry);
18333+ if (unlikely(err))
18334+ goto out_revert;
18335+ }
18336+
b912730e 18337+ au_dir_ts(dir, a->bdst);
4a4d8108 18338+ dir->i_version++;
4a4d8108
AM
18339+ inc_nlink(inode);
18340+ inode->i_ctime = dir->i_ctime;
027c5e7a
AM
18341+ d_instantiate(dentry, au_igrab(inode));
18342+ if (d_unhashed(a->h_path.dentry))
4a4d8108
AM
18343+ /* some filesystem calls d_drop() */
18344+ d_drop(dentry);
076b876e
AM
18345+ /* some filesystems consume an inode even hardlink */
18346+ au_fhsm_wrote(sb, a->bdst, /*force*/0);
4a4d8108
AM
18347+ goto out_unpin; /* success */
18348+
4f0767ce 18349+out_revert:
523b37e3
AM
18350+ /* no delegation since it is just created */
18351+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
18352+ /*delegated*/NULL, /*force*/0);
027c5e7a 18353+ if (unlikely(rerr)) {
523b37e3 18354+ AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
027c5e7a
AM
18355+ err = -EIO;
18356+ }
4a4d8108 18357+ au_dtime_revert(&dt);
4f0767ce 18358+out_unpin:
4a4d8108 18359+ au_unpin(&a->pin);
4f0767ce 18360+out_wh:
4a4d8108 18361+ dput(wh_dentry);
027c5e7a
AM
18362+out_parent:
18363+ di_write_unlock(a->parent);
18364+ dput(a->src_parent);
4f0767ce 18365+out_unlock:
4a4d8108 18366+ if (unlikely(err)) {
5afbbe0d 18367+ au_update_dbtop(dentry);
4a4d8108
AM
18368+ d_drop(dentry);
18369+ }
4a4d8108 18370+ aufs_read_and_write_unlock2(dentry, src_dentry);
e49829fe 18371+out_kfree:
f0c0a007 18372+ au_delayed_kfree(a);
4f0767ce 18373+out:
86dc4139 18374+ AuTraceErr(err);
4a4d8108
AM
18375+ return err;
18376+}
18377+
7eafdf33 18378+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4a4d8108
AM
18379+{
18380+ int err, rerr;
18381+ aufs_bindex_t bindex;
18382+ unsigned char diropq;
18383+ struct path h_path;
18384+ struct dentry *wh_dentry, *parent, *opq_dentry;
febd17d6 18385+ struct inode *h_inode;
4a4d8108
AM
18386+ struct super_block *sb;
18387+ struct {
18388+ struct au_pin pin;
18389+ struct au_dtime dt;
18390+ } *a; /* reduce the stack usage */
18391+ struct au_wr_dir_args wr_dir_args = {
18392+ .force_btgt = -1,
18393+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
18394+ };
18395+
18396+ IMustLock(dir);
18397+
18398+ err = -ENOMEM;
18399+ a = kmalloc(sizeof(*a), GFP_NOFS);
18400+ if (unlikely(!a))
18401+ goto out;
18402+
027c5e7a
AM
18403+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
18404+ if (unlikely(err))
18405+ goto out_free;
18406+ err = au_d_may_add(dentry);
18407+ if (unlikely(err))
18408+ goto out_unlock;
18409+
4a4d8108
AM
18410+ parent = dentry->d_parent; /* dir inode is locked */
18411+ di_write_lock_parent(parent);
18412+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
18413+ &a->pin, &wr_dir_args);
18414+ err = PTR_ERR(wh_dentry);
18415+ if (IS_ERR(wh_dentry))
027c5e7a 18416+ goto out_parent;
4a4d8108
AM
18417+
18418+ sb = dentry->d_sb;
5afbbe0d 18419+ bindex = au_dbtop(dentry);
4a4d8108
AM
18420+ h_path.dentry = au_h_dptr(dentry, bindex);
18421+ h_path.mnt = au_sbr_mnt(sb, bindex);
18422+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
18423+ if (unlikely(err))
027c5e7a 18424+ goto out_unpin;
4a4d8108
AM
18425+
18426+ /* make the dir opaque */
18427+ diropq = 0;
febd17d6 18428+ h_inode = d_inode(h_path.dentry);
4a4d8108
AM
18429+ if (wh_dentry
18430+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
febd17d6 18431+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
4a4d8108 18432+ opq_dentry = au_diropq_create(dentry, bindex);
febd17d6 18433+ inode_unlock(h_inode);
4a4d8108
AM
18434+ err = PTR_ERR(opq_dentry);
18435+ if (IS_ERR(opq_dentry))
18436+ goto out_dir;
18437+ dput(opq_dentry);
18438+ diropq = 1;
18439+ }
18440+
18441+ err = epilog(dir, bindex, wh_dentry, dentry);
18442+ if (!err) {
18443+ inc_nlink(dir);
027c5e7a 18444+ goto out_unpin; /* success */
4a4d8108
AM
18445+ }
18446+
18447+ /* revert */
18448+ if (diropq) {
18449+ AuLabel(revert opq);
febd17d6 18450+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
4a4d8108 18451+ rerr = au_diropq_remove(dentry, bindex);
febd17d6 18452+ inode_unlock(h_inode);
4a4d8108 18453+ if (rerr) {
523b37e3
AM
18454+ AuIOErr("%pd reverting diropq failed(%d, %d)\n",
18455+ dentry, err, rerr);
4a4d8108
AM
18456+ err = -EIO;
18457+ }
18458+ }
18459+
4f0767ce 18460+out_dir:
4a4d8108
AM
18461+ AuLabel(revert dir);
18462+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
18463+ if (rerr) {
523b37e3
AM
18464+ AuIOErr("%pd reverting dir failed(%d, %d)\n",
18465+ dentry, err, rerr);
4a4d8108
AM
18466+ err = -EIO;
18467+ }
4a4d8108 18468+ au_dtime_revert(&a->dt);
027c5e7a 18469+out_unpin:
4a4d8108
AM
18470+ au_unpin(&a->pin);
18471+ dput(wh_dentry);
027c5e7a
AM
18472+out_parent:
18473+ di_write_unlock(parent);
18474+out_unlock:
4a4d8108 18475+ if (unlikely(err)) {
5afbbe0d 18476+ au_update_dbtop(dentry);
4a4d8108
AM
18477+ d_drop(dentry);
18478+ }
4a4d8108 18479+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 18480+out_free:
f0c0a007 18481+ au_delayed_kfree(a);
4f0767ce 18482+out:
4a4d8108
AM
18483+ return err;
18484+}
7f207e10
AM
18485diff -urN /usr/share/empty/fs/aufs/i_op.c linux/fs/aufs/i_op.c
18486--- /usr/share/empty/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
18487+++ linux/fs/aufs/i_op.c 2016-10-09 16:55:38.889431135 +0200
18488@@ -0,0 +1,1451 @@
4a4d8108 18489+/*
8cdd5066 18490+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
18491+ *
18492+ * This program, aufs is free software; you can redistribute it and/or modify
18493+ * it under the terms of the GNU General Public License as published by
18494+ * the Free Software Foundation; either version 2 of the License, or
18495+ * (at your option) any later version.
18496+ *
18497+ * This program is distributed in the hope that it will be useful,
18498+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18499+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18500+ * GNU General Public License for more details.
18501+ *
18502+ * You should have received a copy of the GNU General Public License
523b37e3 18503+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 18504+ */
1facf9fc 18505+
1308ab2a 18506+/*
4a4d8108 18507+ * inode operations (except add/del/rename)
1308ab2a 18508+ */
4a4d8108
AM
18509+
18510+#include <linux/device_cgroup.h>
18511+#include <linux/fs_stack.h>
4a4d8108
AM
18512+#include <linux/namei.h>
18513+#include <linux/security.h>
4a4d8108
AM
18514+#include "aufs.h"
18515+
1e00d052 18516+static int h_permission(struct inode *h_inode, int mask,
79b8bda9 18517+ struct path *h_path, int brperm)
1facf9fc 18518+{
1308ab2a 18519+ int err;
4a4d8108 18520+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
1facf9fc 18521+
e2f27e51
AM
18522+ err = -EPERM;
18523+ if (write_mask && IS_IMMUTABLE(h_inode))
18524+ goto out;
18525+
4a4d8108 18526+ err = -EACCES;
e2f27e51
AM
18527+ if (((mask & MAY_EXEC)
18528+ && S_ISREG(h_inode->i_mode)
18529+ && (path_noexec(h_path)
18530+ || !(h_inode->i_mode & S_IXUGO))))
4a4d8108
AM
18531+ goto out;
18532+
18533+ /*
18534+ * - skip the lower fs test in the case of write to ro branch.
18535+ * - nfs dir permission write check is optimized, but a policy for
18536+ * link/rename requires a real check.
b912730e
AM
18537+ * - nfs always sets MS_POSIXACL regardless its mount option 'noacl.'
18538+ * in this case, generic_permission() returns -EOPNOTSUPP.
4a4d8108
AM
18539+ */
18540+ if ((write_mask && !au_br_writable(brperm))
18541+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
18542+ && write_mask && !(mask & MAY_READ))
18543+ || !h_inode->i_op->permission) {
18544+ /* AuLabel(generic_permission); */
b912730e 18545+ /* AuDbg("get_acl %pf\n", h_inode->i_op->get_acl); */
1e00d052 18546+ err = generic_permission(h_inode, mask);
b912730e
AM
18547+ if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
18548+ err = h_inode->i_op->permission(h_inode, mask);
18549+ AuTraceErr(err);
1308ab2a 18550+ } else {
4a4d8108 18551+ /* AuLabel(h_inode->permission); */
1e00d052 18552+ err = h_inode->i_op->permission(h_inode, mask);
4a4d8108
AM
18553+ AuTraceErr(err);
18554+ }
1facf9fc 18555+
4a4d8108
AM
18556+ if (!err)
18557+ err = devcgroup_inode_permission(h_inode, mask);
7f207e10 18558+ if (!err)
4a4d8108 18559+ err = security_inode_permission(h_inode, mask);
4a4d8108
AM
18560+
18561+#if 0
18562+ if (!err) {
18563+ /* todo: do we need to call ima_path_check()? */
18564+ struct path h_path = {
18565+ .dentry =
18566+ .mnt = h_mnt
18567+ };
18568+ err = ima_path_check(&h_path,
18569+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
18570+ IMA_COUNT_LEAVE);
1308ab2a 18571+ }
4a4d8108 18572+#endif
dece6358 18573+
4f0767ce 18574+out:
1308ab2a 18575+ return err;
18576+}
dece6358 18577+
1e00d052 18578+static int aufs_permission(struct inode *inode, int mask)
1308ab2a 18579+{
18580+ int err;
5afbbe0d 18581+ aufs_bindex_t bindex, bbot;
4a4d8108
AM
18582+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
18583+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
18584+ struct inode *h_inode;
18585+ struct super_block *sb;
18586+ struct au_branch *br;
1facf9fc 18587+
027c5e7a 18588+ /* todo: support rcu-walk? */
1e00d052 18589+ if (mask & MAY_NOT_BLOCK)
027c5e7a
AM
18590+ return -ECHILD;
18591+
4a4d8108
AM
18592+ sb = inode->i_sb;
18593+ si_read_lock(sb, AuLock_FLUSH);
18594+ ii_read_lock_child(inode);
027c5e7a
AM
18595+#if 0
18596+ err = au_iigen_test(inode, au_sigen(sb));
18597+ if (unlikely(err))
18598+ goto out;
18599+#endif
dece6358 18600+
076b876e
AM
18601+ if (!isdir
18602+ || write_mask
18603+ || au_opt_test(au_mntflags(sb), DIRPERM1)) {
4a4d8108 18604+ err = au_busy_or_stale();
5afbbe0d 18605+ h_inode = au_h_iptr(inode, au_ibtop(inode));
4a4d8108
AM
18606+ if (unlikely(!h_inode
18607+ || (h_inode->i_mode & S_IFMT)
18608+ != (inode->i_mode & S_IFMT)))
18609+ goto out;
1facf9fc 18610+
4a4d8108 18611+ err = 0;
5afbbe0d 18612+ bindex = au_ibtop(inode);
4a4d8108 18613+ br = au_sbr(sb, bindex);
79b8bda9 18614+ err = h_permission(h_inode, mask, &br->br_path, br->br_perm);
4a4d8108
AM
18615+ if (write_mask
18616+ && !err
18617+ && !special_file(h_inode->i_mode)) {
18618+ /* test whether the upper writable branch exists */
18619+ err = -EROFS;
18620+ for (; bindex >= 0; bindex--)
18621+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
18622+ err = 0;
18623+ break;
18624+ }
18625+ }
18626+ goto out;
18627+ }
dece6358 18628+
4a4d8108 18629+ /* non-write to dir */
1308ab2a 18630+ err = 0;
5afbbe0d
AM
18631+ bbot = au_ibbot(inode);
18632+ for (bindex = au_ibtop(inode); !err && bindex <= bbot; bindex++) {
4a4d8108
AM
18633+ h_inode = au_h_iptr(inode, bindex);
18634+ if (h_inode) {
18635+ err = au_busy_or_stale();
18636+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
18637+ break;
18638+
18639+ br = au_sbr(sb, bindex);
79b8bda9 18640+ err = h_permission(h_inode, mask, &br->br_path,
4a4d8108
AM
18641+ br->br_perm);
18642+ }
18643+ }
1308ab2a 18644+
4f0767ce 18645+out:
4a4d8108
AM
18646+ ii_read_unlock(inode);
18647+ si_read_unlock(sb);
1308ab2a 18648+ return err;
18649+}
18650+
4a4d8108 18651+/* ---------------------------------------------------------------------- */
1facf9fc 18652+
4a4d8108 18653+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
b4510431 18654+ unsigned int flags)
4a4d8108
AM
18655+{
18656+ struct dentry *ret, *parent;
b752ccd1 18657+ struct inode *inode;
4a4d8108 18658+ struct super_block *sb;
1716fcea 18659+ int err, npositive;
dece6358 18660+
4a4d8108 18661+ IMustLock(dir);
1308ab2a 18662+
537831f9
AM
18663+ /* todo: support rcu-walk? */
18664+ ret = ERR_PTR(-ECHILD);
18665+ if (flags & LOOKUP_RCU)
18666+ goto out;
18667+
18668+ ret = ERR_PTR(-ENAMETOOLONG);
18669+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
18670+ goto out;
18671+
4a4d8108 18672+ sb = dir->i_sb;
7f207e10
AM
18673+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
18674+ ret = ERR_PTR(err);
18675+ if (unlikely(err))
18676+ goto out;
18677+
4a4d8108
AM
18678+ err = au_di_init(dentry);
18679+ ret = ERR_PTR(err);
18680+ if (unlikely(err))
7f207e10 18681+ goto out_si;
1308ab2a 18682+
9dbd164d 18683+ inode = NULL;
027c5e7a 18684+ npositive = 0; /* suppress a warning */
4a4d8108
AM
18685+ parent = dentry->d_parent; /* dir inode is locked */
18686+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
18687+ err = au_alive_dir(parent);
18688+ if (!err)
18689+ err = au_digen_test(parent, au_sigen(sb));
18690+ if (!err) {
5afbbe0d
AM
18691+ /* regardless LOOKUP_CREATE, always ALLOW_NEG */
18692+ npositive = au_lkup_dentry(dentry, au_dbtop(parent),
18693+ AuLkup_ALLOW_NEG);
027c5e7a
AM
18694+ err = npositive;
18695+ }
4a4d8108 18696+ di_read_unlock(parent, AuLock_IR);
4a4d8108
AM
18697+ ret = ERR_PTR(err);
18698+ if (unlikely(err < 0))
18699+ goto out_unlock;
1308ab2a 18700+
4a4d8108 18701+ if (npositive) {
b752ccd1 18702+ inode = au_new_inode(dentry, /*must_new*/0);
c1595e42
JR
18703+ if (IS_ERR(inode)) {
18704+ ret = (void *)inode;
18705+ inode = NULL;
18706+ goto out_unlock;
18707+ }
9dbd164d 18708+ }
4a4d8108 18709+
c1595e42
JR
18710+ if (inode)
18711+ atomic_inc(&inode->i_count);
4a4d8108 18712+ ret = d_splice_alias(inode, dentry);
537831f9
AM
18713+#if 0
18714+ if (unlikely(d_need_lookup(dentry))) {
18715+ spin_lock(&dentry->d_lock);
18716+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
18717+ spin_unlock(&dentry->d_lock);
18718+ } else
18719+#endif
c1595e42 18720+ if (inode) {
2000de60 18721+ if (!IS_ERR(ret)) {
c1595e42 18722+ iput(inode);
2000de60
JR
18723+ if (ret && ret != dentry)
18724+ ii_write_unlock(inode);
18725+ } else {
c1595e42
JR
18726+ ii_write_unlock(inode);
18727+ iput(inode);
18728+ inode = NULL;
18729+ }
7f207e10 18730+ }
1facf9fc 18731+
4f0767ce 18732+out_unlock:
4a4d8108 18733+ di_write_unlock(dentry);
7f207e10 18734+out_si:
4a4d8108 18735+ si_read_unlock(sb);
7f207e10 18736+out:
4a4d8108
AM
18737+ return ret;
18738+}
1facf9fc 18739+
4a4d8108 18740+/* ---------------------------------------------------------------------- */
1facf9fc 18741+
b912730e
AM
18742+struct aopen_node {
18743+ struct hlist_node hlist;
18744+ struct file *file, *h_file;
18745+};
18746+
18747+static int au_do_aopen(struct inode *inode, struct file *file)
18748+{
18749+ struct au_sphlhead *aopen;
18750+ struct aopen_node *node;
18751+ struct au_do_open_args args = {
18752+ .no_lock = 1,
18753+ .open = au_do_open_nondir
18754+ };
18755+
18756+ aopen = &au_sbi(inode->i_sb)->si_aopen;
18757+ spin_lock(&aopen->spin);
18758+ hlist_for_each_entry(node, &aopen->head, hlist)
18759+ if (node->file == file) {
18760+ args.h_file = node->h_file;
18761+ break;
18762+ }
18763+ spin_unlock(&aopen->spin);
18764+ /* AuDebugOn(!args.h_file); */
18765+
18766+ return au_do_open(file, &args);
18767+}
18768+
18769+static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
18770+ struct file *file, unsigned int open_flag,
18771+ umode_t create_mode, int *opened)
18772+{
18773+ int err, h_opened = *opened;
5afbbe0d 18774+ unsigned int lkup_flags;
f0c0a007 18775+ struct dentry *parent, *d;
b912730e
AM
18776+ struct au_sphlhead *aopen;
18777+ struct vfsub_aopen_args args = {
18778+ .open_flag = open_flag,
18779+ .create_mode = create_mode,
18780+ .opened = &h_opened
18781+ };
18782+ struct aopen_node aopen_node = {
18783+ .file = file
18784+ };
18785+
18786+ IMustLock(dir);
5afbbe0d 18787+ AuDbg("open_flag 0%o\n", open_flag);
b912730e
AM
18788+ AuDbgDentry(dentry);
18789+
18790+ err = 0;
18791+ if (!au_di(dentry)) {
5afbbe0d
AM
18792+ lkup_flags = LOOKUP_OPEN;
18793+ if (open_flag & O_CREAT)
18794+ lkup_flags |= LOOKUP_CREATE;
18795+ d = aufs_lookup(dir, dentry, lkup_flags);
b912730e
AM
18796+ if (IS_ERR(d)) {
18797+ err = PTR_ERR(d);
5afbbe0d 18798+ AuTraceErr(err);
b912730e
AM
18799+ goto out;
18800+ } else if (d) {
18801+ /*
18802+ * obsoleted dentry found.
18803+ * another error will be returned later.
18804+ */
18805+ d_drop(d);
b912730e 18806+ AuDbgDentry(d);
5afbbe0d 18807+ dput(d);
b912730e
AM
18808+ }
18809+ AuDbgDentry(dentry);
18810+ }
18811+
18812+ if (d_is_positive(dentry)
18813+ || d_unhashed(dentry)
18814+ || d_unlinked(dentry)
18815+ || !(open_flag & O_CREAT))
18816+ goto out_no_open;
18817+
18818+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
18819+ if (unlikely(err))
18820+ goto out;
18821+
18822+ parent = dentry->d_parent; /* dir is locked */
18823+ di_write_lock_parent(parent);
5afbbe0d 18824+ err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
b912730e
AM
18825+ if (unlikely(err))
18826+ goto out_unlock;
18827+
18828+ AuDbgDentry(dentry);
18829+ if (d_is_positive(dentry))
18830+ goto out_unlock;
18831+
18832+ args.file = get_empty_filp();
18833+ err = PTR_ERR(args.file);
18834+ if (IS_ERR(args.file))
18835+ goto out_unlock;
18836+
18837+ args.file->f_flags = file->f_flags;
18838+ err = au_aopen_or_create(dir, dentry, &args);
18839+ AuTraceErr(err);
18840+ AuDbgFile(args.file);
18841+ if (unlikely(err < 0)) {
18842+ if (h_opened & FILE_OPENED)
18843+ fput(args.file);
18844+ else
18845+ put_filp(args.file);
18846+ goto out_unlock;
18847+ }
18848+
18849+ /* some filesystems don't set FILE_CREATED while succeeded? */
18850+ *opened |= FILE_CREATED;
18851+ if (h_opened & FILE_OPENED)
18852+ aopen_node.h_file = args.file;
18853+ else {
18854+ put_filp(args.file);
18855+ args.file = NULL;
18856+ }
18857+ aopen = &au_sbi(dir->i_sb)->si_aopen;
18858+ au_sphl_add(&aopen_node.hlist, aopen);
18859+ err = finish_open(file, dentry, au_do_aopen, opened);
18860+ au_sphl_del(&aopen_node.hlist, aopen);
18861+ AuTraceErr(err);
18862+ AuDbgFile(file);
18863+ if (aopen_node.h_file)
18864+ fput(aopen_node.h_file);
18865+
18866+out_unlock:
18867+ di_write_unlock(parent);
18868+ aufs_read_unlock(dentry, AuLock_DW);
18869+ AuDbgDentry(dentry);
f0c0a007 18870+ if (unlikely(err < 0))
b912730e
AM
18871+ goto out;
18872+out_no_open:
f0c0a007 18873+ if (err >= 0 && !(*opened & FILE_CREATED)) {
b912730e
AM
18874+ AuLabel(out_no_open);
18875+ dget(dentry);
18876+ err = finish_no_open(file, dentry);
18877+ }
18878+out:
18879+ AuDbg("%pd%s%s\n", dentry,
18880+ (*opened & FILE_CREATED) ? " created" : "",
18881+ (*opened & FILE_OPENED) ? " opened" : "");
18882+ AuTraceErr(err);
18883+ return err;
18884+}
18885+
18886+
18887+/* ---------------------------------------------------------------------- */
18888+
4a4d8108
AM
18889+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
18890+ const unsigned char add_entry, aufs_bindex_t bcpup,
5afbbe0d 18891+ aufs_bindex_t btop)
4a4d8108
AM
18892+{
18893+ int err;
18894+ struct dentry *h_parent;
18895+ struct inode *h_dir;
1facf9fc 18896+
027c5e7a 18897+ if (add_entry)
5527c038 18898+ IMustLock(d_inode(parent));
027c5e7a 18899+ else
4a4d8108
AM
18900+ di_write_lock_parent(parent);
18901+
18902+ err = 0;
18903+ if (!au_h_dptr(parent, bcpup)) {
5afbbe0d 18904+ if (btop > bcpup)
c2b27bf2 18905+ err = au_cpup_dirs(dentry, bcpup);
5afbbe0d 18906+ else if (btop < bcpup)
4a4d8108
AM
18907+ err = au_cpdown_dirs(dentry, bcpup);
18908+ else
c2b27bf2 18909+ BUG();
4a4d8108 18910+ }
38d290e6 18911+ if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
4a4d8108 18912+ h_parent = au_h_dptr(parent, bcpup);
5527c038 18913+ h_dir = d_inode(h_parent);
febd17d6 18914+ inode_lock_nested(h_dir, AuLsc_I_PARENT);
7e9cd9fe 18915+ err = au_lkup_neg(dentry, bcpup, /*wh*/0);
4a4d8108 18916+ /* todo: no unlock here */
febd17d6 18917+ inode_unlock(h_dir);
027c5e7a
AM
18918+
18919+ AuDbg("bcpup %d\n", bcpup);
18920+ if (!err) {
5527c038 18921+ if (d_really_is_negative(dentry))
5afbbe0d 18922+ au_set_h_dptr(dentry, btop, NULL);
4a4d8108
AM
18923+ au_update_dbrange(dentry, /*do_put_zero*/0);
18924+ }
1308ab2a 18925+ }
1facf9fc 18926+
4a4d8108
AM
18927+ if (!add_entry)
18928+ di_write_unlock(parent);
18929+ if (!err)
18930+ err = bcpup; /* success */
1308ab2a 18931+
027c5e7a 18932+ AuTraceErr(err);
4a4d8108
AM
18933+ return err;
18934+}
1facf9fc 18935+
4a4d8108
AM
18936+/*
18937+ * decide the branch and the parent dir where we will create a new entry.
18938+ * returns new bindex or an error.
18939+ * copyup the parent dir if needed.
18940+ */
18941+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
18942+ struct au_wr_dir_args *args)
18943+{
18944+ int err;
392086de 18945+ unsigned int flags;
5afbbe0d 18946+ aufs_bindex_t bcpup, btop, src_btop;
86dc4139
AM
18947+ const unsigned char add_entry
18948+ = au_ftest_wrdir(args->flags, ADD_ENTRY)
38d290e6 18949+ | au_ftest_wrdir(args->flags, TMPFILE);
4a4d8108
AM
18950+ struct super_block *sb;
18951+ struct dentry *parent;
18952+ struct au_sbinfo *sbinfo;
1facf9fc 18953+
4a4d8108
AM
18954+ sb = dentry->d_sb;
18955+ sbinfo = au_sbi(sb);
18956+ parent = dget_parent(dentry);
5afbbe0d
AM
18957+ btop = au_dbtop(dentry);
18958+ bcpup = btop;
4a4d8108
AM
18959+ if (args->force_btgt < 0) {
18960+ if (src_dentry) {
5afbbe0d
AM
18961+ src_btop = au_dbtop(src_dentry);
18962+ if (src_btop < btop)
18963+ bcpup = src_btop;
4a4d8108 18964+ } else if (add_entry) {
392086de
AM
18965+ flags = 0;
18966+ if (au_ftest_wrdir(args->flags, ISDIR))
18967+ au_fset_wbr(flags, DIR);
18968+ err = AuWbrCreate(sbinfo, dentry, flags);
4a4d8108
AM
18969+ bcpup = err;
18970+ }
1facf9fc 18971+
5527c038 18972+ if (bcpup < 0 || au_test_ro(sb, bcpup, d_inode(dentry))) {
4a4d8108
AM
18973+ if (add_entry)
18974+ err = AuWbrCopyup(sbinfo, dentry);
18975+ else {
18976+ if (!IS_ROOT(dentry)) {
18977+ di_read_lock_parent(parent, !AuLock_IR);
18978+ err = AuWbrCopyup(sbinfo, dentry);
18979+ di_read_unlock(parent, !AuLock_IR);
18980+ } else
18981+ err = AuWbrCopyup(sbinfo, dentry);
18982+ }
18983+ bcpup = err;
18984+ if (unlikely(err < 0))
18985+ goto out;
18986+ }
18987+ } else {
18988+ bcpup = args->force_btgt;
5527c038 18989+ AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
1308ab2a 18990+ }
027c5e7a 18991+
5afbbe0d 18992+ AuDbg("btop %d, bcpup %d\n", btop, bcpup);
4a4d8108 18993+ err = bcpup;
5afbbe0d 18994+ if (bcpup == btop)
4a4d8108 18995+ goto out; /* success */
4a4d8108
AM
18996+
18997+ /* copyup the new parent into the branch we process */
5afbbe0d 18998+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, btop);
027c5e7a 18999+ if (err >= 0) {
5527c038 19000+ if (d_really_is_negative(dentry)) {
5afbbe0d
AM
19001+ au_set_h_dptr(dentry, btop, NULL);
19002+ au_set_dbtop(dentry, bcpup);
19003+ au_set_dbbot(dentry, bcpup);
027c5e7a 19004+ }
38d290e6
JR
19005+ AuDebugOn(add_entry
19006+ && !au_ftest_wrdir(args->flags, TMPFILE)
19007+ && !au_h_dptr(dentry, bcpup));
027c5e7a 19008+ }
86dc4139
AM
19009+
19010+out:
19011+ dput(parent);
19012+ return err;
19013+}
19014+
19015+/* ---------------------------------------------------------------------- */
19016+
19017+void au_pin_hdir_unlock(struct au_pin *p)
19018+{
19019+ if (p->hdir)
5afbbe0d 19020+ au_hn_inode_unlock(p->hdir);
86dc4139
AM
19021+}
19022+
c1595e42 19023+int au_pin_hdir_lock(struct au_pin *p)
86dc4139
AM
19024+{
19025+ int err;
19026+
19027+ err = 0;
19028+ if (!p->hdir)
19029+ goto out;
19030+
19031+ /* even if an error happens later, keep this lock */
5afbbe0d 19032+ au_hn_inode_lock_nested(p->hdir, p->lsc_hi);
86dc4139
AM
19033+
19034+ err = -EBUSY;
5527c038 19035+ if (unlikely(p->hdir->hi_inode != d_inode(p->h_parent)))
86dc4139
AM
19036+ goto out;
19037+
19038+ err = 0;
19039+ if (p->h_dentry)
19040+ err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
19041+ p->h_parent, p->br);
19042+
19043+out:
19044+ return err;
19045+}
19046+
19047+int au_pin_hdir_relock(struct au_pin *p)
19048+{
19049+ int err, i;
19050+ struct inode *h_i;
19051+ struct dentry *h_d[] = {
19052+ p->h_dentry,
19053+ p->h_parent
19054+ };
19055+
19056+ err = au_pin_hdir_lock(p);
19057+ if (unlikely(err))
19058+ goto out;
19059+
19060+ for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
19061+ if (!h_d[i])
19062+ continue;
5527c038
JR
19063+ if (d_is_positive(h_d[i])) {
19064+ h_i = d_inode(h_d[i]);
86dc4139 19065+ err = !h_i->i_nlink;
5527c038 19066+ }
86dc4139
AM
19067+ }
19068+
19069+out:
19070+ return err;
19071+}
19072+
5afbbe0d 19073+static void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
86dc4139 19074+{
5afbbe0d
AM
19075+#if !defined(CONFIG_RWSEM_GENERIC_SPINLOCK) && defined(CONFIG_RWSEM_SPIN_ON_OWNER)
19076+ p->hdir->hi_inode->i_rwsem.owner = task;
86dc4139
AM
19077+#endif
19078+}
19079+
19080+void au_pin_hdir_acquire_nest(struct au_pin *p)
19081+{
19082+ if (p->hdir) {
5afbbe0d 19083+ rwsem_acquire_nest(&p->hdir->hi_inode->i_rwsem.dep_map,
86dc4139
AM
19084+ p->lsc_hi, 0, NULL, _RET_IP_);
19085+ au_pin_hdir_set_owner(p, current);
19086+ }
dece6358 19087+}
1facf9fc 19088+
86dc4139
AM
19089+void au_pin_hdir_release(struct au_pin *p)
19090+{
19091+ if (p->hdir) {
19092+ au_pin_hdir_set_owner(p, p->task);
5afbbe0d 19093+ rwsem_release(&p->hdir->hi_inode->i_rwsem.dep_map, 1, _RET_IP_);
86dc4139
AM
19094+ }
19095+}
1308ab2a 19096+
4a4d8108 19097+struct dentry *au_pinned_h_parent(struct au_pin *pin)
1308ab2a 19098+{
4a4d8108
AM
19099+ if (pin && pin->parent)
19100+ return au_h_dptr(pin->parent, pin->bindex);
19101+ return NULL;
dece6358 19102+}
1facf9fc 19103+
4a4d8108 19104+void au_unpin(struct au_pin *p)
dece6358 19105+{
86dc4139
AM
19106+ if (p->hdir)
19107+ au_pin_hdir_unlock(p);
e49829fe 19108+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
b4510431 19109+ vfsub_mnt_drop_write(p->h_mnt);
4a4d8108
AM
19110+ if (!p->hdir)
19111+ return;
1facf9fc 19112+
4a4d8108
AM
19113+ if (!au_ftest_pin(p->flags, DI_LOCKED))
19114+ di_read_unlock(p->parent, AuLock_IR);
19115+ iput(p->hdir->hi_inode);
19116+ dput(p->parent);
19117+ p->parent = NULL;
19118+ p->hdir = NULL;
19119+ p->h_mnt = NULL;
86dc4139 19120+ /* do not clear p->task */
4a4d8108 19121+}
1308ab2a 19122+
4a4d8108
AM
19123+int au_do_pin(struct au_pin *p)
19124+{
19125+ int err;
19126+ struct super_block *sb;
4a4d8108
AM
19127+ struct inode *h_dir;
19128+
19129+ err = 0;
19130+ sb = p->dentry->d_sb;
86dc4139 19131+ p->br = au_sbr(sb, p->bindex);
4a4d8108
AM
19132+ if (IS_ROOT(p->dentry)) {
19133+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
86dc4139 19134+ p->h_mnt = au_br_mnt(p->br);
b4510431 19135+ err = vfsub_mnt_want_write(p->h_mnt);
4a4d8108
AM
19136+ if (unlikely(err)) {
19137+ au_fclr_pin(p->flags, MNT_WRITE);
19138+ goto out_err;
19139+ }
19140+ }
dece6358 19141+ goto out;
1facf9fc 19142+ }
19143+
86dc4139 19144+ p->h_dentry = NULL;
5afbbe0d 19145+ if (p->bindex <= au_dbbot(p->dentry))
86dc4139 19146+ p->h_dentry = au_h_dptr(p->dentry, p->bindex);
dece6358 19147+
4a4d8108
AM
19148+ p->parent = dget_parent(p->dentry);
19149+ if (!au_ftest_pin(p->flags, DI_LOCKED))
19150+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
dece6358 19151+
4a4d8108 19152+ h_dir = NULL;
86dc4139 19153+ p->h_parent = au_h_dptr(p->parent, p->bindex);
5527c038 19154+ p->hdir = au_hi(d_inode(p->parent), p->bindex);
4a4d8108
AM
19155+ if (p->hdir)
19156+ h_dir = p->hdir->hi_inode;
dece6358 19157+
b752ccd1
AM
19158+ /*
19159+ * udba case, or
19160+ * if DI_LOCKED is not set, then p->parent may be different
19161+ * and h_parent can be NULL.
19162+ */
86dc4139 19163+ if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
e49829fe 19164+ err = -EBUSY;
4a4d8108
AM
19165+ if (!au_ftest_pin(p->flags, DI_LOCKED))
19166+ di_read_unlock(p->parent, AuLock_IR);
19167+ dput(p->parent);
19168+ p->parent = NULL;
19169+ goto out_err;
19170+ }
1308ab2a 19171+
4a4d8108 19172+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
86dc4139 19173+ p->h_mnt = au_br_mnt(p->br);
b4510431 19174+ err = vfsub_mnt_want_write(p->h_mnt);
dece6358 19175+ if (unlikely(err)) {
4a4d8108 19176+ au_fclr_pin(p->flags, MNT_WRITE);
86dc4139
AM
19177+ if (!au_ftest_pin(p->flags, DI_LOCKED))
19178+ di_read_unlock(p->parent, AuLock_IR);
19179+ dput(p->parent);
19180+ p->parent = NULL;
19181+ goto out_err;
dece6358
AM
19182+ }
19183+ }
4a4d8108 19184+
86dc4139
AM
19185+ au_igrab(h_dir);
19186+ err = au_pin_hdir_lock(p);
19187+ if (!err)
19188+ goto out; /* success */
19189+
076b876e
AM
19190+ au_unpin(p);
19191+
4f0767ce 19192+out_err:
4a4d8108
AM
19193+ pr_err("err %d\n", err);
19194+ err = au_busy_or_stale();
4f0767ce 19195+out:
1facf9fc 19196+ return err;
19197+}
19198+
4a4d8108
AM
19199+void au_pin_init(struct au_pin *p, struct dentry *dentry,
19200+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
19201+ unsigned int udba, unsigned char flags)
19202+{
19203+ p->dentry = dentry;
19204+ p->udba = udba;
19205+ p->lsc_di = lsc_di;
19206+ p->lsc_hi = lsc_hi;
19207+ p->flags = flags;
19208+ p->bindex = bindex;
19209+
19210+ p->parent = NULL;
19211+ p->hdir = NULL;
19212+ p->h_mnt = NULL;
86dc4139
AM
19213+
19214+ p->h_dentry = NULL;
19215+ p->h_parent = NULL;
19216+ p->br = NULL;
19217+ p->task = current;
4a4d8108
AM
19218+}
19219+
19220+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
19221+ unsigned int udba, unsigned char flags)
19222+{
19223+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
19224+ udba, flags);
19225+ return au_do_pin(pin);
19226+}
19227+
dece6358
AM
19228+/* ---------------------------------------------------------------------- */
19229+
1308ab2a 19230+/*
4a4d8108
AM
19231+ * ->setattr() and ->getattr() are called in various cases.
19232+ * chmod, stat: dentry is revalidated.
19233+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
19234+ * unhashed.
19235+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
1308ab2a 19236+ */
027c5e7a 19237+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
c1595e42 19238+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
1facf9fc 19239+{
4a4d8108 19240+ int err;
4a4d8108 19241+ struct dentry *parent;
1facf9fc 19242+
1308ab2a 19243+ err = 0;
027c5e7a 19244+ if (au_digen_test(dentry, sigen)) {
4a4d8108
AM
19245+ parent = dget_parent(dentry);
19246+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 19247+ err = au_refresh_dentry(dentry, parent);
4a4d8108
AM
19248+ di_read_unlock(parent, AuLock_IR);
19249+ dput(parent);
dece6358 19250+ }
1facf9fc 19251+
4a4d8108 19252+ AuTraceErr(err);
1308ab2a 19253+ return err;
19254+}
dece6358 19255+
c1595e42
JR
19256+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
19257+ struct au_icpup_args *a)
1308ab2a 19258+{
19259+ int err;
4a4d8108 19260+ loff_t sz;
5afbbe0d 19261+ aufs_bindex_t btop, ibtop;
4a4d8108
AM
19262+ struct dentry *hi_wh, *parent;
19263+ struct inode *inode;
4a4d8108
AM
19264+ struct au_wr_dir_args wr_dir_args = {
19265+ .force_btgt = -1,
19266+ .flags = 0
19267+ };
19268+
2000de60 19269+ if (d_is_dir(dentry))
4a4d8108
AM
19270+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
19271+ /* plink or hi_wh() case */
5afbbe0d 19272+ btop = au_dbtop(dentry);
5527c038 19273+ inode = d_inode(dentry);
5afbbe0d
AM
19274+ ibtop = au_ibtop(inode);
19275+ if (btop != ibtop && !au_test_ro(inode->i_sb, ibtop, inode))
19276+ wr_dir_args.force_btgt = ibtop;
4a4d8108
AM
19277+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
19278+ if (unlikely(err < 0))
19279+ goto out;
19280+ a->btgt = err;
5afbbe0d 19281+ if (err != btop)
4a4d8108
AM
19282+ au_fset_icpup(a->flags, DID_CPUP);
19283+
19284+ err = 0;
19285+ a->pin_flags = AuPin_MNT_WRITE;
19286+ parent = NULL;
19287+ if (!IS_ROOT(dentry)) {
19288+ au_fset_pin(a->pin_flags, DI_LOCKED);
19289+ parent = dget_parent(dentry);
19290+ di_write_lock_parent(parent);
19291+ }
19292+
19293+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
19294+ if (unlikely(err))
19295+ goto out_parent;
19296+
4a4d8108 19297+ sz = -1;
5afbbe0d 19298+ a->h_path.dentry = au_h_dptr(dentry, btop);
5527c038 19299+ a->h_inode = d_inode(a->h_path.dentry);
c1595e42 19300+ if (ia && (ia->ia_valid & ATTR_SIZE)) {
febd17d6 19301+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
c1595e42
JR
19302+ if (ia->ia_size < i_size_read(a->h_inode))
19303+ sz = ia->ia_size;
febd17d6 19304+ inode_unlock(a->h_inode);
c1595e42 19305+ }
4a4d8108 19306+
4a4d8108 19307+ hi_wh = NULL;
027c5e7a 19308+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
4a4d8108
AM
19309+ hi_wh = au_hi_wh(inode, a->btgt);
19310+ if (!hi_wh) {
c2b27bf2
AM
19311+ struct au_cp_generic cpg = {
19312+ .dentry = dentry,
19313+ .bdst = a->btgt,
19314+ .bsrc = -1,
19315+ .len = sz,
19316+ .pin = &a->pin
19317+ };
19318+ err = au_sio_cpup_wh(&cpg, /*file*/NULL);
4a4d8108
AM
19319+ if (unlikely(err))
19320+ goto out_unlock;
19321+ hi_wh = au_hi_wh(inode, a->btgt);
19322+ /* todo: revalidate hi_wh? */
19323+ }
19324+ }
19325+
19326+ if (parent) {
19327+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
19328+ di_downgrade_lock(parent, AuLock_IR);
19329+ dput(parent);
19330+ parent = NULL;
19331+ }
19332+ if (!au_ftest_icpup(a->flags, DID_CPUP))
19333+ goto out; /* success */
19334+
19335+ if (!d_unhashed(dentry)) {
c2b27bf2
AM
19336+ struct au_cp_generic cpg = {
19337+ .dentry = dentry,
19338+ .bdst = a->btgt,
5afbbe0d 19339+ .bsrc = btop,
c2b27bf2
AM
19340+ .len = sz,
19341+ .pin = &a->pin,
19342+ .flags = AuCpup_DTIME | AuCpup_HOPEN
19343+ };
19344+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
19345+ if (!err)
19346+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19347+ } else if (!hi_wh)
19348+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19349+ else
19350+ a->h_path.dentry = hi_wh; /* do not dget here */
1308ab2a 19351+
4f0767ce 19352+out_unlock:
5527c038 19353+ a->h_inode = d_inode(a->h_path.dentry);
86dc4139 19354+ if (!err)
dece6358 19355+ goto out; /* success */
4a4d8108 19356+ au_unpin(&a->pin);
4f0767ce 19357+out_parent:
4a4d8108
AM
19358+ if (parent) {
19359+ di_write_unlock(parent);
19360+ dput(parent);
19361+ }
4f0767ce 19362+out:
86dc4139 19363+ if (!err)
febd17d6 19364+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
1facf9fc 19365+ return err;
19366+}
19367+
4a4d8108 19368+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
1facf9fc 19369+{
4a4d8108 19370+ int err;
523b37e3 19371+ struct inode *inode, *delegated;
4a4d8108
AM
19372+ struct super_block *sb;
19373+ struct file *file;
19374+ struct au_icpup_args *a;
1facf9fc 19375+
5527c038 19376+ inode = d_inode(dentry);
4a4d8108 19377+ IMustLock(inode);
dece6358 19378+
4a4d8108
AM
19379+ err = -ENOMEM;
19380+ a = kzalloc(sizeof(*a), GFP_NOFS);
19381+ if (unlikely(!a))
19382+ goto out;
1facf9fc 19383+
4a4d8108
AM
19384+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
19385+ ia->ia_valid &= ~ATTR_MODE;
dece6358 19386+
4a4d8108
AM
19387+ file = NULL;
19388+ sb = dentry->d_sb;
e49829fe
JR
19389+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19390+ if (unlikely(err))
19391+ goto out_kfree;
19392+
4a4d8108
AM
19393+ if (ia->ia_valid & ATTR_FILE) {
19394+ /* currently ftruncate(2) only */
7e9cd9fe 19395+ AuDebugOn(!d_is_reg(dentry));
4a4d8108
AM
19396+ file = ia->ia_file;
19397+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
19398+ if (unlikely(err))
19399+ goto out_si;
19400+ ia->ia_file = au_hf_top(file);
19401+ a->udba = AuOpt_UDBA_NONE;
19402+ } else {
19403+ /* fchmod() doesn't pass ia_file */
19404+ a->udba = au_opt_udba(sb);
027c5e7a
AM
19405+ di_write_lock_child(dentry);
19406+ /* no d_unlinked(), to set UDBA_NONE for root */
4a4d8108
AM
19407+ if (d_unhashed(dentry))
19408+ a->udba = AuOpt_UDBA_NONE;
4a4d8108
AM
19409+ if (a->udba != AuOpt_UDBA_NONE) {
19410+ AuDebugOn(IS_ROOT(dentry));
19411+ err = au_reval_for_attr(dentry, au_sigen(sb));
19412+ if (unlikely(err))
19413+ goto out_dentry;
19414+ }
dece6358 19415+ }
dece6358 19416+
4a4d8108
AM
19417+ err = au_pin_and_icpup(dentry, ia, a);
19418+ if (unlikely(err < 0))
19419+ goto out_dentry;
19420+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
19421+ ia->ia_file = NULL;
19422+ ia->ia_valid &= ~ATTR_FILE;
1308ab2a 19423+ }
dece6358 19424+
4a4d8108
AM
19425+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
19426+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
19427+ == (ATTR_MODE | ATTR_CTIME)) {
7eafdf33 19428+ err = security_path_chmod(&a->h_path, ia->ia_mode);
4a4d8108
AM
19429+ if (unlikely(err))
19430+ goto out_unlock;
19431+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
19432+ && (ia->ia_valid & ATTR_CTIME)) {
86dc4139 19433+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
4a4d8108
AM
19434+ if (unlikely(err))
19435+ goto out_unlock;
19436+ }
dece6358 19437+
4a4d8108
AM
19438+ if (ia->ia_valid & ATTR_SIZE) {
19439+ struct file *f;
1308ab2a 19440+
953406b4 19441+ if (ia->ia_size < i_size_read(inode))
4a4d8108 19442+ /* unmap only */
953406b4 19443+ truncate_setsize(inode, ia->ia_size);
1308ab2a 19444+
4a4d8108
AM
19445+ f = NULL;
19446+ if (ia->ia_valid & ATTR_FILE)
19447+ f = ia->ia_file;
febd17d6 19448+ inode_unlock(a->h_inode);
4a4d8108 19449+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
febd17d6 19450+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
523b37e3
AM
19451+ } else {
19452+ delegated = NULL;
19453+ while (1) {
19454+ err = vfsub_notify_change(&a->h_path, ia, &delegated);
19455+ if (delegated) {
19456+ err = break_deleg_wait(&delegated);
19457+ if (!err)
19458+ continue;
19459+ }
19460+ break;
19461+ }
19462+ }
8cdd5066
JR
19463+ /*
19464+ * regardless aufs 'acl' option setting.
19465+ * why don't all acl-aware fs call this func from their ->setattr()?
19466+ */
19467+ if (!err && (ia->ia_valid & ATTR_MODE))
19468+ err = vfsub_acl_chmod(a->h_inode, ia->ia_mode);
4a4d8108
AM
19469+ if (!err)
19470+ au_cpup_attr_changeable(inode);
1308ab2a 19471+
4f0767ce 19472+out_unlock:
febd17d6 19473+ inode_unlock(a->h_inode);
4a4d8108 19474+ au_unpin(&a->pin);
027c5e7a 19475+ if (unlikely(err))
5afbbe0d 19476+ au_update_dbtop(dentry);
4f0767ce 19477+out_dentry:
4a4d8108
AM
19478+ di_write_unlock(dentry);
19479+ if (file) {
19480+ fi_write_unlock(file);
19481+ ia->ia_file = file;
19482+ ia->ia_valid |= ATTR_FILE;
19483+ }
4f0767ce 19484+out_si:
4a4d8108 19485+ si_read_unlock(sb);
e49829fe 19486+out_kfree:
f0c0a007 19487+ au_delayed_kfree(a);
4f0767ce 19488+out:
4a4d8108
AM
19489+ AuTraceErr(err);
19490+ return err;
1facf9fc 19491+}
19492+
c1595e42
JR
19493+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
19494+static int au_h_path_to_set_attr(struct dentry *dentry,
19495+ struct au_icpup_args *a, struct path *h_path)
19496+{
19497+ int err;
19498+ struct super_block *sb;
19499+
19500+ sb = dentry->d_sb;
19501+ a->udba = au_opt_udba(sb);
19502+ /* no d_unlinked(), to set UDBA_NONE for root */
19503+ if (d_unhashed(dentry))
19504+ a->udba = AuOpt_UDBA_NONE;
19505+ if (a->udba != AuOpt_UDBA_NONE) {
19506+ AuDebugOn(IS_ROOT(dentry));
19507+ err = au_reval_for_attr(dentry, au_sigen(sb));
19508+ if (unlikely(err))
19509+ goto out;
19510+ }
19511+ err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
19512+ if (unlikely(err < 0))
19513+ goto out;
19514+
19515+ h_path->dentry = a->h_path.dentry;
19516+ h_path->mnt = au_sbr_mnt(sb, a->btgt);
19517+
19518+out:
19519+ return err;
19520+}
19521+
5afbbe0d
AM
19522+ssize_t au_srxattr(struct dentry *dentry, struct inode *inode,
19523+ struct au_srxattr *arg)
c1595e42
JR
19524+{
19525+ int err;
19526+ struct path h_path;
19527+ struct super_block *sb;
19528+ struct au_icpup_args *a;
5afbbe0d 19529+ struct inode *h_inode;
c1595e42 19530+
c1595e42
JR
19531+ IMustLock(inode);
19532+
19533+ err = -ENOMEM;
19534+ a = kzalloc(sizeof(*a), GFP_NOFS);
19535+ if (unlikely(!a))
19536+ goto out;
19537+
19538+ sb = dentry->d_sb;
19539+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19540+ if (unlikely(err))
19541+ goto out_kfree;
19542+
19543+ h_path.dentry = NULL; /* silence gcc */
19544+ di_write_lock_child(dentry);
19545+ err = au_h_path_to_set_attr(dentry, a, &h_path);
19546+ if (unlikely(err))
19547+ goto out_di;
19548+
febd17d6 19549+ inode_unlock(a->h_inode);
c1595e42
JR
19550+ switch (arg->type) {
19551+ case AU_XATTR_SET:
5afbbe0d 19552+ AuDebugOn(d_is_negative(h_path.dentry));
c1595e42
JR
19553+ err = vfsub_setxattr(h_path.dentry,
19554+ arg->u.set.name, arg->u.set.value,
19555+ arg->u.set.size, arg->u.set.flags);
19556+ break;
19557+ case AU_XATTR_REMOVE:
19558+ err = vfsub_removexattr(h_path.dentry, arg->u.remove.name);
19559+ break;
19560+ case AU_ACL_SET:
19561+ err = -EOPNOTSUPP;
5527c038 19562+ h_inode = d_inode(h_path.dentry);
c1595e42
JR
19563+ if (h_inode->i_op->set_acl)
19564+ err = h_inode->i_op->set_acl(h_inode,
19565+ arg->u.acl_set.acl,
19566+ arg->u.acl_set.type);
19567+ break;
19568+ }
19569+ if (!err)
19570+ au_cpup_attr_timesizes(inode);
19571+
19572+ au_unpin(&a->pin);
19573+ if (unlikely(err))
5afbbe0d 19574+ au_update_dbtop(dentry);
c1595e42
JR
19575+
19576+out_di:
19577+ di_write_unlock(dentry);
19578+ si_read_unlock(sb);
19579+out_kfree:
f0c0a007 19580+ au_delayed_kfree(a);
c1595e42
JR
19581+out:
19582+ AuTraceErr(err);
19583+ return err;
19584+}
19585+#endif
19586+
4a4d8108
AM
19587+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
19588+ unsigned int nlink)
1facf9fc 19589+{
9dbd164d
AM
19590+ unsigned int n;
19591+
4a4d8108 19592+ inode->i_mode = st->mode;
86dc4139
AM
19593+ /* don't i_[ug]id_write() here */
19594+ inode->i_uid = st->uid;
19595+ inode->i_gid = st->gid;
4a4d8108
AM
19596+ inode->i_atime = st->atime;
19597+ inode->i_mtime = st->mtime;
19598+ inode->i_ctime = st->ctime;
1facf9fc 19599+
4a4d8108
AM
19600+ au_cpup_attr_nlink(inode, /*force*/0);
19601+ if (S_ISDIR(inode->i_mode)) {
9dbd164d
AM
19602+ n = inode->i_nlink;
19603+ n -= nlink;
19604+ n += st->nlink;
f6b6e03d 19605+ smp_mb(); /* for i_nlink */
7eafdf33 19606+ /* 0 can happen */
92d182d2 19607+ set_nlink(inode, n);
4a4d8108 19608+ }
1facf9fc 19609+
4a4d8108
AM
19610+ spin_lock(&inode->i_lock);
19611+ inode->i_blocks = st->blocks;
19612+ i_size_write(inode, st->size);
19613+ spin_unlock(&inode->i_lock);
1facf9fc 19614+}
19615+
c1595e42
JR
19616+/*
19617+ * common routine for aufs_getattr() and aufs_getxattr().
19618+ * returns zero or negative (an error).
19619+ * @dentry will be read-locked in success.
19620+ */
19621+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path)
1facf9fc 19622+{
4a4d8108 19623+ int err;
076b876e 19624+ unsigned int mnt_flags, sigen;
c1595e42 19625+ unsigned char udba_none;
4a4d8108 19626+ aufs_bindex_t bindex;
4a4d8108
AM
19627+ struct super_block *sb, *h_sb;
19628+ struct inode *inode;
1facf9fc 19629+
c1595e42
JR
19630+ h_path->mnt = NULL;
19631+ h_path->dentry = NULL;
19632+
19633+ err = 0;
4a4d8108 19634+ sb = dentry->d_sb;
4a4d8108
AM
19635+ mnt_flags = au_mntflags(sb);
19636+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
1facf9fc 19637+
4a4d8108 19638+ /* support fstat(2) */
027c5e7a 19639+ if (!d_unlinked(dentry) && !udba_none) {
076b876e 19640+ sigen = au_sigen(sb);
027c5e7a
AM
19641+ err = au_digen_test(dentry, sigen);
19642+ if (!err) {
4a4d8108 19643+ di_read_lock_child(dentry, AuLock_IR);
027c5e7a 19644+ err = au_dbrange_test(dentry);
c1595e42
JR
19645+ if (unlikely(err)) {
19646+ di_read_unlock(dentry, AuLock_IR);
19647+ goto out;
19648+ }
027c5e7a 19649+ } else {
4a4d8108
AM
19650+ AuDebugOn(IS_ROOT(dentry));
19651+ di_write_lock_child(dentry);
027c5e7a
AM
19652+ err = au_dbrange_test(dentry);
19653+ if (!err)
19654+ err = au_reval_for_attr(dentry, sigen);
c1595e42
JR
19655+ if (!err)
19656+ di_downgrade_lock(dentry, AuLock_IR);
19657+ else {
19658+ di_write_unlock(dentry);
19659+ goto out;
19660+ }
4a4d8108
AM
19661+ }
19662+ } else
19663+ di_read_lock_child(dentry, AuLock_IR);
1facf9fc 19664+
5527c038 19665+ inode = d_inode(dentry);
5afbbe0d 19666+ bindex = au_ibtop(inode);
c1595e42
JR
19667+ h_path->mnt = au_sbr_mnt(sb, bindex);
19668+ h_sb = h_path->mnt->mnt_sb;
19669+ if (!force
19670+ && !au_test_fs_bad_iattr(h_sb)
19671+ && udba_none)
19672+ goto out; /* success */
1facf9fc 19673+
5afbbe0d 19674+ if (au_dbtop(dentry) == bindex)
c1595e42 19675+ h_path->dentry = au_h_dptr(dentry, bindex);
4a4d8108 19676+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
c1595e42
JR
19677+ h_path->dentry = au_plink_lkup(inode, bindex);
19678+ if (IS_ERR(h_path->dentry))
19679+ /* pretending success */
19680+ h_path->dentry = NULL;
19681+ else
19682+ dput(h_path->dentry);
4a4d8108 19683+ }
c1595e42
JR
19684+
19685+out:
19686+ return err;
19687+}
19688+
19689+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
19690+ struct dentry *dentry, struct kstat *st)
19691+{
19692+ int err;
19693+ unsigned char positive;
19694+ struct path h_path;
19695+ struct inode *inode;
19696+ struct super_block *sb;
19697+
5527c038 19698+ inode = d_inode(dentry);
c1595e42
JR
19699+ sb = dentry->d_sb;
19700+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19701+ if (unlikely(err))
19702+ goto out;
19703+ err = au_h_path_getattr(dentry, /*force*/0, &h_path);
19704+ if (unlikely(err))
19705+ goto out_si;
c06a8ce3 19706+ if (unlikely(!h_path.dentry))
c1595e42 19707+ /* illegally overlapped or something */
4a4d8108
AM
19708+ goto out_fill; /* pretending success */
19709+
5527c038 19710+ positive = d_is_positive(h_path.dentry);
4a4d8108 19711+ if (positive)
c06a8ce3 19712+ err = vfs_getattr(&h_path, st);
4a4d8108
AM
19713+ if (!err) {
19714+ if (positive)
c06a8ce3 19715+ au_refresh_iattr(inode, st,
5527c038 19716+ d_inode(h_path.dentry)->i_nlink);
4a4d8108 19717+ goto out_fill; /* success */
1facf9fc 19718+ }
7f207e10 19719+ AuTraceErr(err);
c1595e42 19720+ goto out_di;
4a4d8108 19721+
4f0767ce 19722+out_fill:
4a4d8108 19723+ generic_fillattr(inode, st);
c1595e42 19724+out_di:
4a4d8108 19725+ di_read_unlock(dentry, AuLock_IR);
c1595e42 19726+out_si:
4a4d8108 19727+ si_read_unlock(sb);
7f207e10
AM
19728+out:
19729+ AuTraceErr(err);
4a4d8108 19730+ return err;
1facf9fc 19731+}
19732+
19733+/* ---------------------------------------------------------------------- */
19734+
febd17d6
JR
19735+static const char *aufs_get_link(struct dentry *dentry, struct inode *inode,
19736+ struct delayed_call *done)
4a4d8108 19737+{
c2c0f25c 19738+ const char *ret;
c2c0f25c 19739+ struct dentry *h_dentry;
febd17d6 19740+ struct inode *h_inode;
4a4d8108 19741+ int err;
c2c0f25c 19742+ aufs_bindex_t bindex;
1facf9fc 19743+
79b8bda9 19744+ ret = NULL; /* suppress a warning */
febd17d6
JR
19745+ err = -ECHILD;
19746+ if (!dentry)
19747+ goto out;
19748+
027c5e7a
AM
19749+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
19750+ if (unlikely(err))
c2c0f25c 19751+ goto out;
027c5e7a
AM
19752+
19753+ err = au_d_hashed_positive(dentry);
c2c0f25c
AM
19754+ if (unlikely(err))
19755+ goto out_unlock;
19756+
19757+ err = -EINVAL;
19758+ inode = d_inode(dentry);
5afbbe0d 19759+ bindex = au_ibtop(inode);
c2c0f25c 19760+ h_inode = au_h_iptr(inode, bindex);
febd17d6 19761+ if (unlikely(!h_inode->i_op->get_link))
c2c0f25c
AM
19762+ goto out_unlock;
19763+
19764+ err = -EBUSY;
19765+ h_dentry = NULL;
5afbbe0d 19766+ if (au_dbtop(dentry) <= bindex) {
c2c0f25c
AM
19767+ h_dentry = au_h_dptr(dentry, bindex);
19768+ if (h_dentry)
19769+ dget(h_dentry);
027c5e7a 19770+ }
c2c0f25c
AM
19771+ if (!h_dentry) {
19772+ h_dentry = d_find_any_alias(h_inode);
19773+ if (IS_ERR(h_dentry)) {
19774+ err = PTR_ERR(h_dentry);
febd17d6 19775+ goto out_unlock;
c2c0f25c
AM
19776+ }
19777+ }
19778+ if (unlikely(!h_dentry))
febd17d6 19779+ goto out_unlock;
1facf9fc 19780+
c2c0f25c 19781+ err = 0;
febd17d6 19782+ AuDbg("%pf\n", h_inode->i_op->get_link);
c2c0f25c 19783+ AuDbgDentry(h_dentry);
febd17d6 19784+ ret = h_inode->i_op->get_link(h_dentry, h_inode, done);
c2c0f25c 19785+ dput(h_dentry);
febd17d6
JR
19786+ if (IS_ERR(ret))
19787+ err = PTR_ERR(ret);
c2c0f25c 19788+
c2c0f25c
AM
19789+out_unlock:
19790+ aufs_read_unlock(dentry, AuLock_IR);
4f0767ce 19791+out:
c2c0f25c
AM
19792+ if (unlikely(err))
19793+ ret = ERR_PTR(err);
19794+ AuTraceErrPtr(ret);
19795+ return ret;
4a4d8108 19796+}
1facf9fc 19797+
4a4d8108 19798+/* ---------------------------------------------------------------------- */
1facf9fc 19799+
e2f27e51
AM
19800+static int au_is_special(struct inode *inode)
19801+{
19802+ return (inode->i_mode & (S_IFBLK | S_IFCHR | S_IFIFO | S_IFSOCK));
19803+}
19804+
0c3ec466 19805+static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags)
4a4d8108 19806+{
0c3ec466 19807+ int err;
e2f27e51 19808+ aufs_bindex_t bindex;
0c3ec466
AM
19809+ struct super_block *sb;
19810+ struct inode *h_inode;
e2f27e51 19811+ struct vfsmount *h_mnt;
0c3ec466
AM
19812+
19813+ sb = inode->i_sb;
e2f27e51
AM
19814+ WARN_ONCE((flags & S_ATIME) && !IS_NOATIME(inode),
19815+ "unexpected s_flags 0x%lx", sb->s_flags);
19816+
0c3ec466
AM
19817+ /* mmap_sem might be acquired already, cf. aufs_mmap() */
19818+ lockdep_off();
19819+ si_read_lock(sb, AuLock_FLUSH);
19820+ ii_write_lock_child(inode);
19821+ lockdep_on();
e2f27e51
AM
19822+
19823+ err = 0;
19824+ bindex = au_ibtop(inode);
19825+ h_inode = au_h_iptr(inode, bindex);
19826+ if (!au_test_ro(sb, bindex, inode)) {
19827+ h_mnt = au_sbr_mnt(sb, bindex);
19828+ err = vfsub_mnt_want_write(h_mnt);
19829+ if (!err) {
19830+ err = vfsub_update_time(h_inode, ts, flags);
19831+ vfsub_mnt_drop_write(h_mnt);
19832+ }
19833+ } else if (au_is_special(h_inode)) {
19834+ /*
19835+ * Never copy-up here.
19836+ * These special files may already be opened and used for
19837+ * communicating. If we copied it up, then the communication
19838+ * would be corrupted.
19839+ */
19840+ AuWarn1("timestamps for i%lu are ignored "
19841+ "since it is on readonly branch (hi%lu).\n",
19842+ inode->i_ino, h_inode->i_ino);
19843+ } else if (flags & ~S_ATIME) {
19844+ err = -EIO;
19845+ AuIOErr1("unexpected flags 0x%x\n", flags);
19846+ AuDebugOn(1);
19847+ }
19848+
0c3ec466 19849+ lockdep_off();
38d290e6
JR
19850+ if (!err)
19851+ au_cpup_attr_timesizes(inode);
0c3ec466
AM
19852+ ii_write_unlock(inode);
19853+ si_read_unlock(sb);
19854+ lockdep_on();
38d290e6
JR
19855+
19856+ if (!err && (flags & S_VERSION))
19857+ inode_inc_iversion(inode);
19858+
0c3ec466 19859+ return err;
4a4d8108 19860+}
1facf9fc 19861+
4a4d8108 19862+/* ---------------------------------------------------------------------- */
1308ab2a 19863+
b95c5147
AM
19864+/* no getattr version will be set by module.c:aufs_init() */
19865+struct inode_operations aufs_iop_nogetattr[AuIop_Last],
19866+ aufs_iop[] = {
19867+ [AuIop_SYMLINK] = {
19868+ .permission = aufs_permission,
c1595e42 19869+#ifdef CONFIG_FS_POSIX_ACL
b95c5147
AM
19870+ .get_acl = aufs_get_acl,
19871+ .set_acl = aufs_set_acl, /* unsupport for symlink? */
c1595e42
JR
19872+#endif
19873+
b95c5147
AM
19874+ .setattr = aufs_setattr,
19875+ .getattr = aufs_getattr,
0c3ec466 19876+
c1595e42 19877+#ifdef CONFIG_AUFS_XATTR
b95c5147
AM
19878+ .setxattr = aufs_setxattr,
19879+ .getxattr = aufs_getxattr,
19880+ .listxattr = aufs_listxattr,
19881+ .removexattr = aufs_removexattr,
c1595e42
JR
19882+#endif
19883+
b95c5147 19884+ .readlink = generic_readlink,
febd17d6 19885+ .get_link = aufs_get_link,
0c3ec466 19886+
b95c5147
AM
19887+ /* .update_time = aufs_update_time */
19888+ },
19889+ [AuIop_DIR] = {
19890+ .create = aufs_create,
19891+ .lookup = aufs_lookup,
19892+ .link = aufs_link,
19893+ .unlink = aufs_unlink,
19894+ .symlink = aufs_symlink,
19895+ .mkdir = aufs_mkdir,
19896+ .rmdir = aufs_rmdir,
19897+ .mknod = aufs_mknod,
19898+ .rename = aufs_rename,
19899+
19900+ .permission = aufs_permission,
c1595e42 19901+#ifdef CONFIG_FS_POSIX_ACL
b95c5147
AM
19902+ .get_acl = aufs_get_acl,
19903+ .set_acl = aufs_set_acl,
c1595e42
JR
19904+#endif
19905+
b95c5147
AM
19906+ .setattr = aufs_setattr,
19907+ .getattr = aufs_getattr,
0c3ec466 19908+
c1595e42 19909+#ifdef CONFIG_AUFS_XATTR
b95c5147
AM
19910+ .setxattr = aufs_setxattr,
19911+ .getxattr = aufs_getxattr,
19912+ .listxattr = aufs_listxattr,
19913+ .removexattr = aufs_removexattr,
c1595e42
JR
19914+#endif
19915+
b95c5147
AM
19916+ .update_time = aufs_update_time,
19917+ .atomic_open = aufs_atomic_open,
19918+ .tmpfile = aufs_tmpfile
19919+ },
19920+ [AuIop_OTHER] = {
19921+ .permission = aufs_permission,
c1595e42 19922+#ifdef CONFIG_FS_POSIX_ACL
b95c5147
AM
19923+ .get_acl = aufs_get_acl,
19924+ .set_acl = aufs_set_acl,
c1595e42
JR
19925+#endif
19926+
b95c5147
AM
19927+ .setattr = aufs_setattr,
19928+ .getattr = aufs_getattr,
0c3ec466 19929+
c1595e42 19930+#ifdef CONFIG_AUFS_XATTR
b95c5147
AM
19931+ .setxattr = aufs_setxattr,
19932+ .getxattr = aufs_getxattr,
19933+ .listxattr = aufs_listxattr,
19934+ .removexattr = aufs_removexattr,
c1595e42
JR
19935+#endif
19936+
b95c5147
AM
19937+ .update_time = aufs_update_time
19938+ }
4a4d8108 19939+};
7f207e10
AM
19940diff -urN /usr/share/empty/fs/aufs/i_op_del.c linux/fs/aufs/i_op_del.c
19941--- /usr/share/empty/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 19942+++ linux/fs/aufs/i_op_del.c 2016-10-09 16:55:36.492701639 +0200
5afbbe0d 19943@@ -0,0 +1,511 @@
1facf9fc 19944+/*
8cdd5066 19945+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 19946+ *
19947+ * This program, aufs is free software; you can redistribute it and/or modify
19948+ * it under the terms of the GNU General Public License as published by
19949+ * the Free Software Foundation; either version 2 of the License, or
19950+ * (at your option) any later version.
dece6358
AM
19951+ *
19952+ * This program is distributed in the hope that it will be useful,
19953+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19954+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19955+ * GNU General Public License for more details.
19956+ *
19957+ * You should have received a copy of the GNU General Public License
523b37e3 19958+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 19959+ */
19960+
19961+/*
4a4d8108 19962+ * inode operations (del entry)
1308ab2a 19963+ */
dece6358 19964+
1308ab2a 19965+#include "aufs.h"
dece6358 19966+
4a4d8108
AM
19967+/*
19968+ * decide if a new whiteout for @dentry is necessary or not.
19969+ * when it is necessary, prepare the parent dir for the upper branch whose
19970+ * branch index is @bcpup for creation. the actual creation of the whiteout will
19971+ * be done by caller.
19972+ * return value:
19973+ * 0: wh is unnecessary
19974+ * plus: wh is necessary
19975+ * minus: error
19976+ */
19977+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
1308ab2a 19978+{
4a4d8108 19979+ int need_wh, err;
5afbbe0d 19980+ aufs_bindex_t btop;
4a4d8108 19981+ struct super_block *sb;
dece6358 19982+
4a4d8108 19983+ sb = dentry->d_sb;
5afbbe0d 19984+ btop = au_dbtop(dentry);
4a4d8108 19985+ if (*bcpup < 0) {
5afbbe0d
AM
19986+ *bcpup = btop;
19987+ if (au_test_ro(sb, btop, d_inode(dentry))) {
4a4d8108
AM
19988+ err = AuWbrCopyup(au_sbi(sb), dentry);
19989+ *bcpup = err;
19990+ if (unlikely(err < 0))
19991+ goto out;
19992+ }
19993+ } else
5afbbe0d 19994+ AuDebugOn(btop < *bcpup
5527c038 19995+ || au_test_ro(sb, *bcpup, d_inode(dentry)));
5afbbe0d 19996+ AuDbg("bcpup %d, btop %d\n", *bcpup, btop);
1308ab2a 19997+
5afbbe0d 19998+ if (*bcpup != btop) {
4a4d8108
AM
19999+ err = au_cpup_dirs(dentry, *bcpup);
20000+ if (unlikely(err))
20001+ goto out;
20002+ need_wh = 1;
20003+ } else {
027c5e7a 20004+ struct au_dinfo *dinfo, *tmp;
4a4d8108 20005+
027c5e7a
AM
20006+ need_wh = -ENOMEM;
20007+ dinfo = au_di(dentry);
20008+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
20009+ if (tmp) {
20010+ au_di_cp(tmp, dinfo);
20011+ au_di_swap(tmp, dinfo);
20012+ /* returns the number of positive dentries */
5afbbe0d
AM
20013+ need_wh = au_lkup_dentry(dentry, btop + 1,
20014+ /* AuLkup_IGNORE_PERM */ 0);
027c5e7a
AM
20015+ au_di_swap(tmp, dinfo);
20016+ au_rw_write_unlock(&tmp->di_rwsem);
20017+ au_di_free(tmp);
4a4d8108
AM
20018+ }
20019+ }
20020+ AuDbg("need_wh %d\n", need_wh);
20021+ err = need_wh;
20022+
4f0767ce 20023+out:
4a4d8108 20024+ return err;
1facf9fc 20025+}
20026+
4a4d8108
AM
20027+/*
20028+ * simple tests for the del-entry operations.
20029+ * following the checks in vfs, plus the parent-child relationship.
20030+ */
20031+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
20032+ struct dentry *h_parent, int isdir)
1facf9fc 20033+{
4a4d8108
AM
20034+ int err;
20035+ umode_t h_mode;
20036+ struct dentry *h_dentry, *h_latest;
1308ab2a 20037+ struct inode *h_inode;
1facf9fc 20038+
4a4d8108 20039+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 20040+ if (d_really_is_positive(dentry)) {
4a4d8108 20041+ err = -ENOENT;
5527c038
JR
20042+ if (unlikely(d_is_negative(h_dentry)))
20043+ goto out;
20044+ h_inode = d_inode(h_dentry);
20045+ if (unlikely(!h_inode->i_nlink))
4a4d8108 20046+ goto out;
1facf9fc 20047+
4a4d8108
AM
20048+ h_mode = h_inode->i_mode;
20049+ if (!isdir) {
20050+ err = -EISDIR;
20051+ if (unlikely(S_ISDIR(h_mode)))
20052+ goto out;
20053+ } else if (unlikely(!S_ISDIR(h_mode))) {
20054+ err = -ENOTDIR;
20055+ goto out;
20056+ }
20057+ } else {
20058+ /* rename(2) case */
20059+ err = -EIO;
5527c038 20060+ if (unlikely(d_is_positive(h_dentry)))
4a4d8108
AM
20061+ goto out;
20062+ }
1facf9fc 20063+
4a4d8108
AM
20064+ err = -ENOENT;
20065+ /* expected parent dir is locked */
20066+ if (unlikely(h_parent != h_dentry->d_parent))
20067+ goto out;
20068+ err = 0;
20069+
20070+ /*
20071+ * rmdir a dir may break the consistency on some filesystem.
20072+ * let's try heavy test.
20073+ */
20074+ err = -EACCES;
076b876e 20075+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)
5527c038 20076+ && au_test_h_perm(d_inode(h_parent),
076b876e 20077+ MAY_EXEC | MAY_WRITE)))
4a4d8108
AM
20078+ goto out;
20079+
076b876e 20080+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent);
4a4d8108
AM
20081+ err = -EIO;
20082+ if (IS_ERR(h_latest))
20083+ goto out;
20084+ if (h_latest == h_dentry)
20085+ err = 0;
20086+ dput(h_latest);
20087+
4f0767ce 20088+out:
4a4d8108 20089+ return err;
1308ab2a 20090+}
1facf9fc 20091+
4a4d8108
AM
20092+/*
20093+ * decide the branch where we operate for @dentry. the branch index will be set
20094+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
20095+ * dir for reverting.
20096+ * when a new whiteout is necessary, create it.
20097+ */
20098+static struct dentry*
20099+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
20100+ struct au_dtime *dt, struct au_pin *pin)
1308ab2a 20101+{
4a4d8108
AM
20102+ struct dentry *wh_dentry;
20103+ struct super_block *sb;
20104+ struct path h_path;
20105+ int err, need_wh;
20106+ unsigned int udba;
20107+ aufs_bindex_t bcpup;
dece6358 20108+
4a4d8108
AM
20109+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
20110+ wh_dentry = ERR_PTR(need_wh);
20111+ if (unlikely(need_wh < 0))
20112+ goto out;
20113+
20114+ sb = dentry->d_sb;
20115+ udba = au_opt_udba(sb);
20116+ bcpup = *rbcpup;
20117+ err = au_pin(pin, dentry, bcpup, udba,
20118+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
20119+ wh_dentry = ERR_PTR(err);
20120+ if (unlikely(err))
20121+ goto out;
20122+
20123+ h_path.dentry = au_pinned_h_parent(pin);
20124+ if (udba != AuOpt_UDBA_NONE
5afbbe0d 20125+ && au_dbtop(dentry) == bcpup) {
4a4d8108
AM
20126+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
20127+ wh_dentry = ERR_PTR(err);
20128+ if (unlikely(err))
20129+ goto out_unpin;
20130+ }
20131+
20132+ h_path.mnt = au_sbr_mnt(sb, bcpup);
20133+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
20134+ wh_dentry = NULL;
20135+ if (!need_wh)
20136+ goto out; /* success, no need to create whiteout */
20137+
20138+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
20139+ if (IS_ERR(wh_dentry))
20140+ goto out_unpin;
20141+
20142+ /* returns with the parent is locked and wh_dentry is dget-ed */
20143+ goto out; /* success */
20144+
4f0767ce 20145+out_unpin:
4a4d8108 20146+ au_unpin(pin);
4f0767ce 20147+out:
4a4d8108 20148+ return wh_dentry;
1facf9fc 20149+}
20150+
4a4d8108
AM
20151+/*
20152+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
20153+ * in order to be revertible and save time for removing many child whiteouts
20154+ * under the dir.
20155+ * returns 1 when there are too many child whiteout and caller should remove
20156+ * them asynchronously. returns 0 when the number of children is enough small to
20157+ * remove now or the branch fs is a remote fs.
20158+ * otherwise return an error.
20159+ */
20160+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
20161+ struct au_nhash *whlist, struct inode *dir)
1facf9fc 20162+{
4a4d8108
AM
20163+ int rmdir_later, err, dirwh;
20164+ struct dentry *h_dentry;
20165+ struct super_block *sb;
5527c038 20166+ struct inode *inode;
4a4d8108
AM
20167+
20168+ sb = dentry->d_sb;
20169+ SiMustAnyLock(sb);
20170+ h_dentry = au_h_dptr(dentry, bindex);
20171+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
20172+ if (unlikely(err))
20173+ goto out;
20174+
20175+ /* stop monitoring */
5527c038
JR
20176+ inode = d_inode(dentry);
20177+ au_hn_free(au_hi(inode, bindex));
4a4d8108
AM
20178+
20179+ if (!au_test_fs_remote(h_dentry->d_sb)) {
20180+ dirwh = au_sbi(sb)->si_dirwh;
20181+ rmdir_later = (dirwh <= 1);
20182+ if (!rmdir_later)
20183+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
20184+ dirwh);
20185+ if (rmdir_later)
20186+ return rmdir_later;
20187+ }
1facf9fc 20188+
4a4d8108
AM
20189+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
20190+ if (unlikely(err)) {
523b37e3
AM
20191+ AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
20192+ h_dentry, bindex, err);
4a4d8108
AM
20193+ err = 0;
20194+ }
dece6358 20195+
4f0767ce 20196+out:
4a4d8108
AM
20197+ AuTraceErr(err);
20198+ return err;
20199+}
1308ab2a 20200+
4a4d8108
AM
20201+/*
20202+ * final procedure for deleting a entry.
20203+ * maintain dentry and iattr.
20204+ */
20205+static void epilog(struct inode *dir, struct dentry *dentry,
20206+ aufs_bindex_t bindex)
20207+{
20208+ struct inode *inode;
1308ab2a 20209+
5527c038 20210+ inode = d_inode(dentry);
4a4d8108
AM
20211+ d_drop(dentry);
20212+ inode->i_ctime = dir->i_ctime;
1308ab2a 20213+
b912730e 20214+ au_dir_ts(dir, bindex);
4a4d8108 20215+ dir->i_version++;
1facf9fc 20216+}
20217+
4a4d8108
AM
20218+/*
20219+ * when an error happened, remove the created whiteout and revert everything.
20220+ */
7f207e10
AM
20221+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
20222+ aufs_bindex_t bwh, struct dentry *wh_dentry,
20223+ struct dentry *dentry, struct au_dtime *dt)
1facf9fc 20224+{
4a4d8108
AM
20225+ int rerr;
20226+ struct path h_path = {
20227+ .dentry = wh_dentry,
7f207e10 20228+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
4a4d8108 20229+ };
dece6358 20230+
7f207e10 20231+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
4a4d8108
AM
20232+ if (!rerr) {
20233+ au_set_dbwh(dentry, bwh);
20234+ au_dtime_revert(dt);
20235+ return 0;
20236+ }
dece6358 20237+
523b37e3 20238+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
4a4d8108 20239+ return -EIO;
1facf9fc 20240+}
20241+
4a4d8108 20242+/* ---------------------------------------------------------------------- */
1facf9fc 20243+
4a4d8108 20244+int aufs_unlink(struct inode *dir, struct dentry *dentry)
1308ab2a 20245+{
4a4d8108 20246+ int err;
5afbbe0d 20247+ aufs_bindex_t bwh, bindex, btop;
523b37e3 20248+ struct inode *inode, *h_dir, *delegated;
4a4d8108 20249+ struct dentry *parent, *wh_dentry;
c2b27bf2
AM
20250+ /* to reuduce stack size */
20251+ struct {
20252+ struct au_dtime dt;
20253+ struct au_pin pin;
20254+ struct path h_path;
20255+ } *a;
1facf9fc 20256+
4a4d8108 20257+ IMustLock(dir);
027c5e7a 20258+
c2b27bf2
AM
20259+ err = -ENOMEM;
20260+ a = kmalloc(sizeof(*a), GFP_NOFS);
20261+ if (unlikely(!a))
20262+ goto out;
20263+
027c5e7a
AM
20264+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
20265+ if (unlikely(err))
c2b27bf2 20266+ goto out_free;
027c5e7a
AM
20267+ err = au_d_hashed_positive(dentry);
20268+ if (unlikely(err))
20269+ goto out_unlock;
5527c038 20270+ inode = d_inode(dentry);
4a4d8108 20271+ IMustLock(inode);
027c5e7a 20272+ err = -EISDIR;
2000de60 20273+ if (unlikely(d_is_dir(dentry)))
027c5e7a 20274+ goto out_unlock; /* possible? */
1facf9fc 20275+
5afbbe0d 20276+ btop = au_dbtop(dentry);
4a4d8108
AM
20277+ bwh = au_dbwh(dentry);
20278+ bindex = -1;
027c5e7a
AM
20279+ parent = dentry->d_parent; /* dir inode is locked */
20280+ di_write_lock_parent(parent);
c2b27bf2
AM
20281+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
20282+ &a->pin);
4a4d8108
AM
20283+ err = PTR_ERR(wh_dentry);
20284+ if (IS_ERR(wh_dentry))
027c5e7a 20285+ goto out_parent;
1facf9fc 20286+
5afbbe0d
AM
20287+ a->h_path.mnt = au_sbr_mnt(dentry->d_sb, btop);
20288+ a->h_path.dentry = au_h_dptr(dentry, btop);
c2b27bf2 20289+ dget(a->h_path.dentry);
5afbbe0d 20290+ if (bindex == btop) {
c2b27bf2 20291+ h_dir = au_pinned_h_dir(&a->pin);
523b37e3
AM
20292+ delegated = NULL;
20293+ err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
20294+ if (unlikely(err == -EWOULDBLOCK)) {
20295+ pr_warn("cannot retry for NFSv4 delegation"
20296+ " for an internal unlink\n");
20297+ iput(delegated);
20298+ }
4a4d8108
AM
20299+ } else {
20300+ /* dir inode is locked */
5527c038 20301+ h_dir = d_inode(wh_dentry->d_parent);
4a4d8108
AM
20302+ IMustLock(h_dir);
20303+ err = 0;
20304+ }
dece6358 20305+
4a4d8108 20306+ if (!err) {
7f207e10 20307+ vfsub_drop_nlink(inode);
4a4d8108
AM
20308+ epilog(dir, dentry, bindex);
20309+
20310+ /* update target timestamps */
5afbbe0d 20311+ if (bindex == btop) {
c2b27bf2
AM
20312+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
20313+ /*ignore*/
5527c038 20314+ inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
4a4d8108
AM
20315+ } else
20316+ /* todo: this timestamp may be reverted later */
20317+ inode->i_ctime = h_dir->i_ctime;
027c5e7a 20318+ goto out_unpin; /* success */
1facf9fc 20319+ }
20320+
4a4d8108
AM
20321+ /* revert */
20322+ if (wh_dentry) {
20323+ int rerr;
20324+
c2b27bf2
AM
20325+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20326+ &a->dt);
4a4d8108
AM
20327+ if (rerr)
20328+ err = rerr;
dece6358 20329+ }
1facf9fc 20330+
027c5e7a 20331+out_unpin:
c2b27bf2 20332+ au_unpin(&a->pin);
4a4d8108 20333+ dput(wh_dentry);
c2b27bf2 20334+ dput(a->h_path.dentry);
027c5e7a 20335+out_parent:
4a4d8108 20336+ di_write_unlock(parent);
027c5e7a 20337+out_unlock:
4a4d8108 20338+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2 20339+out_free:
f0c0a007 20340+ au_delayed_kfree(a);
027c5e7a 20341+out:
4a4d8108 20342+ return err;
dece6358
AM
20343+}
20344+
4a4d8108 20345+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
1308ab2a 20346+{
4a4d8108 20347+ int err, rmdir_later;
5afbbe0d 20348+ aufs_bindex_t bwh, bindex, btop;
4a4d8108
AM
20349+ struct inode *inode;
20350+ struct dentry *parent, *wh_dentry, *h_dentry;
20351+ struct au_whtmp_rmdir *args;
c2b27bf2
AM
20352+ /* to reuduce stack size */
20353+ struct {
20354+ struct au_dtime dt;
20355+ struct au_pin pin;
20356+ } *a;
1facf9fc 20357+
4a4d8108 20358+ IMustLock(dir);
027c5e7a 20359+
c2b27bf2
AM
20360+ err = -ENOMEM;
20361+ a = kmalloc(sizeof(*a), GFP_NOFS);
20362+ if (unlikely(!a))
20363+ goto out;
20364+
027c5e7a
AM
20365+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
20366+ if (unlikely(err))
c2b27bf2 20367+ goto out_free;
53392da6
AM
20368+ err = au_alive_dir(dentry);
20369+ if (unlikely(err))
027c5e7a 20370+ goto out_unlock;
5527c038 20371+ inode = d_inode(dentry);
4a4d8108 20372+ IMustLock(inode);
027c5e7a 20373+ err = -ENOTDIR;
2000de60 20374+ if (unlikely(!d_is_dir(dentry)))
027c5e7a 20375+ goto out_unlock; /* possible? */
dece6358 20376+
4a4d8108
AM
20377+ err = -ENOMEM;
20378+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
20379+ if (unlikely(!args))
20380+ goto out_unlock;
dece6358 20381+
4a4d8108
AM
20382+ parent = dentry->d_parent; /* dir inode is locked */
20383+ di_write_lock_parent(parent);
20384+ err = au_test_empty(dentry, &args->whlist);
20385+ if (unlikely(err))
027c5e7a 20386+ goto out_parent;
1facf9fc 20387+
5afbbe0d 20388+ btop = au_dbtop(dentry);
4a4d8108
AM
20389+ bwh = au_dbwh(dentry);
20390+ bindex = -1;
c2b27bf2
AM
20391+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
20392+ &a->pin);
4a4d8108
AM
20393+ err = PTR_ERR(wh_dentry);
20394+ if (IS_ERR(wh_dentry))
027c5e7a 20395+ goto out_parent;
1facf9fc 20396+
5afbbe0d 20397+ h_dentry = au_h_dptr(dentry, btop);
4a4d8108
AM
20398+ dget(h_dentry);
20399+ rmdir_later = 0;
5afbbe0d
AM
20400+ if (bindex == btop) {
20401+ err = renwh_and_rmdir(dentry, btop, &args->whlist, dir);
4a4d8108
AM
20402+ if (err > 0) {
20403+ rmdir_later = err;
20404+ err = 0;
20405+ }
20406+ } else {
20407+ /* stop monitoring */
5afbbe0d 20408+ au_hn_free(au_hi(inode, btop));
4a4d8108
AM
20409+
20410+ /* dir inode is locked */
5527c038 20411+ IMustLock(d_inode(wh_dentry->d_parent));
1facf9fc 20412+ err = 0;
20413+ }
20414+
4a4d8108 20415+ if (!err) {
027c5e7a 20416+ vfsub_dead_dir(inode);
4a4d8108
AM
20417+ au_set_dbdiropq(dentry, -1);
20418+ epilog(dir, dentry, bindex);
1308ab2a 20419+
4a4d8108 20420+ if (rmdir_later) {
5afbbe0d 20421+ au_whtmp_kick_rmdir(dir, btop, h_dentry, args);
4a4d8108
AM
20422+ args = NULL;
20423+ }
1308ab2a 20424+
4a4d8108 20425+ goto out_unpin; /* success */
1facf9fc 20426+ }
20427+
4a4d8108
AM
20428+ /* revert */
20429+ AuLabel(revert);
20430+ if (wh_dentry) {
20431+ int rerr;
1308ab2a 20432+
c2b27bf2
AM
20433+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20434+ &a->dt);
4a4d8108
AM
20435+ if (rerr)
20436+ err = rerr;
1facf9fc 20437+ }
20438+
4f0767ce 20439+out_unpin:
c2b27bf2 20440+ au_unpin(&a->pin);
4a4d8108
AM
20441+ dput(wh_dentry);
20442+ dput(h_dentry);
027c5e7a 20443+out_parent:
4a4d8108
AM
20444+ di_write_unlock(parent);
20445+ if (args)
20446+ au_whtmp_rmdir_free(args);
4f0767ce 20447+out_unlock:
4a4d8108 20448+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2 20449+out_free:
f0c0a007 20450+ au_delayed_kfree(a);
4f0767ce 20451+out:
4a4d8108
AM
20452+ AuTraceErr(err);
20453+ return err;
dece6358 20454+}
7f207e10
AM
20455diff -urN /usr/share/empty/fs/aufs/i_op_ren.c linux/fs/aufs/i_op_ren.c
20456--- /usr/share/empty/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 20457+++ linux/fs/aufs/i_op_ren.c 2016-10-09 16:55:36.492701639 +0200
b95c5147 20458@@ -0,0 +1,1015 @@
1facf9fc 20459+/*
8cdd5066 20460+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 20461+ *
20462+ * This program, aufs is free software; you can redistribute it and/or modify
20463+ * it under the terms of the GNU General Public License as published by
20464+ * the Free Software Foundation; either version 2 of the License, or
20465+ * (at your option) any later version.
dece6358
AM
20466+ *
20467+ * This program is distributed in the hope that it will be useful,
20468+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20469+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20470+ * GNU General Public License for more details.
20471+ *
20472+ * You should have received a copy of the GNU General Public License
523b37e3 20473+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 20474+ */
20475+
20476+/*
4a4d8108
AM
20477+ * inode operation (rename entry)
20478+ * todo: this is crazy monster
1facf9fc 20479+ */
20480+
20481+#include "aufs.h"
20482+
4a4d8108
AM
20483+enum { AuSRC, AuDST, AuSrcDst };
20484+enum { AuPARENT, AuCHILD, AuParentChild };
1facf9fc 20485+
4a4d8108
AM
20486+#define AuRen_ISDIR 1
20487+#define AuRen_ISSAMEDIR (1 << 1)
20488+#define AuRen_WHSRC (1 << 2)
20489+#define AuRen_WHDST (1 << 3)
20490+#define AuRen_MNT_WRITE (1 << 4)
20491+#define AuRen_DT_DSTDIR (1 << 5)
20492+#define AuRen_DIROPQ (1 << 6)
4a4d8108 20493+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
7f207e10
AM
20494+#define au_fset_ren(flags, name) \
20495+ do { (flags) |= AuRen_##name; } while (0)
20496+#define au_fclr_ren(flags, name) \
20497+ do { (flags) &= ~AuRen_##name; } while (0)
1facf9fc 20498+
4a4d8108
AM
20499+struct au_ren_args {
20500+ struct {
20501+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
20502+ *wh_dentry;
20503+ struct inode *dir, *inode;
20504+ struct au_hinode *hdir;
20505+ struct au_dtime dt[AuParentChild];
5afbbe0d 20506+ aufs_bindex_t btop;
4a4d8108 20507+ } sd[AuSrcDst];
1facf9fc 20508+
4a4d8108
AM
20509+#define src_dentry sd[AuSRC].dentry
20510+#define src_dir sd[AuSRC].dir
20511+#define src_inode sd[AuSRC].inode
20512+#define src_h_dentry sd[AuSRC].h_dentry
20513+#define src_parent sd[AuSRC].parent
20514+#define src_h_parent sd[AuSRC].h_parent
20515+#define src_wh_dentry sd[AuSRC].wh_dentry
20516+#define src_hdir sd[AuSRC].hdir
20517+#define src_h_dir sd[AuSRC].hdir->hi_inode
20518+#define src_dt sd[AuSRC].dt
5afbbe0d 20519+#define src_btop sd[AuSRC].btop
1facf9fc 20520+
4a4d8108
AM
20521+#define dst_dentry sd[AuDST].dentry
20522+#define dst_dir sd[AuDST].dir
20523+#define dst_inode sd[AuDST].inode
20524+#define dst_h_dentry sd[AuDST].h_dentry
20525+#define dst_parent sd[AuDST].parent
20526+#define dst_h_parent sd[AuDST].h_parent
20527+#define dst_wh_dentry sd[AuDST].wh_dentry
20528+#define dst_hdir sd[AuDST].hdir
20529+#define dst_h_dir sd[AuDST].hdir->hi_inode
20530+#define dst_dt sd[AuDST].dt
5afbbe0d 20531+#define dst_btop sd[AuDST].btop
4a4d8108
AM
20532+
20533+ struct dentry *h_trap;
20534+ struct au_branch *br;
20535+ struct au_hinode *src_hinode;
20536+ struct path h_path;
20537+ struct au_nhash whlist;
027c5e7a 20538+ aufs_bindex_t btgt, src_bwh, src_bdiropq;
1facf9fc 20539+
1308ab2a 20540+ unsigned int flags;
1facf9fc 20541+
4a4d8108
AM
20542+ struct au_whtmp_rmdir *thargs;
20543+ struct dentry *h_dst;
20544+};
1308ab2a 20545+
4a4d8108 20546+/* ---------------------------------------------------------------------- */
1308ab2a 20547+
4a4d8108
AM
20548+/*
20549+ * functions for reverting.
20550+ * when an error happened in a single rename systemcall, we should revert
79b8bda9 20551+ * everything as if nothing happened.
4a4d8108
AM
20552+ * we don't need to revert the copied-up/down the parent dir since they are
20553+ * harmless.
20554+ */
1facf9fc 20555+
4a4d8108
AM
20556+#define RevertFailure(fmt, ...) do { \
20557+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
20558+ ##__VA_ARGS__, err, rerr); \
20559+ err = -EIO; \
20560+} while (0)
1facf9fc 20561+
4a4d8108 20562+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
1facf9fc 20563+{
4a4d8108 20564+ int rerr;
1facf9fc 20565+
5afbbe0d 20566+ au_hn_inode_lock_nested(a->src_hinode, AuLsc_I_CHILD);
4a4d8108 20567+ rerr = au_diropq_remove(a->src_dentry, a->btgt);
5afbbe0d 20568+ au_hn_inode_unlock(a->src_hinode);
027c5e7a 20569+ au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
4a4d8108 20570+ if (rerr)
523b37e3 20571+ RevertFailure("remove diropq %pd", a->src_dentry);
4a4d8108 20572+}
1facf9fc 20573+
4a4d8108
AM
20574+static void au_ren_rev_rename(int err, struct au_ren_args *a)
20575+{
20576+ int rerr;
523b37e3 20577+ struct inode *delegated;
1facf9fc 20578+
b4510431
AM
20579+ a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name,
20580+ a->src_h_parent);
4a4d8108
AM
20581+ rerr = PTR_ERR(a->h_path.dentry);
20582+ if (IS_ERR(a->h_path.dentry)) {
523b37e3 20583+ RevertFailure("lkup one %pd", a->src_dentry);
4a4d8108 20584+ return;
1facf9fc 20585+ }
20586+
523b37e3 20587+ delegated = NULL;
4a4d8108
AM
20588+ rerr = vfsub_rename(a->dst_h_dir,
20589+ au_h_dptr(a->src_dentry, a->btgt),
523b37e3
AM
20590+ a->src_h_dir, &a->h_path, &delegated);
20591+ if (unlikely(rerr == -EWOULDBLOCK)) {
20592+ pr_warn("cannot retry for NFSv4 delegation"
20593+ " for an internal rename\n");
20594+ iput(delegated);
20595+ }
4a4d8108
AM
20596+ d_drop(a->h_path.dentry);
20597+ dput(a->h_path.dentry);
20598+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
20599+ if (rerr)
523b37e3 20600+ RevertFailure("rename %pd", a->src_dentry);
1facf9fc 20601+}
20602+
4a4d8108 20603+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
1facf9fc 20604+{
4a4d8108 20605+ int rerr;
523b37e3 20606+ struct inode *delegated;
dece6358 20607+
b4510431
AM
20608+ a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name,
20609+ a->dst_h_parent);
4a4d8108
AM
20610+ rerr = PTR_ERR(a->h_path.dentry);
20611+ if (IS_ERR(a->h_path.dentry)) {
523b37e3 20612+ RevertFailure("lkup one %pd", a->dst_dentry);
4a4d8108
AM
20613+ return;
20614+ }
5527c038 20615+ if (d_is_positive(a->h_path.dentry)) {
4a4d8108
AM
20616+ d_drop(a->h_path.dentry);
20617+ dput(a->h_path.dentry);
20618+ return;
dece6358
AM
20619+ }
20620+
523b37e3
AM
20621+ delegated = NULL;
20622+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path,
20623+ &delegated);
20624+ if (unlikely(rerr == -EWOULDBLOCK)) {
20625+ pr_warn("cannot retry for NFSv4 delegation"
20626+ " for an internal rename\n");
20627+ iput(delegated);
20628+ }
4a4d8108
AM
20629+ d_drop(a->h_path.dentry);
20630+ dput(a->h_path.dentry);
20631+ if (!rerr)
20632+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
20633+ else
523b37e3 20634+ RevertFailure("rename %pd", a->h_dst);
4a4d8108 20635+}
1308ab2a 20636+
4a4d8108
AM
20637+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
20638+{
20639+ int rerr;
1308ab2a 20640+
4a4d8108
AM
20641+ a->h_path.dentry = a->src_wh_dentry;
20642+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
027c5e7a 20643+ au_set_dbwh(a->src_dentry, a->src_bwh);
4a4d8108 20644+ if (rerr)
523b37e3 20645+ RevertFailure("unlink %pd", a->src_wh_dentry);
4a4d8108 20646+}
4a4d8108 20647+#undef RevertFailure
1facf9fc 20648+
1308ab2a 20649+/* ---------------------------------------------------------------------- */
20650+
4a4d8108
AM
20651+/*
20652+ * when we have to copyup the renaming entry, do it with the rename-target name
20653+ * in order to minimize the cost (the later actual rename is unnecessary).
20654+ * otherwise rename it on the target branch.
20655+ */
20656+static int au_ren_or_cpup(struct au_ren_args *a)
1facf9fc 20657+{
dece6358 20658+ int err;
4a4d8108 20659+ struct dentry *d;
523b37e3 20660+ struct inode *delegated;
1facf9fc 20661+
4a4d8108 20662+ d = a->src_dentry;
5afbbe0d 20663+ if (au_dbtop(d) == a->btgt) {
4a4d8108
AM
20664+ a->h_path.dentry = a->dst_h_dentry;
20665+ if (au_ftest_ren(a->flags, DIROPQ)
20666+ && au_dbdiropq(d) == a->btgt)
20667+ au_fclr_ren(a->flags, DIROPQ);
5afbbe0d 20668+ AuDebugOn(au_dbtop(d) != a->btgt);
523b37e3 20669+ delegated = NULL;
4a4d8108 20670+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
523b37e3
AM
20671+ a->dst_h_dir, &a->h_path, &delegated);
20672+ if (unlikely(err == -EWOULDBLOCK)) {
20673+ pr_warn("cannot retry for NFSv4 delegation"
20674+ " for an internal rename\n");
20675+ iput(delegated);
20676+ }
c2b27bf2 20677+ } else
86dc4139 20678+ BUG();
1308ab2a 20679+
027c5e7a
AM
20680+ if (!err && a->h_dst)
20681+ /* it will be set to dinfo later */
20682+ dget(a->h_dst);
1facf9fc 20683+
dece6358
AM
20684+ return err;
20685+}
1facf9fc 20686+
4a4d8108
AM
20687+/* cf. aufs_rmdir() */
20688+static int au_ren_del_whtmp(struct au_ren_args *a)
dece6358 20689+{
4a4d8108
AM
20690+ int err;
20691+ struct inode *dir;
1facf9fc 20692+
4a4d8108
AM
20693+ dir = a->dst_dir;
20694+ SiMustAnyLock(dir->i_sb);
20695+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
20696+ au_sbi(dir->i_sb)->si_dirwh)
20697+ || au_test_fs_remote(a->h_dst->d_sb)) {
20698+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
20699+ if (unlikely(err))
523b37e3
AM
20700+ pr_warn("failed removing whtmp dir %pd (%d), "
20701+ "ignored.\n", a->h_dst, err);
4a4d8108
AM
20702+ } else {
20703+ au_nhash_wh_free(&a->thargs->whlist);
20704+ a->thargs->whlist = a->whlist;
20705+ a->whlist.nh_num = 0;
20706+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
20707+ dput(a->h_dst);
20708+ a->thargs = NULL;
20709+ }
20710+
20711+ return 0;
1308ab2a 20712+}
1facf9fc 20713+
4a4d8108
AM
20714+/* make it 'opaque' dir. */
20715+static int au_ren_diropq(struct au_ren_args *a)
20716+{
20717+ int err;
20718+ struct dentry *diropq;
1facf9fc 20719+
4a4d8108 20720+ err = 0;
027c5e7a 20721+ a->src_bdiropq = au_dbdiropq(a->src_dentry);
4a4d8108 20722+ a->src_hinode = au_hi(a->src_inode, a->btgt);
5afbbe0d 20723+ au_hn_inode_lock_nested(a->src_hinode, AuLsc_I_CHILD);
4a4d8108 20724+ diropq = au_diropq_create(a->src_dentry, a->btgt);
5afbbe0d 20725+ au_hn_inode_unlock(a->src_hinode);
4a4d8108
AM
20726+ if (IS_ERR(diropq))
20727+ err = PTR_ERR(diropq);
076b876e
AM
20728+ else
20729+ dput(diropq);
1facf9fc 20730+
4a4d8108
AM
20731+ return err;
20732+}
1facf9fc 20733+
4a4d8108
AM
20734+static int do_rename(struct au_ren_args *a)
20735+{
20736+ int err;
20737+ struct dentry *d, *h_d;
1facf9fc 20738+
4a4d8108
AM
20739+ /* prepare workqueue args for asynchronous rmdir */
20740+ h_d = a->dst_h_dentry;
5527c038 20741+ if (au_ftest_ren(a->flags, ISDIR) && d_is_positive(h_d)) {
4a4d8108
AM
20742+ err = -ENOMEM;
20743+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
20744+ if (unlikely(!a->thargs))
20745+ goto out;
20746+ a->h_dst = dget(h_d);
20747+ }
1facf9fc 20748+
4a4d8108
AM
20749+ /* create whiteout for src_dentry */
20750+ if (au_ftest_ren(a->flags, WHSRC)) {
027c5e7a
AM
20751+ a->src_bwh = au_dbwh(a->src_dentry);
20752+ AuDebugOn(a->src_bwh >= 0);
4a4d8108
AM
20753+ a->src_wh_dentry
20754+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
20755+ err = PTR_ERR(a->src_wh_dentry);
20756+ if (IS_ERR(a->src_wh_dentry))
20757+ goto out_thargs;
20758+ }
1facf9fc 20759+
4a4d8108
AM
20760+ /* lookup whiteout for dentry */
20761+ if (au_ftest_ren(a->flags, WHDST)) {
20762+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
20763+ a->br);
20764+ err = PTR_ERR(h_d);
20765+ if (IS_ERR(h_d))
20766+ goto out_whsrc;
5527c038 20767+ if (d_is_negative(h_d))
4a4d8108
AM
20768+ dput(h_d);
20769+ else
20770+ a->dst_wh_dentry = h_d;
20771+ }
1facf9fc 20772+
4a4d8108
AM
20773+ /* rename dentry to tmpwh */
20774+ if (a->thargs) {
20775+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
20776+ if (unlikely(err))
20777+ goto out_whdst;
dece6358 20778+
4a4d8108
AM
20779+ d = a->dst_dentry;
20780+ au_set_h_dptr(d, a->btgt, NULL);
86dc4139 20781+ err = au_lkup_neg(d, a->btgt, /*wh*/0);
4a4d8108
AM
20782+ if (unlikely(err))
20783+ goto out_whtmp;
20784+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
20785+ }
1facf9fc 20786+
5afbbe0d 20787+ BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_btop != a->btgt);
1facf9fc 20788+
4a4d8108
AM
20789+ /* rename by vfs_rename or cpup */
20790+ d = a->dst_dentry;
20791+ if (au_ftest_ren(a->flags, ISDIR)
20792+ && (a->dst_wh_dentry
20793+ || au_dbdiropq(d) == a->btgt
20794+ /* hide the lower to keep xino */
5afbbe0d 20795+ || a->btgt < au_dbbot(d)
4a4d8108
AM
20796+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
20797+ au_fset_ren(a->flags, DIROPQ);
20798+ err = au_ren_or_cpup(a);
20799+ if (unlikely(err))
20800+ /* leave the copied-up one */
20801+ goto out_whtmp;
1308ab2a 20802+
4a4d8108
AM
20803+ /* make dir opaque */
20804+ if (au_ftest_ren(a->flags, DIROPQ)) {
20805+ err = au_ren_diropq(a);
20806+ if (unlikely(err))
20807+ goto out_rename;
20808+ }
1308ab2a 20809+
4a4d8108 20810+ /* update target timestamps */
5afbbe0d 20811+ AuDebugOn(au_dbtop(a->src_dentry) != a->btgt);
4a4d8108
AM
20812+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
20813+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
5527c038 20814+ a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
1facf9fc 20815+
4a4d8108
AM
20816+ /* remove whiteout for dentry */
20817+ if (a->dst_wh_dentry) {
20818+ a->h_path.dentry = a->dst_wh_dentry;
20819+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
20820+ a->dst_dentry);
20821+ if (unlikely(err))
20822+ goto out_diropq;
20823+ }
1facf9fc 20824+
4a4d8108
AM
20825+ /* remove whtmp */
20826+ if (a->thargs)
20827+ au_ren_del_whtmp(a); /* ignore this error */
1308ab2a 20828+
076b876e 20829+ au_fhsm_wrote(a->src_dentry->d_sb, a->btgt, /*force*/0);
4a4d8108
AM
20830+ err = 0;
20831+ goto out_success;
20832+
4f0767ce 20833+out_diropq:
4a4d8108
AM
20834+ if (au_ftest_ren(a->flags, DIROPQ))
20835+ au_ren_rev_diropq(err, a);
4f0767ce 20836+out_rename:
7e9cd9fe 20837+ au_ren_rev_rename(err, a);
027c5e7a 20838+ dput(a->h_dst);
4f0767ce 20839+out_whtmp:
4a4d8108
AM
20840+ if (a->thargs)
20841+ au_ren_rev_whtmp(err, a);
4f0767ce 20842+out_whdst:
4a4d8108
AM
20843+ dput(a->dst_wh_dentry);
20844+ a->dst_wh_dentry = NULL;
4f0767ce 20845+out_whsrc:
4a4d8108
AM
20846+ if (a->src_wh_dentry)
20847+ au_ren_rev_whsrc(err, a);
4f0767ce 20848+out_success:
4a4d8108
AM
20849+ dput(a->src_wh_dentry);
20850+ dput(a->dst_wh_dentry);
4f0767ce 20851+out_thargs:
4a4d8108
AM
20852+ if (a->thargs) {
20853+ dput(a->h_dst);
20854+ au_whtmp_rmdir_free(a->thargs);
20855+ a->thargs = NULL;
20856+ }
4f0767ce 20857+out:
4a4d8108 20858+ return err;
dece6358 20859+}
1facf9fc 20860+
1308ab2a 20861+/* ---------------------------------------------------------------------- */
1facf9fc 20862+
4a4d8108
AM
20863+/*
20864+ * test if @dentry dir can be rename destination or not.
20865+ * success means, it is a logically empty dir.
20866+ */
20867+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
1308ab2a 20868+{
4a4d8108 20869+ return au_test_empty(dentry, whlist);
1308ab2a 20870+}
1facf9fc 20871+
4a4d8108
AM
20872+/*
20873+ * test if @dentry dir can be rename source or not.
20874+ * if it can, return 0 and @children is filled.
20875+ * success means,
20876+ * - it is a logically empty dir.
20877+ * - or, it exists on writable branch and has no children including whiteouts
20878+ * on the lower branch.
20879+ */
20880+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
20881+{
20882+ int err;
20883+ unsigned int rdhash;
5afbbe0d 20884+ aufs_bindex_t btop;
1facf9fc 20885+
5afbbe0d
AM
20886+ btop = au_dbtop(dentry);
20887+ if (btop != btgt) {
4a4d8108 20888+ struct au_nhash whlist;
dece6358 20889+
4a4d8108
AM
20890+ SiMustAnyLock(dentry->d_sb);
20891+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
20892+ if (!rdhash)
20893+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
20894+ dentry));
20895+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
20896+ if (unlikely(err))
20897+ goto out;
20898+ err = au_test_empty(dentry, &whlist);
20899+ au_nhash_wh_free(&whlist);
20900+ goto out;
20901+ }
dece6358 20902+
5afbbe0d 20903+ if (btop == au_dbtaildir(dentry))
4a4d8108 20904+ return 0; /* success */
dece6358 20905+
4a4d8108 20906+ err = au_test_empty_lower(dentry);
1facf9fc 20907+
4f0767ce 20908+out:
4a4d8108
AM
20909+ if (err == -ENOTEMPTY) {
20910+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
20911+ " is not supported\n");
20912+ err = -EXDEV;
20913+ }
20914+ return err;
20915+}
1308ab2a 20916+
4a4d8108
AM
20917+/* side effect: sets whlist and h_dentry */
20918+static int au_ren_may_dir(struct au_ren_args *a)
1308ab2a 20919+{
4a4d8108
AM
20920+ int err;
20921+ unsigned int rdhash;
20922+ struct dentry *d;
1facf9fc 20923+
4a4d8108
AM
20924+ d = a->dst_dentry;
20925+ SiMustAnyLock(d->d_sb);
1facf9fc 20926+
4a4d8108
AM
20927+ err = 0;
20928+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
20929+ rdhash = au_sbi(d->d_sb)->si_rdhash;
20930+ if (!rdhash)
20931+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
20932+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
20933+ if (unlikely(err))
20934+ goto out;
1308ab2a 20935+
5afbbe0d 20936+ au_set_dbtop(d, a->dst_btop);
4a4d8108 20937+ err = may_rename_dstdir(d, &a->whlist);
5afbbe0d 20938+ au_set_dbtop(d, a->btgt);
4a4d8108 20939+ }
5afbbe0d 20940+ a->dst_h_dentry = au_h_dptr(d, au_dbtop(d));
4a4d8108
AM
20941+ if (unlikely(err))
20942+ goto out;
20943+
20944+ d = a->src_dentry;
5afbbe0d 20945+ a->src_h_dentry = au_h_dptr(d, au_dbtop(d));
4a4d8108
AM
20946+ if (au_ftest_ren(a->flags, ISDIR)) {
20947+ err = may_rename_srcdir(d, a->btgt);
20948+ if (unlikely(err)) {
20949+ au_nhash_wh_free(&a->whlist);
20950+ a->whlist.nh_num = 0;
20951+ }
20952+ }
4f0767ce 20953+out:
4a4d8108 20954+ return err;
1facf9fc 20955+}
20956+
4a4d8108 20957+/* ---------------------------------------------------------------------- */
1facf9fc 20958+
4a4d8108
AM
20959+/*
20960+ * simple tests for rename.
20961+ * following the checks in vfs, plus the parent-child relationship.
20962+ */
20963+static int au_may_ren(struct au_ren_args *a)
20964+{
20965+ int err, isdir;
20966+ struct inode *h_inode;
1facf9fc 20967+
5afbbe0d 20968+ if (a->src_btop == a->btgt) {
4a4d8108
AM
20969+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
20970+ au_ftest_ren(a->flags, ISDIR));
20971+ if (unlikely(err))
20972+ goto out;
20973+ err = -EINVAL;
20974+ if (unlikely(a->src_h_dentry == a->h_trap))
20975+ goto out;
20976+ }
1facf9fc 20977+
4a4d8108 20978+ err = 0;
5afbbe0d 20979+ if (a->dst_btop != a->btgt)
4a4d8108 20980+ goto out;
1facf9fc 20981+
027c5e7a
AM
20982+ err = -ENOTEMPTY;
20983+ if (unlikely(a->dst_h_dentry == a->h_trap))
20984+ goto out;
20985+
4a4d8108 20986+ err = -EIO;
4a4d8108 20987+ isdir = !!au_ftest_ren(a->flags, ISDIR);
5527c038
JR
20988+ if (d_really_is_negative(a->dst_dentry)) {
20989+ if (d_is_negative(a->dst_h_dentry))
20990+ err = au_may_add(a->dst_dentry, a->btgt,
20991+ a->dst_h_parent, isdir);
4a4d8108 20992+ } else {
5527c038 20993+ if (unlikely(d_is_negative(a->dst_h_dentry)))
4a4d8108 20994+ goto out;
5527c038
JR
20995+ h_inode = d_inode(a->dst_h_dentry);
20996+ if (h_inode->i_nlink)
20997+ err = au_may_del(a->dst_dentry, a->btgt,
20998+ a->dst_h_parent, isdir);
4a4d8108 20999+ }
1facf9fc 21000+
4f0767ce 21001+out:
4a4d8108
AM
21002+ if (unlikely(err == -ENOENT || err == -EEXIST))
21003+ err = -EIO;
21004+ AuTraceErr(err);
21005+ return err;
21006+}
1facf9fc 21007+
1308ab2a 21008+/* ---------------------------------------------------------------------- */
1facf9fc 21009+
4a4d8108
AM
21010+/*
21011+ * locking order
21012+ * (VFS)
21013+ * - src_dir and dir by lock_rename()
21014+ * - inode if exitsts
21015+ * (aufs)
21016+ * - lock all
21017+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
21018+ * + si_read_lock
21019+ * + di_write_lock2_child()
21020+ * + di_write_lock_child()
21021+ * + ii_write_lock_child()
21022+ * + di_write_lock_child2()
21023+ * + ii_write_lock_child2()
21024+ * + src_parent and parent
21025+ * + di_write_lock_parent()
21026+ * + ii_write_lock_parent()
21027+ * + di_write_lock_parent2()
21028+ * + ii_write_lock_parent2()
21029+ * + lower src_dir and dir by vfsub_lock_rename()
21030+ * + verify the every relationships between child and parent. if any
21031+ * of them failed, unlock all and return -EBUSY.
21032+ */
21033+static void au_ren_unlock(struct au_ren_args *a)
1308ab2a 21034+{
4a4d8108
AM
21035+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
21036+ a->dst_h_parent, a->dst_hdir);
86dc4139
AM
21037+ if (au_ftest_ren(a->flags, MNT_WRITE))
21038+ vfsub_mnt_drop_write(au_br_mnt(a->br));
1308ab2a 21039+}
21040+
4a4d8108 21041+static int au_ren_lock(struct au_ren_args *a)
1308ab2a 21042+{
4a4d8108
AM
21043+ int err;
21044+ unsigned int udba;
1308ab2a 21045+
4a4d8108
AM
21046+ err = 0;
21047+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
21048+ a->src_hdir = au_hi(a->src_dir, a->btgt);
21049+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
21050+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
86dc4139
AM
21051+
21052+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
21053+ if (unlikely(err))
21054+ goto out;
21055+ au_fset_ren(a->flags, MNT_WRITE);
4a4d8108
AM
21056+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
21057+ a->dst_h_parent, a->dst_hdir);
21058+ udba = au_opt_udba(a->src_dentry->d_sb);
5527c038
JR
21059+ if (unlikely(a->src_hdir->hi_inode != d_inode(a->src_h_parent)
21060+ || a->dst_hdir->hi_inode != d_inode(a->dst_h_parent)))
4a4d8108 21061+ err = au_busy_or_stale();
5afbbe0d 21062+ if (!err && au_dbtop(a->src_dentry) == a->btgt)
4a4d8108 21063+ err = au_h_verify(a->src_h_dentry, udba,
5527c038 21064+ d_inode(a->src_h_parent), a->src_h_parent,
4a4d8108 21065+ a->br);
5afbbe0d 21066+ if (!err && au_dbtop(a->dst_dentry) == a->btgt)
4a4d8108 21067+ err = au_h_verify(a->dst_h_dentry, udba,
5527c038 21068+ d_inode(a->dst_h_parent), a->dst_h_parent,
4a4d8108 21069+ a->br);
86dc4139 21070+ if (!err)
4a4d8108 21071+ goto out; /* success */
4a4d8108
AM
21072+
21073+ err = au_busy_or_stale();
4a4d8108 21074+ au_ren_unlock(a);
86dc4139 21075+
4f0767ce 21076+out:
4a4d8108 21077+ return err;
1facf9fc 21078+}
21079+
21080+/* ---------------------------------------------------------------------- */
21081+
4a4d8108 21082+static void au_ren_refresh_dir(struct au_ren_args *a)
1facf9fc 21083+{
4a4d8108 21084+ struct inode *dir;
dece6358 21085+
4a4d8108
AM
21086+ dir = a->dst_dir;
21087+ dir->i_version++;
21088+ if (au_ftest_ren(a->flags, ISDIR)) {
21089+ /* is this updating defined in POSIX? */
21090+ au_cpup_attr_timesizes(a->src_inode);
21091+ au_cpup_attr_nlink(dir, /*force*/1);
4a4d8108 21092+ }
027c5e7a 21093+
b912730e 21094+ au_dir_ts(dir, a->btgt);
dece6358 21095+
4a4d8108
AM
21096+ if (au_ftest_ren(a->flags, ISSAMEDIR))
21097+ return;
dece6358 21098+
4a4d8108
AM
21099+ dir = a->src_dir;
21100+ dir->i_version++;
21101+ if (au_ftest_ren(a->flags, ISDIR))
21102+ au_cpup_attr_nlink(dir, /*force*/1);
b912730e 21103+ au_dir_ts(dir, a->btgt);
1facf9fc 21104+}
21105+
4a4d8108 21106+static void au_ren_refresh(struct au_ren_args *a)
1facf9fc 21107+{
5afbbe0d 21108+ aufs_bindex_t bbot, bindex;
4a4d8108
AM
21109+ struct dentry *d, *h_d;
21110+ struct inode *i, *h_i;
21111+ struct super_block *sb;
dece6358 21112+
027c5e7a
AM
21113+ d = a->dst_dentry;
21114+ d_drop(d);
21115+ if (a->h_dst)
21116+ /* already dget-ed by au_ren_or_cpup() */
21117+ au_set_h_dptr(d, a->btgt, a->h_dst);
21118+
21119+ i = a->dst_inode;
21120+ if (i) {
21121+ if (!au_ftest_ren(a->flags, ISDIR))
21122+ vfsub_drop_nlink(i);
21123+ else {
21124+ vfsub_dead_dir(i);
21125+ au_cpup_attr_timesizes(i);
21126+ }
21127+ au_update_dbrange(d, /*do_put_zero*/1);
21128+ } else {
5afbbe0d
AM
21129+ bbot = a->btgt;
21130+ for (bindex = au_dbtop(d); bindex < bbot; bindex++)
027c5e7a 21131+ au_set_h_dptr(d, bindex, NULL);
5afbbe0d
AM
21132+ bbot = au_dbbot(d);
21133+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++)
027c5e7a
AM
21134+ au_set_h_dptr(d, bindex, NULL);
21135+ au_update_dbrange(d, /*do_put_zero*/0);
21136+ }
21137+
4a4d8108
AM
21138+ d = a->src_dentry;
21139+ au_set_dbwh(d, -1);
5afbbe0d
AM
21140+ bbot = au_dbbot(d);
21141+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
4a4d8108
AM
21142+ h_d = au_h_dptr(d, bindex);
21143+ if (h_d)
21144+ au_set_h_dptr(d, bindex, NULL);
21145+ }
5afbbe0d 21146+ au_set_dbbot(d, a->btgt);
4a4d8108
AM
21147+
21148+ sb = d->d_sb;
21149+ i = a->src_inode;
21150+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
21151+ return; /* success */
21152+
5afbbe0d
AM
21153+ bbot = au_ibbot(i);
21154+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
4a4d8108
AM
21155+ h_i = au_h_iptr(i, bindex);
21156+ if (h_i) {
21157+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
21158+ /* ignore this error */
21159+ au_set_h_iptr(i, bindex, NULL, 0);
21160+ }
21161+ }
5afbbe0d 21162+ au_set_ibbot(i, a->btgt);
1308ab2a 21163+}
dece6358 21164+
4a4d8108
AM
21165+/* ---------------------------------------------------------------------- */
21166+
21167+/* mainly for link(2) and rename(2) */
21168+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
1308ab2a 21169+{
4a4d8108
AM
21170+ aufs_bindex_t bdiropq, bwh;
21171+ struct dentry *parent;
21172+ struct au_branch *br;
21173+
21174+ parent = dentry->d_parent;
5527c038 21175+ IMustLock(d_inode(parent)); /* dir is locked */
4a4d8108
AM
21176+
21177+ bdiropq = au_dbdiropq(parent);
21178+ bwh = au_dbwh(dentry);
21179+ br = au_sbr(dentry->d_sb, btgt);
21180+ if (au_br_rdonly(br)
21181+ || (0 <= bdiropq && bdiropq < btgt)
21182+ || (0 <= bwh && bwh < btgt))
21183+ btgt = -1;
21184+
21185+ AuDbg("btgt %d\n", btgt);
21186+ return btgt;
1facf9fc 21187+}
21188+
5afbbe0d 21189+/* sets src_btop, dst_btop and btgt */
4a4d8108 21190+static int au_ren_wbr(struct au_ren_args *a)
1facf9fc 21191+{
4a4d8108
AM
21192+ int err;
21193+ struct au_wr_dir_args wr_dir_args = {
21194+ /* .force_btgt = -1, */
21195+ .flags = AuWrDir_ADD_ENTRY
21196+ };
dece6358 21197+
5afbbe0d
AM
21198+ a->src_btop = au_dbtop(a->src_dentry);
21199+ a->dst_btop = au_dbtop(a->dst_dentry);
4a4d8108
AM
21200+ if (au_ftest_ren(a->flags, ISDIR))
21201+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
5afbbe0d
AM
21202+ wr_dir_args.force_btgt = a->src_btop;
21203+ if (a->dst_inode && a->dst_btop < a->src_btop)
21204+ wr_dir_args.force_btgt = a->dst_btop;
4a4d8108
AM
21205+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
21206+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
21207+ a->btgt = err;
dece6358 21208+
4a4d8108 21209+ return err;
1facf9fc 21210+}
21211+
4a4d8108 21212+static void au_ren_dt(struct au_ren_args *a)
1facf9fc 21213+{
4a4d8108
AM
21214+ a->h_path.dentry = a->src_h_parent;
21215+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
21216+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
21217+ a->h_path.dentry = a->dst_h_parent;
21218+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
21219+ }
1facf9fc 21220+
4a4d8108
AM
21221+ au_fclr_ren(a->flags, DT_DSTDIR);
21222+ if (!au_ftest_ren(a->flags, ISDIR))
21223+ return;
dece6358 21224+
4a4d8108
AM
21225+ a->h_path.dentry = a->src_h_dentry;
21226+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
5527c038 21227+ if (d_is_positive(a->dst_h_dentry)) {
4a4d8108
AM
21228+ au_fset_ren(a->flags, DT_DSTDIR);
21229+ a->h_path.dentry = a->dst_h_dentry;
21230+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
21231+ }
1308ab2a 21232+}
dece6358 21233+
4a4d8108 21234+static void au_ren_rev_dt(int err, struct au_ren_args *a)
1308ab2a 21235+{
4a4d8108 21236+ struct dentry *h_d;
febd17d6 21237+ struct inode *h_inode;
4a4d8108
AM
21238+
21239+ au_dtime_revert(a->src_dt + AuPARENT);
21240+ if (!au_ftest_ren(a->flags, ISSAMEDIR))
21241+ au_dtime_revert(a->dst_dt + AuPARENT);
21242+
21243+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
21244+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
febd17d6
JR
21245+ h_inode = d_inode(h_d);
21246+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
4a4d8108 21247+ au_dtime_revert(a->src_dt + AuCHILD);
febd17d6 21248+ inode_unlock(h_inode);
4a4d8108
AM
21249+
21250+ if (au_ftest_ren(a->flags, DT_DSTDIR)) {
21251+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
febd17d6
JR
21252+ h_inode = d_inode(h_d);
21253+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
4a4d8108 21254+ au_dtime_revert(a->dst_dt + AuCHILD);
febd17d6 21255+ inode_unlock(h_inode);
1facf9fc 21256+ }
21257+ }
21258+}
21259+
4a4d8108
AM
21260+/* ---------------------------------------------------------------------- */
21261+
21262+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
21263+ struct inode *_dst_dir, struct dentry *_dst_dentry)
1facf9fc 21264+{
e49829fe 21265+ int err, flags;
4a4d8108
AM
21266+ /* reduce stack space */
21267+ struct au_ren_args *a;
21268+
523b37e3 21269+ AuDbg("%pd, %pd\n", _src_dentry, _dst_dentry);
4a4d8108
AM
21270+ IMustLock(_src_dir);
21271+ IMustLock(_dst_dir);
21272+
21273+ err = -ENOMEM;
21274+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
21275+ a = kzalloc(sizeof(*a), GFP_NOFS);
21276+ if (unlikely(!a))
21277+ goto out;
21278+
21279+ a->src_dir = _src_dir;
21280+ a->src_dentry = _src_dentry;
5527c038
JR
21281+ a->src_inode = NULL;
21282+ if (d_really_is_positive(a->src_dentry))
21283+ a->src_inode = d_inode(a->src_dentry);
4a4d8108
AM
21284+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
21285+ a->dst_dir = _dst_dir;
21286+ a->dst_dentry = _dst_dentry;
5527c038
JR
21287+ a->dst_inode = NULL;
21288+ if (d_really_is_positive(a->dst_dentry))
21289+ a->dst_inode = d_inode(a->dst_dentry);
4a4d8108
AM
21290+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
21291+ if (a->dst_inode) {
21292+ IMustLock(a->dst_inode);
21293+ au_igrab(a->dst_inode);
1facf9fc 21294+ }
1facf9fc 21295+
4a4d8108 21296+ err = -ENOTDIR;
027c5e7a 21297+ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
2000de60 21298+ if (d_is_dir(a->src_dentry)) {
4a4d8108 21299+ au_fset_ren(a->flags, ISDIR);
5527c038 21300+ if (unlikely(d_really_is_positive(a->dst_dentry)
2000de60 21301+ && !d_is_dir(a->dst_dentry)))
4a4d8108 21302+ goto out_free;
b95c5147
AM
21303+ flags |= AuLock_DIRS;
21304+ }
21305+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, flags);
e49829fe
JR
21306+ if (unlikely(err))
21307+ goto out_free;
1facf9fc 21308+
027c5e7a
AM
21309+ err = au_d_hashed_positive(a->src_dentry);
21310+ if (unlikely(err))
21311+ goto out_unlock;
21312+ err = -ENOENT;
21313+ if (a->dst_inode) {
21314+ /*
21315+ * If it is a dir, VFS unhash dst_dentry before this
21316+ * function. It means we cannot rely upon d_unhashed().
21317+ */
21318+ if (unlikely(!a->dst_inode->i_nlink))
21319+ goto out_unlock;
21320+ if (!S_ISDIR(a->dst_inode->i_mode)) {
21321+ err = au_d_hashed_positive(a->dst_dentry);
21322+ if (unlikely(err))
21323+ goto out_unlock;
21324+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
21325+ goto out_unlock;
21326+ } else if (unlikely(d_unhashed(a->dst_dentry)))
21327+ goto out_unlock;
21328+
7eafdf33
AM
21329+ /*
21330+ * is it possible?
79b8bda9 21331+ * yes, it happened (in linux-3.3-rcN) but I don't know why.
7eafdf33
AM
21332+ * there may exist a problem somewhere else.
21333+ */
21334+ err = -EINVAL;
5527c038 21335+ if (unlikely(d_inode(a->dst_parent) == d_inode(a->src_dentry)))
7eafdf33
AM
21336+ goto out_unlock;
21337+
4a4d8108
AM
21338+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
21339+ di_write_lock_parent(a->dst_parent);
1facf9fc 21340+
4a4d8108
AM
21341+ /* which branch we process */
21342+ err = au_ren_wbr(a);
21343+ if (unlikely(err < 0))
027c5e7a 21344+ goto out_parent;
4a4d8108 21345+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
86dc4139 21346+ a->h_path.mnt = au_br_mnt(a->br);
1facf9fc 21347+
4a4d8108
AM
21348+ /* are they available to be renamed */
21349+ err = au_ren_may_dir(a);
21350+ if (unlikely(err))
21351+ goto out_children;
1facf9fc 21352+
4a4d8108 21353+ /* prepare the writable parent dir on the same branch */
5afbbe0d 21354+ if (a->dst_btop == a->btgt) {
4a4d8108
AM
21355+ au_fset_ren(a->flags, WHDST);
21356+ } else {
21357+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
21358+ if (unlikely(err))
21359+ goto out_children;
21360+ }
1facf9fc 21361+
4a4d8108
AM
21362+ if (a->src_dir != a->dst_dir) {
21363+ /*
21364+ * this temporary unlock is safe,
21365+ * because both dir->i_mutex are locked.
21366+ */
21367+ di_write_unlock(a->dst_parent);
21368+ di_write_lock_parent(a->src_parent);
21369+ err = au_wr_dir_need_wh(a->src_dentry,
21370+ au_ftest_ren(a->flags, ISDIR),
21371+ &a->btgt);
21372+ di_write_unlock(a->src_parent);
21373+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
21374+ au_fclr_ren(a->flags, ISSAMEDIR);
21375+ } else
21376+ err = au_wr_dir_need_wh(a->src_dentry,
21377+ au_ftest_ren(a->flags, ISDIR),
21378+ &a->btgt);
21379+ if (unlikely(err < 0))
21380+ goto out_children;
21381+ if (err)
21382+ au_fset_ren(a->flags, WHSRC);
1facf9fc 21383+
86dc4139 21384+ /* cpup src */
5afbbe0d 21385+ if (a->src_btop != a->btgt) {
86dc4139
AM
21386+ struct au_pin pin;
21387+
21388+ err = au_pin(&pin, a->src_dentry, a->btgt,
21389+ au_opt_udba(a->src_dentry->d_sb),
21390+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 21391+ if (!err) {
c2b27bf2
AM
21392+ struct au_cp_generic cpg = {
21393+ .dentry = a->src_dentry,
21394+ .bdst = a->btgt,
5afbbe0d 21395+ .bsrc = a->src_btop,
c2b27bf2
AM
21396+ .len = -1,
21397+ .pin = &pin,
21398+ .flags = AuCpup_DTIME | AuCpup_HOPEN
21399+ };
5afbbe0d 21400+ AuDebugOn(au_dbtop(a->src_dentry) != a->src_btop);
c2b27bf2 21401+ err = au_sio_cpup_simple(&cpg);
367653fa 21402+ au_unpin(&pin);
86dc4139 21403+ }
86dc4139
AM
21404+ if (unlikely(err))
21405+ goto out_children;
5afbbe0d 21406+ a->src_btop = a->btgt;
86dc4139
AM
21407+ a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
21408+ au_fset_ren(a->flags, WHSRC);
21409+ }
21410+
4a4d8108
AM
21411+ /* lock them all */
21412+ err = au_ren_lock(a);
21413+ if (unlikely(err))
86dc4139 21414+ /* leave the copied-up one */
4a4d8108 21415+ goto out_children;
1facf9fc 21416+
4a4d8108
AM
21417+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
21418+ err = au_may_ren(a);
21419+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
21420+ err = -ENAMETOOLONG;
21421+ if (unlikely(err))
21422+ goto out_hdir;
1facf9fc 21423+
4a4d8108
AM
21424+ /* store timestamps to be revertible */
21425+ au_ren_dt(a);
1facf9fc 21426+
4a4d8108
AM
21427+ /* here we go */
21428+ err = do_rename(a);
21429+ if (unlikely(err))
21430+ goto out_dt;
21431+
21432+ /* update dir attributes */
21433+ au_ren_refresh_dir(a);
21434+
21435+ /* dput/iput all lower dentries */
21436+ au_ren_refresh(a);
21437+
21438+ goto out_hdir; /* success */
21439+
4f0767ce 21440+out_dt:
4a4d8108 21441+ au_ren_rev_dt(err, a);
4f0767ce 21442+out_hdir:
4a4d8108 21443+ au_ren_unlock(a);
4f0767ce 21444+out_children:
4a4d8108 21445+ au_nhash_wh_free(&a->whlist);
5afbbe0d
AM
21446+ if (err && a->dst_inode && a->dst_btop != a->btgt) {
21447+ AuDbg("btop %d, btgt %d\n", a->dst_btop, a->btgt);
027c5e7a 21448+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
5afbbe0d 21449+ au_set_dbtop(a->dst_dentry, a->dst_btop);
4a4d8108 21450+ }
027c5e7a 21451+out_parent:
4a4d8108
AM
21452+ if (!err)
21453+ d_move(a->src_dentry, a->dst_dentry);
027c5e7a 21454+ else {
5afbbe0d 21455+ au_update_dbtop(a->dst_dentry);
027c5e7a
AM
21456+ if (!a->dst_inode)
21457+ d_drop(a->dst_dentry);
21458+ }
4a4d8108
AM
21459+ if (au_ftest_ren(a->flags, ISSAMEDIR))
21460+ di_write_unlock(a->dst_parent);
21461+ else
21462+ di_write_unlock2(a->src_parent, a->dst_parent);
027c5e7a 21463+out_unlock:
4a4d8108 21464+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
4f0767ce 21465+out_free:
4a4d8108
AM
21466+ iput(a->dst_inode);
21467+ if (a->thargs)
21468+ au_whtmp_rmdir_free(a->thargs);
f0c0a007 21469+ au_delayed_kfree(a);
4f0767ce 21470+out:
4a4d8108
AM
21471+ AuTraceErr(err);
21472+ return err;
1308ab2a 21473+}
7f207e10
AM
21474diff -urN /usr/share/empty/fs/aufs/Kconfig linux/fs/aufs/Kconfig
21475--- /usr/share/empty/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
e2f27e51 21476+++ linux/fs/aufs/Kconfig 2016-10-09 16:55:36.482701377 +0200
c1595e42 21477@@ -0,0 +1,185 @@
4a4d8108
AM
21478+config AUFS_FS
21479+ tristate "Aufs (Advanced multi layered unification filesystem) support"
4a4d8108
AM
21480+ help
21481+ Aufs is a stackable unification filesystem such as Unionfs,
21482+ which unifies several directories and provides a merged single
21483+ directory.
21484+ In the early days, aufs was entirely re-designed and
21485+ re-implemented Unionfs Version 1.x series. Introducing many
21486+ original ideas, approaches and improvements, it becomes totally
21487+ different from Unionfs while keeping the basic features.
1facf9fc 21488+
4a4d8108
AM
21489+if AUFS_FS
21490+choice
21491+ prompt "Maximum number of branches"
21492+ default AUFS_BRANCH_MAX_127
21493+ help
21494+ Specifies the maximum number of branches (or member directories)
21495+ in a single aufs. The larger value consumes more system
21496+ resources and has a minor impact to performance.
21497+config AUFS_BRANCH_MAX_127
21498+ bool "127"
21499+ help
21500+ Specifies the maximum number of branches (or member directories)
21501+ in a single aufs. The larger value consumes more system
21502+ resources and has a minor impact to performance.
21503+config AUFS_BRANCH_MAX_511
21504+ bool "511"
21505+ help
21506+ Specifies the maximum number of branches (or member directories)
21507+ in a single aufs. The larger value consumes more system
21508+ resources and has a minor impact to performance.
21509+config AUFS_BRANCH_MAX_1023
21510+ bool "1023"
21511+ help
21512+ Specifies the maximum number of branches (or member directories)
21513+ in a single aufs. The larger value consumes more system
21514+ resources and has a minor impact to performance.
21515+config AUFS_BRANCH_MAX_32767
21516+ bool "32767"
21517+ help
21518+ Specifies the maximum number of branches (or member directories)
21519+ in a single aufs. The larger value consumes more system
21520+ resources and has a minor impact to performance.
21521+endchoice
1facf9fc 21522+
e49829fe
JR
21523+config AUFS_SBILIST
21524+ bool
21525+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
21526+ default y
21527+ help
21528+ Automatic configuration for internal use.
21529+ When aufs supports Magic SysRq or /proc, enabled automatically.
21530+
4a4d8108
AM
21531+config AUFS_HNOTIFY
21532+ bool "Detect direct branch access (bypassing aufs)"
21533+ help
21534+ If you want to modify files on branches directly, eg. bypassing aufs,
21535+ and want aufs to detect the changes of them fully, then enable this
21536+ option and use 'udba=notify' mount option.
7f207e10 21537+ Currently there is only one available configuration, "fsnotify".
4a4d8108
AM
21538+ It will have a negative impact to the performance.
21539+ See detail in aufs.5.
dece6358 21540+
4a4d8108
AM
21541+choice
21542+ prompt "method" if AUFS_HNOTIFY
21543+ default AUFS_HFSNOTIFY
21544+config AUFS_HFSNOTIFY
21545+ bool "fsnotify"
21546+ select FSNOTIFY
4a4d8108 21547+endchoice
1facf9fc 21548+
4a4d8108
AM
21549+config AUFS_EXPORT
21550+ bool "NFS-exportable aufs"
2cbb1c4b 21551+ depends on EXPORTFS
4a4d8108
AM
21552+ help
21553+ If you want to export your mounted aufs via NFS, then enable this
21554+ option. There are several requirements for this configuration.
21555+ See detail in aufs.5.
1facf9fc 21556+
4a4d8108
AM
21557+config AUFS_INO_T_64
21558+ bool
21559+ depends on AUFS_EXPORT
21560+ depends on 64BIT && !(ALPHA || S390)
21561+ default y
21562+ help
21563+ Automatic configuration for internal use.
21564+ /* typedef unsigned long/int __kernel_ino_t */
21565+ /* alpha and s390x are int */
1facf9fc 21566+
c1595e42
JR
21567+config AUFS_XATTR
21568+ bool "support for XATTR/EA (including Security Labels)"
21569+ help
21570+ If your branch fs supports XATTR/EA and you want to make them
21571+ available in aufs too, then enable this opsion and specify the
21572+ branch attributes for EA.
21573+ See detail in aufs.5.
21574+
076b876e
AM
21575+config AUFS_FHSM
21576+ bool "File-based Hierarchical Storage Management"
21577+ help
21578+ Hierarchical Storage Management (or HSM) is a well-known feature
21579+ in the storage world. Aufs provides this feature as file-based.
21580+ with multiple branches.
21581+ These multiple branches are prioritized, ie. the topmost one
21582+ should be the fastest drive and be used heavily.
21583+
4a4d8108
AM
21584+config AUFS_RDU
21585+ bool "Readdir in userspace"
21586+ help
21587+ Aufs has two methods to provide a merged view for a directory,
21588+ by a user-space library and by kernel-space natively. The latter
21589+ is always enabled but sometimes large and slow.
21590+ If you enable this option, install the library in aufs2-util
21591+ package, and set some environment variables for your readdir(3),
21592+ then the work will be handled in user-space which generally
21593+ shows better performance in most cases.
21594+ See detail in aufs.5.
1facf9fc 21595+
4a4d8108
AM
21596+config AUFS_SHWH
21597+ bool "Show whiteouts"
21598+ help
21599+ If you want to make the whiteouts in aufs visible, then enable
21600+ this option and specify 'shwh' mount option. Although it may
21601+ sounds like philosophy or something, but in technically it
21602+ simply shows the name of whiteout with keeping its behaviour.
1facf9fc 21603+
4a4d8108
AM
21604+config AUFS_BR_RAMFS
21605+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
21606+ help
21607+ If you want to use ramfs as an aufs branch fs, then enable this
21608+ option. Generally tmpfs is recommended.
21609+ Aufs prohibited them to be a branch fs by default, because
21610+ initramfs becomes unusable after switch_root or something
21611+ generally. If you sets initramfs as an aufs branch and boot your
21612+ system by switch_root, you will meet a problem easily since the
21613+ files in initramfs may be inaccessible.
21614+ Unless you are going to use ramfs as an aufs branch fs without
21615+ switch_root or something, leave it N.
1facf9fc 21616+
4a4d8108
AM
21617+config AUFS_BR_FUSE
21618+ bool "Fuse fs as an aufs branch"
21619+ depends on FUSE_FS
21620+ select AUFS_POLL
21621+ help
21622+ If you want to use fuse-based userspace filesystem as an aufs
21623+ branch fs, then enable this option.
21624+ It implements the internal poll(2) operation which is
21625+ implemented by fuse only (curretnly).
1facf9fc 21626+
4a4d8108
AM
21627+config AUFS_POLL
21628+ bool
21629+ help
21630+ Automatic configuration for internal use.
1facf9fc 21631+
4a4d8108
AM
21632+config AUFS_BR_HFSPLUS
21633+ bool "Hfsplus as an aufs branch"
21634+ depends on HFSPLUS_FS
21635+ default y
21636+ help
21637+ If you want to use hfsplus fs as an aufs branch fs, then enable
21638+ this option. This option introduces a small overhead at
21639+ copying-up a file on hfsplus.
1facf9fc 21640+
4a4d8108
AM
21641+config AUFS_BDEV_LOOP
21642+ bool
21643+ depends on BLK_DEV_LOOP
21644+ default y
21645+ help
21646+ Automatic configuration for internal use.
21647+ Convert =[ym] into =y.
1308ab2a 21648+
4a4d8108
AM
21649+config AUFS_DEBUG
21650+ bool "Debug aufs"
21651+ help
21652+ Enable this to compile aufs internal debug code.
21653+ It will have a negative impact to the performance.
21654+
21655+config AUFS_MAGIC_SYSRQ
21656+ bool
21657+ depends on AUFS_DEBUG && MAGIC_SYSRQ
21658+ default y
21659+ help
21660+ Automatic configuration for internal use.
21661+ When aufs supports Magic SysRq, enabled automatically.
21662+endif
7f207e10
AM
21663diff -urN /usr/share/empty/fs/aufs/loop.c linux/fs/aufs/loop.c
21664--- /usr/share/empty/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
21665+++ linux/fs/aufs/loop.c 2016-10-09 16:55:38.889431135 +0200
21666@@ -0,0 +1,147 @@
1facf9fc 21667+/*
8cdd5066 21668+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21669+ *
21670+ * This program, aufs is free software; you can redistribute it and/or modify
21671+ * it under the terms of the GNU General Public License as published by
21672+ * the Free Software Foundation; either version 2 of the License, or
21673+ * (at your option) any later version.
dece6358
AM
21674+ *
21675+ * This program is distributed in the hope that it will be useful,
21676+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21677+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21678+ * GNU General Public License for more details.
21679+ *
21680+ * You should have received a copy of the GNU General Public License
523b37e3 21681+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21682+ */
21683+
21684+/*
21685+ * support for loopback block device as a branch
21686+ */
21687+
1facf9fc 21688+#include "aufs.h"
21689+
392086de
AM
21690+/* added into drivers/block/loop.c */
21691+static struct file *(*backing_file_func)(struct super_block *sb);
21692+
1facf9fc 21693+/*
21694+ * test if two lower dentries have overlapping branches.
21695+ */
b752ccd1 21696+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
1facf9fc 21697+{
b752ccd1 21698+ struct super_block *h_sb;
392086de
AM
21699+ struct file *backing_file;
21700+
21701+ if (unlikely(!backing_file_func)) {
21702+ /* don't load "loop" module here */
21703+ backing_file_func = symbol_get(loop_backing_file);
21704+ if (unlikely(!backing_file_func))
21705+ /* "loop" module is not loaded */
21706+ return 0;
21707+ }
1facf9fc 21708+
b752ccd1 21709+ h_sb = h_adding->d_sb;
392086de
AM
21710+ backing_file = backing_file_func(h_sb);
21711+ if (!backing_file)
1facf9fc 21712+ return 0;
21713+
2000de60 21714+ h_adding = backing_file->f_path.dentry;
b752ccd1
AM
21715+ /*
21716+ * h_adding can be local NFS.
21717+ * in this case aufs cannot detect the loop.
21718+ */
21719+ if (unlikely(h_adding->d_sb == sb))
1facf9fc 21720+ return 1;
b752ccd1 21721+ return !!au_test_subdir(h_adding, sb->s_root);
1facf9fc 21722+}
21723+
21724+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
21725+int au_test_loopback_kthread(void)
21726+{
b752ccd1
AM
21727+ int ret;
21728+ struct task_struct *tsk = current;
a2a7ad62 21729+ char c, comm[sizeof(tsk->comm)];
b752ccd1
AM
21730+
21731+ ret = 0;
21732+ if (tsk->flags & PF_KTHREAD) {
a2a7ad62
AM
21733+ get_task_comm(comm, tsk);
21734+ c = comm[4];
b752ccd1 21735+ ret = ('0' <= c && c <= '9'
a2a7ad62 21736+ && !strncmp(comm, "loop", 4));
b752ccd1 21737+ }
1facf9fc 21738+
b752ccd1 21739+ return ret;
1facf9fc 21740+}
87a755f4
AM
21741+
21742+/* ---------------------------------------------------------------------- */
21743+
21744+#define au_warn_loopback_step 16
21745+static int au_warn_loopback_nelem = au_warn_loopback_step;
21746+static unsigned long *au_warn_loopback_array;
21747+
21748+void au_warn_loopback(struct super_block *h_sb)
21749+{
21750+ int i, new_nelem;
21751+ unsigned long *a, magic;
21752+ static DEFINE_SPINLOCK(spin);
21753+
21754+ magic = h_sb->s_magic;
21755+ spin_lock(&spin);
21756+ a = au_warn_loopback_array;
21757+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
21758+ if (a[i] == magic) {
21759+ spin_unlock(&spin);
21760+ return;
21761+ }
21762+
21763+ /* h_sb is new to us, print it */
21764+ if (i < au_warn_loopback_nelem) {
21765+ a[i] = magic;
21766+ goto pr;
21767+ }
21768+
21769+ /* expand the array */
21770+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
21771+ a = au_kzrealloc(au_warn_loopback_array,
21772+ au_warn_loopback_nelem * sizeof(unsigned long),
e2f27e51
AM
21773+ new_nelem * sizeof(unsigned long), GFP_ATOMIC,
21774+ /*may_shrink*/0);
87a755f4
AM
21775+ if (a) {
21776+ au_warn_loopback_nelem = new_nelem;
21777+ au_warn_loopback_array = a;
21778+ a[i] = magic;
21779+ goto pr;
21780+ }
21781+
21782+ spin_unlock(&spin);
21783+ AuWarn1("realloc failed, ignored\n");
21784+ return;
21785+
21786+pr:
21787+ spin_unlock(&spin);
0c3ec466
AM
21788+ pr_warn("you may want to try another patch for loopback file "
21789+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
87a755f4
AM
21790+}
21791+
21792+int au_loopback_init(void)
21793+{
21794+ int err;
21795+ struct super_block *sb __maybe_unused;
21796+
79b8bda9 21797+ BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(unsigned long));
87a755f4
AM
21798+
21799+ err = 0;
21800+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
21801+ sizeof(unsigned long), GFP_NOFS);
21802+ if (unlikely(!au_warn_loopback_array))
21803+ err = -ENOMEM;
21804+
21805+ return err;
21806+}
21807+
21808+void au_loopback_fin(void)
21809+{
79b8bda9
AM
21810+ if (backing_file_func)
21811+ symbol_put(loop_backing_file);
f0c0a007 21812+ au_delayed_kfree(au_warn_loopback_array);
87a755f4 21813+}
7f207e10
AM
21814diff -urN /usr/share/empty/fs/aufs/loop.h linux/fs/aufs/loop.h
21815--- /usr/share/empty/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 21816+++ linux/fs/aufs/loop.h 2016-10-09 16:55:36.492701639 +0200
523b37e3 21817@@ -0,0 +1,52 @@
1facf9fc 21818+/*
8cdd5066 21819+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21820+ *
21821+ * This program, aufs is free software; you can redistribute it and/or modify
21822+ * it under the terms of the GNU General Public License as published by
21823+ * the Free Software Foundation; either version 2 of the License, or
21824+ * (at your option) any later version.
dece6358
AM
21825+ *
21826+ * This program is distributed in the hope that it will be useful,
21827+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21828+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21829+ * GNU General Public License for more details.
21830+ *
21831+ * You should have received a copy of the GNU General Public License
523b37e3 21832+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21833+ */
21834+
21835+/*
21836+ * support for loopback mount as a branch
21837+ */
21838+
21839+#ifndef __AUFS_LOOP_H__
21840+#define __AUFS_LOOP_H__
21841+
21842+#ifdef __KERNEL__
21843+
dece6358
AM
21844+struct dentry;
21845+struct super_block;
1facf9fc 21846+
21847+#ifdef CONFIG_AUFS_BDEV_LOOP
392086de
AM
21848+/* drivers/block/loop.c */
21849+struct file *loop_backing_file(struct super_block *sb);
21850+
1facf9fc 21851+/* loop.c */
b752ccd1 21852+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
1facf9fc 21853+int au_test_loopback_kthread(void);
87a755f4
AM
21854+void au_warn_loopback(struct super_block *h_sb);
21855+
21856+int au_loopback_init(void);
21857+void au_loopback_fin(void);
1facf9fc 21858+#else
4a4d8108 21859+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
b752ccd1 21860+ struct dentry *h_adding)
4a4d8108 21861+AuStubInt0(au_test_loopback_kthread, void)
87a755f4
AM
21862+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
21863+
21864+AuStubInt0(au_loopback_init, void)
21865+AuStubVoid(au_loopback_fin, void)
1facf9fc 21866+#endif /* BLK_DEV_LOOP */
21867+
21868+#endif /* __KERNEL__ */
21869+#endif /* __AUFS_LOOP_H__ */
7f207e10
AM
21870diff -urN /usr/share/empty/fs/aufs/magic.mk linux/fs/aufs/magic.mk
21871--- /usr/share/empty/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
e2f27e51 21872+++ linux/fs/aufs/magic.mk 2016-10-09 16:55:36.492701639 +0200
7e9cd9fe 21873@@ -0,0 +1,30 @@
1facf9fc 21874+
21875+# defined in ${srctree}/fs/fuse/inode.c
21876+# tristate
21877+ifdef CONFIG_FUSE_FS
21878+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
21879+endif
21880+
1facf9fc 21881+# defined in ${srctree}/fs/xfs/xfs_sb.h
21882+# tristate
21883+ifdef CONFIG_XFS_FS
21884+ccflags-y += -DXFS_SB_MAGIC=0x58465342
21885+endif
21886+
21887+# defined in ${srctree}/fs/configfs/mount.c
21888+# tristate
21889+ifdef CONFIG_CONFIGFS_FS
21890+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
21891+endif
21892+
1facf9fc 21893+# defined in ${srctree}/fs/ubifs/ubifs.h
21894+# tristate
21895+ifdef CONFIG_UBIFS_FS
21896+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
21897+endif
4a4d8108
AM
21898+
21899+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
21900+# tristate
21901+ifdef CONFIG_HFSPLUS_FS
21902+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
21903+endif
7f207e10
AM
21904diff -urN /usr/share/empty/fs/aufs/Makefile linux/fs/aufs/Makefile
21905--- /usr/share/empty/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
e2f27e51 21906+++ linux/fs/aufs/Makefile 2016-10-09 16:55:36.486034798 +0200
c1595e42 21907@@ -0,0 +1,44 @@
4a4d8108
AM
21908+
21909+include ${src}/magic.mk
21910+ifeq (${CONFIG_AUFS_FS},m)
21911+include ${src}/conf.mk
21912+endif
21913+-include ${src}/priv_def.mk
21914+
21915+# cf. include/linux/kernel.h
21916+# enable pr_debug
21917+ccflags-y += -DDEBUG
f6c5ef8b
AM
21918+# sparse requires the full pathname
21919+ifdef M
523b37e3 21920+ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h
f6c5ef8b 21921+else
523b37e3 21922+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
f6c5ef8b 21923+endif
4a4d8108
AM
21924+
21925+obj-$(CONFIG_AUFS_FS) += aufs.o
21926+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
21927+ wkq.o vfsub.o dcsub.o \
e49829fe 21928+ cpup.o whout.o wbr_policy.o \
4a4d8108
AM
21929+ dinfo.o dentry.o \
21930+ dynop.o \
21931+ finfo.o file.o f_op.o \
21932+ dir.o vdir.o \
21933+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
c2b27bf2 21934+ mvdown.o ioctl.o
4a4d8108
AM
21935+
21936+# all are boolean
e49829fe 21937+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
4a4d8108
AM
21938+aufs-$(CONFIG_SYSFS) += sysfs.o
21939+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
21940+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
21941+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
21942+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
4a4d8108 21943+aufs-$(CONFIG_AUFS_EXPORT) += export.o
c1595e42
JR
21944+aufs-$(CONFIG_AUFS_XATTR) += xattr.o
21945+aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
076b876e 21946+aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
4a4d8108
AM
21947+aufs-$(CONFIG_AUFS_POLL) += poll.o
21948+aufs-$(CONFIG_AUFS_RDU) += rdu.o
4a4d8108
AM
21949+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
21950+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
21951+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
7f207e10
AM
21952diff -urN /usr/share/empty/fs/aufs/module.c linux/fs/aufs/module.c
21953--- /usr/share/empty/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
21954+++ linux/fs/aufs/module.c 2016-10-09 16:55:38.889431135 +0200
21955@@ -0,0 +1,333 @@
1facf9fc 21956+/*
8cdd5066 21957+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21958+ *
21959+ * This program, aufs is free software; you can redistribute it and/or modify
21960+ * it under the terms of the GNU General Public License as published by
21961+ * the Free Software Foundation; either version 2 of the License, or
21962+ * (at your option) any later version.
dece6358
AM
21963+ *
21964+ * This program is distributed in the hope that it will be useful,
21965+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21966+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21967+ * GNU General Public License for more details.
21968+ *
21969+ * You should have received a copy of the GNU General Public License
523b37e3 21970+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21971+ */
21972+
21973+/*
21974+ * module global variables and operations
21975+ */
21976+
21977+#include <linux/module.h>
21978+#include <linux/seq_file.h>
21979+#include "aufs.h"
21980+
e2f27e51
AM
21981+/* shrinkable realloc */
21982+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink)
1facf9fc 21983+{
e2f27e51
AM
21984+ size_t sz;
21985+ int diff;
1facf9fc 21986+
e2f27e51
AM
21987+ sz = 0;
21988+ diff = -1;
21989+ if (p) {
21990+#if 0 /* unused */
21991+ if (!new_sz) {
21992+ au_delayed_kfree(p);
21993+ p = NULL;
21994+ goto out;
21995+ }
21996+#else
21997+ AuDebugOn(!new_sz);
21998+#endif
21999+ sz = ksize(p);
22000+ diff = au_kmidx_sub(sz, new_sz);
22001+ }
22002+ if (sz && !diff)
22003+ goto out;
22004+
22005+ if (sz < new_sz)
22006+ /* expand or SLOB */
22007+ p = krealloc(p, new_sz, gfp);
22008+ else if (new_sz < sz && may_shrink) {
22009+ /* shrink */
22010+ void *q;
22011+
22012+ q = kmalloc(new_sz, gfp);
22013+ if (q) {
22014+ if (p) {
22015+ memcpy(q, p, new_sz);
22016+ au_delayed_kfree(p);
22017+ }
22018+ p = q;
22019+ } else
22020+ p = NULL;
22021+ }
22022+
22023+out:
22024+ return p;
22025+}
22026+
22027+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
22028+ int may_shrink)
22029+{
22030+ p = au_krealloc(p, new_sz, gfp, may_shrink);
22031+ if (p && new_sz > nused)
1facf9fc 22032+ memset(p + nused, 0, new_sz - nused);
22033+ return p;
22034+}
22035+
22036+/* ---------------------------------------------------------------------- */
1facf9fc 22037+/*
22038+ * aufs caches
22039+ */
f0c0a007
AM
22040+
22041+struct au_dfree au_dfree;
22042+
22043+/* delayed free */
22044+static void au_do_dfree(struct work_struct *work __maybe_unused)
22045+{
22046+ struct llist_head *head;
22047+ struct llist_node *node, *next;
22048+
22049+#define AU_CACHE_DFREE_DO_BODY(name, idx, lnode) do { \
22050+ head = &au_dfree.cache[AuCache_##idx].llist; \
22051+ node = llist_del_all(head); \
22052+ for (; node; node = next) { \
e2f27e51
AM
22053+ struct au_##name *p \
22054+ = llist_entry(node, struct au_##name, \
22055+ lnode); \
f0c0a007
AM
22056+ next = llist_next(node); \
22057+ au_cache_free_##name(p); \
22058+ } \
22059+ } while (0)
22060+
22061+ AU_CACHE_DFREE_DO_BODY(dinfo, DINFO, di_lnode);
22062+ AU_CACHE_DFREE_DO_BODY(icntnr, ICNTNR, lnode);
22063+ AU_CACHE_DFREE_DO_BODY(finfo, FINFO, fi_lnode);
22064+ AU_CACHE_DFREE_DO_BODY(vdir, VDIR, vd_lnode);
22065+ AU_CACHE_DFREE_DO_BODY(vdir_dehstr, DEHSTR, lnode);
22066+#ifdef CONFIG_AUFS_HNOTIFY
22067+ AU_CACHE_DFREE_DO_BODY(hnotify, HNOTIFY, hn_lnode);
22068+#endif
22069+
22070+#define AU_DFREE_DO_BODY(llist, func) do { \
22071+ node = llist_del_all(llist); \
22072+ for (; node; node = next) { \
22073+ next = llist_next(node); \
22074+ func(node); \
22075+ } \
22076+ } while (0)
22077+
22078+ AU_DFREE_DO_BODY(au_dfree.llist + AU_DFREE_KFREE, kfree);
22079+ AU_DFREE_DO_BODY(au_dfree.llist + AU_DFREE_FREE_PAGE, au_free_page);
22080+
22081+#undef AU_CACHE_DFREE_DO_BODY
22082+#undef AU_DFREE_DO_BODY
22083+}
22084+
22085+AU_CACHE_DFREE_FUNC(dinfo, DINFO, di_lnode);
22086+AU_CACHE_DFREE_FUNC(icntnr, ICNTNR, lnode);
22087+AU_CACHE_DFREE_FUNC(finfo, FINFO, fi_lnode);
22088+AU_CACHE_DFREE_FUNC(vdir, VDIR, vd_lnode);
22089+AU_CACHE_DFREE_FUNC(vdir_dehstr, DEHSTR, lnode);
5afbbe0d
AM
22090+
22091+static void au_cache_fin(void)
22092+{
22093+ int i;
f0c0a007 22094+ struct au_cache *cp;
5afbbe0d
AM
22095+
22096+ /*
22097+ * Make sure all delayed rcu free inodes are flushed before we
22098+ * destroy cache.
22099+ */
22100+ rcu_barrier();
22101+
22102+ /* excluding AuCache_HNOTIFY */
22103+ BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
f0c0a007 22104+ flush_delayed_work(&au_dfree.dwork);
5afbbe0d 22105+ for (i = 0; i < AuCache_HNOTIFY; i++) {
f0c0a007
AM
22106+ cp = au_dfree.cache + i;
22107+ AuDebugOn(!llist_empty(&cp->llist));
22108+ kmem_cache_destroy(cp->cache);
22109+ cp->cache = NULL;
5afbbe0d
AM
22110+ }
22111+}
22112+
1facf9fc 22113+static int __init au_cache_init(void)
22114+{
f0c0a007
AM
22115+ struct au_cache *cp;
22116+
22117+ cp = au_dfree.cache;
22118+ cp[AuCache_DINFO].cache = AuCacheCtor(au_dinfo, au_di_init_once);
22119+ if (cp[AuCache_DINFO].cache)
027c5e7a 22120+ /* SLAB_DESTROY_BY_RCU */
f0c0a007
AM
22121+ cp[AuCache_ICNTNR].cache = AuCacheCtor(au_icntnr,
22122+ au_icntnr_init_once);
22123+ if (cp[AuCache_ICNTNR].cache)
22124+ cp[AuCache_FINFO].cache = AuCacheCtor(au_finfo,
22125+ au_fi_init_once);
22126+ if (cp[AuCache_FINFO].cache)
22127+ cp[AuCache_VDIR].cache = AuCache(au_vdir);
22128+ if (cp[AuCache_VDIR].cache)
22129+ cp[AuCache_DEHSTR].cache = AuCache(au_vdir_dehstr);
22130+ if (cp[AuCache_DEHSTR].cache)
1facf9fc 22131+ return 0;
22132+
5afbbe0d 22133+ au_cache_fin();
1facf9fc 22134+ return -ENOMEM;
22135+}
22136+
1facf9fc 22137+/* ---------------------------------------------------------------------- */
22138+
22139+int au_dir_roflags;
22140+
e49829fe 22141+#ifdef CONFIG_AUFS_SBILIST
1e00d052
AM
22142+/*
22143+ * iterate_supers_type() doesn't protect us from
22144+ * remounting (branch management)
22145+ */
5afbbe0d 22146+struct au_sphlhead au_sbilist;
e49829fe
JR
22147+#endif
22148+
1facf9fc 22149+/*
22150+ * functions for module interface.
22151+ */
22152+MODULE_LICENSE("GPL");
22153+/* MODULE_LICENSE("GPL v2"); */
dece6358 22154+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
1facf9fc 22155+MODULE_DESCRIPTION(AUFS_NAME
22156+ " -- Advanced multi layered unification filesystem");
22157+MODULE_VERSION(AUFS_VERSION);
c06a8ce3 22158+MODULE_ALIAS_FS(AUFS_NAME);
1facf9fc 22159+
1facf9fc 22160+/* this module parameter has no meaning when SYSFS is disabled */
22161+int sysaufs_brs = 1;
22162+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
22163+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
22164+
076b876e 22165+/* this module parameter has no meaning when USER_NS is disabled */
8cdd5066 22166+bool au_userns;
076b876e
AM
22167+MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
22168+module_param_named(allow_userns, au_userns, bool, S_IRUGO);
22169+
1facf9fc 22170+/* ---------------------------------------------------------------------- */
22171+
22172+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
22173+
22174+int au_seq_path(struct seq_file *seq, struct path *path)
22175+{
79b8bda9
AM
22176+ int err;
22177+
22178+ err = seq_path(seq, path, au_esc_chars);
22179+ if (err > 0)
22180+ err = 0;
22181+ else if (err < 0)
22182+ err = -ENOMEM;
22183+
22184+ return err;
1facf9fc 22185+}
22186+
22187+/* ---------------------------------------------------------------------- */
22188+
22189+static int __init aufs_init(void)
22190+{
22191+ int err, i;
22192+ char *p;
f0c0a007 22193+ struct au_cache *cp;
1facf9fc 22194+
22195+ p = au_esc_chars;
22196+ for (i = 1; i <= ' '; i++)
22197+ *p++ = i;
22198+ *p++ = '\\';
22199+ *p++ = '\x7f';
22200+ *p = 0;
22201+
22202+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
22203+
b95c5147
AM
22204+ memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
22205+ for (i = 0; i < AuIop_Last; i++)
22206+ aufs_iop_nogetattr[i].getattr = NULL;
22207+
f0c0a007
AM
22208+ /* First, initialize au_dfree */
22209+ for (i = 0; i < AuCache_Last; i++) { /* including hnotify */
22210+ cp = au_dfree.cache + i;
22211+ cp->cache = NULL;
22212+ init_llist_head(&cp->llist);
22213+ }
22214+ for (i = 0; i < AU_DFREE_Last; i++)
22215+ init_llist_head(au_dfree.llist + i);
22216+ INIT_DELAYED_WORK(&au_dfree.dwork, au_do_dfree);
22217+
e49829fe 22218+ au_sbilist_init();
1facf9fc 22219+ sysaufs_brs_init();
22220+ au_debug_init();
4a4d8108 22221+ au_dy_init();
1facf9fc 22222+ err = sysaufs_init();
22223+ if (unlikely(err))
22224+ goto out;
e49829fe 22225+ err = au_procfs_init();
4f0767ce 22226+ if (unlikely(err))
953406b4 22227+ goto out_sysaufs;
e49829fe
JR
22228+ err = au_wkq_init();
22229+ if (unlikely(err))
22230+ goto out_procfs;
87a755f4 22231+ err = au_loopback_init();
1facf9fc 22232+ if (unlikely(err))
22233+ goto out_wkq;
87a755f4
AM
22234+ err = au_hnotify_init();
22235+ if (unlikely(err))
22236+ goto out_loopback;
1facf9fc 22237+ err = au_sysrq_init();
22238+ if (unlikely(err))
22239+ goto out_hin;
22240+ err = au_cache_init();
22241+ if (unlikely(err))
22242+ goto out_sysrq;
076b876e
AM
22243+
22244+ aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
1facf9fc 22245+ err = register_filesystem(&aufs_fs_type);
22246+ if (unlikely(err))
22247+ goto out_cache;
076b876e 22248+
4a4d8108
AM
22249+ /* since we define pr_fmt, call printk directly */
22250+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
1facf9fc 22251+ goto out; /* success */
22252+
4f0767ce 22253+out_cache:
1facf9fc 22254+ au_cache_fin();
4f0767ce 22255+out_sysrq:
1facf9fc 22256+ au_sysrq_fin();
4f0767ce 22257+out_hin:
4a4d8108 22258+ au_hnotify_fin();
87a755f4
AM
22259+out_loopback:
22260+ au_loopback_fin();
4f0767ce 22261+out_wkq:
1facf9fc 22262+ au_wkq_fin();
e49829fe
JR
22263+out_procfs:
22264+ au_procfs_fin();
4f0767ce 22265+out_sysaufs:
1facf9fc 22266+ sysaufs_fin();
4a4d8108 22267+ au_dy_fin();
f0c0a007 22268+ flush_delayed_work(&au_dfree.dwork);
4f0767ce 22269+out:
1facf9fc 22270+ return err;
22271+}
22272+
22273+static void __exit aufs_exit(void)
22274+{
22275+ unregister_filesystem(&aufs_fs_type);
22276+ au_cache_fin();
22277+ au_sysrq_fin();
4a4d8108 22278+ au_hnotify_fin();
87a755f4 22279+ au_loopback_fin();
1facf9fc 22280+ au_wkq_fin();
e49829fe 22281+ au_procfs_fin();
1facf9fc 22282+ sysaufs_fin();
4a4d8108 22283+ au_dy_fin();
f0c0a007 22284+ flush_delayed_work(&au_dfree.dwork);
1facf9fc 22285+}
22286+
22287+module_init(aufs_init);
22288+module_exit(aufs_exit);
7f207e10
AM
22289diff -urN /usr/share/empty/fs/aufs/module.h linux/fs/aufs/module.h
22290--- /usr/share/empty/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
22291+++ linux/fs/aufs/module.h 2016-10-09 16:55:38.889431135 +0200
22292@@ -0,0 +1,156 @@
1facf9fc 22293+/*
8cdd5066 22294+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 22295+ *
22296+ * This program, aufs is free software; you can redistribute it and/or modify
22297+ * it under the terms of the GNU General Public License as published by
22298+ * the Free Software Foundation; either version 2 of the License, or
22299+ * (at your option) any later version.
dece6358
AM
22300+ *
22301+ * This program is distributed in the hope that it will be useful,
22302+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22303+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22304+ * GNU General Public License for more details.
22305+ *
22306+ * You should have received a copy of the GNU General Public License
523b37e3 22307+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 22308+ */
22309+
22310+/*
22311+ * module initialization and module-global
22312+ */
22313+
22314+#ifndef __AUFS_MODULE_H__
22315+#define __AUFS_MODULE_H__
22316+
22317+#ifdef __KERNEL__
22318+
22319+#include <linux/slab.h>
f0c0a007 22320+#include "debug.h"
1facf9fc 22321+
dece6358
AM
22322+struct path;
22323+struct seq_file;
22324+
1facf9fc 22325+/* module parameters */
1facf9fc 22326+extern int sysaufs_brs;
8cdd5066 22327+extern bool au_userns;
1facf9fc 22328+
22329+/* ---------------------------------------------------------------------- */
22330+
22331+extern int au_dir_roflags;
22332+
e2f27e51
AM
22333+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink);
22334+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
22335+ int may_shrink);
22336+
22337+static inline int au_kmidx_sub(size_t sz, size_t new_sz)
22338+{
22339+#ifndef CONFIG_SLOB
22340+ return kmalloc_index(sz) - kmalloc_index(new_sz);
22341+#else
22342+ return -1; /* SLOB is untested */
22343+#endif
22344+}
22345+
1facf9fc 22346+int au_seq_path(struct seq_file *seq, struct path *path);
22347+
e49829fe
JR
22348+#ifdef CONFIG_PROC_FS
22349+/* procfs.c */
22350+int __init au_procfs_init(void);
22351+void au_procfs_fin(void);
22352+#else
22353+AuStubInt0(au_procfs_init, void);
22354+AuStubVoid(au_procfs_fin, void);
22355+#endif
22356+
4f0767ce
JR
22357+/* ---------------------------------------------------------------------- */
22358+
f0c0a007 22359+/* kmem cache and delayed free */
1facf9fc 22360+enum {
22361+ AuCache_DINFO,
22362+ AuCache_ICNTNR,
22363+ AuCache_FINFO,
22364+ AuCache_VDIR,
22365+ AuCache_DEHSTR,
7eafdf33 22366+ AuCache_HNOTIFY, /* must be last */
1facf9fc 22367+ AuCache_Last
22368+};
22369+
f0c0a007
AM
22370+enum {
22371+ AU_DFREE_KFREE,
22372+ AU_DFREE_FREE_PAGE,
22373+ AU_DFREE_Last
22374+};
22375+
22376+struct au_cache {
22377+ struct kmem_cache *cache;
22378+ struct llist_head llist; /* delayed free */
22379+};
22380+
22381+/*
22382+ * in order to reduce the cost of the internal timer, consolidate all the
22383+ * delayed free works into a single delayed_work.
22384+ */
22385+struct au_dfree {
22386+ struct au_cache cache[AuCache_Last];
22387+ struct llist_head llist[AU_DFREE_Last];
22388+ struct delayed_work dwork;
22389+};
22390+
22391+extern struct au_dfree au_dfree;
22392+
4a4d8108
AM
22393+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
22394+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
22395+#define AuCacheCtor(type, ctor) \
22396+ kmem_cache_create(#type, sizeof(struct type), \
22397+ __alignof__(struct type), AuCacheFlags, ctor)
1facf9fc 22398+
f0c0a007
AM
22399+#define AU_DFREE_DELAY msecs_to_jiffies(10)
22400+#define AU_DFREE_BODY(lnode, llist) do { \
22401+ if (llist_add(lnode, llist)) \
22402+ schedule_delayed_work(&au_dfree.dwork, \
22403+ AU_DFREE_DELAY); \
22404+ } while (0)
22405+#define AU_CACHE_DFREE_FUNC(name, idx, lnode) \
22406+ void au_cache_dfree_##name(struct au_##name *p) \
22407+ { \
22408+ struct au_cache *cp = au_dfree.cache + AuCache_##idx; \
22409+ AU_DFREE_BODY(&p->lnode, &cp->llist); \
22410+ }
1facf9fc 22411+
22412+#define AuCacheFuncs(name, index) \
4a4d8108 22413+static inline struct au_##name *au_cache_alloc_##name(void) \
f0c0a007 22414+{ return kmem_cache_alloc(au_dfree.cache[AuCache_##index].cache, GFP_NOFS); } \
4a4d8108 22415+static inline void au_cache_free_##name(struct au_##name *p) \
f0c0a007
AM
22416+{ kmem_cache_free(au_dfree.cache[AuCache_##index].cache, p); } \
22417+void au_cache_dfree_##name(struct au_##name *p)
1facf9fc 22418+
22419+AuCacheFuncs(dinfo, DINFO);
22420+AuCacheFuncs(icntnr, ICNTNR);
22421+AuCacheFuncs(finfo, FINFO);
22422+AuCacheFuncs(vdir, VDIR);
4a4d8108
AM
22423+AuCacheFuncs(vdir_dehstr, DEHSTR);
22424+#ifdef CONFIG_AUFS_HNOTIFY
22425+AuCacheFuncs(hnotify, HNOTIFY);
22426+#endif
1facf9fc 22427+
f0c0a007
AM
22428+static inline void au_delayed_kfree(const void *p)
22429+{
22430+ AuDebugOn(!p);
22431+ AuDebugOn(ksize(p) < sizeof(struct llist_node));
22432+
22433+ AU_DFREE_BODY((void *)p, au_dfree.llist + AU_DFREE_KFREE);
22434+}
22435+
22436+/* cast only */
22437+static inline void au_free_page(void *p)
22438+{
22439+ free_page((unsigned long)p);
22440+}
22441+
22442+static inline void au_delayed_free_page(unsigned long addr)
22443+{
22444+ AU_DFREE_BODY((void *)addr, au_dfree.llist + AU_DFREE_FREE_PAGE);
22445+}
22446+
4a4d8108
AM
22447+#endif /* __KERNEL__ */
22448+#endif /* __AUFS_MODULE_H__ */
c2b27bf2
AM
22449diff -urN /usr/share/empty/fs/aufs/mvdown.c linux/fs/aufs/mvdown.c
22450--- /usr/share/empty/fs/aufs/mvdown.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 22451+++ linux/fs/aufs/mvdown.c 2016-10-09 16:55:36.492701639 +0200
5afbbe0d 22452@@ -0,0 +1,704 @@
c2b27bf2 22453+/*
8cdd5066 22454+ * Copyright (C) 2011-2016 Junjiro R. Okajima
c2b27bf2
AM
22455+ *
22456+ * This program, aufs is free software; you can redistribute it and/or modify
22457+ * it under the terms of the GNU General Public License as published by
22458+ * the Free Software Foundation; either version 2 of the License, or
22459+ * (at your option) any later version.
22460+ *
22461+ * This program is distributed in the hope that it will be useful,
22462+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22463+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22464+ * GNU General Public License for more details.
22465+ *
22466+ * You should have received a copy of the GNU General Public License
523b37e3
AM
22467+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
22468+ */
22469+
22470+/*
22471+ * move-down, opposite of copy-up
c2b27bf2
AM
22472+ */
22473+
22474+#include "aufs.h"
22475+
c2b27bf2
AM
22476+struct au_mvd_args {
22477+ struct {
c2b27bf2
AM
22478+ struct super_block *h_sb;
22479+ struct dentry *h_parent;
22480+ struct au_hinode *hdir;
392086de 22481+ struct inode *h_dir, *h_inode;
c1595e42 22482+ struct au_pin pin;
c2b27bf2
AM
22483+ } info[AUFS_MVDOWN_NARRAY];
22484+
22485+ struct aufs_mvdown mvdown;
22486+ struct dentry *dentry, *parent;
22487+ struct inode *inode, *dir;
22488+ struct super_block *sb;
22489+ aufs_bindex_t bopq, bwh, bfound;
22490+ unsigned char rename_lock;
c2b27bf2
AM
22491+};
22492+
392086de 22493+#define mvd_errno mvdown.au_errno
076b876e
AM
22494+#define mvd_bsrc mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
22495+#define mvd_src_brid mvdown.stbr[AUFS_MVDOWN_UPPER].brid
22496+#define mvd_bdst mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
22497+#define mvd_dst_brid mvdown.stbr[AUFS_MVDOWN_LOWER].brid
c2b27bf2 22498+
392086de
AM
22499+#define mvd_h_src_sb info[AUFS_MVDOWN_UPPER].h_sb
22500+#define mvd_h_src_parent info[AUFS_MVDOWN_UPPER].h_parent
22501+#define mvd_hdir_src info[AUFS_MVDOWN_UPPER].hdir
22502+#define mvd_h_src_dir info[AUFS_MVDOWN_UPPER].h_dir
22503+#define mvd_h_src_inode info[AUFS_MVDOWN_UPPER].h_inode
c1595e42 22504+#define mvd_pin_src info[AUFS_MVDOWN_UPPER].pin
392086de
AM
22505+
22506+#define mvd_h_dst_sb info[AUFS_MVDOWN_LOWER].h_sb
22507+#define mvd_h_dst_parent info[AUFS_MVDOWN_LOWER].h_parent
22508+#define mvd_hdir_dst info[AUFS_MVDOWN_LOWER].hdir
22509+#define mvd_h_dst_dir info[AUFS_MVDOWN_LOWER].h_dir
22510+#define mvd_h_dst_inode info[AUFS_MVDOWN_LOWER].h_inode
c1595e42 22511+#define mvd_pin_dst info[AUFS_MVDOWN_LOWER].pin
c2b27bf2
AM
22512+
22513+#define AU_MVD_PR(flag, ...) do { \
22514+ if (flag) \
22515+ pr_err(__VA_ARGS__); \
22516+ } while (0)
22517+
076b876e
AM
22518+static int find_lower_writable(struct au_mvd_args *a)
22519+{
22520+ struct super_block *sb;
5afbbe0d 22521+ aufs_bindex_t bindex, bbot;
076b876e
AM
22522+ struct au_branch *br;
22523+
22524+ sb = a->sb;
22525+ bindex = a->mvd_bsrc;
5afbbe0d 22526+ bbot = au_sbbot(sb);
076b876e 22527+ if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
5afbbe0d 22528+ for (bindex++; bindex <= bbot; bindex++) {
076b876e
AM
22529+ br = au_sbr(sb, bindex);
22530+ if (au_br_fhsm(br->br_perm)
22531+ && (!(au_br_sb(br)->s_flags & MS_RDONLY)))
22532+ return bindex;
22533+ }
22534+ else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
5afbbe0d 22535+ for (bindex++; bindex <= bbot; bindex++) {
076b876e
AM
22536+ br = au_sbr(sb, bindex);
22537+ if (!au_br_rdonly(br))
22538+ return bindex;
22539+ }
22540+ else
5afbbe0d 22541+ for (bindex++; bindex <= bbot; bindex++) {
076b876e
AM
22542+ br = au_sbr(sb, bindex);
22543+ if (!(au_br_sb(br)->s_flags & MS_RDONLY)) {
22544+ if (au_br_rdonly(br))
22545+ a->mvdown.flags
22546+ |= AUFS_MVDOWN_ROLOWER_R;
22547+ return bindex;
22548+ }
22549+ }
22550+
22551+ return -1;
22552+}
22553+
c2b27bf2 22554+/* make the parent dir on bdst */
392086de 22555+static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22556+{
22557+ int err;
22558+
22559+ err = 0;
22560+ a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
22561+ a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
22562+ a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
22563+ a->mvd_h_dst_parent = NULL;
5afbbe0d 22564+ if (au_dbbot(a->parent) >= a->mvd_bdst)
c2b27bf2
AM
22565+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22566+ if (!a->mvd_h_dst_parent) {
22567+ err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
22568+ if (unlikely(err)) {
392086de 22569+ AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
c2b27bf2
AM
22570+ goto out;
22571+ }
22572+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22573+ }
22574+
22575+out:
22576+ AuTraceErr(err);
22577+ return err;
22578+}
22579+
22580+/* lock them all */
392086de 22581+static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22582+{
22583+ int err;
22584+ struct dentry *h_trap;
22585+
22586+ a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
22587+ a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
c1595e42
JR
22588+ err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
22589+ au_opt_udba(a->sb),
22590+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22591+ AuTraceErr(err);
22592+ if (unlikely(err)) {
22593+ AU_MVD_PR(dmsg, "pin_dst failed\n");
22594+ goto out;
22595+ }
22596+
c2b27bf2
AM
22597+ if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
22598+ a->rename_lock = 0;
c1595e42
JR
22599+ au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22600+ AuLsc_DI_PARENT, AuLsc_I_PARENT3,
22601+ au_opt_udba(a->sb),
22602+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22603+ err = au_do_pin(&a->mvd_pin_src);
22604+ AuTraceErr(err);
5527c038 22605+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
c1595e42
JR
22606+ if (unlikely(err)) {
22607+ AU_MVD_PR(dmsg, "pin_src failed\n");
22608+ goto out_dst;
22609+ }
22610+ goto out; /* success */
c2b27bf2
AM
22611+ }
22612+
c2b27bf2 22613+ a->rename_lock = 1;
c1595e42
JR
22614+ au_pin_hdir_unlock(&a->mvd_pin_dst);
22615+ err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22616+ au_opt_udba(a->sb),
22617+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22618+ AuTraceErr(err);
5527c038 22619+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
c1595e42
JR
22620+ if (unlikely(err)) {
22621+ AU_MVD_PR(dmsg, "pin_src failed\n");
22622+ au_pin_hdir_lock(&a->mvd_pin_dst);
22623+ goto out_dst;
22624+ }
22625+ au_pin_hdir_unlock(&a->mvd_pin_src);
c2b27bf2
AM
22626+ h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22627+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22628+ if (h_trap) {
22629+ err = (h_trap != a->mvd_h_src_parent);
22630+ if (err)
22631+ err = (h_trap != a->mvd_h_dst_parent);
22632+ }
22633+ BUG_ON(err); /* it should never happen */
c1595e42
JR
22634+ if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
22635+ err = -EBUSY;
22636+ AuTraceErr(err);
22637+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22638+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22639+ au_pin_hdir_lock(&a->mvd_pin_src);
22640+ au_unpin(&a->mvd_pin_src);
22641+ au_pin_hdir_lock(&a->mvd_pin_dst);
22642+ goto out_dst;
22643+ }
22644+ goto out; /* success */
c2b27bf2 22645+
c1595e42
JR
22646+out_dst:
22647+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
22648+out:
22649+ AuTraceErr(err);
22650+ return err;
22651+}
22652+
392086de 22653+static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2 22654+{
c1595e42
JR
22655+ if (!a->rename_lock)
22656+ au_unpin(&a->mvd_pin_src);
22657+ else {
c2b27bf2
AM
22658+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22659+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
c1595e42
JR
22660+ au_pin_hdir_lock(&a->mvd_pin_src);
22661+ au_unpin(&a->mvd_pin_src);
22662+ au_pin_hdir_lock(&a->mvd_pin_dst);
22663+ }
22664+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
22665+}
22666+
22667+/* copy-down the file */
392086de 22668+static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22669+{
22670+ int err;
22671+ struct au_cp_generic cpg = {
22672+ .dentry = a->dentry,
22673+ .bdst = a->mvd_bdst,
22674+ .bsrc = a->mvd_bsrc,
22675+ .len = -1,
c1595e42 22676+ .pin = &a->mvd_pin_dst,
c2b27bf2
AM
22677+ .flags = AuCpup_DTIME | AuCpup_HOPEN
22678+ };
22679+
22680+ AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
392086de
AM
22681+ if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
22682+ au_fset_cpup(cpg.flags, OVERWRITE);
22683+ if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
22684+ au_fset_cpup(cpg.flags, RWDST);
c2b27bf2
AM
22685+ err = au_sio_cpdown_simple(&cpg);
22686+ if (unlikely(err))
392086de 22687+ AU_MVD_PR(dmsg, "cpdown failed\n");
c2b27bf2
AM
22688+
22689+ AuTraceErr(err);
22690+ return err;
22691+}
22692+
22693+/*
22694+ * unlink the whiteout on bdst if exist which may be created by UDBA while we
22695+ * were sleeping
22696+ */
392086de 22697+static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22698+{
22699+ int err;
22700+ struct path h_path;
22701+ struct au_branch *br;
523b37e3 22702+ struct inode *delegated;
c2b27bf2
AM
22703+
22704+ br = au_sbr(a->sb, a->mvd_bdst);
22705+ h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
22706+ err = PTR_ERR(h_path.dentry);
22707+ if (IS_ERR(h_path.dentry)) {
392086de 22708+ AU_MVD_PR(dmsg, "wh_lkup failed\n");
c2b27bf2
AM
22709+ goto out;
22710+ }
22711+
22712+ err = 0;
5527c038 22713+ if (d_is_positive(h_path.dentry)) {
c2b27bf2 22714+ h_path.mnt = au_br_mnt(br);
523b37e3 22715+ delegated = NULL;
5527c038 22716+ err = vfsub_unlink(d_inode(a->mvd_h_dst_parent), &h_path,
523b37e3
AM
22717+ &delegated, /*force*/0);
22718+ if (unlikely(err == -EWOULDBLOCK)) {
22719+ pr_warn("cannot retry for NFSv4 delegation"
22720+ " for an internal unlink\n");
22721+ iput(delegated);
22722+ }
c2b27bf2 22723+ if (unlikely(err))
392086de 22724+ AU_MVD_PR(dmsg, "wh_unlink failed\n");
c2b27bf2
AM
22725+ }
22726+ dput(h_path.dentry);
22727+
22728+out:
22729+ AuTraceErr(err);
22730+ return err;
22731+}
22732+
22733+/*
22734+ * unlink the topmost h_dentry
c2b27bf2 22735+ */
392086de 22736+static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22737+{
22738+ int err;
22739+ struct path h_path;
523b37e3 22740+ struct inode *delegated;
c2b27bf2
AM
22741+
22742+ h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
22743+ h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
523b37e3
AM
22744+ delegated = NULL;
22745+ err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
22746+ if (unlikely(err == -EWOULDBLOCK)) {
22747+ pr_warn("cannot retry for NFSv4 delegation"
22748+ " for an internal unlink\n");
22749+ iput(delegated);
22750+ }
c2b27bf2 22751+ if (unlikely(err))
392086de 22752+ AU_MVD_PR(dmsg, "unlink failed\n");
c2b27bf2
AM
22753+
22754+ AuTraceErr(err);
22755+ return err;
22756+}
22757+
076b876e
AM
22758+/* Since mvdown succeeded, we ignore an error of this function */
22759+static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
22760+{
22761+ int err;
22762+ struct au_branch *br;
22763+
22764+ a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
22765+ br = au_sbr(a->sb, a->mvd_bsrc);
22766+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
22767+ if (!err) {
22768+ br = au_sbr(a->sb, a->mvd_bdst);
22769+ a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
22770+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
22771+ }
22772+ if (!err)
22773+ a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
22774+ else
22775+ AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
22776+}
22777+
c2b27bf2
AM
22778+/*
22779+ * copy-down the file and unlink the bsrc file.
22780+ * - unlink the bdst whout if exist
22781+ * - copy-down the file (with whtmp name and rename)
22782+ * - unlink the bsrc file
22783+ */
392086de 22784+static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22785+{
22786+ int err;
22787+
392086de 22788+ err = au_do_mkdir(dmsg, a);
c2b27bf2 22789+ if (!err)
392086de 22790+ err = au_do_lock(dmsg, a);
c2b27bf2
AM
22791+ if (unlikely(err))
22792+ goto out;
22793+
22794+ /*
22795+ * do not revert the activities we made on bdst since they should be
22796+ * harmless in aufs.
22797+ */
22798+
392086de 22799+ err = au_do_cpdown(dmsg, a);
c2b27bf2 22800+ if (!err)
392086de
AM
22801+ err = au_do_unlink_wh(dmsg, a);
22802+ if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
22803+ err = au_do_unlink(dmsg, a);
c2b27bf2
AM
22804+ if (unlikely(err))
22805+ goto out_unlock;
22806+
c1595e42
JR
22807+ AuDbg("%pd2, 0x%x, %d --> %d\n",
22808+ a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
076b876e
AM
22809+ if (find_lower_writable(a) < 0)
22810+ a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
22811+
22812+ if (a->mvdown.flags & AUFS_MVDOWN_STFS)
22813+ au_do_stfs(dmsg, a);
22814+
c2b27bf2 22815+ /* maintain internal array */
392086de
AM
22816+ if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
22817+ au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
5afbbe0d 22818+ au_set_dbtop(a->dentry, a->mvd_bdst);
392086de 22819+ au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
5afbbe0d 22820+ au_set_ibtop(a->inode, a->mvd_bdst);
79b8bda9
AM
22821+ } else {
22822+ /* hide the lower */
22823+ au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
5afbbe0d 22824+ au_set_dbbot(a->dentry, a->mvd_bsrc);
79b8bda9 22825+ au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
5afbbe0d 22826+ au_set_ibbot(a->inode, a->mvd_bsrc);
392086de 22827+ }
5afbbe0d
AM
22828+ if (au_dbbot(a->dentry) < a->mvd_bdst)
22829+ au_set_dbbot(a->dentry, a->mvd_bdst);
22830+ if (au_ibbot(a->inode) < a->mvd_bdst)
22831+ au_set_ibbot(a->inode, a->mvd_bdst);
c2b27bf2
AM
22832+
22833+out_unlock:
392086de 22834+ au_do_unlock(dmsg, a);
c2b27bf2
AM
22835+out:
22836+ AuTraceErr(err);
22837+ return err;
22838+}
22839+
22840+/* ---------------------------------------------------------------------- */
22841+
c2b27bf2 22842+/* make sure the file is idle */
392086de 22843+static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22844+{
22845+ int err, plinked;
c2b27bf2
AM
22846+
22847+ err = 0;
c2b27bf2 22848+ plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
5afbbe0d 22849+ if (au_dbtop(a->dentry) == a->mvd_bsrc
c1595e42 22850+ && au_dcount(a->dentry) == 1
c2b27bf2 22851+ && atomic_read(&a->inode->i_count) == 1
392086de 22852+ /* && a->mvd_h_src_inode->i_nlink == 1 */
c2b27bf2
AM
22853+ && (!plinked || !au_plink_test(a->inode))
22854+ && a->inode->i_nlink == 1)
22855+ goto out;
22856+
22857+ err = -EBUSY;
392086de 22858+ AU_MVD_PR(dmsg,
c1595e42 22859+ "b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
5afbbe0d 22860+ a->mvd_bsrc, au_dbtop(a->dentry), au_dcount(a->dentry),
c2b27bf2 22861+ atomic_read(&a->inode->i_count), a->inode->i_nlink,
392086de 22862+ a->mvd_h_src_inode->i_nlink,
c2b27bf2
AM
22863+ plinked, plinked ? au_plink_test(a->inode) : 0);
22864+
22865+out:
22866+ AuTraceErr(err);
22867+ return err;
22868+}
22869+
22870+/* make sure the parent dir is fine */
392086de 22871+static int au_mvd_args_parent(const unsigned char dmsg,
c2b27bf2
AM
22872+ struct au_mvd_args *a)
22873+{
22874+ int err;
22875+ aufs_bindex_t bindex;
22876+
22877+ err = 0;
22878+ if (unlikely(au_alive_dir(a->parent))) {
22879+ err = -ENOENT;
392086de 22880+ AU_MVD_PR(dmsg, "parent dir is dead\n");
c2b27bf2
AM
22881+ goto out;
22882+ }
22883+
22884+ a->bopq = au_dbdiropq(a->parent);
22885+ bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
22886+ AuDbg("b%d\n", bindex);
22887+ if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
22888+ || (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
22889+ err = -EINVAL;
392086de
AM
22890+ a->mvd_errno = EAU_MVDOWN_OPAQUE;
22891+ AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
c2b27bf2
AM
22892+ a->bopq, a->mvd_bdst);
22893+ }
22894+
22895+out:
22896+ AuTraceErr(err);
22897+ return err;
22898+}
22899+
392086de 22900+static int au_mvd_args_intermediate(const unsigned char dmsg,
c2b27bf2
AM
22901+ struct au_mvd_args *a)
22902+{
22903+ int err;
22904+ struct au_dinfo *dinfo, *tmp;
22905+
22906+ /* lookup the next lower positive entry */
22907+ err = -ENOMEM;
22908+ tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
22909+ if (unlikely(!tmp))
22910+ goto out;
22911+
22912+ a->bfound = -1;
22913+ a->bwh = -1;
22914+ dinfo = au_di(a->dentry);
22915+ au_di_cp(tmp, dinfo);
22916+ au_di_swap(tmp, dinfo);
22917+
22918+ /* returns the number of positive dentries */
5afbbe0d
AM
22919+ err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1,
22920+ /* AuLkup_IGNORE_PERM */ 0);
c2b27bf2
AM
22921+ if (!err)
22922+ a->bwh = au_dbwh(a->dentry);
22923+ else if (err > 0)
5afbbe0d 22924+ a->bfound = au_dbtop(a->dentry);
c2b27bf2
AM
22925+
22926+ au_di_swap(tmp, dinfo);
22927+ au_rw_write_unlock(&tmp->di_rwsem);
22928+ au_di_free(tmp);
22929+ if (unlikely(err < 0))
392086de 22930+ AU_MVD_PR(dmsg, "failed look-up lower\n");
c2b27bf2
AM
22931+
22932+ /*
22933+ * here, we have these cases.
22934+ * bfound == -1
22935+ * no positive dentry under bsrc. there are more sub-cases.
22936+ * bwh < 0
22937+ * there no whiteout, we can safely move-down.
22938+ * bwh <= bsrc
22939+ * impossible
22940+ * bsrc < bwh && bwh < bdst
22941+ * there is a whiteout on RO branch. cannot proceed.
22942+ * bwh == bdst
22943+ * there is a whiteout on the RW target branch. it should
22944+ * be removed.
22945+ * bdst < bwh
22946+ * there is a whiteout somewhere unrelated branch.
22947+ * -1 < bfound && bfound <= bsrc
22948+ * impossible.
22949+ * bfound < bdst
22950+ * found, but it is on RO branch between bsrc and bdst. cannot
22951+ * proceed.
22952+ * bfound == bdst
22953+ * found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
22954+ * error.
22955+ * bdst < bfound
22956+ * found, after we create the file on bdst, it will be hidden.
22957+ */
22958+
22959+ AuDebugOn(a->bfound == -1
22960+ && a->bwh != -1
22961+ && a->bwh <= a->mvd_bsrc);
22962+ AuDebugOn(-1 < a->bfound
22963+ && a->bfound <= a->mvd_bsrc);
22964+
22965+ err = -EINVAL;
22966+ if (a->bfound == -1
22967+ && a->mvd_bsrc < a->bwh
22968+ && a->bwh != -1
22969+ && a->bwh < a->mvd_bdst) {
392086de
AM
22970+ a->mvd_errno = EAU_MVDOWN_WHITEOUT;
22971+ AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
c2b27bf2
AM
22972+ a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
22973+ goto out;
22974+ } else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
392086de
AM
22975+ a->mvd_errno = EAU_MVDOWN_UPPER;
22976+ AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
c2b27bf2
AM
22977+ a->mvd_bdst, a->bfound);
22978+ goto out;
22979+ }
22980+
22981+ err = 0; /* success */
22982+
22983+out:
22984+ AuTraceErr(err);
22985+ return err;
22986+}
22987+
392086de 22988+static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22989+{
22990+ int err;
22991+
392086de
AM
22992+ err = 0;
22993+ if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
22994+ && a->bfound == a->mvd_bdst)
22995+ err = -EEXIST;
c2b27bf2
AM
22996+ AuTraceErr(err);
22997+ return err;
22998+}
22999+
392086de 23000+static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
23001+{
23002+ int err;
23003+ struct au_branch *br;
23004+
23005+ err = -EISDIR;
23006+ if (unlikely(S_ISDIR(a->inode->i_mode)))
23007+ goto out;
23008+
23009+ err = -EINVAL;
392086de 23010+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
5afbbe0d 23011+ a->mvd_bsrc = au_ibtop(a->inode);
392086de
AM
23012+ else {
23013+ a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
23014+ if (unlikely(a->mvd_bsrc < 0
5afbbe0d
AM
23015+ || (a->mvd_bsrc < au_dbtop(a->dentry)
23016+ || au_dbbot(a->dentry) < a->mvd_bsrc
392086de 23017+ || !au_h_dptr(a->dentry, a->mvd_bsrc))
5afbbe0d
AM
23018+ || (a->mvd_bsrc < au_ibtop(a->inode)
23019+ || au_ibbot(a->inode) < a->mvd_bsrc
392086de
AM
23020+ || !au_h_iptr(a->inode, a->mvd_bsrc)))) {
23021+ a->mvd_errno = EAU_MVDOWN_NOUPPER;
23022+ AU_MVD_PR(dmsg, "no upper\n");
23023+ goto out;
23024+ }
23025+ }
5afbbe0d 23026+ if (unlikely(a->mvd_bsrc == au_sbbot(a->sb))) {
392086de
AM
23027+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
23028+ AU_MVD_PR(dmsg, "on the bottom\n");
c2b27bf2
AM
23029+ goto out;
23030+ }
392086de 23031+ a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
c2b27bf2
AM
23032+ br = au_sbr(a->sb, a->mvd_bsrc);
23033+ err = au_br_rdonly(br);
392086de
AM
23034+ if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
23035+ if (unlikely(err))
23036+ goto out;
23037+ } else if (!(vfsub_native_ro(a->mvd_h_src_inode)
23038+ || IS_APPEND(a->mvd_h_src_inode))) {
23039+ if (err)
23040+ a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
23041+ /* go on */
23042+ } else
c2b27bf2
AM
23043+ goto out;
23044+
23045+ err = -EINVAL;
392086de
AM
23046+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
23047+ a->mvd_bdst = find_lower_writable(a);
23048+ if (unlikely(a->mvd_bdst < 0)) {
23049+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
23050+ AU_MVD_PR(dmsg, "no writable lower branch\n");
23051+ goto out;
23052+ }
23053+ } else {
23054+ a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
23055+ if (unlikely(a->mvd_bdst < 0
5afbbe0d 23056+ || au_sbbot(a->sb) < a->mvd_bdst)) {
392086de
AM
23057+ a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
23058+ AU_MVD_PR(dmsg, "no lower brid\n");
23059+ goto out;
23060+ }
c2b27bf2
AM
23061+ }
23062+
392086de 23063+ err = au_mvd_args_busy(dmsg, a);
c2b27bf2 23064+ if (!err)
392086de 23065+ err = au_mvd_args_parent(dmsg, a);
c2b27bf2 23066+ if (!err)
392086de 23067+ err = au_mvd_args_intermediate(dmsg, a);
c2b27bf2 23068+ if (!err)
392086de 23069+ err = au_mvd_args_exist(dmsg, a);
c2b27bf2
AM
23070+ if (!err)
23071+ AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
23072+
23073+out:
23074+ AuTraceErr(err);
23075+ return err;
23076+}
23077+
23078+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
23079+{
392086de
AM
23080+ int err, e;
23081+ unsigned char dmsg;
23082+ struct au_mvd_args *args;
79b8bda9 23083+ struct inode *inode;
c2b27bf2 23084+
79b8bda9 23085+ inode = d_inode(dentry);
c2b27bf2
AM
23086+ err = -EPERM;
23087+ if (unlikely(!capable(CAP_SYS_ADMIN)))
23088+ goto out;
23089+
392086de
AM
23090+ err = -ENOMEM;
23091+ args = kmalloc(sizeof(*args), GFP_NOFS);
23092+ if (unlikely(!args))
23093+ goto out;
23094+
23095+ err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
23096+ if (!err)
23097+ err = !access_ok(VERIFY_WRITE, uarg, sizeof(*uarg));
c2b27bf2
AM
23098+ if (unlikely(err)) {
23099+ err = -EFAULT;
392086de
AM
23100+ AuTraceErr(err);
23101+ goto out_free;
c2b27bf2 23102+ }
392086de
AM
23103+ AuDbg("flags 0x%x\n", args->mvdown.flags);
23104+ args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
23105+ args->mvdown.au_errno = 0;
23106+ args->dentry = dentry;
79b8bda9 23107+ args->inode = inode;
392086de 23108+ args->sb = dentry->d_sb;
c2b27bf2 23109+
392086de
AM
23110+ err = -ENOENT;
23111+ dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
23112+ args->parent = dget_parent(dentry);
5527c038 23113+ args->dir = d_inode(args->parent);
febd17d6 23114+ inode_lock_nested(args->dir, I_MUTEX_PARENT);
392086de
AM
23115+ dput(args->parent);
23116+ if (unlikely(args->parent != dentry->d_parent)) {
23117+ AU_MVD_PR(dmsg, "parent dir is moved\n");
c2b27bf2
AM
23118+ goto out_dir;
23119+ }
23120+
febd17d6 23121+ inode_lock_nested(inode, I_MUTEX_CHILD);
b95c5147 23122+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
c2b27bf2
AM
23123+ if (unlikely(err))
23124+ goto out_inode;
23125+
392086de
AM
23126+ di_write_lock_parent(args->parent);
23127+ err = au_mvd_args(dmsg, args);
c2b27bf2
AM
23128+ if (unlikely(err))
23129+ goto out_parent;
23130+
392086de 23131+ err = au_do_mvdown(dmsg, args);
c2b27bf2
AM
23132+ if (unlikely(err))
23133+ goto out_parent;
c2b27bf2 23134+
392086de 23135+ au_cpup_attr_timesizes(args->dir);
79b8bda9
AM
23136+ au_cpup_attr_timesizes(inode);
23137+ if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
23138+ au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
c2b27bf2
AM
23139+ /* au_digen_dec(dentry); */
23140+
23141+out_parent:
392086de 23142+ di_write_unlock(args->parent);
c2b27bf2
AM
23143+ aufs_read_unlock(dentry, AuLock_DW);
23144+out_inode:
febd17d6 23145+ inode_unlock(inode);
c2b27bf2 23146+out_dir:
febd17d6 23147+ inode_unlock(args->dir);
392086de
AM
23148+out_free:
23149+ e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
23150+ if (unlikely(e))
23151+ err = -EFAULT;
f0c0a007 23152+ au_delayed_kfree(args);
c2b27bf2
AM
23153+out:
23154+ AuTraceErr(err);
23155+ return err;
23156+}
23157diff -urN /usr/share/empty/fs/aufs/opts.c linux/fs/aufs/opts.c
23158--- /usr/share/empty/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 23159+++ linux/fs/aufs/opts.c 2016-10-09 16:55:36.496035060 +0200
f0c0a007 23160@@ -0,0 +1,1860 @@
1facf9fc 23161+/*
8cdd5066 23162+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 23163+ *
23164+ * This program, aufs is free software; you can redistribute it and/or modify
23165+ * it under the terms of the GNU General Public License as published by
23166+ * the Free Software Foundation; either version 2 of the License, or
23167+ * (at your option) any later version.
dece6358
AM
23168+ *
23169+ * This program is distributed in the hope that it will be useful,
23170+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23171+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23172+ * GNU General Public License for more details.
23173+ *
23174+ * You should have received a copy of the GNU General Public License
523b37e3 23175+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 23176+ */
23177+
23178+/*
23179+ * mount options/flags
23180+ */
23181+
dece6358 23182+#include <linux/namei.h>
1facf9fc 23183+#include <linux/types.h> /* a distribution requires */
23184+#include <linux/parser.h>
23185+#include "aufs.h"
23186+
23187+/* ---------------------------------------------------------------------- */
23188+
23189+enum {
23190+ Opt_br,
7e9cd9fe
AM
23191+ Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend,
23192+ Opt_idel, Opt_imod,
23193+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash,
dece6358 23194+ Opt_rdblk_def, Opt_rdhash_def,
7e9cd9fe 23195+ Opt_xino, Opt_noxino,
1facf9fc 23196+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
23197+ Opt_trunc_xino_path, Opt_itrunc_xino,
23198+ Opt_trunc_xib, Opt_notrunc_xib,
dece6358 23199+ Opt_shwh, Opt_noshwh,
1facf9fc 23200+ Opt_plink, Opt_noplink, Opt_list_plink,
23201+ Opt_udba,
4a4d8108 23202+ Opt_dio, Opt_nodio,
1facf9fc 23203+ Opt_diropq_a, Opt_diropq_w,
23204+ Opt_warn_perm, Opt_nowarn_perm,
23205+ Opt_wbr_copyup, Opt_wbr_create,
076b876e 23206+ Opt_fhsm_sec,
1facf9fc 23207+ Opt_verbose, Opt_noverbose,
23208+ Opt_sum, Opt_nosum, Opt_wsum,
076b876e 23209+ Opt_dirperm1, Opt_nodirperm1,
c1595e42 23210+ Opt_acl, Opt_noacl,
1facf9fc 23211+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
23212+};
23213+
23214+static match_table_t options = {
23215+ {Opt_br, "br=%s"},
23216+ {Opt_br, "br:%s"},
23217+
23218+ {Opt_add, "add=%d:%s"},
23219+ {Opt_add, "add:%d:%s"},
23220+ {Opt_add, "ins=%d:%s"},
23221+ {Opt_add, "ins:%d:%s"},
23222+ {Opt_append, "append=%s"},
23223+ {Opt_append, "append:%s"},
23224+ {Opt_prepend, "prepend=%s"},
23225+ {Opt_prepend, "prepend:%s"},
23226+
23227+ {Opt_del, "del=%s"},
23228+ {Opt_del, "del:%s"},
23229+ /* {Opt_idel, "idel:%d"}, */
23230+ {Opt_mod, "mod=%s"},
23231+ {Opt_mod, "mod:%s"},
23232+ /* {Opt_imod, "imod:%d:%s"}, */
23233+
23234+ {Opt_dirwh, "dirwh=%d"},
23235+
23236+ {Opt_xino, "xino=%s"},
23237+ {Opt_noxino, "noxino"},
23238+ {Opt_trunc_xino, "trunc_xino"},
23239+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
23240+ {Opt_notrunc_xino, "notrunc_xino"},
23241+ {Opt_trunc_xino_path, "trunc_xino=%s"},
23242+ {Opt_itrunc_xino, "itrunc_xino=%d"},
23243+ /* {Opt_zxino, "zxino=%s"}, */
23244+ {Opt_trunc_xib, "trunc_xib"},
23245+ {Opt_notrunc_xib, "notrunc_xib"},
23246+
e49829fe 23247+#ifdef CONFIG_PROC_FS
1facf9fc 23248+ {Opt_plink, "plink"},
e49829fe
JR
23249+#else
23250+ {Opt_ignore_silent, "plink"},
23251+#endif
23252+
1facf9fc 23253+ {Opt_noplink, "noplink"},
e49829fe 23254+
1facf9fc 23255+#ifdef CONFIG_AUFS_DEBUG
23256+ {Opt_list_plink, "list_plink"},
23257+#endif
23258+
23259+ {Opt_udba, "udba=%s"},
23260+
4a4d8108
AM
23261+ {Opt_dio, "dio"},
23262+ {Opt_nodio, "nodio"},
23263+
076b876e
AM
23264+#ifdef CONFIG_AUFS_FHSM
23265+ {Opt_fhsm_sec, "fhsm_sec=%d"},
23266+#else
23267+ {Opt_ignore_silent, "fhsm_sec=%d"},
23268+#endif
23269+
1facf9fc 23270+ {Opt_diropq_a, "diropq=always"},
23271+ {Opt_diropq_a, "diropq=a"},
23272+ {Opt_diropq_w, "diropq=whiteouted"},
23273+ {Opt_diropq_w, "diropq=w"},
23274+
23275+ {Opt_warn_perm, "warn_perm"},
23276+ {Opt_nowarn_perm, "nowarn_perm"},
23277+
23278+ /* keep them temporary */
1facf9fc 23279+ {Opt_ignore_silent, "nodlgt"},
1facf9fc 23280+ {Opt_ignore_silent, "clean_plink"},
23281+
dece6358
AM
23282+#ifdef CONFIG_AUFS_SHWH
23283+ {Opt_shwh, "shwh"},
23284+#endif
23285+ {Opt_noshwh, "noshwh"},
23286+
076b876e
AM
23287+ {Opt_dirperm1, "dirperm1"},
23288+ {Opt_nodirperm1, "nodirperm1"},
23289+
1facf9fc 23290+ {Opt_verbose, "verbose"},
23291+ {Opt_verbose, "v"},
23292+ {Opt_noverbose, "noverbose"},
23293+ {Opt_noverbose, "quiet"},
23294+ {Opt_noverbose, "q"},
23295+ {Opt_noverbose, "silent"},
23296+
23297+ {Opt_sum, "sum"},
23298+ {Opt_nosum, "nosum"},
23299+ {Opt_wsum, "wsum"},
23300+
23301+ {Opt_rdcache, "rdcache=%d"},
23302+ {Opt_rdblk, "rdblk=%d"},
dece6358 23303+ {Opt_rdblk_def, "rdblk=def"},
1facf9fc 23304+ {Opt_rdhash, "rdhash=%d"},
dece6358 23305+ {Opt_rdhash_def, "rdhash=def"},
1facf9fc 23306+
23307+ {Opt_wbr_create, "create=%s"},
23308+ {Opt_wbr_create, "create_policy=%s"},
23309+ {Opt_wbr_copyup, "cpup=%s"},
23310+ {Opt_wbr_copyup, "copyup=%s"},
23311+ {Opt_wbr_copyup, "copyup_policy=%s"},
23312+
c1595e42
JR
23313+ /* generic VFS flag */
23314+#ifdef CONFIG_FS_POSIX_ACL
23315+ {Opt_acl, "acl"},
23316+ {Opt_noacl, "noacl"},
23317+#else
23318+ {Opt_ignore_silent, "acl"},
23319+ {Opt_ignore_silent, "noacl"},
23320+#endif
23321+
1facf9fc 23322+ /* internal use for the scripts */
23323+ {Opt_ignore_silent, "si=%s"},
23324+
23325+ {Opt_br, "dirs=%s"},
23326+ {Opt_ignore, "debug=%d"},
23327+ {Opt_ignore, "delete=whiteout"},
23328+ {Opt_ignore, "delete=all"},
23329+ {Opt_ignore, "imap=%s"},
23330+
1308ab2a 23331+ /* temporary workaround, due to old mount(8)? */
23332+ {Opt_ignore_silent, "relatime"},
23333+
1facf9fc 23334+ {Opt_err, NULL}
23335+};
23336+
23337+/* ---------------------------------------------------------------------- */
23338+
076b876e 23339+static const char *au_parser_pattern(int val, match_table_t tbl)
1facf9fc 23340+{
076b876e
AM
23341+ struct match_token *p;
23342+
23343+ p = tbl;
23344+ while (p->pattern) {
23345+ if (p->token == val)
23346+ return p->pattern;
23347+ p++;
1facf9fc 23348+ }
23349+ BUG();
23350+ return "??";
23351+}
23352+
076b876e
AM
23353+static const char *au_optstr(int *val, match_table_t tbl)
23354+{
23355+ struct match_token *p;
23356+ int v;
23357+
23358+ v = *val;
2000de60
JR
23359+ if (!v)
23360+ goto out;
076b876e 23361+ p = tbl;
2000de60
JR
23362+ while (p->pattern) {
23363+ if (p->token
23364+ && (v & p->token) == p->token) {
076b876e
AM
23365+ *val &= ~p->token;
23366+ return p->pattern;
23367+ }
23368+ p++;
23369+ }
2000de60
JR
23370+
23371+out:
076b876e
AM
23372+ return NULL;
23373+}
23374+
1facf9fc 23375+/* ---------------------------------------------------------------------- */
23376+
1e00d052 23377+static match_table_t brperm = {
1facf9fc 23378+ {AuBrPerm_RO, AUFS_BRPERM_RO},
23379+ {AuBrPerm_RR, AUFS_BRPERM_RR},
23380+ {AuBrPerm_RW, AUFS_BRPERM_RW},
1e00d052
AM
23381+ {0, NULL}
23382+};
1facf9fc 23383+
86dc4139 23384+static match_table_t brattr = {
076b876e
AM
23385+ /* general */
23386+ {AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG},
23387+ {AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL},
c1595e42 23388+ /* 'unpin' attrib is meaningless since linux-3.18-rc1 */
86dc4139 23389+ {AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN},
2000de60 23390+#ifdef CONFIG_AUFS_FHSM
076b876e 23391+ {AuBrAttr_FHSM, AUFS_BRATTR_FHSM},
2000de60
JR
23392+#endif
23393+#ifdef CONFIG_AUFS_XATTR
c1595e42
JR
23394+ {AuBrAttr_ICEX, AUFS_BRATTR_ICEX},
23395+ {AuBrAttr_ICEX_SEC, AUFS_BRATTR_ICEX_SEC},
23396+ {AuBrAttr_ICEX_SYS, AUFS_BRATTR_ICEX_SYS},
23397+ {AuBrAttr_ICEX_TR, AUFS_BRATTR_ICEX_TR},
23398+ {AuBrAttr_ICEX_USR, AUFS_BRATTR_ICEX_USR},
23399+ {AuBrAttr_ICEX_OTH, AUFS_BRATTR_ICEX_OTH},
2000de60 23400+#endif
076b876e
AM
23401+
23402+ /* ro/rr branch */
1e00d052 23403+ {AuBrRAttr_WH, AUFS_BRRATTR_WH},
076b876e
AM
23404+
23405+ /* rw branch */
23406+ {AuBrWAttr_MOO, AUFS_BRWATTR_MOO},
1e00d052 23407+ {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
076b876e 23408+
1e00d052 23409+ {0, NULL}
1facf9fc 23410+};
23411+
1e00d052
AM
23412+static int br_attr_val(char *str, match_table_t table, substring_t args[])
23413+{
23414+ int attr, v;
23415+ char *p;
23416+
23417+ attr = 0;
23418+ do {
23419+ p = strchr(str, '+');
23420+ if (p)
23421+ *p = 0;
23422+ v = match_token(str, table, args);
076b876e
AM
23423+ if (v) {
23424+ if (v & AuBrAttr_CMOO_Mask)
23425+ attr &= ~AuBrAttr_CMOO_Mask;
1e00d052 23426+ attr |= v;
076b876e 23427+ } else {
1e00d052
AM
23428+ if (p)
23429+ *p = '+';
0c3ec466 23430+ pr_warn("ignored branch attribute %s\n", str);
1e00d052
AM
23431+ break;
23432+ }
23433+ if (p)
23434+ str = p + 1;
23435+ } while (p);
23436+
23437+ return attr;
23438+}
23439+
076b876e
AM
23440+static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm)
23441+{
23442+ int sz;
23443+ const char *p;
23444+ char *q;
23445+
076b876e
AM
23446+ q = str->a;
23447+ *q = 0;
23448+ p = au_optstr(&perm, brattr);
23449+ if (p) {
23450+ sz = strlen(p);
23451+ memcpy(q, p, sz + 1);
23452+ q += sz;
23453+ } else
23454+ goto out;
23455+
23456+ do {
23457+ p = au_optstr(&perm, brattr);
23458+ if (p) {
23459+ *q++ = '+';
23460+ sz = strlen(p);
23461+ memcpy(q, p, sz + 1);
23462+ q += sz;
23463+ }
23464+ } while (p);
23465+
23466+out:
c1595e42 23467+ return q - str->a;
076b876e
AM
23468+}
23469+
4a4d8108 23470+static int noinline_for_stack br_perm_val(char *perm)
1facf9fc 23471+{
076b876e
AM
23472+ int val, bad, sz;
23473+ char *p;
1facf9fc 23474+ substring_t args[MAX_OPT_ARGS];
076b876e 23475+ au_br_perm_str_t attr;
1facf9fc 23476+
1e00d052
AM
23477+ p = strchr(perm, '+');
23478+ if (p)
23479+ *p = 0;
23480+ val = match_token(perm, brperm, args);
23481+ if (!val) {
23482+ if (p)
23483+ *p = '+';
0c3ec466 23484+ pr_warn("ignored branch permission %s\n", perm);
1e00d052
AM
23485+ val = AuBrPerm_RO;
23486+ goto out;
23487+ }
23488+ if (!p)
23489+ goto out;
23490+
076b876e
AM
23491+ val |= br_attr_val(p + 1, brattr, args);
23492+
23493+ bad = 0;
86dc4139 23494+ switch (val & AuBrPerm_Mask) {
1e00d052
AM
23495+ case AuBrPerm_RO:
23496+ case AuBrPerm_RR:
076b876e
AM
23497+ bad = val & AuBrWAttr_Mask;
23498+ val &= ~AuBrWAttr_Mask;
1e00d052
AM
23499+ break;
23500+ case AuBrPerm_RW:
076b876e
AM
23501+ bad = val & AuBrRAttr_Mask;
23502+ val &= ~AuBrRAttr_Mask;
1e00d052
AM
23503+ break;
23504+ }
c1595e42
JR
23505+
23506+ /*
23507+ * 'unpin' attrib becomes meaningless since linux-3.18-rc1, but aufs
23508+ * does not treat it as an error, just warning.
23509+ * this is a tiny guard for the user operation.
23510+ */
23511+ if (val & AuBrAttr_UNPIN) {
23512+ bad |= AuBrAttr_UNPIN;
23513+ val &= ~AuBrAttr_UNPIN;
23514+ }
23515+
076b876e
AM
23516+ if (unlikely(bad)) {
23517+ sz = au_do_optstr_br_attr(&attr, bad);
23518+ AuDebugOn(!sz);
23519+ pr_warn("ignored branch attribute %s\n", attr.a);
23520+ }
1e00d052
AM
23521+
23522+out:
1facf9fc 23523+ return val;
23524+}
23525+
076b876e 23526+void au_optstr_br_perm(au_br_perm_str_t *str, int perm)
1facf9fc 23527+{
076b876e
AM
23528+ au_br_perm_str_t attr;
23529+ const char *p;
23530+ char *q;
1e00d052
AM
23531+ int sz;
23532+
076b876e
AM
23533+ q = str->a;
23534+ p = au_optstr(&perm, brperm);
23535+ AuDebugOn(!p || !*p);
23536+ sz = strlen(p);
23537+ memcpy(q, p, sz + 1);
23538+ q += sz;
1e00d052 23539+
076b876e
AM
23540+ sz = au_do_optstr_br_attr(&attr, perm);
23541+ if (sz) {
23542+ *q++ = '+';
23543+ memcpy(q, attr.a, sz + 1);
1e00d052
AM
23544+ }
23545+
076b876e 23546+ AuDebugOn(strlen(str->a) >= sizeof(str->a));
1facf9fc 23547+}
23548+
23549+/* ---------------------------------------------------------------------- */
23550+
23551+static match_table_t udbalevel = {
23552+ {AuOpt_UDBA_REVAL, "reval"},
23553+ {AuOpt_UDBA_NONE, "none"},
4a4d8108
AM
23554+#ifdef CONFIG_AUFS_HNOTIFY
23555+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
23556+#ifdef CONFIG_AUFS_HFSNOTIFY
23557+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
4a4d8108 23558+#endif
1facf9fc 23559+#endif
23560+ {-1, NULL}
23561+};
23562+
4a4d8108 23563+static int noinline_for_stack udba_val(char *str)
1facf9fc 23564+{
23565+ substring_t args[MAX_OPT_ARGS];
23566+
7f207e10 23567+ return match_token(str, udbalevel, args);
1facf9fc 23568+}
23569+
23570+const char *au_optstr_udba(int udba)
23571+{
076b876e 23572+ return au_parser_pattern(udba, udbalevel);
1facf9fc 23573+}
23574+
23575+/* ---------------------------------------------------------------------- */
23576+
23577+static match_table_t au_wbr_create_policy = {
23578+ {AuWbrCreate_TDP, "tdp"},
23579+ {AuWbrCreate_TDP, "top-down-parent"},
23580+ {AuWbrCreate_RR, "rr"},
23581+ {AuWbrCreate_RR, "round-robin"},
23582+ {AuWbrCreate_MFS, "mfs"},
23583+ {AuWbrCreate_MFS, "most-free-space"},
23584+ {AuWbrCreate_MFSV, "mfs:%d"},
23585+ {AuWbrCreate_MFSV, "most-free-space:%d"},
23586+
23587+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
23588+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
23589+ {AuWbrCreate_PMFS, "pmfs"},
23590+ {AuWbrCreate_PMFSV, "pmfs:%d"},
392086de
AM
23591+ {AuWbrCreate_PMFSRR, "pmfsrr:%d"},
23592+ {AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"},
1facf9fc 23593+
23594+ {-1, NULL}
23595+};
23596+
dece6358
AM
23597+/*
23598+ * cf. linux/lib/parser.c and cmdline.c
23599+ * gave up calling memparse() since it uses simple_strtoull() instead of
9dbd164d 23600+ * kstrto...().
dece6358 23601+ */
4a4d8108
AM
23602+static int noinline_for_stack
23603+au_match_ull(substring_t *s, unsigned long long *result)
1facf9fc 23604+{
23605+ int err;
23606+ unsigned int len;
23607+ char a[32];
23608+
23609+ err = -ERANGE;
23610+ len = s->to - s->from;
23611+ if (len + 1 <= sizeof(a)) {
23612+ memcpy(a, s->from, len);
23613+ a[len] = '\0';
9dbd164d 23614+ err = kstrtoull(a, 0, result);
1facf9fc 23615+ }
23616+ return err;
23617+}
23618+
23619+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
23620+ struct au_opt_wbr_create *create)
23621+{
23622+ int err;
23623+ unsigned long long ull;
23624+
23625+ err = 0;
23626+ if (!au_match_ull(arg, &ull))
23627+ create->mfsrr_watermark = ull;
23628+ else {
4a4d8108 23629+ pr_err("bad integer in %s\n", str);
1facf9fc 23630+ err = -EINVAL;
23631+ }
23632+
23633+ return err;
23634+}
23635+
23636+static int au_wbr_mfs_sec(substring_t *arg, char *str,
23637+ struct au_opt_wbr_create *create)
23638+{
23639+ int n, err;
23640+
23641+ err = 0;
027c5e7a 23642+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
1facf9fc 23643+ create->mfs_second = n;
23644+ else {
4a4d8108 23645+ pr_err("bad integer in %s\n", str);
1facf9fc 23646+ err = -EINVAL;
23647+ }
23648+
23649+ return err;
23650+}
23651+
4a4d8108
AM
23652+static int noinline_for_stack
23653+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
1facf9fc 23654+{
23655+ int err, e;
23656+ substring_t args[MAX_OPT_ARGS];
23657+
23658+ err = match_token(str, au_wbr_create_policy, args);
23659+ create->wbr_create = err;
23660+ switch (err) {
23661+ case AuWbrCreate_MFSRRV:
392086de 23662+ case AuWbrCreate_PMFSRRV:
1facf9fc 23663+ e = au_wbr_mfs_wmark(&args[0], str, create);
23664+ if (!e)
23665+ e = au_wbr_mfs_sec(&args[1], str, create);
23666+ if (unlikely(e))
23667+ err = e;
23668+ break;
23669+ case AuWbrCreate_MFSRR:
392086de 23670+ case AuWbrCreate_PMFSRR:
1facf9fc 23671+ e = au_wbr_mfs_wmark(&args[0], str, create);
23672+ if (unlikely(e)) {
23673+ err = e;
23674+ break;
23675+ }
23676+ /*FALLTHROUGH*/
23677+ case AuWbrCreate_MFS:
23678+ case AuWbrCreate_PMFS:
027c5e7a 23679+ create->mfs_second = AUFS_MFS_DEF_SEC;
1facf9fc 23680+ break;
23681+ case AuWbrCreate_MFSV:
23682+ case AuWbrCreate_PMFSV:
23683+ e = au_wbr_mfs_sec(&args[0], str, create);
23684+ if (unlikely(e))
23685+ err = e;
23686+ break;
23687+ }
23688+
23689+ return err;
23690+}
23691+
23692+const char *au_optstr_wbr_create(int wbr_create)
23693+{
076b876e 23694+ return au_parser_pattern(wbr_create, au_wbr_create_policy);
1facf9fc 23695+}
23696+
23697+static match_table_t au_wbr_copyup_policy = {
23698+ {AuWbrCopyup_TDP, "tdp"},
23699+ {AuWbrCopyup_TDP, "top-down-parent"},
23700+ {AuWbrCopyup_BUP, "bup"},
23701+ {AuWbrCopyup_BUP, "bottom-up-parent"},
23702+ {AuWbrCopyup_BU, "bu"},
23703+ {AuWbrCopyup_BU, "bottom-up"},
23704+ {-1, NULL}
23705+};
23706+
4a4d8108 23707+static int noinline_for_stack au_wbr_copyup_val(char *str)
1facf9fc 23708+{
23709+ substring_t args[MAX_OPT_ARGS];
23710+
23711+ return match_token(str, au_wbr_copyup_policy, args);
23712+}
23713+
23714+const char *au_optstr_wbr_copyup(int wbr_copyup)
23715+{
076b876e 23716+ return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy);
1facf9fc 23717+}
23718+
23719+/* ---------------------------------------------------------------------- */
23720+
23721+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
23722+
23723+static void dump_opts(struct au_opts *opts)
23724+{
23725+#ifdef CONFIG_AUFS_DEBUG
23726+ /* reduce stack space */
23727+ union {
23728+ struct au_opt_add *add;
23729+ struct au_opt_del *del;
23730+ struct au_opt_mod *mod;
23731+ struct au_opt_xino *xino;
23732+ struct au_opt_xino_itrunc *xino_itrunc;
23733+ struct au_opt_wbr_create *create;
23734+ } u;
23735+ struct au_opt *opt;
23736+
23737+ opt = opts->opt;
23738+ while (opt->type != Opt_tail) {
23739+ switch (opt->type) {
23740+ case Opt_add:
23741+ u.add = &opt->add;
23742+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
23743+ u.add->bindex, u.add->pathname, u.add->perm,
23744+ u.add->path.dentry);
23745+ break;
23746+ case Opt_del:
23747+ case Opt_idel:
23748+ u.del = &opt->del;
23749+ AuDbg("del {%s, %p}\n",
23750+ u.del->pathname, u.del->h_path.dentry);
23751+ break;
23752+ case Opt_mod:
23753+ case Opt_imod:
23754+ u.mod = &opt->mod;
23755+ AuDbg("mod {%s, 0x%x, %p}\n",
23756+ u.mod->path, u.mod->perm, u.mod->h_root);
23757+ break;
23758+ case Opt_append:
23759+ u.add = &opt->add;
23760+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
23761+ u.add->bindex, u.add->pathname, u.add->perm,
23762+ u.add->path.dentry);
23763+ break;
23764+ case Opt_prepend:
23765+ u.add = &opt->add;
23766+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
23767+ u.add->bindex, u.add->pathname, u.add->perm,
23768+ u.add->path.dentry);
23769+ break;
23770+ case Opt_dirwh:
23771+ AuDbg("dirwh %d\n", opt->dirwh);
23772+ break;
23773+ case Opt_rdcache:
23774+ AuDbg("rdcache %d\n", opt->rdcache);
23775+ break;
23776+ case Opt_rdblk:
23777+ AuDbg("rdblk %u\n", opt->rdblk);
23778+ break;
dece6358
AM
23779+ case Opt_rdblk_def:
23780+ AuDbg("rdblk_def\n");
23781+ break;
1facf9fc 23782+ case Opt_rdhash:
23783+ AuDbg("rdhash %u\n", opt->rdhash);
23784+ break;
dece6358
AM
23785+ case Opt_rdhash_def:
23786+ AuDbg("rdhash_def\n");
23787+ break;
1facf9fc 23788+ case Opt_xino:
23789+ u.xino = &opt->xino;
523b37e3 23790+ AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
1facf9fc 23791+ break;
23792+ case Opt_trunc_xino:
23793+ AuLabel(trunc_xino);
23794+ break;
23795+ case Opt_notrunc_xino:
23796+ AuLabel(notrunc_xino);
23797+ break;
23798+ case Opt_trunc_xino_path:
23799+ case Opt_itrunc_xino:
23800+ u.xino_itrunc = &opt->xino_itrunc;
23801+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
23802+ break;
1facf9fc 23803+ case Opt_noxino:
23804+ AuLabel(noxino);
23805+ break;
23806+ case Opt_trunc_xib:
23807+ AuLabel(trunc_xib);
23808+ break;
23809+ case Opt_notrunc_xib:
23810+ AuLabel(notrunc_xib);
23811+ break;
dece6358
AM
23812+ case Opt_shwh:
23813+ AuLabel(shwh);
23814+ break;
23815+ case Opt_noshwh:
23816+ AuLabel(noshwh);
23817+ break;
076b876e
AM
23818+ case Opt_dirperm1:
23819+ AuLabel(dirperm1);
23820+ break;
23821+ case Opt_nodirperm1:
23822+ AuLabel(nodirperm1);
23823+ break;
1facf9fc 23824+ case Opt_plink:
23825+ AuLabel(plink);
23826+ break;
23827+ case Opt_noplink:
23828+ AuLabel(noplink);
23829+ break;
23830+ case Opt_list_plink:
23831+ AuLabel(list_plink);
23832+ break;
23833+ case Opt_udba:
23834+ AuDbg("udba %d, %s\n",
23835+ opt->udba, au_optstr_udba(opt->udba));
23836+ break;
4a4d8108
AM
23837+ case Opt_dio:
23838+ AuLabel(dio);
23839+ break;
23840+ case Opt_nodio:
23841+ AuLabel(nodio);
23842+ break;
1facf9fc 23843+ case Opt_diropq_a:
23844+ AuLabel(diropq_a);
23845+ break;
23846+ case Opt_diropq_w:
23847+ AuLabel(diropq_w);
23848+ break;
23849+ case Opt_warn_perm:
23850+ AuLabel(warn_perm);
23851+ break;
23852+ case Opt_nowarn_perm:
23853+ AuLabel(nowarn_perm);
23854+ break;
1facf9fc 23855+ case Opt_verbose:
23856+ AuLabel(verbose);
23857+ break;
23858+ case Opt_noverbose:
23859+ AuLabel(noverbose);
23860+ break;
23861+ case Opt_sum:
23862+ AuLabel(sum);
23863+ break;
23864+ case Opt_nosum:
23865+ AuLabel(nosum);
23866+ break;
23867+ case Opt_wsum:
23868+ AuLabel(wsum);
23869+ break;
23870+ case Opt_wbr_create:
23871+ u.create = &opt->wbr_create;
23872+ AuDbg("create %d, %s\n", u.create->wbr_create,
23873+ au_optstr_wbr_create(u.create->wbr_create));
23874+ switch (u.create->wbr_create) {
23875+ case AuWbrCreate_MFSV:
23876+ case AuWbrCreate_PMFSV:
23877+ AuDbg("%d sec\n", u.create->mfs_second);
23878+ break;
23879+ case AuWbrCreate_MFSRR:
23880+ AuDbg("%llu watermark\n",
23881+ u.create->mfsrr_watermark);
23882+ break;
23883+ case AuWbrCreate_MFSRRV:
392086de 23884+ case AuWbrCreate_PMFSRRV:
1facf9fc 23885+ AuDbg("%llu watermark, %d sec\n",
23886+ u.create->mfsrr_watermark,
23887+ u.create->mfs_second);
23888+ break;
23889+ }
23890+ break;
23891+ case Opt_wbr_copyup:
23892+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
23893+ au_optstr_wbr_copyup(opt->wbr_copyup));
23894+ break;
076b876e
AM
23895+ case Opt_fhsm_sec:
23896+ AuDbg("fhsm_sec %u\n", opt->fhsm_second);
23897+ break;
c1595e42
JR
23898+ case Opt_acl:
23899+ AuLabel(acl);
23900+ break;
23901+ case Opt_noacl:
23902+ AuLabel(noacl);
23903+ break;
1facf9fc 23904+ default:
23905+ BUG();
23906+ }
23907+ opt++;
23908+ }
23909+#endif
23910+}
23911+
23912+void au_opts_free(struct au_opts *opts)
23913+{
23914+ struct au_opt *opt;
23915+
23916+ opt = opts->opt;
23917+ while (opt->type != Opt_tail) {
23918+ switch (opt->type) {
23919+ case Opt_add:
23920+ case Opt_append:
23921+ case Opt_prepend:
23922+ path_put(&opt->add.path);
23923+ break;
23924+ case Opt_del:
23925+ case Opt_idel:
23926+ path_put(&opt->del.h_path);
23927+ break;
23928+ case Opt_mod:
23929+ case Opt_imod:
23930+ dput(opt->mod.h_root);
23931+ break;
23932+ case Opt_xino:
23933+ fput(opt->xino.file);
23934+ break;
23935+ }
23936+ opt++;
23937+ }
23938+}
23939+
23940+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
23941+ aufs_bindex_t bindex)
23942+{
23943+ int err;
23944+ struct au_opt_add *add = &opt->add;
23945+ char *p;
23946+
23947+ add->bindex = bindex;
1e00d052 23948+ add->perm = AuBrPerm_RO;
1facf9fc 23949+ add->pathname = opt_str;
23950+ p = strchr(opt_str, '=');
23951+ if (p) {
23952+ *p++ = 0;
23953+ if (*p)
23954+ add->perm = br_perm_val(p);
23955+ }
23956+
23957+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
23958+ if (!err) {
23959+ if (!p) {
23960+ add->perm = AuBrPerm_RO;
23961+ if (au_test_fs_rr(add->path.dentry->d_sb))
23962+ add->perm = AuBrPerm_RR;
23963+ else if (!bindex && !(sb_flags & MS_RDONLY))
23964+ add->perm = AuBrPerm_RW;
23965+ }
23966+ opt->type = Opt_add;
23967+ goto out;
23968+ }
4a4d8108 23969+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
1facf9fc 23970+ err = -EINVAL;
23971+
4f0767ce 23972+out:
1facf9fc 23973+ return err;
23974+}
23975+
23976+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
23977+{
23978+ int err;
23979+
23980+ del->pathname = args[0].from;
23981+ AuDbg("del path %s\n", del->pathname);
23982+
23983+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
23984+ if (unlikely(err))
4a4d8108 23985+ pr_err("lookup failed %s (%d)\n", del->pathname, err);
1facf9fc 23986+
23987+ return err;
23988+}
23989+
23990+#if 0 /* reserved for future use */
23991+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
23992+ struct au_opt_del *del, substring_t args[])
23993+{
23994+ int err;
23995+ struct dentry *root;
23996+
23997+ err = -EINVAL;
23998+ root = sb->s_root;
23999+ aufs_read_lock(root, AuLock_FLUSH);
5afbbe0d 24000+ if (bindex < 0 || au_sbbot(sb) < bindex) {
4a4d8108 24001+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 24002+ goto out;
24003+ }
24004+
24005+ err = 0;
24006+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
24007+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
24008+
4f0767ce 24009+out:
1facf9fc 24010+ aufs_read_unlock(root, !AuLock_IR);
24011+ return err;
24012+}
24013+#endif
24014+
4a4d8108
AM
24015+static int noinline_for_stack
24016+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
1facf9fc 24017+{
24018+ int err;
24019+ struct path path;
24020+ char *p;
24021+
24022+ err = -EINVAL;
24023+ mod->path = args[0].from;
24024+ p = strchr(mod->path, '=');
24025+ if (unlikely(!p)) {
4a4d8108 24026+ pr_err("no permssion %s\n", args[0].from);
1facf9fc 24027+ goto out;
24028+ }
24029+
24030+ *p++ = 0;
24031+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
24032+ if (unlikely(err)) {
4a4d8108 24033+ pr_err("lookup failed %s (%d)\n", mod->path, err);
1facf9fc 24034+ goto out;
24035+ }
24036+
24037+ mod->perm = br_perm_val(p);
24038+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
24039+ mod->h_root = dget(path.dentry);
24040+ path_put(&path);
24041+
4f0767ce 24042+out:
1facf9fc 24043+ return err;
24044+}
24045+
24046+#if 0 /* reserved for future use */
24047+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
24048+ struct au_opt_mod *mod, substring_t args[])
24049+{
24050+ int err;
24051+ struct dentry *root;
24052+
24053+ err = -EINVAL;
24054+ root = sb->s_root;
24055+ aufs_read_lock(root, AuLock_FLUSH);
5afbbe0d 24056+ if (bindex < 0 || au_sbbot(sb) < bindex) {
4a4d8108 24057+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 24058+ goto out;
24059+ }
24060+
24061+ err = 0;
24062+ mod->perm = br_perm_val(args[1].from);
24063+ AuDbg("mod path %s, perm 0x%x, %s\n",
24064+ mod->path, mod->perm, args[1].from);
24065+ mod->h_root = dget(au_h_dptr(root, bindex));
24066+
4f0767ce 24067+out:
1facf9fc 24068+ aufs_read_unlock(root, !AuLock_IR);
24069+ return err;
24070+}
24071+#endif
24072+
24073+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
24074+ substring_t args[])
24075+{
24076+ int err;
24077+ struct file *file;
24078+
24079+ file = au_xino_create(sb, args[0].from, /*silent*/0);
24080+ err = PTR_ERR(file);
24081+ if (IS_ERR(file))
24082+ goto out;
24083+
24084+ err = -EINVAL;
2000de60 24085+ if (unlikely(file->f_path.dentry->d_sb == sb)) {
1facf9fc 24086+ fput(file);
4a4d8108 24087+ pr_err("%s must be outside\n", args[0].from);
1facf9fc 24088+ goto out;
24089+ }
24090+
24091+ err = 0;
24092+ xino->file = file;
24093+ xino->path = args[0].from;
24094+
4f0767ce 24095+out:
1facf9fc 24096+ return err;
24097+}
24098+
4a4d8108
AM
24099+static int noinline_for_stack
24100+au_opts_parse_xino_itrunc_path(struct super_block *sb,
24101+ struct au_opt_xino_itrunc *xino_itrunc,
24102+ substring_t args[])
1facf9fc 24103+{
24104+ int err;
5afbbe0d 24105+ aufs_bindex_t bbot, bindex;
1facf9fc 24106+ struct path path;
24107+ struct dentry *root;
24108+
24109+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
24110+ if (unlikely(err)) {
4a4d8108 24111+ pr_err("lookup failed %s (%d)\n", args[0].from, err);
1facf9fc 24112+ goto out;
24113+ }
24114+
24115+ xino_itrunc->bindex = -1;
24116+ root = sb->s_root;
24117+ aufs_read_lock(root, AuLock_FLUSH);
5afbbe0d
AM
24118+ bbot = au_sbbot(sb);
24119+ for (bindex = 0; bindex <= bbot; bindex++) {
1facf9fc 24120+ if (au_h_dptr(root, bindex) == path.dentry) {
24121+ xino_itrunc->bindex = bindex;
24122+ break;
24123+ }
24124+ }
24125+ aufs_read_unlock(root, !AuLock_IR);
24126+ path_put(&path);
24127+
24128+ if (unlikely(xino_itrunc->bindex < 0)) {
4a4d8108 24129+ pr_err("no such branch %s\n", args[0].from);
1facf9fc 24130+ err = -EINVAL;
24131+ }
24132+
4f0767ce 24133+out:
1facf9fc 24134+ return err;
24135+}
24136+
24137+/* called without aufs lock */
24138+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
24139+{
24140+ int err, n, token;
24141+ aufs_bindex_t bindex;
24142+ unsigned char skipped;
24143+ struct dentry *root;
24144+ struct au_opt *opt, *opt_tail;
24145+ char *opt_str;
24146+ /* reduce the stack space */
24147+ union {
24148+ struct au_opt_xino_itrunc *xino_itrunc;
24149+ struct au_opt_wbr_create *create;
24150+ } u;
24151+ struct {
24152+ substring_t args[MAX_OPT_ARGS];
24153+ } *a;
24154+
24155+ err = -ENOMEM;
24156+ a = kmalloc(sizeof(*a), GFP_NOFS);
24157+ if (unlikely(!a))
24158+ goto out;
24159+
24160+ root = sb->s_root;
24161+ err = 0;
24162+ bindex = 0;
24163+ opt = opts->opt;
24164+ opt_tail = opt + opts->max_opt - 1;
24165+ opt->type = Opt_tail;
24166+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
24167+ err = -EINVAL;
24168+ skipped = 0;
24169+ token = match_token(opt_str, options, a->args);
24170+ switch (token) {
24171+ case Opt_br:
24172+ err = 0;
24173+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
24174+ && *opt_str) {
24175+ err = opt_add(opt, opt_str, opts->sb_flags,
24176+ bindex++);
24177+ if (unlikely(!err && ++opt > opt_tail)) {
24178+ err = -E2BIG;
24179+ break;
24180+ }
24181+ opt->type = Opt_tail;
24182+ skipped = 1;
24183+ }
24184+ break;
24185+ case Opt_add:
24186+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 24187+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24188+ break;
24189+ }
24190+ bindex = n;
24191+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
24192+ bindex);
24193+ if (!err)
24194+ opt->type = token;
24195+ break;
24196+ case Opt_append:
24197+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
24198+ /*dummy bindex*/1);
24199+ if (!err)
24200+ opt->type = token;
24201+ break;
24202+ case Opt_prepend:
24203+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
24204+ /*bindex*/0);
24205+ if (!err)
24206+ opt->type = token;
24207+ break;
24208+ case Opt_del:
24209+ err = au_opts_parse_del(&opt->del, a->args);
24210+ if (!err)
24211+ opt->type = token;
24212+ break;
24213+#if 0 /* reserved for future use */
24214+ case Opt_idel:
24215+ del->pathname = "(indexed)";
24216+ if (unlikely(match_int(&args[0], &n))) {
4a4d8108 24217+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24218+ break;
24219+ }
24220+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
24221+ if (!err)
24222+ opt->type = token;
24223+ break;
24224+#endif
24225+ case Opt_mod:
24226+ err = au_opts_parse_mod(&opt->mod, a->args);
24227+ if (!err)
24228+ opt->type = token;
24229+ break;
24230+#ifdef IMOD /* reserved for future use */
24231+ case Opt_imod:
24232+ u.mod->path = "(indexed)";
24233+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 24234+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24235+ break;
24236+ }
24237+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
24238+ if (!err)
24239+ opt->type = token;
24240+ break;
24241+#endif
24242+ case Opt_xino:
24243+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
24244+ if (!err)
24245+ opt->type = token;
24246+ break;
24247+
24248+ case Opt_trunc_xino_path:
24249+ err = au_opts_parse_xino_itrunc_path
24250+ (sb, &opt->xino_itrunc, a->args);
24251+ if (!err)
24252+ opt->type = token;
24253+ break;
24254+
24255+ case Opt_itrunc_xino:
24256+ u.xino_itrunc = &opt->xino_itrunc;
24257+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 24258+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24259+ break;
24260+ }
24261+ u.xino_itrunc->bindex = n;
24262+ aufs_read_lock(root, AuLock_FLUSH);
5afbbe0d 24263+ if (n < 0 || au_sbbot(sb) < n) {
4a4d8108 24264+ pr_err("out of bounds, %d\n", n);
1facf9fc 24265+ aufs_read_unlock(root, !AuLock_IR);
24266+ break;
24267+ }
24268+ aufs_read_unlock(root, !AuLock_IR);
24269+ err = 0;
24270+ opt->type = token;
24271+ break;
24272+
24273+ case Opt_dirwh:
24274+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
24275+ break;
24276+ err = 0;
24277+ opt->type = token;
24278+ break;
24279+
24280+ case Opt_rdcache:
027c5e7a
AM
24281+ if (unlikely(match_int(&a->args[0], &n))) {
24282+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24283+ break;
027c5e7a
AM
24284+ }
24285+ if (unlikely(n > AUFS_RDCACHE_MAX)) {
24286+ pr_err("rdcache must be smaller than %d\n",
24287+ AUFS_RDCACHE_MAX);
24288+ break;
24289+ }
24290+ opt->rdcache = n;
1facf9fc 24291+ err = 0;
24292+ opt->type = token;
24293+ break;
24294+ case Opt_rdblk:
24295+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 24296+ || n < 0
1facf9fc 24297+ || n > KMALLOC_MAX_SIZE)) {
4a4d8108 24298+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24299+ break;
24300+ }
1308ab2a 24301+ if (unlikely(n && n < NAME_MAX)) {
4a4d8108
AM
24302+ pr_err("rdblk must be larger than %d\n",
24303+ NAME_MAX);
1facf9fc 24304+ break;
24305+ }
24306+ opt->rdblk = n;
24307+ err = 0;
24308+ opt->type = token;
24309+ break;
24310+ case Opt_rdhash:
24311+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 24312+ || n < 0
1facf9fc 24313+ || n * sizeof(struct hlist_head)
24314+ > KMALLOC_MAX_SIZE)) {
4a4d8108 24315+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24316+ break;
24317+ }
24318+ opt->rdhash = n;
24319+ err = 0;
24320+ opt->type = token;
24321+ break;
24322+
24323+ case Opt_trunc_xino:
24324+ case Opt_notrunc_xino:
24325+ case Opt_noxino:
24326+ case Opt_trunc_xib:
24327+ case Opt_notrunc_xib:
dece6358
AM
24328+ case Opt_shwh:
24329+ case Opt_noshwh:
076b876e
AM
24330+ case Opt_dirperm1:
24331+ case Opt_nodirperm1:
1facf9fc 24332+ case Opt_plink:
24333+ case Opt_noplink:
24334+ case Opt_list_plink:
4a4d8108
AM
24335+ case Opt_dio:
24336+ case Opt_nodio:
1facf9fc 24337+ case Opt_diropq_a:
24338+ case Opt_diropq_w:
24339+ case Opt_warn_perm:
24340+ case Opt_nowarn_perm:
1facf9fc 24341+ case Opt_verbose:
24342+ case Opt_noverbose:
24343+ case Opt_sum:
24344+ case Opt_nosum:
24345+ case Opt_wsum:
dece6358
AM
24346+ case Opt_rdblk_def:
24347+ case Opt_rdhash_def:
c1595e42
JR
24348+ case Opt_acl:
24349+ case Opt_noacl:
1facf9fc 24350+ err = 0;
24351+ opt->type = token;
24352+ break;
24353+
24354+ case Opt_udba:
24355+ opt->udba = udba_val(a->args[0].from);
24356+ if (opt->udba >= 0) {
24357+ err = 0;
24358+ opt->type = token;
24359+ } else
4a4d8108 24360+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 24361+ break;
24362+
24363+ case Opt_wbr_create:
24364+ u.create = &opt->wbr_create;
24365+ u.create->wbr_create
24366+ = au_wbr_create_val(a->args[0].from, u.create);
24367+ if (u.create->wbr_create >= 0) {
24368+ err = 0;
24369+ opt->type = token;
24370+ } else
4a4d8108 24371+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 24372+ break;
24373+ case Opt_wbr_copyup:
24374+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
24375+ if (opt->wbr_copyup >= 0) {
24376+ err = 0;
24377+ opt->type = token;
24378+ } else
4a4d8108 24379+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 24380+ break;
24381+
076b876e
AM
24382+ case Opt_fhsm_sec:
24383+ if (unlikely(match_int(&a->args[0], &n)
24384+ || n < 0)) {
24385+ pr_err("bad integer in %s\n", opt_str);
24386+ break;
24387+ }
24388+ if (sysaufs_brs) {
24389+ opt->fhsm_second = n;
24390+ opt->type = token;
24391+ } else
24392+ pr_warn("ignored %s\n", opt_str);
24393+ err = 0;
24394+ break;
24395+
1facf9fc 24396+ case Opt_ignore:
0c3ec466 24397+ pr_warn("ignored %s\n", opt_str);
1facf9fc 24398+ /*FALLTHROUGH*/
24399+ case Opt_ignore_silent:
24400+ skipped = 1;
24401+ err = 0;
24402+ break;
24403+ case Opt_err:
4a4d8108 24404+ pr_err("unknown option %s\n", opt_str);
1facf9fc 24405+ break;
24406+ }
24407+
24408+ if (!err && !skipped) {
24409+ if (unlikely(++opt > opt_tail)) {
24410+ err = -E2BIG;
24411+ opt--;
24412+ opt->type = Opt_tail;
24413+ break;
24414+ }
24415+ opt->type = Opt_tail;
24416+ }
24417+ }
24418+
f0c0a007 24419+ au_delayed_kfree(a);
1facf9fc 24420+ dump_opts(opts);
24421+ if (unlikely(err))
24422+ au_opts_free(opts);
24423+
4f0767ce 24424+out:
1facf9fc 24425+ return err;
24426+}
24427+
24428+static int au_opt_wbr_create(struct super_block *sb,
24429+ struct au_opt_wbr_create *create)
24430+{
24431+ int err;
24432+ struct au_sbinfo *sbinfo;
24433+
dece6358
AM
24434+ SiMustWriteLock(sb);
24435+
1facf9fc 24436+ err = 1; /* handled */
24437+ sbinfo = au_sbi(sb);
24438+ if (sbinfo->si_wbr_create_ops->fin) {
24439+ err = sbinfo->si_wbr_create_ops->fin(sb);
24440+ if (!err)
24441+ err = 1;
24442+ }
24443+
24444+ sbinfo->si_wbr_create = create->wbr_create;
24445+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
24446+ switch (create->wbr_create) {
24447+ case AuWbrCreate_MFSRRV:
24448+ case AuWbrCreate_MFSRR:
392086de
AM
24449+ case AuWbrCreate_PMFSRR:
24450+ case AuWbrCreate_PMFSRRV:
1facf9fc 24451+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
24452+ /*FALLTHROUGH*/
24453+ case AuWbrCreate_MFS:
24454+ case AuWbrCreate_MFSV:
24455+ case AuWbrCreate_PMFS:
24456+ case AuWbrCreate_PMFSV:
e49829fe
JR
24457+ sbinfo->si_wbr_mfs.mfs_expire
24458+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
1facf9fc 24459+ break;
24460+ }
24461+
24462+ if (sbinfo->si_wbr_create_ops->init)
24463+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
24464+
24465+ return err;
24466+}
24467+
24468+/*
24469+ * returns,
24470+ * plus: processed without an error
24471+ * zero: unprocessed
24472+ */
24473+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
24474+ struct au_opts *opts)
24475+{
24476+ int err;
24477+ struct au_sbinfo *sbinfo;
24478+
dece6358
AM
24479+ SiMustWriteLock(sb);
24480+
1facf9fc 24481+ err = 1; /* handled */
24482+ sbinfo = au_sbi(sb);
24483+ switch (opt->type) {
24484+ case Opt_udba:
24485+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
24486+ sbinfo->si_mntflags |= opt->udba;
24487+ opts->given_udba |= opt->udba;
24488+ break;
24489+
24490+ case Opt_plink:
24491+ au_opt_set(sbinfo->si_mntflags, PLINK);
24492+ break;
24493+ case Opt_noplink:
24494+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
e49829fe 24495+ au_plink_put(sb, /*verbose*/1);
1facf9fc 24496+ au_opt_clr(sbinfo->si_mntflags, PLINK);
24497+ break;
24498+ case Opt_list_plink:
24499+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
24500+ au_plink_list(sb);
24501+ break;
24502+
4a4d8108
AM
24503+ case Opt_dio:
24504+ au_opt_set(sbinfo->si_mntflags, DIO);
24505+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24506+ break;
24507+ case Opt_nodio:
24508+ au_opt_clr(sbinfo->si_mntflags, DIO);
24509+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24510+ break;
24511+
076b876e
AM
24512+ case Opt_fhsm_sec:
24513+ au_fhsm_set(sbinfo, opt->fhsm_second);
24514+ break;
24515+
1facf9fc 24516+ case Opt_diropq_a:
24517+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24518+ break;
24519+ case Opt_diropq_w:
24520+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24521+ break;
24522+
24523+ case Opt_warn_perm:
24524+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
24525+ break;
24526+ case Opt_nowarn_perm:
24527+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
24528+ break;
24529+
1facf9fc 24530+ case Opt_verbose:
24531+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
24532+ break;
24533+ case Opt_noverbose:
24534+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
24535+ break;
24536+
24537+ case Opt_sum:
24538+ au_opt_set(sbinfo->si_mntflags, SUM);
24539+ break;
24540+ case Opt_wsum:
24541+ au_opt_clr(sbinfo->si_mntflags, SUM);
24542+ au_opt_set(sbinfo->si_mntflags, SUM_W);
24543+ case Opt_nosum:
24544+ au_opt_clr(sbinfo->si_mntflags, SUM);
24545+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
24546+ break;
24547+
24548+ case Opt_wbr_create:
24549+ err = au_opt_wbr_create(sb, &opt->wbr_create);
24550+ break;
24551+ case Opt_wbr_copyup:
24552+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
24553+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
24554+ break;
24555+
24556+ case Opt_dirwh:
24557+ sbinfo->si_dirwh = opt->dirwh;
24558+ break;
24559+
24560+ case Opt_rdcache:
e49829fe
JR
24561+ sbinfo->si_rdcache
24562+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
1facf9fc 24563+ break;
24564+ case Opt_rdblk:
24565+ sbinfo->si_rdblk = opt->rdblk;
24566+ break;
dece6358
AM
24567+ case Opt_rdblk_def:
24568+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
24569+ break;
1facf9fc 24570+ case Opt_rdhash:
24571+ sbinfo->si_rdhash = opt->rdhash;
24572+ break;
dece6358
AM
24573+ case Opt_rdhash_def:
24574+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
24575+ break;
24576+
24577+ case Opt_shwh:
24578+ au_opt_set(sbinfo->si_mntflags, SHWH);
24579+ break;
24580+ case Opt_noshwh:
24581+ au_opt_clr(sbinfo->si_mntflags, SHWH);
24582+ break;
1facf9fc 24583+
076b876e
AM
24584+ case Opt_dirperm1:
24585+ au_opt_set(sbinfo->si_mntflags, DIRPERM1);
24586+ break;
24587+ case Opt_nodirperm1:
24588+ au_opt_clr(sbinfo->si_mntflags, DIRPERM1);
24589+ break;
24590+
1facf9fc 24591+ case Opt_trunc_xino:
24592+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
24593+ break;
24594+ case Opt_notrunc_xino:
24595+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
24596+ break;
24597+
24598+ case Opt_trunc_xino_path:
24599+ case Opt_itrunc_xino:
24600+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
24601+ if (!err)
24602+ err = 1;
24603+ break;
24604+
24605+ case Opt_trunc_xib:
24606+ au_fset_opts(opts->flags, TRUNC_XIB);
24607+ break;
24608+ case Opt_notrunc_xib:
24609+ au_fclr_opts(opts->flags, TRUNC_XIB);
24610+ break;
24611+
c1595e42
JR
24612+ case Opt_acl:
24613+ sb->s_flags |= MS_POSIXACL;
24614+ break;
24615+ case Opt_noacl:
24616+ sb->s_flags &= ~MS_POSIXACL;
24617+ break;
24618+
1facf9fc 24619+ default:
24620+ err = 0;
24621+ break;
24622+ }
24623+
24624+ return err;
24625+}
24626+
24627+/*
24628+ * returns tri-state.
24629+ * plus: processed without an error
24630+ * zero: unprocessed
24631+ * minus: error
24632+ */
24633+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
24634+ struct au_opts *opts)
24635+{
24636+ int err, do_refresh;
24637+
24638+ err = 0;
24639+ switch (opt->type) {
24640+ case Opt_append:
5afbbe0d 24641+ opt->add.bindex = au_sbbot(sb) + 1;
1facf9fc 24642+ if (opt->add.bindex < 0)
24643+ opt->add.bindex = 0;
24644+ goto add;
24645+ case Opt_prepend:
24646+ opt->add.bindex = 0;
f6b6e03d 24647+ add: /* indented label */
1facf9fc 24648+ case Opt_add:
24649+ err = au_br_add(sb, &opt->add,
24650+ au_ftest_opts(opts->flags, REMOUNT));
24651+ if (!err) {
24652+ err = 1;
027c5e7a 24653+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24654+ }
24655+ break;
24656+
24657+ case Opt_del:
24658+ case Opt_idel:
24659+ err = au_br_del(sb, &opt->del,
24660+ au_ftest_opts(opts->flags, REMOUNT));
24661+ if (!err) {
24662+ err = 1;
24663+ au_fset_opts(opts->flags, TRUNC_XIB);
027c5e7a 24664+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24665+ }
24666+ break;
24667+
24668+ case Opt_mod:
24669+ case Opt_imod:
24670+ err = au_br_mod(sb, &opt->mod,
24671+ au_ftest_opts(opts->flags, REMOUNT),
24672+ &do_refresh);
24673+ if (!err) {
24674+ err = 1;
027c5e7a
AM
24675+ if (do_refresh)
24676+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24677+ }
24678+ break;
24679+ }
24680+
24681+ return err;
24682+}
24683+
24684+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
24685+ struct au_opt_xino **opt_xino,
24686+ struct au_opts *opts)
24687+{
24688+ int err;
5afbbe0d 24689+ aufs_bindex_t bbot, bindex;
1facf9fc 24690+ struct dentry *root, *parent, *h_root;
24691+
24692+ err = 0;
24693+ switch (opt->type) {
24694+ case Opt_xino:
24695+ err = au_xino_set(sb, &opt->xino,
24696+ !!au_ftest_opts(opts->flags, REMOUNT));
24697+ if (unlikely(err))
24698+ break;
24699+
24700+ *opt_xino = &opt->xino;
24701+ au_xino_brid_set(sb, -1);
24702+
24703+ /* safe d_parent access */
2000de60 24704+ parent = opt->xino.file->f_path.dentry->d_parent;
1facf9fc 24705+ root = sb->s_root;
5afbbe0d
AM
24706+ bbot = au_sbbot(sb);
24707+ for (bindex = 0; bindex <= bbot; bindex++) {
1facf9fc 24708+ h_root = au_h_dptr(root, bindex);
24709+ if (h_root == parent) {
24710+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
24711+ break;
24712+ }
24713+ }
24714+ break;
24715+
24716+ case Opt_noxino:
24717+ au_xino_clr(sb);
24718+ au_xino_brid_set(sb, -1);
24719+ *opt_xino = (void *)-1;
24720+ break;
24721+ }
24722+
24723+ return err;
24724+}
24725+
24726+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
24727+ unsigned int pending)
24728+{
076b876e 24729+ int err, fhsm;
5afbbe0d 24730+ aufs_bindex_t bindex, bbot;
79b8bda9 24731+ unsigned char do_plink, skip, do_free, can_no_dreval;
1facf9fc 24732+ struct au_branch *br;
24733+ struct au_wbr *wbr;
79b8bda9 24734+ struct dentry *root, *dentry;
1facf9fc 24735+ struct inode *dir, *h_dir;
24736+ struct au_sbinfo *sbinfo;
24737+ struct au_hinode *hdir;
24738+
dece6358
AM
24739+ SiMustAnyLock(sb);
24740+
1facf9fc 24741+ sbinfo = au_sbi(sb);
24742+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
24743+
dece6358
AM
24744+ if (!(sb_flags & MS_RDONLY)) {
24745+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
0c3ec466 24746+ pr_warn("first branch should be rw\n");
dece6358 24747+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
febd17d6 24748+ pr_warn_once("shwh should be used with ro\n");
dece6358 24749+ }
1facf9fc 24750+
4a4d8108 24751+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
1facf9fc 24752+ && !au_opt_test(sbinfo->si_mntflags, XINO))
febd17d6 24753+ pr_warn_once("udba=*notify requires xino\n");
1facf9fc 24754+
076b876e 24755+ if (au_opt_test(sbinfo->si_mntflags, DIRPERM1))
febd17d6
JR
24756+ pr_warn_once("dirperm1 breaks the protection"
24757+ " by the permission bits on the lower branch\n");
076b876e 24758+
1facf9fc 24759+ err = 0;
076b876e 24760+ fhsm = 0;
1facf9fc 24761+ root = sb->s_root;
5527c038 24762+ dir = d_inode(root);
1facf9fc 24763+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
79b8bda9
AM
24764+ can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
24765+ UDBA_NONE);
5afbbe0d
AM
24766+ bbot = au_sbbot(sb);
24767+ for (bindex = 0; !err && bindex <= bbot; bindex++) {
1facf9fc 24768+ skip = 0;
24769+ h_dir = au_h_iptr(dir, bindex);
24770+ br = au_sbr(sb, bindex);
1facf9fc 24771+
c1595e42
JR
24772+ if ((br->br_perm & AuBrAttr_ICEX)
24773+ && !h_dir->i_op->listxattr)
24774+ br->br_perm &= ~AuBrAttr_ICEX;
24775+#if 0
24776+ if ((br->br_perm & AuBrAttr_ICEX_SEC)
24777+ && (au_br_sb(br)->s_flags & MS_NOSEC))
24778+ br->br_perm &= ~AuBrAttr_ICEX_SEC;
24779+#endif
24780+
24781+ do_free = 0;
1facf9fc 24782+ wbr = br->br_wbr;
24783+ if (wbr)
24784+ wbr_wh_read_lock(wbr);
24785+
1e00d052 24786+ if (!au_br_writable(br->br_perm)) {
1facf9fc 24787+ do_free = !!wbr;
24788+ skip = (!wbr
24789+ || (!wbr->wbr_whbase
24790+ && !wbr->wbr_plink
24791+ && !wbr->wbr_orph));
1e00d052 24792+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 24793+ /* skip = (!br->br_whbase && !br->br_orph); */
24794+ skip = (!wbr || !wbr->wbr_whbase);
24795+ if (skip && wbr) {
24796+ if (do_plink)
24797+ skip = !!wbr->wbr_plink;
24798+ else
24799+ skip = !wbr->wbr_plink;
24800+ }
1e00d052 24801+ } else {
1facf9fc 24802+ /* skip = (br->br_whbase && br->br_ohph); */
24803+ skip = (wbr && wbr->wbr_whbase);
24804+ if (skip) {
24805+ if (do_plink)
24806+ skip = !!wbr->wbr_plink;
24807+ else
24808+ skip = !wbr->wbr_plink;
24809+ }
1facf9fc 24810+ }
24811+ if (wbr)
24812+ wbr_wh_read_unlock(wbr);
24813+
79b8bda9
AM
24814+ if (can_no_dreval) {
24815+ dentry = br->br_path.dentry;
24816+ spin_lock(&dentry->d_lock);
24817+ if (dentry->d_flags &
24818+ (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE))
24819+ can_no_dreval = 0;
24820+ spin_unlock(&dentry->d_lock);
24821+ }
24822+
076b876e
AM
24823+ if (au_br_fhsm(br->br_perm)) {
24824+ fhsm++;
24825+ AuDebugOn(!br->br_fhsm);
24826+ }
24827+
1facf9fc 24828+ if (skip)
24829+ continue;
24830+
24831+ hdir = au_hi(dir, bindex);
5afbbe0d 24832+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 24833+ if (wbr)
24834+ wbr_wh_write_lock(wbr);
86dc4139 24835+ err = au_wh_init(br, sb);
1facf9fc 24836+ if (wbr)
24837+ wbr_wh_write_unlock(wbr);
5afbbe0d 24838+ au_hn_inode_unlock(hdir);
1facf9fc 24839+
24840+ if (!err && do_free) {
f0c0a007
AM
24841+ if (wbr)
24842+ au_delayed_kfree(wbr);
1facf9fc 24843+ br->br_wbr = NULL;
24844+ }
24845+ }
24846+
79b8bda9
AM
24847+ if (can_no_dreval)
24848+ au_fset_si(sbinfo, NO_DREVAL);
24849+ else
24850+ au_fclr_si(sbinfo, NO_DREVAL);
24851+
c1595e42 24852+ if (fhsm >= 2) {
076b876e 24853+ au_fset_si(sbinfo, FHSM);
5afbbe0d 24854+ for (bindex = bbot; bindex >= 0; bindex--) {
c1595e42
JR
24855+ br = au_sbr(sb, bindex);
24856+ if (au_br_fhsm(br->br_perm)) {
24857+ au_fhsm_set_bottom(sb, bindex);
24858+ break;
24859+ }
24860+ }
24861+ } else {
076b876e 24862+ au_fclr_si(sbinfo, FHSM);
c1595e42
JR
24863+ au_fhsm_set_bottom(sb, -1);
24864+ }
076b876e 24865+
1facf9fc 24866+ return err;
24867+}
24868+
24869+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
24870+{
24871+ int err;
24872+ unsigned int tmp;
5afbbe0d 24873+ aufs_bindex_t bindex, bbot;
1facf9fc 24874+ struct au_opt *opt;
24875+ struct au_opt_xino *opt_xino, xino;
24876+ struct au_sbinfo *sbinfo;
027c5e7a 24877+ struct au_branch *br;
076b876e 24878+ struct inode *dir;
1facf9fc 24879+
dece6358
AM
24880+ SiMustWriteLock(sb);
24881+
1facf9fc 24882+ err = 0;
24883+ opt_xino = NULL;
24884+ opt = opts->opt;
24885+ while (err >= 0 && opt->type != Opt_tail)
24886+ err = au_opt_simple(sb, opt++, opts);
24887+ if (err > 0)
24888+ err = 0;
24889+ else if (unlikely(err < 0))
24890+ goto out;
24891+
24892+ /* disable xino and udba temporary */
24893+ sbinfo = au_sbi(sb);
24894+ tmp = sbinfo->si_mntflags;
24895+ au_opt_clr(sbinfo->si_mntflags, XINO);
24896+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
24897+
24898+ opt = opts->opt;
24899+ while (err >= 0 && opt->type != Opt_tail)
24900+ err = au_opt_br(sb, opt++, opts);
24901+ if (err > 0)
24902+ err = 0;
24903+ else if (unlikely(err < 0))
24904+ goto out;
24905+
5afbbe0d
AM
24906+ bbot = au_sbbot(sb);
24907+ if (unlikely(bbot < 0)) {
1facf9fc 24908+ err = -EINVAL;
4a4d8108 24909+ pr_err("no branches\n");
1facf9fc 24910+ goto out;
24911+ }
24912+
24913+ if (au_opt_test(tmp, XINO))
24914+ au_opt_set(sbinfo->si_mntflags, XINO);
24915+ opt = opts->opt;
24916+ while (!err && opt->type != Opt_tail)
24917+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
24918+ if (unlikely(err))
24919+ goto out;
24920+
24921+ err = au_opts_verify(sb, sb->s_flags, tmp);
24922+ if (unlikely(err))
24923+ goto out;
24924+
24925+ /* restore xino */
24926+ if (au_opt_test(tmp, XINO) && !opt_xino) {
24927+ xino.file = au_xino_def(sb);
24928+ err = PTR_ERR(xino.file);
24929+ if (IS_ERR(xino.file))
24930+ goto out;
24931+
24932+ err = au_xino_set(sb, &xino, /*remount*/0);
24933+ fput(xino.file);
24934+ if (unlikely(err))
24935+ goto out;
24936+ }
24937+
24938+ /* restore udba */
027c5e7a 24939+ tmp &= AuOptMask_UDBA;
1facf9fc 24940+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
027c5e7a 24941+ sbinfo->si_mntflags |= tmp;
5afbbe0d
AM
24942+ bbot = au_sbbot(sb);
24943+ for (bindex = 0; bindex <= bbot; bindex++) {
027c5e7a
AM
24944+ br = au_sbr(sb, bindex);
24945+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
24946+ if (unlikely(err))
24947+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
24948+ bindex, err);
24949+ /* go on even if err */
24950+ }
4a4d8108 24951+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
5527c038 24952+ dir = d_inode(sb->s_root);
4a4d8108 24953+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
1facf9fc 24954+ }
24955+
4f0767ce 24956+out:
1facf9fc 24957+ return err;
24958+}
24959+
24960+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
24961+{
24962+ int err, rerr;
79b8bda9 24963+ unsigned char no_dreval;
1facf9fc 24964+ struct inode *dir;
24965+ struct au_opt_xino *opt_xino;
24966+ struct au_opt *opt;
24967+ struct au_sbinfo *sbinfo;
24968+
dece6358
AM
24969+ SiMustWriteLock(sb);
24970+
79b8bda9 24971+ err = 0;
5527c038 24972+ dir = d_inode(sb->s_root);
1facf9fc 24973+ sbinfo = au_sbi(sb);
1facf9fc 24974+ opt_xino = NULL;
24975+ opt = opts->opt;
24976+ while (err >= 0 && opt->type != Opt_tail) {
24977+ err = au_opt_simple(sb, opt, opts);
24978+ if (!err)
24979+ err = au_opt_br(sb, opt, opts);
24980+ if (!err)
24981+ err = au_opt_xino(sb, opt, &opt_xino, opts);
24982+ opt++;
24983+ }
24984+ if (err > 0)
24985+ err = 0;
24986+ AuTraceErr(err);
24987+ /* go on even err */
24988+
79b8bda9 24989+ no_dreval = !!au_ftest_si(sbinfo, NO_DREVAL);
1facf9fc 24990+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
24991+ if (unlikely(rerr && !err))
24992+ err = rerr;
24993+
79b8bda9 24994+ if (no_dreval != !!au_ftest_si(sbinfo, NO_DREVAL))
b95c5147 24995+ au_fset_opts(opts->flags, REFRESH_IDOP);
79b8bda9 24996+
1facf9fc 24997+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
24998+ rerr = au_xib_trunc(sb);
24999+ if (unlikely(rerr && !err))
25000+ err = rerr;
25001+ }
25002+
25003+ /* will be handled by the caller */
027c5e7a 25004+ if (!au_ftest_opts(opts->flags, REFRESH)
79b8bda9
AM
25005+ && (opts->given_udba
25006+ || au_opt_test(sbinfo->si_mntflags, XINO)
b95c5147 25007+ || au_ftest_opts(opts->flags, REFRESH_IDOP)
79b8bda9 25008+ ))
027c5e7a 25009+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 25010+
25011+ AuDbg("status 0x%x\n", opts->flags);
25012+ return err;
25013+}
25014+
25015+/* ---------------------------------------------------------------------- */
25016+
25017+unsigned int au_opt_udba(struct super_block *sb)
25018+{
25019+ return au_mntflags(sb) & AuOptMask_UDBA;
25020+}
7f207e10
AM
25021diff -urN /usr/share/empty/fs/aufs/opts.h linux/fs/aufs/opts.h
25022--- /usr/share/empty/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 25023+++ linux/fs/aufs/opts.h 2016-10-09 16:55:36.496035060 +0200
79b8bda9 25024@@ -0,0 +1,211 @@
1facf9fc 25025+/*
8cdd5066 25026+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 25027+ *
25028+ * This program, aufs is free software; you can redistribute it and/or modify
25029+ * it under the terms of the GNU General Public License as published by
25030+ * the Free Software Foundation; either version 2 of the License, or
25031+ * (at your option) any later version.
dece6358
AM
25032+ *
25033+ * This program is distributed in the hope that it will be useful,
25034+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25035+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25036+ * GNU General Public License for more details.
25037+ *
25038+ * You should have received a copy of the GNU General Public License
523b37e3 25039+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 25040+ */
25041+
25042+/*
25043+ * mount options/flags
25044+ */
25045+
25046+#ifndef __AUFS_OPTS_H__
25047+#define __AUFS_OPTS_H__
25048+
25049+#ifdef __KERNEL__
25050+
dece6358 25051+#include <linux/path.h>
1facf9fc 25052+
dece6358
AM
25053+struct file;
25054+struct super_block;
25055+
1facf9fc 25056+/* ---------------------------------------------------------------------- */
25057+
25058+/* mount flags */
25059+#define AuOpt_XINO 1 /* external inode number bitmap
25060+ and translation table */
25061+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
25062+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
25063+#define AuOpt_UDBA_REVAL (1 << 3)
4a4d8108 25064+#define AuOpt_UDBA_HNOTIFY (1 << 4)
dece6358
AM
25065+#define AuOpt_SHWH (1 << 5) /* show whiteout */
25066+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
076b876e
AM
25067+#define AuOpt_DIRPERM1 (1 << 7) /* ignore the lower dir's perm
25068+ bits */
dece6358
AM
25069+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
25070+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
25071+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
25072+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
25073+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
4a4d8108 25074+#define AuOpt_DIO (1 << 14) /* direct io */
1facf9fc 25075+
4a4d8108
AM
25076+#ifndef CONFIG_AUFS_HNOTIFY
25077+#undef AuOpt_UDBA_HNOTIFY
25078+#define AuOpt_UDBA_HNOTIFY 0
1facf9fc 25079+#endif
dece6358
AM
25080+#ifndef CONFIG_AUFS_SHWH
25081+#undef AuOpt_SHWH
25082+#define AuOpt_SHWH 0
25083+#endif
1facf9fc 25084+
25085+#define AuOpt_Def (AuOpt_XINO \
25086+ | AuOpt_UDBA_REVAL \
25087+ | AuOpt_PLINK \
25088+ /* | AuOpt_DIRPERM1 */ \
25089+ | AuOpt_WARN_PERM)
25090+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
25091+ | AuOpt_UDBA_REVAL \
4a4d8108 25092+ | AuOpt_UDBA_HNOTIFY)
1facf9fc 25093+
25094+#define au_opt_test(flags, name) (flags & AuOpt_##name)
25095+#define au_opt_set(flags, name) do { \
25096+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
25097+ ((flags) |= AuOpt_##name); \
25098+} while (0)
25099+#define au_opt_set_udba(flags, name) do { \
25100+ (flags) &= ~AuOptMask_UDBA; \
25101+ ((flags) |= AuOpt_##name); \
25102+} while (0)
7f207e10
AM
25103+#define au_opt_clr(flags, name) do { \
25104+ ((flags) &= ~AuOpt_##name); \
25105+} while (0)
1facf9fc 25106+
e49829fe
JR
25107+static inline unsigned int au_opts_plink(unsigned int mntflags)
25108+{
25109+#ifdef CONFIG_PROC_FS
25110+ return mntflags;
25111+#else
25112+ return mntflags & ~AuOpt_PLINK;
25113+#endif
25114+}
25115+
1facf9fc 25116+/* ---------------------------------------------------------------------- */
25117+
25118+/* policies to select one among multiple writable branches */
25119+enum {
25120+ AuWbrCreate_TDP, /* top down parent */
25121+ AuWbrCreate_RR, /* round robin */
25122+ AuWbrCreate_MFS, /* most free space */
25123+ AuWbrCreate_MFSV, /* mfs with seconds */
25124+ AuWbrCreate_MFSRR, /* mfs then rr */
25125+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
25126+ AuWbrCreate_PMFS, /* parent and mfs */
25127+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
392086de
AM
25128+ AuWbrCreate_PMFSRR, /* parent, mfs and round-robin */
25129+ AuWbrCreate_PMFSRRV, /* plus seconds */
1facf9fc 25130+
25131+ AuWbrCreate_Def = AuWbrCreate_TDP
25132+};
25133+
25134+enum {
25135+ AuWbrCopyup_TDP, /* top down parent */
25136+ AuWbrCopyup_BUP, /* bottom up parent */
25137+ AuWbrCopyup_BU, /* bottom up */
25138+
25139+ AuWbrCopyup_Def = AuWbrCopyup_TDP
25140+};
25141+
25142+/* ---------------------------------------------------------------------- */
25143+
25144+struct au_opt_add {
25145+ aufs_bindex_t bindex;
25146+ char *pathname;
25147+ int perm;
25148+ struct path path;
25149+};
25150+
25151+struct au_opt_del {
25152+ char *pathname;
25153+ struct path h_path;
25154+};
25155+
25156+struct au_opt_mod {
25157+ char *path;
25158+ int perm;
25159+ struct dentry *h_root;
25160+};
25161+
25162+struct au_opt_xino {
25163+ char *path;
25164+ struct file *file;
25165+};
25166+
25167+struct au_opt_xino_itrunc {
25168+ aufs_bindex_t bindex;
25169+};
25170+
25171+struct au_opt_wbr_create {
25172+ int wbr_create;
25173+ int mfs_second;
25174+ unsigned long long mfsrr_watermark;
25175+};
25176+
25177+struct au_opt {
25178+ int type;
25179+ union {
25180+ struct au_opt_xino xino;
25181+ struct au_opt_xino_itrunc xino_itrunc;
25182+ struct au_opt_add add;
25183+ struct au_opt_del del;
25184+ struct au_opt_mod mod;
25185+ int dirwh;
25186+ int rdcache;
25187+ unsigned int rdblk;
25188+ unsigned int rdhash;
25189+ int udba;
25190+ struct au_opt_wbr_create wbr_create;
25191+ int wbr_copyup;
076b876e 25192+ unsigned int fhsm_second;
1facf9fc 25193+ };
25194+};
25195+
25196+/* opts flags */
25197+#define AuOpts_REMOUNT 1
027c5e7a
AM
25198+#define AuOpts_REFRESH (1 << 1)
25199+#define AuOpts_TRUNC_XIB (1 << 2)
25200+#define AuOpts_REFRESH_DYAOP (1 << 3)
b95c5147 25201+#define AuOpts_REFRESH_IDOP (1 << 4)
1facf9fc 25202+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
7f207e10
AM
25203+#define au_fset_opts(flags, name) \
25204+ do { (flags) |= AuOpts_##name; } while (0)
25205+#define au_fclr_opts(flags, name) \
25206+ do { (flags) &= ~AuOpts_##name; } while (0)
1facf9fc 25207+
25208+struct au_opts {
25209+ struct au_opt *opt;
25210+ int max_opt;
25211+
25212+ unsigned int given_udba;
25213+ unsigned int flags;
25214+ unsigned long sb_flags;
25215+};
25216+
25217+/* ---------------------------------------------------------------------- */
25218+
7e9cd9fe 25219+/* opts.c */
076b876e 25220+void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
1facf9fc 25221+const char *au_optstr_udba(int udba);
25222+const char *au_optstr_wbr_copyup(int wbr_copyup);
25223+const char *au_optstr_wbr_create(int wbr_create);
25224+
25225+void au_opts_free(struct au_opts *opts);
25226+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
25227+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
25228+ unsigned int pending);
25229+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
25230+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
25231+
25232+unsigned int au_opt_udba(struct super_block *sb);
25233+
1facf9fc 25234+#endif /* __KERNEL__ */
25235+#endif /* __AUFS_OPTS_H__ */
7f207e10
AM
25236diff -urN /usr/share/empty/fs/aufs/plink.c linux/fs/aufs/plink.c
25237--- /usr/share/empty/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 25238+++ linux/fs/aufs/plink.c 2016-10-09 16:55:36.496035060 +0200
f0c0a007 25239@@ -0,0 +1,514 @@
1facf9fc 25240+/*
8cdd5066 25241+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 25242+ *
25243+ * This program, aufs is free software; you can redistribute it and/or modify
25244+ * it under the terms of the GNU General Public License as published by
25245+ * the Free Software Foundation; either version 2 of the License, or
25246+ * (at your option) any later version.
dece6358
AM
25247+ *
25248+ * This program is distributed in the hope that it will be useful,
25249+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25250+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25251+ * GNU General Public License for more details.
25252+ *
25253+ * You should have received a copy of the GNU General Public License
523b37e3 25254+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 25255+ */
25256+
25257+/*
25258+ * pseudo-link
25259+ */
25260+
25261+#include "aufs.h"
25262+
25263+/*
e49829fe 25264+ * the pseudo-link maintenance mode.
1facf9fc 25265+ * during a user process maintains the pseudo-links,
25266+ * prohibit adding a new plink and branch manipulation.
e49829fe
JR
25267+ *
25268+ * Flags
25269+ * NOPLM:
25270+ * For entry functions which will handle plink, and i_mutex is already held
25271+ * in VFS.
25272+ * They cannot wait and should return an error at once.
25273+ * Callers has to check the error.
25274+ * NOPLMW:
25275+ * For entry functions which will handle plink, but i_mutex is not held
25276+ * in VFS.
25277+ * They can wait the plink maintenance mode to finish.
25278+ *
25279+ * They behave like F_SETLK and F_SETLKW.
25280+ * If the caller never handle plink, then both flags are unnecessary.
1facf9fc 25281+ */
e49829fe
JR
25282+
25283+int au_plink_maint(struct super_block *sb, int flags)
1facf9fc 25284+{
e49829fe
JR
25285+ int err;
25286+ pid_t pid, ppid;
f0c0a007 25287+ struct task_struct *parent, *prev;
e49829fe 25288+ struct au_sbinfo *sbi;
dece6358
AM
25289+
25290+ SiMustAnyLock(sb);
25291+
e49829fe
JR
25292+ err = 0;
25293+ if (!au_opt_test(au_mntflags(sb), PLINK))
25294+ goto out;
25295+
25296+ sbi = au_sbi(sb);
25297+ pid = sbi->si_plink_maint_pid;
25298+ if (!pid || pid == current->pid)
25299+ goto out;
25300+
25301+ /* todo: it highly depends upon /sbin/mount.aufs */
f0c0a007
AM
25302+ prev = NULL;
25303+ parent = current;
25304+ ppid = 0;
e49829fe 25305+ rcu_read_lock();
f0c0a007
AM
25306+ while (1) {
25307+ parent = rcu_dereference(parent->real_parent);
25308+ if (parent == prev)
25309+ break;
25310+ ppid = task_pid_vnr(parent);
25311+ if (pid == ppid) {
25312+ rcu_read_unlock();
25313+ goto out;
25314+ }
25315+ prev = parent;
25316+ }
e49829fe 25317+ rcu_read_unlock();
e49829fe
JR
25318+
25319+ if (au_ftest_lock(flags, NOPLMW)) {
027c5e7a
AM
25320+ /* if there is no i_mutex lock in VFS, we don't need to wait */
25321+ /* AuDebugOn(!lockdep_depth(current)); */
e49829fe
JR
25322+ while (sbi->si_plink_maint_pid) {
25323+ si_read_unlock(sb);
25324+ /* gave up wake_up_bit() */
25325+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
25326+
25327+ if (au_ftest_lock(flags, FLUSH))
25328+ au_nwt_flush(&sbi->si_nowait);
25329+ si_noflush_read_lock(sb);
25330+ }
25331+ } else if (au_ftest_lock(flags, NOPLM)) {
25332+ AuDbg("ppid %d, pid %d\n", ppid, pid);
25333+ err = -EAGAIN;
25334+ }
25335+
25336+out:
25337+ return err;
4a4d8108
AM
25338+}
25339+
e49829fe 25340+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
4a4d8108 25341+{
4a4d8108 25342+ spin_lock(&sbinfo->si_plink_maint_lock);
027c5e7a 25343+ sbinfo->si_plink_maint_pid = 0;
4a4d8108 25344+ spin_unlock(&sbinfo->si_plink_maint_lock);
027c5e7a 25345+ wake_up_all(&sbinfo->si_plink_wq);
4a4d8108
AM
25346+}
25347+
e49829fe 25348+int au_plink_maint_enter(struct super_block *sb)
4a4d8108
AM
25349+{
25350+ int err;
4a4d8108
AM
25351+ struct au_sbinfo *sbinfo;
25352+
25353+ err = 0;
4a4d8108
AM
25354+ sbinfo = au_sbi(sb);
25355+ /* make sure i am the only one in this fs */
e49829fe
JR
25356+ si_write_lock(sb, AuLock_FLUSH);
25357+ if (au_opt_test(au_mntflags(sb), PLINK)) {
25358+ spin_lock(&sbinfo->si_plink_maint_lock);
25359+ if (!sbinfo->si_plink_maint_pid)
25360+ sbinfo->si_plink_maint_pid = current->pid;
25361+ else
25362+ err = -EBUSY;
25363+ spin_unlock(&sbinfo->si_plink_maint_lock);
25364+ }
4a4d8108
AM
25365+ si_write_unlock(sb);
25366+
25367+ return err;
1facf9fc 25368+}
25369+
25370+/* ---------------------------------------------------------------------- */
25371+
1facf9fc 25372+#ifdef CONFIG_AUFS_DEBUG
25373+void au_plink_list(struct super_block *sb)
25374+{
86dc4139 25375+ int i;
1facf9fc 25376+ struct au_sbinfo *sbinfo;
86dc4139 25377+ struct hlist_head *plink_hlist;
5afbbe0d 25378+ struct au_icntnr *icntnr;
1facf9fc 25379+
dece6358
AM
25380+ SiMustAnyLock(sb);
25381+
1facf9fc 25382+ sbinfo = au_sbi(sb);
25383+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25384+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25385+
86dc4139
AM
25386+ for (i = 0; i < AuPlink_NHASH; i++) {
25387+ plink_hlist = &sbinfo->si_plink[i].head;
25388+ rcu_read_lock();
5afbbe0d
AM
25389+ hlist_for_each_entry_rcu(icntnr, plink_hlist, plink)
25390+ AuDbg("%lu\n", icntnr->vfs_inode.i_ino);
86dc4139
AM
25391+ rcu_read_unlock();
25392+ }
1facf9fc 25393+}
25394+#endif
25395+
25396+/* is the inode pseudo-linked? */
25397+int au_plink_test(struct inode *inode)
25398+{
86dc4139 25399+ int found, i;
1facf9fc 25400+ struct au_sbinfo *sbinfo;
86dc4139 25401+ struct hlist_head *plink_hlist;
5afbbe0d 25402+ struct au_icntnr *icntnr;
1facf9fc 25403+
25404+ sbinfo = au_sbi(inode->i_sb);
dece6358 25405+ AuRwMustAnyLock(&sbinfo->si_rwsem);
1facf9fc 25406+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
e49829fe 25407+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
1facf9fc 25408+
25409+ found = 0;
86dc4139
AM
25410+ i = au_plink_hash(inode->i_ino);
25411+ plink_hlist = &sbinfo->si_plink[i].head;
4a4d8108 25412+ rcu_read_lock();
5afbbe0d
AM
25413+ hlist_for_each_entry_rcu(icntnr, plink_hlist, plink)
25414+ if (&icntnr->vfs_inode == inode) {
1facf9fc 25415+ found = 1;
25416+ break;
25417+ }
4a4d8108 25418+ rcu_read_unlock();
1facf9fc 25419+ return found;
25420+}
25421+
25422+/* ---------------------------------------------------------------------- */
25423+
25424+/*
25425+ * generate a name for plink.
25426+ * the file will be stored under AUFS_WH_PLINKDIR.
25427+ */
25428+/* 20 is max digits length of ulong 64 */
25429+#define PLINK_NAME_LEN ((20 + 1) * 2)
25430+
25431+static int plink_name(char *name, int len, struct inode *inode,
25432+ aufs_bindex_t bindex)
25433+{
25434+ int rlen;
25435+ struct inode *h_inode;
25436+
25437+ h_inode = au_h_iptr(inode, bindex);
25438+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
25439+ return rlen;
25440+}
25441+
7f207e10
AM
25442+struct au_do_plink_lkup_args {
25443+ struct dentry **errp;
25444+ struct qstr *tgtname;
25445+ struct dentry *h_parent;
25446+ struct au_branch *br;
25447+};
25448+
25449+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
25450+ struct dentry *h_parent,
25451+ struct au_branch *br)
25452+{
25453+ struct dentry *h_dentry;
febd17d6 25454+ struct inode *h_inode;
7f207e10 25455+
febd17d6
JR
25456+ h_inode = d_inode(h_parent);
25457+ inode_lock_nested(h_inode, AuLsc_I_CHILD2);
b4510431 25458+ h_dentry = vfsub_lkup_one(tgtname, h_parent);
febd17d6 25459+ inode_unlock(h_inode);
7f207e10
AM
25460+ return h_dentry;
25461+}
25462+
25463+static void au_call_do_plink_lkup(void *args)
25464+{
25465+ struct au_do_plink_lkup_args *a = args;
25466+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
25467+}
25468+
1facf9fc 25469+/* lookup the plink-ed @inode under the branch at @bindex */
25470+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
25471+{
25472+ struct dentry *h_dentry, *h_parent;
25473+ struct au_branch *br;
7f207e10 25474+ int wkq_err;
1facf9fc 25475+ char a[PLINK_NAME_LEN];
0c3ec466 25476+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 25477+
e49829fe
JR
25478+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
25479+
1facf9fc 25480+ br = au_sbr(inode->i_sb, bindex);
25481+ h_parent = br->br_wbr->wbr_plink;
1facf9fc 25482+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25483+
2dfbb274 25484+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
7f207e10
AM
25485+ struct au_do_plink_lkup_args args = {
25486+ .errp = &h_dentry,
25487+ .tgtname = &tgtname,
25488+ .h_parent = h_parent,
25489+ .br = br
25490+ };
25491+
25492+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
25493+ if (unlikely(wkq_err))
25494+ h_dentry = ERR_PTR(wkq_err);
25495+ } else
25496+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
25497+
1facf9fc 25498+ return h_dentry;
25499+}
25500+
25501+/* create a pseudo-link */
25502+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
25503+ struct dentry *h_dentry, struct au_branch *br)
25504+{
25505+ int err;
25506+ struct path h_path = {
86dc4139 25507+ .mnt = au_br_mnt(br)
1facf9fc 25508+ };
523b37e3 25509+ struct inode *h_dir, *delegated;
1facf9fc 25510+
5527c038 25511+ h_dir = d_inode(h_parent);
febd17d6 25512+ inode_lock_nested(h_dir, AuLsc_I_CHILD2);
4f0767ce 25513+again:
b4510431 25514+ h_path.dentry = vfsub_lkup_one(tgt, h_parent);
1facf9fc 25515+ err = PTR_ERR(h_path.dentry);
25516+ if (IS_ERR(h_path.dentry))
25517+ goto out;
25518+
25519+ err = 0;
25520+ /* wh.plink dir is not monitored */
7f207e10 25521+ /* todo: is it really safe? */
5527c038
JR
25522+ if (d_is_positive(h_path.dentry)
25523+ && d_inode(h_path.dentry) != d_inode(h_dentry)) {
523b37e3
AM
25524+ delegated = NULL;
25525+ err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
25526+ if (unlikely(err == -EWOULDBLOCK)) {
25527+ pr_warn("cannot retry for NFSv4 delegation"
25528+ " for an internal unlink\n");
25529+ iput(delegated);
25530+ }
1facf9fc 25531+ dput(h_path.dentry);
25532+ h_path.dentry = NULL;
25533+ if (!err)
25534+ goto again;
25535+ }
5527c038 25536+ if (!err && d_is_negative(h_path.dentry)) {
523b37e3
AM
25537+ delegated = NULL;
25538+ err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
25539+ if (unlikely(err == -EWOULDBLOCK)) {
25540+ pr_warn("cannot retry for NFSv4 delegation"
25541+ " for an internal link\n");
25542+ iput(delegated);
25543+ }
25544+ }
1facf9fc 25545+ dput(h_path.dentry);
25546+
4f0767ce 25547+out:
febd17d6 25548+ inode_unlock(h_dir);
1facf9fc 25549+ return err;
25550+}
25551+
25552+struct do_whplink_args {
25553+ int *errp;
25554+ struct qstr *tgt;
25555+ struct dentry *h_parent;
25556+ struct dentry *h_dentry;
25557+ struct au_branch *br;
25558+};
25559+
25560+static void call_do_whplink(void *args)
25561+{
25562+ struct do_whplink_args *a = args;
25563+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
25564+}
25565+
25566+static int whplink(struct dentry *h_dentry, struct inode *inode,
25567+ aufs_bindex_t bindex, struct au_branch *br)
25568+{
25569+ int err, wkq_err;
25570+ struct au_wbr *wbr;
25571+ struct dentry *h_parent;
1facf9fc 25572+ char a[PLINK_NAME_LEN];
0c3ec466 25573+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 25574+
25575+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
25576+ h_parent = wbr->wbr_plink;
1facf9fc 25577+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25578+
25579+ /* always superio. */
2dfbb274 25580+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
1facf9fc 25581+ struct do_whplink_args args = {
25582+ .errp = &err,
25583+ .tgt = &tgtname,
25584+ .h_parent = h_parent,
25585+ .h_dentry = h_dentry,
25586+ .br = br
25587+ };
25588+ wkq_err = au_wkq_wait(call_do_whplink, &args);
25589+ if (unlikely(wkq_err))
25590+ err = wkq_err;
25591+ } else
25592+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
1facf9fc 25593+
25594+ return err;
25595+}
25596+
1facf9fc 25597+/*
25598+ * create a new pseudo-link for @h_dentry on @bindex.
25599+ * the linked inode is held in aufs @inode.
25600+ */
25601+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
25602+ struct dentry *h_dentry)
25603+{
25604+ struct super_block *sb;
25605+ struct au_sbinfo *sbinfo;
86dc4139 25606+ struct hlist_head *plink_hlist;
5afbbe0d 25607+ struct au_icntnr *icntnr;
86dc4139
AM
25608+ struct au_sphlhead *sphl;
25609+ int found, err, cnt, i;
1facf9fc 25610+
25611+ sb = inode->i_sb;
25612+ sbinfo = au_sbi(sb);
25613+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25614+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25615+
86dc4139 25616+ found = au_plink_test(inode);
4a4d8108 25617+ if (found)
1facf9fc 25618+ return;
4a4d8108 25619+
86dc4139
AM
25620+ i = au_plink_hash(inode->i_ino);
25621+ sphl = sbinfo->si_plink + i;
25622+ plink_hlist = &sphl->head;
5afbbe0d 25623+ au_igrab(inode);
1facf9fc 25624+
86dc4139 25625+ spin_lock(&sphl->spin);
5afbbe0d
AM
25626+ hlist_for_each_entry(icntnr, plink_hlist, plink) {
25627+ if (&icntnr->vfs_inode == inode) {
4a4d8108
AM
25628+ found = 1;
25629+ break;
25630+ }
1facf9fc 25631+ }
5afbbe0d
AM
25632+ if (!found) {
25633+ icntnr = container_of(inode, struct au_icntnr, vfs_inode);
25634+ hlist_add_head_rcu(&icntnr->plink, plink_hlist);
25635+ }
86dc4139 25636+ spin_unlock(&sphl->spin);
4a4d8108 25637+ if (!found) {
86dc4139
AM
25638+ cnt = au_sphl_count(sphl);
25639+#define msg "unexpectedly unblanced or too many pseudo-links"
25640+ if (cnt > AUFS_PLINK_WARN)
25641+ AuWarn1(msg ", %d\n", cnt);
25642+#undef msg
1facf9fc 25643+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
5afbbe0d
AM
25644+ if (unlikely(err)) {
25645+ pr_warn("err %d, damaged pseudo link.\n", err);
25646+ au_sphl_del_rcu(&icntnr->plink, sphl);
25647+ iput(&icntnr->vfs_inode);
4a4d8108 25648+ }
5afbbe0d
AM
25649+ } else
25650+ iput(&icntnr->vfs_inode);
1facf9fc 25651+}
25652+
25653+/* free all plinks */
e49829fe 25654+void au_plink_put(struct super_block *sb, int verbose)
1facf9fc 25655+{
86dc4139 25656+ int i, warned;
1facf9fc 25657+ struct au_sbinfo *sbinfo;
86dc4139
AM
25658+ struct hlist_head *plink_hlist;
25659+ struct hlist_node *tmp;
5afbbe0d 25660+ struct au_icntnr *icntnr;
1facf9fc 25661+
dece6358
AM
25662+ SiMustWriteLock(sb);
25663+
1facf9fc 25664+ sbinfo = au_sbi(sb);
25665+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25666+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25667+
1facf9fc 25668+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
25669+ warned = 0;
25670+ for (i = 0; i < AuPlink_NHASH; i++) {
25671+ plink_hlist = &sbinfo->si_plink[i].head;
25672+ if (!warned && verbose && !hlist_empty(plink_hlist)) {
25673+ pr_warn("pseudo-link is not flushed");
25674+ warned = 1;
25675+ }
5afbbe0d
AM
25676+ hlist_for_each_entry_safe(icntnr, tmp, plink_hlist, plink)
25677+ iput(&icntnr->vfs_inode);
86dc4139
AM
25678+ INIT_HLIST_HEAD(plink_hlist);
25679+ }
1facf9fc 25680+}
25681+
e49829fe
JR
25682+void au_plink_clean(struct super_block *sb, int verbose)
25683+{
25684+ struct dentry *root;
25685+
25686+ root = sb->s_root;
25687+ aufs_write_lock(root);
25688+ if (au_opt_test(au_mntflags(sb), PLINK))
25689+ au_plink_put(sb, verbose);
25690+ aufs_write_unlock(root);
25691+}
25692+
86dc4139
AM
25693+static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
25694+{
25695+ int do_put;
5afbbe0d 25696+ aufs_bindex_t btop, bbot, bindex;
86dc4139
AM
25697+
25698+ do_put = 0;
5afbbe0d
AM
25699+ btop = au_ibtop(inode);
25700+ bbot = au_ibbot(inode);
25701+ if (btop >= 0) {
25702+ for (bindex = btop; bindex <= bbot; bindex++) {
86dc4139
AM
25703+ if (!au_h_iptr(inode, bindex)
25704+ || au_ii_br_id(inode, bindex) != br_id)
25705+ continue;
25706+ au_set_h_iptr(inode, bindex, NULL, 0);
25707+ do_put = 1;
25708+ break;
25709+ }
25710+ if (do_put)
5afbbe0d 25711+ for (bindex = btop; bindex <= bbot; bindex++)
86dc4139
AM
25712+ if (au_h_iptr(inode, bindex)) {
25713+ do_put = 0;
25714+ break;
25715+ }
25716+ } else
25717+ do_put = 1;
25718+
25719+ return do_put;
25720+}
25721+
1facf9fc 25722+/* free the plinks on a branch specified by @br_id */
25723+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
25724+{
25725+ struct au_sbinfo *sbinfo;
86dc4139
AM
25726+ struct hlist_head *plink_hlist;
25727+ struct hlist_node *tmp;
5afbbe0d 25728+ struct au_icntnr *icntnr;
1facf9fc 25729+ struct inode *inode;
86dc4139 25730+ int i, do_put;
1facf9fc 25731+
dece6358
AM
25732+ SiMustWriteLock(sb);
25733+
1facf9fc 25734+ sbinfo = au_sbi(sb);
25735+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25736+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25737+
1facf9fc 25738+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
25739+ for (i = 0; i < AuPlink_NHASH; i++) {
25740+ plink_hlist = &sbinfo->si_plink[i].head;
5afbbe0d
AM
25741+ hlist_for_each_entry_safe(icntnr, tmp, plink_hlist, plink) {
25742+ inode = au_igrab(&icntnr->vfs_inode);
86dc4139
AM
25743+ ii_write_lock_child(inode);
25744+ do_put = au_plink_do_half_refresh(inode, br_id);
5afbbe0d
AM
25745+ if (do_put) {
25746+ hlist_del(&icntnr->plink);
25747+ iput(inode);
25748+ }
86dc4139
AM
25749+ ii_write_unlock(inode);
25750+ iput(inode);
dece6358 25751+ }
dece6358
AM
25752+ }
25753+}
7f207e10
AM
25754diff -urN /usr/share/empty/fs/aufs/poll.c linux/fs/aufs/poll.c
25755--- /usr/share/empty/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 25756+++ linux/fs/aufs/poll.c 2016-10-09 16:55:36.496035060 +0200
b912730e 25757@@ -0,0 +1,52 @@
dece6358 25758+/*
8cdd5066 25759+ * Copyright (C) 2005-2016 Junjiro R. Okajima
dece6358
AM
25760+ *
25761+ * This program, aufs is free software; you can redistribute it and/or modify
25762+ * it under the terms of the GNU General Public License as published by
25763+ * the Free Software Foundation; either version 2 of the License, or
25764+ * (at your option) any later version.
25765+ *
25766+ * This program is distributed in the hope that it will be useful,
25767+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25768+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25769+ * GNU General Public License for more details.
25770+ *
25771+ * You should have received a copy of the GNU General Public License
523b37e3 25772+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358
AM
25773+ */
25774+
1308ab2a 25775+/*
25776+ * poll operation
25777+ * There is only one filesystem which implements ->poll operation, currently.
25778+ */
25779+
25780+#include "aufs.h"
25781+
25782+unsigned int aufs_poll(struct file *file, poll_table *wait)
25783+{
25784+ unsigned int mask;
25785+ int err;
25786+ struct file *h_file;
1308ab2a 25787+ struct super_block *sb;
25788+
25789+ /* We should pretend an error happened. */
25790+ mask = POLLERR /* | POLLIN | POLLOUT */;
b912730e 25791+ sb = file->f_path.dentry->d_sb;
e49829fe 25792+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
25793+
25794+ h_file = au_read_pre(file, /*keep_fi*/0);
25795+ err = PTR_ERR(h_file);
25796+ if (IS_ERR(h_file))
1308ab2a 25797+ goto out;
25798+
25799+ /* it is not an error if h_file has no operation */
25800+ mask = DEFAULT_POLLMASK;
523b37e3 25801+ if (h_file->f_op->poll)
1308ab2a 25802+ mask = h_file->f_op->poll(h_file, wait);
b912730e 25803+ fput(h_file); /* instead of au_read_post() */
1308ab2a 25804+
4f0767ce 25805+out:
1308ab2a 25806+ si_read_unlock(sb);
25807+ AuTraceErr((int)mask);
25808+ return mask;
25809+}
c1595e42
JR
25810diff -urN /usr/share/empty/fs/aufs/posix_acl.c linux/fs/aufs/posix_acl.c
25811--- /usr/share/empty/fs/aufs/posix_acl.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 25812+++ linux/fs/aufs/posix_acl.c 2016-10-09 16:55:36.496035060 +0200
8cdd5066 25813@@ -0,0 +1,98 @@
c1595e42 25814+/*
8cdd5066 25815+ * Copyright (C) 2014-2016 Junjiro R. Okajima
c1595e42
JR
25816+ *
25817+ * This program, aufs is free software; you can redistribute it and/or modify
25818+ * it under the terms of the GNU General Public License as published by
25819+ * the Free Software Foundation; either version 2 of the License, or
25820+ * (at your option) any later version.
25821+ *
25822+ * This program is distributed in the hope that it will be useful,
25823+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25824+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25825+ * GNU General Public License for more details.
25826+ *
25827+ * You should have received a copy of the GNU General Public License
25828+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25829+ */
25830+
25831+/*
25832+ * posix acl operations
25833+ */
25834+
25835+#include <linux/fs.h>
c1595e42
JR
25836+#include "aufs.h"
25837+
25838+struct posix_acl *aufs_get_acl(struct inode *inode, int type)
25839+{
25840+ struct posix_acl *acl;
25841+ int err;
25842+ aufs_bindex_t bindex;
25843+ struct inode *h_inode;
25844+ struct super_block *sb;
25845+
25846+ acl = NULL;
25847+ sb = inode->i_sb;
25848+ si_read_lock(sb, AuLock_FLUSH);
25849+ ii_read_lock_child(inode);
25850+ if (!(sb->s_flags & MS_POSIXACL))
25851+ goto out;
25852+
5afbbe0d 25853+ bindex = au_ibtop(inode);
c1595e42
JR
25854+ h_inode = au_h_iptr(inode, bindex);
25855+ if (unlikely(!h_inode
25856+ || ((h_inode->i_mode & S_IFMT)
25857+ != (inode->i_mode & S_IFMT)))) {
25858+ err = au_busy_or_stale();
25859+ acl = ERR_PTR(err);
25860+ goto out;
25861+ }
25862+
25863+ /* always topmost only */
25864+ acl = get_acl(h_inode, type);
25865+
25866+out:
25867+ ii_read_unlock(inode);
25868+ si_read_unlock(sb);
25869+
25870+ AuTraceErrPtr(acl);
25871+ return acl;
25872+}
25873+
25874+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
25875+{
25876+ int err;
25877+ ssize_t ssz;
25878+ struct dentry *dentry;
25879+ struct au_srxattr arg = {
25880+ .type = AU_ACL_SET,
25881+ .u.acl_set = {
25882+ .acl = acl,
25883+ .type = type
25884+ },
25885+ };
25886+
5afbbe0d
AM
25887+ IMustLock(inode);
25888+
c1595e42
JR
25889+ if (inode->i_ino == AUFS_ROOT_INO)
25890+ dentry = dget(inode->i_sb->s_root);
25891+ else {
25892+ dentry = d_find_alias(inode);
25893+ if (!dentry)
25894+ dentry = d_find_any_alias(inode);
25895+ if (!dentry) {
25896+ pr_warn("cannot handle this inode, "
25897+ "please report to aufs-users ML\n");
25898+ err = -ENOENT;
25899+ goto out;
25900+ }
25901+ }
25902+
5afbbe0d 25903+ ssz = au_srxattr(dentry, inode, &arg);
c1595e42
JR
25904+ dput(dentry);
25905+ err = ssz;
25906+ if (ssz >= 0)
25907+ err = 0;
25908+
25909+out:
c1595e42
JR
25910+ return err;
25911+}
7f207e10
AM
25912diff -urN /usr/share/empty/fs/aufs/procfs.c linux/fs/aufs/procfs.c
25913--- /usr/share/empty/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 25914+++ linux/fs/aufs/procfs.c 2016-10-09 16:55:36.496035060 +0200
523b37e3 25915@@ -0,0 +1,169 @@
e49829fe 25916+/*
8cdd5066 25917+ * Copyright (C) 2010-2016 Junjiro R. Okajima
e49829fe
JR
25918+ *
25919+ * This program, aufs is free software; you can redistribute it and/or modify
25920+ * it under the terms of the GNU General Public License as published by
25921+ * the Free Software Foundation; either version 2 of the License, or
25922+ * (at your option) any later version.
25923+ *
25924+ * This program is distributed in the hope that it will be useful,
25925+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25926+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25927+ * GNU General Public License for more details.
25928+ *
25929+ * You should have received a copy of the GNU General Public License
523b37e3 25930+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
25931+ */
25932+
25933+/*
25934+ * procfs interfaces
25935+ */
25936+
25937+#include <linux/proc_fs.h>
25938+#include "aufs.h"
25939+
25940+static int au_procfs_plm_release(struct inode *inode, struct file *file)
25941+{
25942+ struct au_sbinfo *sbinfo;
25943+
25944+ sbinfo = file->private_data;
25945+ if (sbinfo) {
25946+ au_plink_maint_leave(sbinfo);
25947+ kobject_put(&sbinfo->si_kobj);
25948+ }
25949+
25950+ return 0;
25951+}
25952+
25953+static void au_procfs_plm_write_clean(struct file *file)
25954+{
25955+ struct au_sbinfo *sbinfo;
25956+
25957+ sbinfo = file->private_data;
25958+ if (sbinfo)
25959+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
25960+}
25961+
25962+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
25963+{
25964+ int err;
25965+ struct super_block *sb;
25966+ struct au_sbinfo *sbinfo;
25967+
25968+ err = -EBUSY;
25969+ if (unlikely(file->private_data))
25970+ goto out;
25971+
25972+ sb = NULL;
53392da6 25973+ /* don't use au_sbilist_lock() here */
e49829fe 25974+ spin_lock(&au_sbilist.spin);
5afbbe0d 25975+ hlist_for_each_entry(sbinfo, &au_sbilist.head, si_list)
e49829fe
JR
25976+ if (id == sysaufs_si_id(sbinfo)) {
25977+ kobject_get(&sbinfo->si_kobj);
25978+ sb = sbinfo->si_sb;
25979+ break;
25980+ }
25981+ spin_unlock(&au_sbilist.spin);
25982+
25983+ err = -EINVAL;
25984+ if (unlikely(!sb))
25985+ goto out;
25986+
25987+ err = au_plink_maint_enter(sb);
25988+ if (!err)
25989+ /* keep kobject_get() */
25990+ file->private_data = sbinfo;
25991+ else
25992+ kobject_put(&sbinfo->si_kobj);
25993+out:
25994+ return err;
25995+}
25996+
25997+/*
25998+ * Accept a valid "si=xxxx" only.
25999+ * Once it is accepted successfully, accept "clean" too.
26000+ */
26001+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
26002+ size_t count, loff_t *ppos)
26003+{
26004+ ssize_t err;
26005+ unsigned long id;
26006+ /* last newline is allowed */
26007+ char buf[3 + sizeof(unsigned long) * 2 + 1];
26008+
26009+ err = -EACCES;
26010+ if (unlikely(!capable(CAP_SYS_ADMIN)))
26011+ goto out;
26012+
26013+ err = -EINVAL;
26014+ if (unlikely(count > sizeof(buf)))
26015+ goto out;
26016+
26017+ err = copy_from_user(buf, ubuf, count);
26018+ if (unlikely(err)) {
26019+ err = -EFAULT;
26020+ goto out;
26021+ }
26022+ buf[count] = 0;
26023+
26024+ err = -EINVAL;
26025+ if (!strcmp("clean", buf)) {
26026+ au_procfs_plm_write_clean(file);
26027+ goto out_success;
26028+ } else if (unlikely(strncmp("si=", buf, 3)))
26029+ goto out;
26030+
9dbd164d 26031+ err = kstrtoul(buf + 3, 16, &id);
e49829fe
JR
26032+ if (unlikely(err))
26033+ goto out;
26034+
26035+ err = au_procfs_plm_write_si(file, id);
26036+ if (unlikely(err))
26037+ goto out;
26038+
26039+out_success:
26040+ err = count; /* success */
26041+out:
26042+ return err;
26043+}
26044+
26045+static const struct file_operations au_procfs_plm_fop = {
26046+ .write = au_procfs_plm_write,
26047+ .release = au_procfs_plm_release,
26048+ .owner = THIS_MODULE
26049+};
26050+
26051+/* ---------------------------------------------------------------------- */
26052+
26053+static struct proc_dir_entry *au_procfs_dir;
26054+
26055+void au_procfs_fin(void)
26056+{
26057+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
26058+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
26059+}
26060+
26061+int __init au_procfs_init(void)
26062+{
26063+ int err;
26064+ struct proc_dir_entry *entry;
26065+
26066+ err = -ENOMEM;
26067+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
26068+ if (unlikely(!au_procfs_dir))
26069+ goto out;
26070+
26071+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
26072+ au_procfs_dir, &au_procfs_plm_fop);
26073+ if (unlikely(!entry))
26074+ goto out_dir;
26075+
26076+ err = 0;
26077+ goto out; /* success */
26078+
26079+
26080+out_dir:
26081+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
26082+out:
26083+ return err;
26084+}
7f207e10
AM
26085diff -urN /usr/share/empty/fs/aufs/rdu.c linux/fs/aufs/rdu.c
26086--- /usr/share/empty/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 26087+++ linux/fs/aufs/rdu.c 2016-10-09 16:55:36.496035060 +0200
5afbbe0d 26088@@ -0,0 +1,381 @@
1308ab2a 26089+/*
8cdd5066 26090+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1308ab2a 26091+ *
26092+ * This program, aufs is free software; you can redistribute it and/or modify
26093+ * it under the terms of the GNU General Public License as published by
26094+ * the Free Software Foundation; either version 2 of the License, or
26095+ * (at your option) any later version.
26096+ *
26097+ * This program is distributed in the hope that it will be useful,
26098+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26099+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26100+ * GNU General Public License for more details.
26101+ *
26102+ * You should have received a copy of the GNU General Public License
523b37e3 26103+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1308ab2a 26104+ */
26105+
26106+/*
26107+ * readdir in userspace.
26108+ */
26109+
b752ccd1 26110+#include <linux/compat.h>
4a4d8108 26111+#include <linux/fs_stack.h>
1308ab2a 26112+#include <linux/security.h>
1308ab2a 26113+#include "aufs.h"
26114+
26115+/* bits for struct aufs_rdu.flags */
26116+#define AuRdu_CALLED 1
26117+#define AuRdu_CONT (1 << 1)
26118+#define AuRdu_FULL (1 << 2)
26119+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
7f207e10
AM
26120+#define au_fset_rdu(flags, name) \
26121+ do { (flags) |= AuRdu_##name; } while (0)
26122+#define au_fclr_rdu(flags, name) \
26123+ do { (flags) &= ~AuRdu_##name; } while (0)
1308ab2a 26124+
26125+struct au_rdu_arg {
392086de 26126+ struct dir_context ctx;
1308ab2a 26127+ struct aufs_rdu *rdu;
26128+ union au_rdu_ent_ul ent;
26129+ unsigned long end;
26130+
26131+ struct super_block *sb;
26132+ int err;
26133+};
26134+
392086de 26135+static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
1308ab2a 26136+ loff_t offset, u64 h_ino, unsigned int d_type)
26137+{
26138+ int err, len;
392086de 26139+ struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
1308ab2a 26140+ struct aufs_rdu *rdu = arg->rdu;
26141+ struct au_rdu_ent ent;
26142+
26143+ err = 0;
26144+ arg->err = 0;
26145+ au_fset_rdu(rdu->cookie.flags, CALLED);
26146+ len = au_rdu_len(nlen);
26147+ if (arg->ent.ul + len < arg->end) {
26148+ ent.ino = h_ino;
26149+ ent.bindex = rdu->cookie.bindex;
26150+ ent.type = d_type;
26151+ ent.nlen = nlen;
4a4d8108
AM
26152+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
26153+ ent.type = DT_UNKNOWN;
1308ab2a 26154+
9dbd164d 26155+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 26156+ err = -EFAULT;
26157+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
26158+ goto out;
26159+ if (copy_to_user(arg->ent.e->name, name, nlen))
26160+ goto out;
26161+ /* the terminating NULL */
26162+ if (__put_user(0, arg->ent.e->name + nlen))
26163+ goto out;
26164+ err = 0;
26165+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
26166+ arg->ent.ul += len;
26167+ rdu->rent++;
26168+ } else {
26169+ err = -EFAULT;
26170+ au_fset_rdu(rdu->cookie.flags, FULL);
26171+ rdu->full = 1;
26172+ rdu->tail = arg->ent;
26173+ }
26174+
4f0767ce 26175+out:
1308ab2a 26176+ /* AuTraceErr(err); */
26177+ return err;
26178+}
26179+
26180+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
26181+{
26182+ int err;
26183+ loff_t offset;
26184+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
26185+
92d182d2 26186+ /* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
1308ab2a 26187+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
26188+ err = offset;
26189+ if (unlikely(offset != cookie->h_pos))
26190+ goto out;
26191+
26192+ err = 0;
26193+ do {
26194+ arg->err = 0;
26195+ au_fclr_rdu(cookie->flags, CALLED);
26196+ /* smp_mb(); */
392086de 26197+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1308ab2a 26198+ if (err >= 0)
26199+ err = arg->err;
26200+ } while (!err
26201+ && au_ftest_rdu(cookie->flags, CALLED)
26202+ && !au_ftest_rdu(cookie->flags, FULL));
26203+ cookie->h_pos = h_file->f_pos;
26204+
4f0767ce 26205+out:
1308ab2a 26206+ AuTraceErr(err);
26207+ return err;
26208+}
26209+
26210+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
26211+{
26212+ int err;
5afbbe0d 26213+ aufs_bindex_t bbot;
392086de
AM
26214+ struct au_rdu_arg arg = {
26215+ .ctx = {
2000de60 26216+ .actor = au_rdu_fill
392086de
AM
26217+ }
26218+ };
1308ab2a 26219+ struct dentry *dentry;
26220+ struct inode *inode;
26221+ struct file *h_file;
26222+ struct au_rdu_cookie *cookie = &rdu->cookie;
26223+
26224+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
26225+ if (unlikely(err)) {
26226+ err = -EFAULT;
26227+ AuTraceErr(err);
26228+ goto out;
26229+ }
26230+ rdu->rent = 0;
26231+ rdu->tail = rdu->ent;
26232+ rdu->full = 0;
26233+ arg.rdu = rdu;
26234+ arg.ent = rdu->ent;
26235+ arg.end = arg.ent.ul;
26236+ arg.end += rdu->sz;
26237+
26238+ err = -ENOTDIR;
5afbbe0d 26239+ if (unlikely(!file->f_op->iterate && !file->f_op->iterate_shared))
1308ab2a 26240+ goto out;
26241+
26242+ err = security_file_permission(file, MAY_READ);
26243+ AuTraceErr(err);
26244+ if (unlikely(err))
26245+ goto out;
26246+
2000de60 26247+ dentry = file->f_path.dentry;
5527c038 26248+ inode = d_inode(dentry);
5afbbe0d 26249+ inode_lock_shared(inode);
1308ab2a 26250+
26251+ arg.sb = inode->i_sb;
e49829fe
JR
26252+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
26253+ if (unlikely(err))
26254+ goto out_mtx;
027c5e7a
AM
26255+ err = au_alive_dir(dentry);
26256+ if (unlikely(err))
26257+ goto out_si;
e49829fe 26258+ /* todo: reval? */
1308ab2a 26259+ fi_read_lock(file);
26260+
26261+ err = -EAGAIN;
26262+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
26263+ && cookie->generation != au_figen(file)))
26264+ goto out_unlock;
26265+
26266+ err = 0;
26267+ if (!rdu->blk) {
26268+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
26269+ if (!rdu->blk)
26270+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
26271+ }
5afbbe0d
AM
26272+ bbot = au_fbtop(file);
26273+ if (cookie->bindex < bbot)
26274+ cookie->bindex = bbot;
26275+ bbot = au_fbbot_dir(file);
26276+ /* AuDbg("b%d, b%d\n", cookie->bindex, bbot); */
26277+ for (; !err && cookie->bindex <= bbot;
1308ab2a 26278+ cookie->bindex++, cookie->h_pos = 0) {
4a4d8108 26279+ h_file = au_hf_dir(file, cookie->bindex);
1308ab2a 26280+ if (!h_file)
26281+ continue;
26282+
26283+ au_fclr_rdu(cookie->flags, FULL);
26284+ err = au_rdu_do(h_file, &arg);
26285+ AuTraceErr(err);
26286+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
26287+ break;
26288+ }
26289+ AuDbg("rent %llu\n", rdu->rent);
26290+
26291+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
26292+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
26293+ au_fset_rdu(cookie->flags, CONT);
26294+ cookie->generation = au_figen(file);
26295+ }
26296+
26297+ ii_read_lock_child(inode);
5afbbe0d 26298+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibtop(inode)));
1308ab2a 26299+ ii_read_unlock(inode);
26300+
4f0767ce 26301+out_unlock:
1308ab2a 26302+ fi_read_unlock(file);
027c5e7a 26303+out_si:
1308ab2a 26304+ si_read_unlock(arg.sb);
4f0767ce 26305+out_mtx:
5afbbe0d 26306+ inode_unlock_shared(inode);
4f0767ce 26307+out:
1308ab2a 26308+ AuTraceErr(err);
26309+ return err;
26310+}
26311+
26312+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
26313+{
26314+ int err;
26315+ ino_t ino;
26316+ unsigned long long nent;
26317+ union au_rdu_ent_ul *u;
26318+ struct au_rdu_ent ent;
26319+ struct super_block *sb;
26320+
26321+ err = 0;
26322+ nent = rdu->nent;
26323+ u = &rdu->ent;
2000de60 26324+ sb = file->f_path.dentry->d_sb;
1308ab2a 26325+ si_read_lock(sb, AuLock_FLUSH);
26326+ while (nent-- > 0) {
9dbd164d 26327+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 26328+ err = copy_from_user(&ent, u->e, sizeof(ent));
4a4d8108
AM
26329+ if (!err)
26330+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
1308ab2a 26331+ if (unlikely(err)) {
26332+ err = -EFAULT;
26333+ AuTraceErr(err);
26334+ break;
26335+ }
26336+
26337+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
26338+ if (!ent.wh)
26339+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
26340+ else
26341+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
26342+ &ino);
26343+ if (unlikely(err)) {
26344+ AuTraceErr(err);
26345+ break;
26346+ }
26347+
26348+ err = __put_user(ino, &u->e->ino);
26349+ if (unlikely(err)) {
26350+ err = -EFAULT;
26351+ AuTraceErr(err);
26352+ break;
26353+ }
26354+ u->ul += au_rdu_len(ent.nlen);
26355+ }
26356+ si_read_unlock(sb);
26357+
26358+ return err;
26359+}
26360+
26361+/* ---------------------------------------------------------------------- */
26362+
26363+static int au_rdu_verify(struct aufs_rdu *rdu)
26364+{
b752ccd1 26365+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
1308ab2a 26366+ "%llu, b%d, 0x%x, g%u}\n",
b752ccd1 26367+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
1308ab2a 26368+ rdu->blk,
26369+ rdu->rent, rdu->shwh, rdu->full,
26370+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
26371+ rdu->cookie.generation);
dece6358 26372+
b752ccd1 26373+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
1308ab2a 26374+ return 0;
dece6358 26375+
b752ccd1
AM
26376+ AuDbg("%u:%u\n",
26377+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
1308ab2a 26378+ return -EINVAL;
26379+}
26380+
26381+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
dece6358 26382+{
1308ab2a 26383+ long err, e;
26384+ struct aufs_rdu rdu;
26385+ void __user *p = (void __user *)arg;
dece6358 26386+
1308ab2a 26387+ err = copy_from_user(&rdu, p, sizeof(rdu));
26388+ if (unlikely(err)) {
26389+ err = -EFAULT;
26390+ AuTraceErr(err);
26391+ goto out;
26392+ }
26393+ err = au_rdu_verify(&rdu);
dece6358
AM
26394+ if (unlikely(err))
26395+ goto out;
26396+
1308ab2a 26397+ switch (cmd) {
26398+ case AUFS_CTL_RDU:
26399+ err = au_rdu(file, &rdu);
26400+ if (unlikely(err))
26401+ break;
dece6358 26402+
1308ab2a 26403+ e = copy_to_user(p, &rdu, sizeof(rdu));
26404+ if (unlikely(e)) {
26405+ err = -EFAULT;
26406+ AuTraceErr(err);
26407+ }
26408+ break;
26409+ case AUFS_CTL_RDU_INO:
26410+ err = au_rdu_ino(file, &rdu);
26411+ break;
26412+
26413+ default:
4a4d8108 26414+ /* err = -ENOTTY; */
1308ab2a 26415+ err = -EINVAL;
26416+ }
dece6358 26417+
4f0767ce 26418+out:
1308ab2a 26419+ AuTraceErr(err);
26420+ return err;
1facf9fc 26421+}
b752ccd1
AM
26422+
26423+#ifdef CONFIG_COMPAT
26424+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
26425+{
26426+ long err, e;
26427+ struct aufs_rdu rdu;
26428+ void __user *p = compat_ptr(arg);
26429+
26430+ /* todo: get_user()? */
26431+ err = copy_from_user(&rdu, p, sizeof(rdu));
26432+ if (unlikely(err)) {
26433+ err = -EFAULT;
26434+ AuTraceErr(err);
26435+ goto out;
26436+ }
26437+ rdu.ent.e = compat_ptr(rdu.ent.ul);
26438+ err = au_rdu_verify(&rdu);
26439+ if (unlikely(err))
26440+ goto out;
26441+
26442+ switch (cmd) {
26443+ case AUFS_CTL_RDU:
26444+ err = au_rdu(file, &rdu);
26445+ if (unlikely(err))
26446+ break;
26447+
26448+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
26449+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
26450+ e = copy_to_user(p, &rdu, sizeof(rdu));
26451+ if (unlikely(e)) {
26452+ err = -EFAULT;
26453+ AuTraceErr(err);
26454+ }
26455+ break;
26456+ case AUFS_CTL_RDU_INO:
26457+ err = au_rdu_ino(file, &rdu);
26458+ break;
26459+
26460+ default:
26461+ /* err = -ENOTTY; */
26462+ err = -EINVAL;
26463+ }
26464+
4f0767ce 26465+out:
b752ccd1
AM
26466+ AuTraceErr(err);
26467+ return err;
26468+}
26469+#endif
7f207e10
AM
26470diff -urN /usr/share/empty/fs/aufs/rwsem.h linux/fs/aufs/rwsem.h
26471--- /usr/share/empty/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 26472+++ linux/fs/aufs/rwsem.h 2016-10-09 16:55:36.496035060 +0200
5afbbe0d 26473@@ -0,0 +1,198 @@
1facf9fc 26474+/*
8cdd5066 26475+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 26476+ *
26477+ * This program, aufs is free software; you can redistribute it and/or modify
26478+ * it under the terms of the GNU General Public License as published by
26479+ * the Free Software Foundation; either version 2 of the License, or
26480+ * (at your option) any later version.
dece6358
AM
26481+ *
26482+ * This program is distributed in the hope that it will be useful,
26483+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26484+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26485+ * GNU General Public License for more details.
26486+ *
26487+ * You should have received a copy of the GNU General Public License
523b37e3 26488+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26489+ */
26490+
26491+/*
26492+ * simple read-write semaphore wrappers
26493+ */
26494+
26495+#ifndef __AUFS_RWSEM_H__
26496+#define __AUFS_RWSEM_H__
26497+
26498+#ifdef __KERNEL__
26499+
4a4d8108 26500+#include "debug.h"
dece6358
AM
26501+
26502+struct au_rwsem {
26503+ struct rw_semaphore rwsem;
26504+#ifdef CONFIG_AUFS_DEBUG
26505+ /* just for debugging, not almighty counter */
26506+ atomic_t rcnt, wcnt;
26507+#endif
26508+};
26509+
5afbbe0d
AM
26510+#ifdef CONFIG_LOCKDEP
26511+#define au_lockdep_set_name(rw) \
26512+ lockdep_set_class_and_name(&(rw)->rwsem, \
26513+ /*original key*/(rw)->rwsem.dep_map.key, \
26514+ /*name*/#rw)
26515+#else
26516+#define au_lockdep_set_name(rw) do {} while (0)
26517+#endif
26518+
dece6358
AM
26519+#ifdef CONFIG_AUFS_DEBUG
26520+#define AuDbgCntInit(rw) do { \
26521+ atomic_set(&(rw)->rcnt, 0); \
26522+ atomic_set(&(rw)->wcnt, 0); \
26523+ smp_mb(); /* atomic set */ \
26524+} while (0)
26525+
5afbbe0d
AM
26526+#define AuDbgCnt(rw, cnt) atomic_read(&(rw)->cnt)
26527+#define AuDbgCntInc(rw, cnt) atomic_inc(&(rw)->cnt)
26528+#define AuDbgCntDec(rw, cnt) WARN_ON(atomic_dec_return(&(rw)->cnt) < 0)
26529+#define AuDbgRcntInc(rw) AuDbgCntInc(rw, rcnt)
26530+#define AuDbgRcntDec(rw) AuDbgCntDec(rw, rcnt)
26531+#define AuDbgWcntInc(rw) AuDbgCntInc(rw, wcnt)
26532+#define AuDbgWcntDec(rw) AuDbgCntDec(rw, wcnt)
dece6358 26533+#else
5afbbe0d 26534+#define AuDbgCnt(rw, cnt) 0
dece6358
AM
26535+#define AuDbgCntInit(rw) do {} while (0)
26536+#define AuDbgRcntInc(rw) do {} while (0)
26537+#define AuDbgRcntDec(rw) do {} while (0)
26538+#define AuDbgWcntInc(rw) do {} while (0)
26539+#define AuDbgWcntDec(rw) do {} while (0)
26540+#endif /* CONFIG_AUFS_DEBUG */
26541+
26542+/* to debug easier, do not make them inlined functions */
5afbbe0d 26543+#define AuRwMustNoWaiters(rw) AuDebugOn(rwsem_is_contended(&(rw)->rwsem))
dece6358 26544+/* rwsem_is_locked() is unusable */
5afbbe0d
AM
26545+#define AuRwMustReadLock(rw) AuDebugOn(AuDbgCnt(rw, rcnt) <= 0)
26546+#define AuRwMustWriteLock(rw) AuDebugOn(AuDbgCnt(rw, wcnt) <= 0)
26547+#define AuRwMustAnyLock(rw) AuDebugOn(AuDbgCnt(rw, rcnt) <= 0 \
26548+ && AuDbgCnt(rw, wcnt) <= 0)
26549+#define AuRwDestroy(rw) AuDebugOn(AuDbgCnt(rw, rcnt) \
26550+ || AuDbgCnt(rw, wcnt))
26551+
26552+#define au_rw_init(rw) do { \
26553+ AuDbgCntInit(rw); \
26554+ init_rwsem(&(rw)->rwsem); \
26555+ au_lockdep_set_name(rw); \
26556+ } while (0)
dece6358 26557+
5afbbe0d
AM
26558+#define au_rw_init_wlock(rw) do { \
26559+ au_rw_init(rw); \
26560+ down_write(&(rw)->rwsem); \
26561+ AuDbgWcntInc(rw); \
26562+ } while (0)
dece6358 26563+
5afbbe0d
AM
26564+#define au_rw_init_wlock_nested(rw, lsc) do { \
26565+ au_rw_init(rw); \
26566+ down_write_nested(&(rw)->rwsem, lsc); \
26567+ AuDbgWcntInc(rw); \
26568+ } while (0)
dece6358
AM
26569+
26570+static inline void au_rw_read_lock(struct au_rwsem *rw)
26571+{
26572+ down_read(&rw->rwsem);
26573+ AuDbgRcntInc(rw);
26574+}
26575+
26576+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
26577+{
26578+ down_read_nested(&rw->rwsem, lsc);
26579+ AuDbgRcntInc(rw);
26580+}
26581+
26582+static inline void au_rw_read_unlock(struct au_rwsem *rw)
26583+{
26584+ AuRwMustReadLock(rw);
26585+ AuDbgRcntDec(rw);
26586+ up_read(&rw->rwsem);
26587+}
26588+
26589+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
26590+{
26591+ AuRwMustWriteLock(rw);
26592+ AuDbgRcntInc(rw);
26593+ AuDbgWcntDec(rw);
26594+ downgrade_write(&rw->rwsem);
26595+}
26596+
26597+static inline void au_rw_write_lock(struct au_rwsem *rw)
26598+{
26599+ down_write(&rw->rwsem);
26600+ AuDbgWcntInc(rw);
26601+}
26602+
26603+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
26604+ unsigned int lsc)
26605+{
26606+ down_write_nested(&rw->rwsem, lsc);
26607+ AuDbgWcntInc(rw);
26608+}
1facf9fc 26609+
dece6358
AM
26610+static inline void au_rw_write_unlock(struct au_rwsem *rw)
26611+{
26612+ AuRwMustWriteLock(rw);
26613+ AuDbgWcntDec(rw);
26614+ up_write(&rw->rwsem);
26615+}
26616+
26617+/* why is not _nested version defined */
26618+static inline int au_rw_read_trylock(struct au_rwsem *rw)
26619+{
076b876e
AM
26620+ int ret;
26621+
26622+ ret = down_read_trylock(&rw->rwsem);
dece6358
AM
26623+ if (ret)
26624+ AuDbgRcntInc(rw);
26625+ return ret;
26626+}
26627+
26628+static inline int au_rw_write_trylock(struct au_rwsem *rw)
26629+{
076b876e
AM
26630+ int ret;
26631+
26632+ ret = down_write_trylock(&rw->rwsem);
dece6358
AM
26633+ if (ret)
26634+ AuDbgWcntInc(rw);
26635+ return ret;
26636+}
26637+
5afbbe0d 26638+#undef AuDbgCntDec
dece6358
AM
26639+#undef AuDbgRcntInc
26640+#undef AuDbgRcntDec
dece6358 26641+#undef AuDbgWcntDec
1facf9fc 26642+
26643+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26644+static inline void prefix##_read_lock(param) \
dece6358 26645+{ au_rw_read_lock(rwsem); } \
1facf9fc 26646+static inline void prefix##_write_lock(param) \
dece6358 26647+{ au_rw_write_lock(rwsem); } \
1facf9fc 26648+static inline int prefix##_read_trylock(param) \
dece6358 26649+{ return au_rw_read_trylock(rwsem); } \
1facf9fc 26650+static inline int prefix##_write_trylock(param) \
dece6358 26651+{ return au_rw_write_trylock(rwsem); }
1facf9fc 26652+/* why is not _nested version defined */
26653+/* static inline void prefix##_read_trylock_nested(param, lsc)
dece6358 26654+{ au_rw_read_trylock_nested(rwsem, lsc)); }
1facf9fc 26655+static inline void prefix##_write_trylock_nestd(param, lsc)
dece6358 26656+{ au_rw_write_trylock_nested(rwsem, lsc); } */
1facf9fc 26657+
26658+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
26659+static inline void prefix##_read_unlock(param) \
dece6358 26660+{ au_rw_read_unlock(rwsem); } \
1facf9fc 26661+static inline void prefix##_write_unlock(param) \
dece6358 26662+{ au_rw_write_unlock(rwsem); } \
1facf9fc 26663+static inline void prefix##_downgrade_lock(param) \
dece6358 26664+{ au_rw_dgrade_lock(rwsem); }
1facf9fc 26665+
26666+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
26667+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26668+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
26669+
26670+#endif /* __KERNEL__ */
26671+#endif /* __AUFS_RWSEM_H__ */
7f207e10
AM
26672diff -urN /usr/share/empty/fs/aufs/sbinfo.c linux/fs/aufs/sbinfo.c
26673--- /usr/share/empty/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
26674+++ linux/fs/aufs/sbinfo.c 2016-10-09 16:55:38.889431135 +0200
26675@@ -0,0 +1,355 @@
1facf9fc 26676+/*
8cdd5066 26677+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 26678+ *
26679+ * This program, aufs is free software; you can redistribute it and/or modify
26680+ * it under the terms of the GNU General Public License as published by
26681+ * the Free Software Foundation; either version 2 of the License, or
26682+ * (at your option) any later version.
dece6358
AM
26683+ *
26684+ * This program is distributed in the hope that it will be useful,
26685+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26686+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26687+ * GNU General Public License for more details.
26688+ *
26689+ * You should have received a copy of the GNU General Public License
523b37e3 26690+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26691+ */
26692+
26693+/*
26694+ * superblock private data
26695+ */
26696+
26697+#include "aufs.h"
26698+
26699+/*
26700+ * they are necessary regardless sysfs is disabled.
26701+ */
26702+void au_si_free(struct kobject *kobj)
26703+{
86dc4139 26704+ int i;
1facf9fc 26705+ struct au_sbinfo *sbinfo;
b752ccd1 26706+ char *locked __maybe_unused; /* debug only */
1facf9fc 26707+
26708+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
86dc4139
AM
26709+ for (i = 0; i < AuPlink_NHASH; i++)
26710+ AuDebugOn(!hlist_empty(&sbinfo->si_plink[i].head));
f0c0a007 26711+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
5afbbe0d
AM
26712+
26713+ AuDebugOn(percpu_counter_sum(&sbinfo->si_ninodes));
26714+ percpu_counter_destroy(&sbinfo->si_ninodes);
26715+ AuDebugOn(percpu_counter_sum(&sbinfo->si_nfiles));
26716+ percpu_counter_destroy(&sbinfo->si_nfiles);
1facf9fc 26717+
e49829fe 26718+ au_rw_write_lock(&sbinfo->si_rwsem);
1facf9fc 26719+ au_br_free(sbinfo);
e49829fe 26720+ au_rw_write_unlock(&sbinfo->si_rwsem);
b752ccd1 26721+
f0c0a007 26722+ au_delayed_kfree(sbinfo->si_branch);
febd17d6 26723+ for (i = 0; i < AU_NPIDMAP; i++)
f0c0a007
AM
26724+ if (sbinfo->au_si_pid.pid_bitmap[i])
26725+ au_delayed_kfree(sbinfo->au_si_pid.pid_bitmap[i]);
febd17d6 26726+ mutex_destroy(&sbinfo->au_si_pid.pid_mtx);
1facf9fc 26727+ mutex_destroy(&sbinfo->si_xib_mtx);
dece6358 26728+ AuRwDestroy(&sbinfo->si_rwsem);
1facf9fc 26729+
f0c0a007 26730+ au_delayed_kfree(sbinfo);
1facf9fc 26731+}
26732+
26733+int au_si_alloc(struct super_block *sb)
26734+{
86dc4139 26735+ int err, i;
1facf9fc 26736+ struct au_sbinfo *sbinfo;
26737+
26738+ err = -ENOMEM;
4a4d8108 26739+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
1facf9fc 26740+ if (unlikely(!sbinfo))
26741+ goto out;
26742+
26743+ /* will be reallocated separately */
26744+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
26745+ if (unlikely(!sbinfo->si_branch))
febd17d6 26746+ goto out_sbinfo;
1facf9fc 26747+
1facf9fc 26748+ err = sysaufs_si_init(sbinfo);
26749+ if (unlikely(err))
26750+ goto out_br;
26751+
26752+ au_nwt_init(&sbinfo->si_nowait);
dece6358 26753+ au_rw_init_wlock(&sbinfo->si_rwsem);
febd17d6 26754+ mutex_init(&sbinfo->au_si_pid.pid_mtx);
b752ccd1 26755+
5afbbe0d
AM
26756+ percpu_counter_init(&sbinfo->si_ninodes, 0, GFP_NOFS);
26757+ percpu_counter_init(&sbinfo->si_nfiles, 0, GFP_NOFS);
7f207e10 26758+
5afbbe0d 26759+ sbinfo->si_bbot = -1;
392086de 26760+ sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
1facf9fc 26761+
26762+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
26763+ sbinfo->si_wbr_create = AuWbrCreate_Def;
4a4d8108
AM
26764+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
26765+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
1facf9fc 26766+
076b876e
AM
26767+ au_fhsm_init(sbinfo);
26768+
e49829fe 26769+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
1facf9fc 26770+
392086de
AM
26771+ sbinfo->si_xino_jiffy = jiffies;
26772+ sbinfo->si_xino_expire
26773+ = msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
1facf9fc 26774+ mutex_init(&sbinfo->si_xib_mtx);
1facf9fc 26775+ sbinfo->si_xino_brid = -1;
26776+ /* leave si_xib_last_pindex and si_xib_next_bit */
26777+
b912730e
AM
26778+ au_sphl_init(&sbinfo->si_aopen);
26779+
e49829fe 26780+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
1facf9fc 26781+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
26782+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
26783+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
26784+
86dc4139
AM
26785+ for (i = 0; i < AuPlink_NHASH; i++)
26786+ au_sphl_init(sbinfo->si_plink + i);
1facf9fc 26787+ init_waitqueue_head(&sbinfo->si_plink_wq);
4a4d8108 26788+ spin_lock_init(&sbinfo->si_plink_maint_lock);
1facf9fc 26789+
523b37e3
AM
26790+ au_sphl_init(&sbinfo->si_files);
26791+
b95c5147
AM
26792+ /* with getattr by default */
26793+ sbinfo->si_iop_array = aufs_iop;
26794+
1facf9fc 26795+ /* leave other members for sysaufs and si_mnt. */
26796+ sbinfo->si_sb = sb;
26797+ sb->s_fs_info = sbinfo;
b752ccd1 26798+ si_pid_set(sb);
1facf9fc 26799+ return 0; /* success */
26800+
4f0767ce 26801+out_br:
f0c0a007 26802+ au_delayed_kfree(sbinfo->si_branch);
4f0767ce 26803+out_sbinfo:
f0c0a007 26804+ au_delayed_kfree(sbinfo);
4f0767ce 26805+out:
1facf9fc 26806+ return err;
26807+}
26808+
e2f27e51 26809+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink)
1facf9fc 26810+{
26811+ int err, sz;
26812+ struct au_branch **brp;
26813+
dece6358
AM
26814+ AuRwMustWriteLock(&sbinfo->si_rwsem);
26815+
1facf9fc 26816+ err = -ENOMEM;
5afbbe0d 26817+ sz = sizeof(*brp) * (sbinfo->si_bbot + 1);
1facf9fc 26818+ if (unlikely(!sz))
26819+ sz = sizeof(*brp);
e2f27e51
AM
26820+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS,
26821+ may_shrink);
1facf9fc 26822+ if (brp) {
26823+ sbinfo->si_branch = brp;
26824+ err = 0;
26825+ }
26826+
26827+ return err;
26828+}
26829+
26830+/* ---------------------------------------------------------------------- */
26831+
26832+unsigned int au_sigen_inc(struct super_block *sb)
26833+{
26834+ unsigned int gen;
5527c038 26835+ struct inode *inode;
1facf9fc 26836+
dece6358
AM
26837+ SiMustWriteLock(sb);
26838+
1facf9fc 26839+ gen = ++au_sbi(sb)->si_generation;
26840+ au_update_digen(sb->s_root);
5527c038
JR
26841+ inode = d_inode(sb->s_root);
26842+ au_update_iigen(inode, /*half*/0);
26843+ inode->i_version++;
1facf9fc 26844+ return gen;
26845+}
26846+
26847+aufs_bindex_t au_new_br_id(struct super_block *sb)
26848+{
26849+ aufs_bindex_t br_id;
26850+ int i;
26851+ struct au_sbinfo *sbinfo;
26852+
dece6358
AM
26853+ SiMustWriteLock(sb);
26854+
1facf9fc 26855+ sbinfo = au_sbi(sb);
26856+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
26857+ br_id = ++sbinfo->si_last_br_id;
7f207e10 26858+ AuDebugOn(br_id < 0);
1facf9fc 26859+ if (br_id && au_br_index(sb, br_id) < 0)
26860+ return br_id;
26861+ }
26862+
26863+ return -1;
26864+}
26865+
26866+/* ---------------------------------------------------------------------- */
26867+
e49829fe
JR
26868+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
26869+int si_read_lock(struct super_block *sb, int flags)
26870+{
26871+ int err;
26872+
26873+ err = 0;
26874+ if (au_ftest_lock(flags, FLUSH))
26875+ au_nwt_flush(&au_sbi(sb)->si_nowait);
26876+
26877+ si_noflush_read_lock(sb);
26878+ err = au_plink_maint(sb, flags);
26879+ if (unlikely(err))
26880+ si_read_unlock(sb);
26881+
26882+ return err;
26883+}
26884+
26885+int si_write_lock(struct super_block *sb, int flags)
26886+{
26887+ int err;
26888+
26889+ if (au_ftest_lock(flags, FLUSH))
26890+ au_nwt_flush(&au_sbi(sb)->si_nowait);
26891+
26892+ si_noflush_write_lock(sb);
26893+ err = au_plink_maint(sb, flags);
26894+ if (unlikely(err))
26895+ si_write_unlock(sb);
26896+
26897+ return err;
26898+}
26899+
1facf9fc 26900+/* dentry and super_block lock. call at entry point */
e49829fe 26901+int aufs_read_lock(struct dentry *dentry, int flags)
1facf9fc 26902+{
e49829fe 26903+ int err;
027c5e7a 26904+ struct super_block *sb;
e49829fe 26905+
027c5e7a
AM
26906+ sb = dentry->d_sb;
26907+ err = si_read_lock(sb, flags);
26908+ if (unlikely(err))
26909+ goto out;
26910+
26911+ if (au_ftest_lock(flags, DW))
26912+ di_write_lock_child(dentry);
26913+ else
26914+ di_read_lock_child(dentry, flags);
26915+
26916+ if (au_ftest_lock(flags, GEN)) {
26917+ err = au_digen_test(dentry, au_sigen(sb));
79b8bda9
AM
26918+ if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
26919+ AuDebugOn(!err && au_dbrange_test(dentry));
26920+ else if (!err)
26921+ err = au_dbrange_test(dentry);
027c5e7a
AM
26922+ if (unlikely(err))
26923+ aufs_read_unlock(dentry, flags);
e49829fe
JR
26924+ }
26925+
027c5e7a 26926+out:
e49829fe 26927+ return err;
1facf9fc 26928+}
26929+
26930+void aufs_read_unlock(struct dentry *dentry, int flags)
26931+{
26932+ if (au_ftest_lock(flags, DW))
26933+ di_write_unlock(dentry);
26934+ else
26935+ di_read_unlock(dentry, flags);
26936+ si_read_unlock(dentry->d_sb);
26937+}
26938+
26939+void aufs_write_lock(struct dentry *dentry)
26940+{
e49829fe 26941+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 26942+ di_write_lock_child(dentry);
26943+}
26944+
26945+void aufs_write_unlock(struct dentry *dentry)
26946+{
26947+ di_write_unlock(dentry);
26948+ si_write_unlock(dentry->d_sb);
26949+}
26950+
e49829fe 26951+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
1facf9fc 26952+{
e49829fe 26953+ int err;
027c5e7a
AM
26954+ unsigned int sigen;
26955+ struct super_block *sb;
e49829fe 26956+
027c5e7a
AM
26957+ sb = d1->d_sb;
26958+ err = si_read_lock(sb, flags);
26959+ if (unlikely(err))
26960+ goto out;
26961+
b95c5147 26962+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
027c5e7a
AM
26963+
26964+ if (au_ftest_lock(flags, GEN)) {
26965+ sigen = au_sigen(sb);
26966+ err = au_digen_test(d1, sigen);
26967+ AuDebugOn(!err && au_dbrange_test(d1));
26968+ if (!err) {
26969+ err = au_digen_test(d2, sigen);
26970+ AuDebugOn(!err && au_dbrange_test(d2));
26971+ }
26972+ if (unlikely(err))
26973+ aufs_read_and_write_unlock2(d1, d2);
26974+ }
26975+
26976+out:
e49829fe 26977+ return err;
1facf9fc 26978+}
26979+
26980+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
26981+{
26982+ di_write_unlock2(d1, d2);
26983+ si_read_unlock(d1->d_sb);
26984+}
b752ccd1
AM
26985+
26986+/* ---------------------------------------------------------------------- */
26987+
febd17d6 26988+static void si_pid_alloc(struct au_si_pid *au_si_pid, int idx)
b752ccd1 26989+{
febd17d6 26990+ unsigned long *p;
b752ccd1 26991+
febd17d6
JR
26992+ BUILD_BUG_ON(sizeof(unsigned long) !=
26993+ sizeof(*au_si_pid->pid_bitmap));
b752ccd1 26994+
febd17d6
JR
26995+ mutex_lock(&au_si_pid->pid_mtx);
26996+ p = au_si_pid->pid_bitmap[idx];
26997+ while (!p) {
26998+ /*
26999+ * bad approach.
27000+ * but keeping 'si_pid_set()' void is more important.
27001+ */
27002+ p = kcalloc(BITS_TO_LONGS(AU_PIDSTEP),
27003+ sizeof(*au_si_pid->pid_bitmap),
27004+ GFP_NOFS);
27005+ if (p)
27006+ break;
27007+ cond_resched();
27008+ }
27009+ au_si_pid->pid_bitmap[idx] = p;
27010+ mutex_unlock(&au_si_pid->pid_mtx);
b752ccd1
AM
27011+}
27012+
febd17d6 27013+void si_pid_set(struct super_block *sb)
b752ccd1 27014+{
febd17d6
JR
27015+ pid_t bit;
27016+ int idx;
27017+ unsigned long *bitmap;
27018+ struct au_si_pid *au_si_pid;
27019+
27020+ si_pid_idx_bit(&idx, &bit);
27021+ au_si_pid = &au_sbi(sb)->au_si_pid;
27022+ bitmap = au_si_pid->pid_bitmap[idx];
27023+ if (!bitmap) {
27024+ si_pid_alloc(au_si_pid, idx);
27025+ bitmap = au_si_pid->pid_bitmap[idx];
27026+ }
27027+ AuDebugOn(test_bit(bit, bitmap));
27028+ set_bit(bit, bitmap);
27029+ /* smp_mb(); */
b752ccd1 27030+}
7f207e10
AM
27031diff -urN /usr/share/empty/fs/aufs/spl.h linux/fs/aufs/spl.h
27032--- /usr/share/empty/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 27033+++ linux/fs/aufs/spl.h 2016-10-09 16:55:36.496035060 +0200
f0c0a007 27034@@ -0,0 +1,113 @@
1facf9fc 27035+/*
8cdd5066 27036+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 27037+ *
27038+ * This program, aufs is free software; you can redistribute it and/or modify
27039+ * it under the terms of the GNU General Public License as published by
27040+ * the Free Software Foundation; either version 2 of the License, or
27041+ * (at your option) any later version.
dece6358
AM
27042+ *
27043+ * This program is distributed in the hope that it will be useful,
27044+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27045+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27046+ * GNU General Public License for more details.
27047+ *
27048+ * You should have received a copy of the GNU General Public License
523b37e3 27049+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 27050+ */
27051+
27052+/*
27053+ * simple list protected by a spinlock
27054+ */
27055+
27056+#ifndef __AUFS_SPL_H__
27057+#define __AUFS_SPL_H__
27058+
27059+#ifdef __KERNEL__
27060+
f0c0a007 27061+#if 0
1facf9fc 27062+struct au_splhead {
27063+ spinlock_t spin;
27064+ struct list_head head;
27065+};
27066+
27067+static inline void au_spl_init(struct au_splhead *spl)
27068+{
27069+ spin_lock_init(&spl->spin);
27070+ INIT_LIST_HEAD(&spl->head);
27071+}
27072+
27073+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
27074+{
27075+ spin_lock(&spl->spin);
27076+ list_add(list, &spl->head);
27077+ spin_unlock(&spl->spin);
27078+}
27079+
27080+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
27081+{
27082+ spin_lock(&spl->spin);
27083+ list_del(list);
27084+ spin_unlock(&spl->spin);
27085+}
27086+
4a4d8108
AM
27087+static inline void au_spl_del_rcu(struct list_head *list,
27088+ struct au_splhead *spl)
27089+{
27090+ spin_lock(&spl->spin);
27091+ list_del_rcu(list);
27092+ spin_unlock(&spl->spin);
27093+}
f0c0a007 27094+#endif
4a4d8108 27095+
86dc4139
AM
27096+/* ---------------------------------------------------------------------- */
27097+
27098+struct au_sphlhead {
27099+ spinlock_t spin;
27100+ struct hlist_head head;
27101+};
27102+
27103+static inline void au_sphl_init(struct au_sphlhead *sphl)
27104+{
27105+ spin_lock_init(&sphl->spin);
27106+ INIT_HLIST_HEAD(&sphl->head);
27107+}
27108+
27109+static inline void au_sphl_add(struct hlist_node *hlist,
27110+ struct au_sphlhead *sphl)
27111+{
27112+ spin_lock(&sphl->spin);
27113+ hlist_add_head(hlist, &sphl->head);
27114+ spin_unlock(&sphl->spin);
27115+}
27116+
27117+static inline void au_sphl_del(struct hlist_node *hlist,
27118+ struct au_sphlhead *sphl)
27119+{
27120+ spin_lock(&sphl->spin);
27121+ hlist_del(hlist);
27122+ spin_unlock(&sphl->spin);
27123+}
27124+
27125+static inline void au_sphl_del_rcu(struct hlist_node *hlist,
27126+ struct au_sphlhead *sphl)
27127+{
27128+ spin_lock(&sphl->spin);
27129+ hlist_del_rcu(hlist);
27130+ spin_unlock(&sphl->spin);
27131+}
27132+
27133+static inline unsigned long au_sphl_count(struct au_sphlhead *sphl)
27134+{
27135+ unsigned long cnt;
27136+ struct hlist_node *pos;
27137+
27138+ cnt = 0;
27139+ spin_lock(&sphl->spin);
27140+ hlist_for_each(pos, &sphl->head)
27141+ cnt++;
27142+ spin_unlock(&sphl->spin);
27143+ return cnt;
27144+}
27145+
1facf9fc 27146+#endif /* __KERNEL__ */
27147+#endif /* __AUFS_SPL_H__ */
7f207e10
AM
27148diff -urN /usr/share/empty/fs/aufs/super.c linux/fs/aufs/super.c
27149--- /usr/share/empty/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 27150+++ linux/fs/aufs/super.c 2016-10-09 16:55:36.496035060 +0200
f0c0a007 27151@@ -0,0 +1,1038 @@
1facf9fc 27152+/*
8cdd5066 27153+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 27154+ *
27155+ * This program, aufs is free software; you can redistribute it and/or modify
27156+ * it under the terms of the GNU General Public License as published by
27157+ * the Free Software Foundation; either version 2 of the License, or
27158+ * (at your option) any later version.
dece6358
AM
27159+ *
27160+ * This program is distributed in the hope that it will be useful,
27161+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27162+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27163+ * GNU General Public License for more details.
27164+ *
27165+ * You should have received a copy of the GNU General Public License
523b37e3 27166+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 27167+ */
27168+
27169+/*
27170+ * mount and super_block operations
27171+ */
27172+
f6c5ef8b 27173+#include <linux/mm.h>
1facf9fc 27174+#include <linux/seq_file.h>
27175+#include <linux/statfs.h>
7f207e10 27176+#include <linux/vmalloc.h>
1facf9fc 27177+#include "aufs.h"
27178+
27179+/*
27180+ * super_operations
27181+ */
27182+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
27183+{
27184+ struct au_icntnr *c;
27185+
27186+ c = au_cache_alloc_icntnr();
27187+ if (c) {
027c5e7a 27188+ au_icntnr_init(c);
1facf9fc 27189+ c->vfs_inode.i_version = 1; /* sigen(sb); */
27190+ c->iinfo.ii_hinode = NULL;
27191+ return &c->vfs_inode;
27192+ }
27193+ return NULL;
27194+}
27195+
027c5e7a
AM
27196+static void aufs_destroy_inode_cb(struct rcu_head *head)
27197+{
27198+ struct inode *inode = container_of(head, struct inode, i_rcu);
27199+
f0c0a007 27200+ au_cache_dfree_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
027c5e7a
AM
27201+}
27202+
1facf9fc 27203+static void aufs_destroy_inode(struct inode *inode)
27204+{
5afbbe0d
AM
27205+ if (!au_is_bad_inode(inode))
27206+ au_iinfo_fin(inode);
027c5e7a 27207+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
1facf9fc 27208+}
27209+
27210+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
27211+{
27212+ struct inode *inode;
27213+ int err;
27214+
27215+ inode = iget_locked(sb, ino);
27216+ if (unlikely(!inode)) {
27217+ inode = ERR_PTR(-ENOMEM);
27218+ goto out;
27219+ }
27220+ if (!(inode->i_state & I_NEW))
27221+ goto out;
27222+
27223+ err = au_xigen_new(inode);
27224+ if (!err)
27225+ err = au_iinfo_init(inode);
27226+ if (!err)
27227+ inode->i_version++;
27228+ else {
27229+ iget_failed(inode);
27230+ inode = ERR_PTR(err);
27231+ }
27232+
4f0767ce 27233+out:
1facf9fc 27234+ /* never return NULL */
27235+ AuDebugOn(!inode);
27236+ AuTraceErrPtr(inode);
27237+ return inode;
27238+}
27239+
27240+/* lock free root dinfo */
27241+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
27242+{
27243+ int err;
5afbbe0d 27244+ aufs_bindex_t bindex, bbot;
1facf9fc 27245+ struct path path;
4a4d8108 27246+ struct au_hdentry *hdp;
1facf9fc 27247+ struct au_branch *br;
076b876e 27248+ au_br_perm_str_t perm;
1facf9fc 27249+
27250+ err = 0;
5afbbe0d
AM
27251+ bbot = au_sbbot(sb);
27252+ bindex = 0;
27253+ hdp = au_hdentry(au_di(sb->s_root), bindex);
27254+ for (; !err && bindex <= bbot; bindex++, hdp++) {
1facf9fc 27255+ br = au_sbr(sb, bindex);
86dc4139 27256+ path.mnt = au_br_mnt(br);
5afbbe0d 27257+ path.dentry = hdp->hd_dentry;
1facf9fc 27258+ err = au_seq_path(seq, &path);
79b8bda9 27259+ if (!err) {
076b876e 27260+ au_optstr_br_perm(&perm, br->br_perm);
79b8bda9 27261+ seq_printf(seq, "=%s", perm.a);
5afbbe0d 27262+ if (bindex != bbot)
79b8bda9 27263+ seq_putc(seq, ':');
1e00d052 27264+ }
1facf9fc 27265+ }
79b8bda9
AM
27266+ if (unlikely(err || seq_has_overflowed(seq)))
27267+ err = -E2BIG;
1facf9fc 27268+
27269+ return err;
27270+}
27271+
27272+static void au_show_wbr_create(struct seq_file *m, int v,
27273+ struct au_sbinfo *sbinfo)
27274+{
27275+ const char *pat;
27276+
dece6358
AM
27277+ AuRwMustAnyLock(&sbinfo->si_rwsem);
27278+
c2b27bf2 27279+ seq_puts(m, ",create=");
1facf9fc 27280+ pat = au_optstr_wbr_create(v);
27281+ switch (v) {
27282+ case AuWbrCreate_TDP:
27283+ case AuWbrCreate_RR:
27284+ case AuWbrCreate_MFS:
27285+ case AuWbrCreate_PMFS:
c2b27bf2 27286+ seq_puts(m, pat);
1facf9fc 27287+ break;
27288+ case AuWbrCreate_MFSV:
27289+ seq_printf(m, /*pat*/"mfs:%lu",
e49829fe
JR
27290+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
27291+ / MSEC_PER_SEC);
1facf9fc 27292+ break;
27293+ case AuWbrCreate_PMFSV:
27294+ seq_printf(m, /*pat*/"pmfs:%lu",
e49829fe
JR
27295+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
27296+ / MSEC_PER_SEC);
1facf9fc 27297+ break;
27298+ case AuWbrCreate_MFSRR:
27299+ seq_printf(m, /*pat*/"mfsrr:%llu",
27300+ sbinfo->si_wbr_mfs.mfsrr_watermark);
27301+ break;
27302+ case AuWbrCreate_MFSRRV:
27303+ seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
27304+ sbinfo->si_wbr_mfs.mfsrr_watermark,
e49829fe
JR
27305+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
27306+ / MSEC_PER_SEC);
1facf9fc 27307+ break;
392086de
AM
27308+ case AuWbrCreate_PMFSRR:
27309+ seq_printf(m, /*pat*/"pmfsrr:%llu",
27310+ sbinfo->si_wbr_mfs.mfsrr_watermark);
27311+ break;
27312+ case AuWbrCreate_PMFSRRV:
27313+ seq_printf(m, /*pat*/"pmfsrr:%llu:%lu",
27314+ sbinfo->si_wbr_mfs.mfsrr_watermark,
27315+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
27316+ / MSEC_PER_SEC);
27317+ break;
1facf9fc 27318+ }
27319+}
27320+
7eafdf33 27321+static int au_show_xino(struct seq_file *seq, struct super_block *sb)
1facf9fc 27322+{
27323+#ifdef CONFIG_SYSFS
27324+ return 0;
27325+#else
27326+ int err;
27327+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
27328+ aufs_bindex_t bindex, brid;
1facf9fc 27329+ struct qstr *name;
27330+ struct file *f;
27331+ struct dentry *d, *h_root;
27332+
dece6358
AM
27333+ AuRwMustAnyLock(&sbinfo->si_rwsem);
27334+
1facf9fc 27335+ err = 0;
1facf9fc 27336+ f = au_sbi(sb)->si_xib;
27337+ if (!f)
27338+ goto out;
27339+
27340+ /* stop printing the default xino path on the first writable branch */
27341+ h_root = NULL;
27342+ brid = au_xino_brid(sb);
27343+ if (brid >= 0) {
27344+ bindex = au_br_index(sb, brid);
5afbbe0d 27345+ h_root = au_hdentry(au_di(sb->s_root), bindex)->hd_dentry;
1facf9fc 27346+ }
2000de60 27347+ d = f->f_path.dentry;
1facf9fc 27348+ name = &d->d_name;
27349+ /* safe ->d_parent because the file is unlinked */
27350+ if (d->d_parent == h_root
27351+ && name->len == len
27352+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
27353+ goto out;
27354+
27355+ seq_puts(seq, ",xino=");
27356+ err = au_xino_path(seq, f);
27357+
4f0767ce 27358+out:
1facf9fc 27359+ return err;
27360+#endif
27361+}
27362+
27363+/* seq_file will re-call me in case of too long string */
7eafdf33 27364+static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
1facf9fc 27365+{
027c5e7a 27366+ int err;
1facf9fc 27367+ unsigned int mnt_flags, v;
27368+ struct super_block *sb;
27369+ struct au_sbinfo *sbinfo;
27370+
27371+#define AuBool(name, str) do { \
27372+ v = au_opt_test(mnt_flags, name); \
27373+ if (v != au_opt_test(AuOpt_Def, name)) \
27374+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
27375+} while (0)
27376+
27377+#define AuStr(name, str) do { \
27378+ v = mnt_flags & AuOptMask_##name; \
27379+ if (v != (AuOpt_Def & AuOptMask_##name)) \
27380+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
27381+} while (0)
27382+
27383+#define AuUInt(name, str, val) do { \
27384+ if (val != AUFS_##name##_DEF) \
27385+ seq_printf(m, "," #str "=%u", val); \
27386+} while (0)
27387+
7eafdf33 27388+ sb = dentry->d_sb;
c1595e42
JR
27389+ if (sb->s_flags & MS_POSIXACL)
27390+ seq_puts(m, ",acl");
27391+
27392+ /* lock free root dinfo */
1facf9fc 27393+ si_noflush_read_lock(sb);
27394+ sbinfo = au_sbi(sb);
27395+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
27396+
27397+ mnt_flags = au_mntflags(sb);
27398+ if (au_opt_test(mnt_flags, XINO)) {
7eafdf33 27399+ err = au_show_xino(m, sb);
1facf9fc 27400+ if (unlikely(err))
27401+ goto out;
27402+ } else
27403+ seq_puts(m, ",noxino");
27404+
27405+ AuBool(TRUNC_XINO, trunc_xino);
27406+ AuStr(UDBA, udba);
dece6358 27407+ AuBool(SHWH, shwh);
1facf9fc 27408+ AuBool(PLINK, plink);
4a4d8108 27409+ AuBool(DIO, dio);
076b876e 27410+ AuBool(DIRPERM1, dirperm1);
1facf9fc 27411+
27412+ v = sbinfo->si_wbr_create;
27413+ if (v != AuWbrCreate_Def)
27414+ au_show_wbr_create(m, v, sbinfo);
27415+
27416+ v = sbinfo->si_wbr_copyup;
27417+ if (v != AuWbrCopyup_Def)
27418+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
27419+
27420+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
27421+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
27422+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
27423+
27424+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
27425+
027c5e7a
AM
27426+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
27427+ AuUInt(RDCACHE, rdcache, v);
1facf9fc 27428+
27429+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
27430+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
27431+
076b876e
AM
27432+ au_fhsm_show(m, sbinfo);
27433+
1facf9fc 27434+ AuBool(SUM, sum);
27435+ /* AuBool(SUM_W, wsum); */
27436+ AuBool(WARN_PERM, warn_perm);
27437+ AuBool(VERBOSE, verbose);
27438+
4f0767ce 27439+out:
1facf9fc 27440+ /* be sure to print "br:" last */
27441+ if (!sysaufs_brs) {
27442+ seq_puts(m, ",br:");
27443+ au_show_brs(m, sb);
27444+ }
27445+ si_read_unlock(sb);
27446+ return 0;
27447+
1facf9fc 27448+#undef AuBool
27449+#undef AuStr
4a4d8108 27450+#undef AuUInt
1facf9fc 27451+}
27452+
27453+/* ---------------------------------------------------------------------- */
27454+
27455+/* sum mode which returns the summation for statfs(2) */
27456+
27457+static u64 au_add_till_max(u64 a, u64 b)
27458+{
27459+ u64 old;
27460+
27461+ old = a;
27462+ a += b;
92d182d2
AM
27463+ if (old <= a)
27464+ return a;
27465+ return ULLONG_MAX;
27466+}
27467+
27468+static u64 au_mul_till_max(u64 a, long mul)
27469+{
27470+ u64 old;
27471+
27472+ old = a;
27473+ a *= mul;
27474+ if (old <= a)
1facf9fc 27475+ return a;
27476+ return ULLONG_MAX;
27477+}
27478+
27479+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
27480+{
27481+ int err;
92d182d2 27482+ long bsize, factor;
1facf9fc 27483+ u64 blocks, bfree, bavail, files, ffree;
5afbbe0d 27484+ aufs_bindex_t bbot, bindex, i;
1facf9fc 27485+ unsigned char shared;
7f207e10 27486+ struct path h_path;
1facf9fc 27487+ struct super_block *h_sb;
27488+
92d182d2
AM
27489+ err = 0;
27490+ bsize = LONG_MAX;
27491+ files = 0;
27492+ ffree = 0;
1facf9fc 27493+ blocks = 0;
27494+ bfree = 0;
27495+ bavail = 0;
5afbbe0d
AM
27496+ bbot = au_sbbot(sb);
27497+ for (bindex = 0; bindex <= bbot; bindex++) {
7f207e10
AM
27498+ h_path.mnt = au_sbr_mnt(sb, bindex);
27499+ h_sb = h_path.mnt->mnt_sb;
1facf9fc 27500+ shared = 0;
92d182d2 27501+ for (i = 0; !shared && i < bindex; i++)
1facf9fc 27502+ shared = (au_sbr_sb(sb, i) == h_sb);
27503+ if (shared)
27504+ continue;
27505+
27506+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27507+ h_path.dentry = h_path.mnt->mnt_root;
27508+ err = vfs_statfs(&h_path, buf);
1facf9fc 27509+ if (unlikely(err))
27510+ goto out;
27511+
92d182d2
AM
27512+ if (bsize > buf->f_bsize) {
27513+ /*
27514+ * we will reduce bsize, so we have to expand blocks
27515+ * etc. to match them again
27516+ */
27517+ factor = (bsize / buf->f_bsize);
27518+ blocks = au_mul_till_max(blocks, factor);
27519+ bfree = au_mul_till_max(bfree, factor);
27520+ bavail = au_mul_till_max(bavail, factor);
27521+ bsize = buf->f_bsize;
27522+ }
27523+
27524+ factor = (buf->f_bsize / bsize);
27525+ blocks = au_add_till_max(blocks,
27526+ au_mul_till_max(buf->f_blocks, factor));
27527+ bfree = au_add_till_max(bfree,
27528+ au_mul_till_max(buf->f_bfree, factor));
27529+ bavail = au_add_till_max(bavail,
27530+ au_mul_till_max(buf->f_bavail, factor));
1facf9fc 27531+ files = au_add_till_max(files, buf->f_files);
27532+ ffree = au_add_till_max(ffree, buf->f_ffree);
27533+ }
27534+
92d182d2 27535+ buf->f_bsize = bsize;
1facf9fc 27536+ buf->f_blocks = blocks;
27537+ buf->f_bfree = bfree;
27538+ buf->f_bavail = bavail;
27539+ buf->f_files = files;
27540+ buf->f_ffree = ffree;
92d182d2 27541+ buf->f_frsize = 0;
1facf9fc 27542+
4f0767ce 27543+out:
1facf9fc 27544+ return err;
27545+}
27546+
27547+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
27548+{
27549+ int err;
7f207e10 27550+ struct path h_path;
1facf9fc 27551+ struct super_block *sb;
27552+
27553+ /* lock free root dinfo */
27554+ sb = dentry->d_sb;
27555+ si_noflush_read_lock(sb);
7f207e10 27556+ if (!au_opt_test(au_mntflags(sb), SUM)) {
1facf9fc 27557+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27558+ h_path.mnt = au_sbr_mnt(sb, 0);
27559+ h_path.dentry = h_path.mnt->mnt_root;
27560+ err = vfs_statfs(&h_path, buf);
27561+ } else
1facf9fc 27562+ err = au_statfs_sum(sb, buf);
27563+ si_read_unlock(sb);
27564+
27565+ if (!err) {
27566+ buf->f_type = AUFS_SUPER_MAGIC;
4a4d8108 27567+ buf->f_namelen = AUFS_MAX_NAMELEN;
1facf9fc 27568+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
27569+ }
27570+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
27571+
27572+ return err;
27573+}
27574+
27575+/* ---------------------------------------------------------------------- */
27576+
537831f9
AM
27577+static int aufs_sync_fs(struct super_block *sb, int wait)
27578+{
27579+ int err, e;
5afbbe0d 27580+ aufs_bindex_t bbot, bindex;
537831f9
AM
27581+ struct au_branch *br;
27582+ struct super_block *h_sb;
27583+
27584+ err = 0;
27585+ si_noflush_read_lock(sb);
5afbbe0d
AM
27586+ bbot = au_sbbot(sb);
27587+ for (bindex = 0; bindex <= bbot; bindex++) {
537831f9
AM
27588+ br = au_sbr(sb, bindex);
27589+ if (!au_br_writable(br->br_perm))
27590+ continue;
27591+
27592+ h_sb = au_sbr_sb(sb, bindex);
27593+ if (h_sb->s_op->sync_fs) {
27594+ e = h_sb->s_op->sync_fs(h_sb, wait);
27595+ if (unlikely(e && !err))
27596+ err = e;
27597+ /* go on even if an error happens */
27598+ }
27599+ }
27600+ si_read_unlock(sb);
27601+
27602+ return err;
27603+}
27604+
27605+/* ---------------------------------------------------------------------- */
27606+
1facf9fc 27607+/* final actions when unmounting a file system */
27608+static void aufs_put_super(struct super_block *sb)
27609+{
27610+ struct au_sbinfo *sbinfo;
27611+
27612+ sbinfo = au_sbi(sb);
27613+ if (!sbinfo)
27614+ return;
27615+
1facf9fc 27616+ dbgaufs_si_fin(sbinfo);
27617+ kobject_put(&sbinfo->si_kobj);
27618+}
27619+
27620+/* ---------------------------------------------------------------------- */
27621+
79b8bda9
AM
27622+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
27623+ struct super_block *sb, void *arg)
7f207e10
AM
27624+{
27625+ void *array;
076b876e 27626+ unsigned long long n, sz;
7f207e10
AM
27627+
27628+ array = NULL;
27629+ n = 0;
27630+ if (!*hint)
27631+ goto out;
27632+
27633+ if (*hint > ULLONG_MAX / sizeof(array)) {
27634+ array = ERR_PTR(-EMFILE);
27635+ pr_err("hint %llu\n", *hint);
27636+ goto out;
27637+ }
27638+
076b876e
AM
27639+ sz = sizeof(array) * *hint;
27640+ array = kzalloc(sz, GFP_NOFS);
7f207e10 27641+ if (unlikely(!array))
076b876e 27642+ array = vzalloc(sz);
7f207e10
AM
27643+ if (unlikely(!array)) {
27644+ array = ERR_PTR(-ENOMEM);
27645+ goto out;
27646+ }
27647+
79b8bda9 27648+ n = cb(sb, array, *hint, arg);
7f207e10
AM
27649+ AuDebugOn(n > *hint);
27650+
27651+out:
27652+ *hint = n;
27653+ return array;
27654+}
27655+
79b8bda9 27656+static unsigned long long au_iarray_cb(struct super_block *sb, void *a,
7f207e10
AM
27657+ unsigned long long max __maybe_unused,
27658+ void *arg)
27659+{
27660+ unsigned long long n;
27661+ struct inode **p, *inode;
27662+ struct list_head *head;
27663+
27664+ n = 0;
27665+ p = a;
27666+ head = arg;
79b8bda9 27667+ spin_lock(&sb->s_inode_list_lock);
7f207e10 27668+ list_for_each_entry(inode, head, i_sb_list) {
5afbbe0d
AM
27669+ if (!au_is_bad_inode(inode)
27670+ && au_ii(inode)->ii_btop >= 0) {
2cbb1c4b
JR
27671+ spin_lock(&inode->i_lock);
27672+ if (atomic_read(&inode->i_count)) {
27673+ au_igrab(inode);
27674+ *p++ = inode;
27675+ n++;
27676+ AuDebugOn(n > max);
27677+ }
27678+ spin_unlock(&inode->i_lock);
7f207e10
AM
27679+ }
27680+ }
79b8bda9 27681+ spin_unlock(&sb->s_inode_list_lock);
7f207e10
AM
27682+
27683+ return n;
27684+}
27685+
27686+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
27687+{
5afbbe0d 27688+ *max = au_ninodes(sb);
79b8bda9 27689+ return au_array_alloc(max, au_iarray_cb, sb, &sb->s_inodes);
7f207e10
AM
27690+}
27691+
27692+void au_iarray_free(struct inode **a, unsigned long long max)
27693+{
27694+ unsigned long long ull;
27695+
27696+ for (ull = 0; ull < max; ull++)
27697+ iput(a[ull]);
be52b249 27698+ kvfree(a);
7f207e10
AM
27699+}
27700+
27701+/* ---------------------------------------------------------------------- */
27702+
1facf9fc 27703+/*
27704+ * refresh dentry and inode at remount time.
27705+ */
027c5e7a
AM
27706+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
27707+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
27708+ struct dentry *parent)
1facf9fc 27709+{
27710+ int err;
1facf9fc 27711+
27712+ di_write_lock_child(dentry);
1facf9fc 27713+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
27714+ err = au_refresh_dentry(dentry, parent);
27715+ if (!err && dir_flags)
5527c038 27716+ au_hn_reset(d_inode(dentry), dir_flags);
1facf9fc 27717+ di_read_unlock(parent, AuLock_IR);
1facf9fc 27718+ di_write_unlock(dentry);
27719+
27720+ return err;
27721+}
27722+
027c5e7a
AM
27723+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
27724+ struct au_sbinfo *sbinfo,
b95c5147 27725+ const unsigned int dir_flags, unsigned int do_idop)
1facf9fc 27726+{
027c5e7a
AM
27727+ int err;
27728+ struct dentry *parent;
027c5e7a
AM
27729+
27730+ err = 0;
27731+ parent = dget_parent(dentry);
27732+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
5527c038
JR
27733+ if (d_really_is_positive(dentry)) {
27734+ if (!d_is_dir(dentry))
027c5e7a
AM
27735+ err = au_do_refresh(dentry, /*dir_flags*/0,
27736+ parent);
27737+ else {
27738+ err = au_do_refresh(dentry, dir_flags, parent);
27739+ if (unlikely(err))
27740+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
27741+ }
27742+ } else
27743+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
27744+ AuDbgDentry(dentry);
27745+ }
27746+ dput(parent);
27747+
79b8bda9 27748+ if (!err) {
b95c5147 27749+ if (do_idop)
79b8bda9
AM
27750+ au_refresh_dop(dentry, /*force_reval*/0);
27751+ } else
27752+ au_refresh_dop(dentry, /*force_reval*/1);
27753+
027c5e7a
AM
27754+ AuTraceErr(err);
27755+ return err;
1facf9fc 27756+}
27757+
b95c5147 27758+static int au_refresh_d(struct super_block *sb, unsigned int do_idop)
1facf9fc 27759+{
27760+ int err, i, j, ndentry, e;
027c5e7a 27761+ unsigned int sigen;
1facf9fc 27762+ struct au_dcsub_pages dpages;
27763+ struct au_dpage *dpage;
027c5e7a
AM
27764+ struct dentry **dentries, *d;
27765+ struct au_sbinfo *sbinfo;
27766+ struct dentry *root = sb->s_root;
5527c038 27767+ const unsigned int dir_flags = au_hi_flags(d_inode(root), /*isdir*/1);
1facf9fc 27768+
b95c5147 27769+ if (do_idop)
79b8bda9
AM
27770+ au_refresh_dop(root, /*force_reval*/0);
27771+
027c5e7a
AM
27772+ err = au_dpages_init(&dpages, GFP_NOFS);
27773+ if (unlikely(err))
1facf9fc 27774+ goto out;
027c5e7a
AM
27775+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
27776+ if (unlikely(err))
1facf9fc 27777+ goto out_dpages;
1facf9fc 27778+
027c5e7a
AM
27779+ sigen = au_sigen(sb);
27780+ sbinfo = au_sbi(sb);
27781+ for (i = 0; i < dpages.ndpage; i++) {
1facf9fc 27782+ dpage = dpages.dpages + i;
27783+ dentries = dpage->dentries;
27784+ ndentry = dpage->ndentry;
027c5e7a 27785+ for (j = 0; j < ndentry; j++) {
1facf9fc 27786+ d = dentries[j];
79b8bda9 27787+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags,
b95c5147 27788+ do_idop);
027c5e7a
AM
27789+ if (unlikely(e && !err))
27790+ err = e;
27791+ /* go on even err */
1facf9fc 27792+ }
27793+ }
27794+
4f0767ce 27795+out_dpages:
1facf9fc 27796+ au_dpages_free(&dpages);
4f0767ce 27797+out:
1facf9fc 27798+ return err;
27799+}
27800+
b95c5147 27801+static int au_refresh_i(struct super_block *sb, unsigned int do_idop)
1facf9fc 27802+{
027c5e7a
AM
27803+ int err, e;
27804+ unsigned int sigen;
27805+ unsigned long long max, ull;
27806+ struct inode *inode, **array;
1facf9fc 27807+
027c5e7a
AM
27808+ array = au_iarray_alloc(sb, &max);
27809+ err = PTR_ERR(array);
27810+ if (IS_ERR(array))
27811+ goto out;
1facf9fc 27812+
27813+ err = 0;
027c5e7a
AM
27814+ sigen = au_sigen(sb);
27815+ for (ull = 0; ull < max; ull++) {
27816+ inode = array[ull];
076b876e
AM
27817+ if (unlikely(!inode))
27818+ break;
b95c5147
AM
27819+
27820+ e = 0;
27821+ ii_write_lock_child(inode);
537831f9 27822+ if (au_iigen(inode, NULL) != sigen) {
027c5e7a 27823+ e = au_refresh_hinode_self(inode);
1facf9fc 27824+ if (unlikely(e)) {
b95c5147 27825+ au_refresh_iop(inode, /*force_getattr*/1);
027c5e7a 27826+ pr_err("error %d, i%lu\n", e, inode->i_ino);
1facf9fc 27827+ if (!err)
27828+ err = e;
27829+ /* go on even if err */
27830+ }
27831+ }
b95c5147
AM
27832+ if (!e && do_idop)
27833+ au_refresh_iop(inode, /*force_getattr*/0);
27834+ ii_write_unlock(inode);
1facf9fc 27835+ }
27836+
027c5e7a 27837+ au_iarray_free(array, max);
1facf9fc 27838+
4f0767ce 27839+out:
1facf9fc 27840+ return err;
27841+}
27842+
b95c5147 27843+static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
1facf9fc 27844+{
027c5e7a
AM
27845+ int err, e;
27846+ unsigned int udba;
5afbbe0d 27847+ aufs_bindex_t bindex, bbot;
1facf9fc 27848+ struct dentry *root;
27849+ struct inode *inode;
027c5e7a 27850+ struct au_branch *br;
79b8bda9 27851+ struct au_sbinfo *sbi;
1facf9fc 27852+
27853+ au_sigen_inc(sb);
79b8bda9
AM
27854+ sbi = au_sbi(sb);
27855+ au_fclr_si(sbi, FAILED_REFRESH_DIR);
1facf9fc 27856+
27857+ root = sb->s_root;
27858+ DiMustNoWaiters(root);
5527c038 27859+ inode = d_inode(root);
1facf9fc 27860+ IiMustNoWaiters(inode);
1facf9fc 27861+
027c5e7a 27862+ udba = au_opt_udba(sb);
5afbbe0d
AM
27863+ bbot = au_sbbot(sb);
27864+ for (bindex = 0; bindex <= bbot; bindex++) {
027c5e7a
AM
27865+ br = au_sbr(sb, bindex);
27866+ err = au_hnotify_reset_br(udba, br, br->br_perm);
1facf9fc 27867+ if (unlikely(err))
027c5e7a
AM
27868+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
27869+ bindex, err);
27870+ /* go on even if err */
1facf9fc 27871+ }
027c5e7a 27872+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
1facf9fc 27873+
b95c5147 27874+ if (do_idop) {
79b8bda9
AM
27875+ if (au_ftest_si(sbi, NO_DREVAL)) {
27876+ AuDebugOn(sb->s_d_op == &aufs_dop_noreval);
27877+ sb->s_d_op = &aufs_dop_noreval;
b95c5147
AM
27878+ AuDebugOn(sbi->si_iop_array == aufs_iop_nogetattr);
27879+ sbi->si_iop_array = aufs_iop_nogetattr;
79b8bda9
AM
27880+ } else {
27881+ AuDebugOn(sb->s_d_op == &aufs_dop);
27882+ sb->s_d_op = &aufs_dop;
b95c5147
AM
27883+ AuDebugOn(sbi->si_iop_array == aufs_iop);
27884+ sbi->si_iop_array = aufs_iop;
79b8bda9 27885+ }
b95c5147
AM
27886+ pr_info("reset to %pf and %pf\n",
27887+ sb->s_d_op, sbi->si_iop_array);
79b8bda9
AM
27888+ }
27889+
027c5e7a 27890+ di_write_unlock(root);
b95c5147
AM
27891+ err = au_refresh_d(sb, do_idop);
27892+ e = au_refresh_i(sb, do_idop);
027c5e7a
AM
27893+ if (unlikely(e && !err))
27894+ err = e;
1facf9fc 27895+ /* aufs_write_lock() calls ..._child() */
27896+ di_write_lock_child(root);
027c5e7a
AM
27897+
27898+ au_cpup_attr_all(inode, /*force*/1);
27899+
27900+ if (unlikely(err))
27901+ AuIOErr("refresh failed, ignored, %d\n", err);
1facf9fc 27902+}
27903+
27904+/* stop extra interpretation of errno in mount(8), and strange error messages */
27905+static int cvt_err(int err)
27906+{
27907+ AuTraceErr(err);
27908+
27909+ switch (err) {
27910+ case -ENOENT:
27911+ case -ENOTDIR:
27912+ case -EEXIST:
27913+ case -EIO:
27914+ err = -EINVAL;
27915+ }
27916+ return err;
27917+}
27918+
27919+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
27920+{
4a4d8108
AM
27921+ int err, do_dx;
27922+ unsigned int mntflags;
be52b249
AM
27923+ struct au_opts opts = {
27924+ .opt = NULL
27925+ };
1facf9fc 27926+ struct dentry *root;
27927+ struct inode *inode;
27928+ struct au_sbinfo *sbinfo;
27929+
27930+ err = 0;
27931+ root = sb->s_root;
27932+ if (!data || !*data) {
e49829fe
JR
27933+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
27934+ if (!err) {
27935+ di_write_lock_child(root);
27936+ err = au_opts_verify(sb, *flags, /*pending*/0);
27937+ aufs_write_unlock(root);
27938+ }
1facf9fc 27939+ goto out;
27940+ }
27941+
27942+ err = -ENOMEM;
1facf9fc 27943+ opts.opt = (void *)__get_free_page(GFP_NOFS);
27944+ if (unlikely(!opts.opt))
27945+ goto out;
27946+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
27947+ opts.flags = AuOpts_REMOUNT;
27948+ opts.sb_flags = *flags;
27949+
27950+ /* parse it before aufs lock */
27951+ err = au_opts_parse(sb, data, &opts);
27952+ if (unlikely(err))
27953+ goto out_opts;
27954+
27955+ sbinfo = au_sbi(sb);
5527c038 27956+ inode = d_inode(root);
febd17d6 27957+ inode_lock(inode);
e49829fe
JR
27958+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
27959+ if (unlikely(err))
27960+ goto out_mtx;
27961+ di_write_lock_child(root);
1facf9fc 27962+
27963+ /* au_opts_remount() may return an error */
27964+ err = au_opts_remount(sb, &opts);
27965+ au_opts_free(&opts);
27966+
027c5e7a 27967+ if (au_ftest_opts(opts.flags, REFRESH))
b95c5147 27968+ au_remount_refresh(sb, au_ftest_opts(opts.flags, REFRESH_IDOP));
1facf9fc 27969+
4a4d8108
AM
27970+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
27971+ mntflags = au_mntflags(sb);
27972+ do_dx = !!au_opt_test(mntflags, DIO);
27973+ au_dy_arefresh(do_dx);
27974+ }
27975+
076b876e 27976+ au_fhsm_wrote_all(sb, /*force*/1); /* ?? */
1facf9fc 27977+ aufs_write_unlock(root);
953406b4 27978+
e49829fe 27979+out_mtx:
febd17d6 27980+ inode_unlock(inode);
4f0767ce 27981+out_opts:
f0c0a007 27982+ au_delayed_free_page((unsigned long)opts.opt);
4f0767ce 27983+out:
1facf9fc 27984+ err = cvt_err(err);
27985+ AuTraceErr(err);
27986+ return err;
27987+}
27988+
4a4d8108 27989+static const struct super_operations aufs_sop = {
1facf9fc 27990+ .alloc_inode = aufs_alloc_inode,
27991+ .destroy_inode = aufs_destroy_inode,
b752ccd1 27992+ /* always deleting, no clearing */
1facf9fc 27993+ .drop_inode = generic_delete_inode,
27994+ .show_options = aufs_show_options,
27995+ .statfs = aufs_statfs,
27996+ .put_super = aufs_put_super,
537831f9 27997+ .sync_fs = aufs_sync_fs,
1facf9fc 27998+ .remount_fs = aufs_remount_fs
27999+};
28000+
28001+/* ---------------------------------------------------------------------- */
28002+
28003+static int alloc_root(struct super_block *sb)
28004+{
28005+ int err;
28006+ struct inode *inode;
28007+ struct dentry *root;
28008+
28009+ err = -ENOMEM;
28010+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
28011+ err = PTR_ERR(inode);
28012+ if (IS_ERR(inode))
28013+ goto out;
28014+
b95c5147 28015+ inode->i_op = aufs_iop + AuIop_DIR; /* with getattr by default */
1facf9fc 28016+ inode->i_fop = &aufs_dir_fop;
28017+ inode->i_mode = S_IFDIR;
9dbd164d 28018+ set_nlink(inode, 2);
1facf9fc 28019+ unlock_new_inode(inode);
28020+
92d182d2 28021+ root = d_make_root(inode);
1facf9fc 28022+ if (unlikely(!root))
92d182d2 28023+ goto out;
1facf9fc 28024+ err = PTR_ERR(root);
28025+ if (IS_ERR(root))
92d182d2 28026+ goto out;
1facf9fc 28027+
4a4d8108 28028+ err = au_di_init(root);
1facf9fc 28029+ if (!err) {
28030+ sb->s_root = root;
28031+ return 0; /* success */
28032+ }
28033+ dput(root);
1facf9fc 28034+
4f0767ce 28035+out:
1facf9fc 28036+ return err;
1facf9fc 28037+}
28038+
28039+static int aufs_fill_super(struct super_block *sb, void *raw_data,
28040+ int silent __maybe_unused)
28041+{
28042+ int err;
be52b249
AM
28043+ struct au_opts opts = {
28044+ .opt = NULL
28045+ };
79b8bda9 28046+ struct au_sbinfo *sbinfo;
1facf9fc 28047+ struct dentry *root;
28048+ struct inode *inode;
28049+ char *arg = raw_data;
28050+
28051+ if (unlikely(!arg || !*arg)) {
28052+ err = -EINVAL;
4a4d8108 28053+ pr_err("no arg\n");
1facf9fc 28054+ goto out;
28055+ }
28056+
28057+ err = -ENOMEM;
1facf9fc 28058+ opts.opt = (void *)__get_free_page(GFP_NOFS);
28059+ if (unlikely(!opts.opt))
28060+ goto out;
28061+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
28062+ opts.sb_flags = sb->s_flags;
28063+
28064+ err = au_si_alloc(sb);
28065+ if (unlikely(err))
28066+ goto out_opts;
79b8bda9 28067+ sbinfo = au_sbi(sb);
1facf9fc 28068+
28069+ /* all timestamps always follow the ones on the branch */
28070+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
28071+ sb->s_op = &aufs_sop;
027c5e7a 28072+ sb->s_d_op = &aufs_dop;
1facf9fc 28073+ sb->s_magic = AUFS_SUPER_MAGIC;
28074+ sb->s_maxbytes = 0;
c1595e42 28075+ sb->s_stack_depth = 1;
1facf9fc 28076+ au_export_init(sb);
c1595e42 28077+ /* au_xattr_init(sb); */
1facf9fc 28078+
28079+ err = alloc_root(sb);
28080+ if (unlikely(err)) {
28081+ si_write_unlock(sb);
28082+ goto out_info;
28083+ }
28084+ root = sb->s_root;
5527c038 28085+ inode = d_inode(root);
1facf9fc 28086+
28087+ /*
28088+ * actually we can parse options regardless aufs lock here.
28089+ * but at remount time, parsing must be done before aufs lock.
28090+ * so we follow the same rule.
28091+ */
28092+ ii_write_lock_parent(inode);
28093+ aufs_write_unlock(root);
28094+ err = au_opts_parse(sb, arg, &opts);
28095+ if (unlikely(err))
28096+ goto out_root;
28097+
28098+ /* lock vfs_inode first, then aufs. */
febd17d6 28099+ inode_lock(inode);
1facf9fc 28100+ aufs_write_lock(root);
28101+ err = au_opts_mount(sb, &opts);
28102+ au_opts_free(&opts);
79b8bda9
AM
28103+ if (!err && au_ftest_si(sbinfo, NO_DREVAL)) {
28104+ sb->s_d_op = &aufs_dop_noreval;
28105+ pr_info("%pf\n", sb->s_d_op);
28106+ au_refresh_dop(root, /*force_reval*/0);
b95c5147
AM
28107+ sbinfo->si_iop_array = aufs_iop_nogetattr;
28108+ au_refresh_iop(inode, /*force_getattr*/0);
79b8bda9 28109+ }
1facf9fc 28110+ aufs_write_unlock(root);
febd17d6 28111+ inode_unlock(inode);
4a4d8108
AM
28112+ if (!err)
28113+ goto out_opts; /* success */
1facf9fc 28114+
4f0767ce 28115+out_root:
1facf9fc 28116+ dput(root);
28117+ sb->s_root = NULL;
4f0767ce 28118+out_info:
79b8bda9
AM
28119+ dbgaufs_si_fin(sbinfo);
28120+ kobject_put(&sbinfo->si_kobj);
1facf9fc 28121+ sb->s_fs_info = NULL;
4f0767ce 28122+out_opts:
f0c0a007 28123+ au_delayed_free_page((unsigned long)opts.opt);
4f0767ce 28124+out:
1facf9fc 28125+ AuTraceErr(err);
28126+ err = cvt_err(err);
28127+ AuTraceErr(err);
28128+ return err;
28129+}
28130+
28131+/* ---------------------------------------------------------------------- */
28132+
027c5e7a
AM
28133+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
28134+ const char *dev_name __maybe_unused,
28135+ void *raw_data)
1facf9fc 28136+{
027c5e7a 28137+ struct dentry *root;
1facf9fc 28138+ struct super_block *sb;
28139+
28140+ /* all timestamps always follow the ones on the branch */
28141+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
027c5e7a
AM
28142+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
28143+ if (IS_ERR(root))
28144+ goto out;
28145+
28146+ sb = root->d_sb;
28147+ si_write_lock(sb, !AuLock_FLUSH);
28148+ sysaufs_brs_add(sb, 0);
28149+ si_write_unlock(sb);
28150+ au_sbilist_add(sb);
28151+
28152+out:
28153+ return root;
1facf9fc 28154+}
28155+
e49829fe
JR
28156+static void aufs_kill_sb(struct super_block *sb)
28157+{
28158+ struct au_sbinfo *sbinfo;
28159+
28160+ sbinfo = au_sbi(sb);
28161+ if (sbinfo) {
28162+ au_sbilist_del(sb);
28163+ aufs_write_lock(sb->s_root);
076b876e 28164+ au_fhsm_fin(sb);
e49829fe
JR
28165+ if (sbinfo->si_wbr_create_ops->fin)
28166+ sbinfo->si_wbr_create_ops->fin(sb);
28167+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
28168+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
b95c5147 28169+ au_remount_refresh(sb, /*do_idop*/0);
e49829fe
JR
28170+ }
28171+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
28172+ au_plink_put(sb, /*verbose*/1);
28173+ au_xino_clr(sb);
1e00d052 28174+ sbinfo->si_sb = NULL;
e49829fe 28175+ aufs_write_unlock(sb->s_root);
e49829fe
JR
28176+ au_nwt_flush(&sbinfo->si_nowait);
28177+ }
98d9a5b1 28178+ kill_anon_super(sb);
e49829fe
JR
28179+}
28180+
1facf9fc 28181+struct file_system_type aufs_fs_type = {
28182+ .name = AUFS_FSTYPE,
c06a8ce3
AM
28183+ /* a race between rename and others */
28184+ .fs_flags = FS_RENAME_DOES_D_MOVE,
027c5e7a 28185+ .mount = aufs_mount,
e49829fe 28186+ .kill_sb = aufs_kill_sb,
1facf9fc 28187+ /* no need to __module_get() and module_put(). */
28188+ .owner = THIS_MODULE,
28189+};
7f207e10
AM
28190diff -urN /usr/share/empty/fs/aufs/super.h linux/fs/aufs/super.h
28191--- /usr/share/empty/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 28192+++ linux/fs/aufs/super.h 2016-10-09 16:55:38.889431135 +0200
5afbbe0d 28193@@ -0,0 +1,638 @@
1facf9fc 28194+/*
8cdd5066 28195+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 28196+ *
28197+ * This program, aufs is free software; you can redistribute it and/or modify
28198+ * it under the terms of the GNU General Public License as published by
28199+ * the Free Software Foundation; either version 2 of the License, or
28200+ * (at your option) any later version.
dece6358
AM
28201+ *
28202+ * This program is distributed in the hope that it will be useful,
28203+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28204+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28205+ * GNU General Public License for more details.
28206+ *
28207+ * You should have received a copy of the GNU General Public License
523b37e3 28208+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28209+ */
28210+
28211+/*
28212+ * super_block operations
28213+ */
28214+
28215+#ifndef __AUFS_SUPER_H__
28216+#define __AUFS_SUPER_H__
28217+
28218+#ifdef __KERNEL__
28219+
28220+#include <linux/fs.h>
5527c038 28221+#include <linux/kobject.h>
1facf9fc 28222+#include "rwsem.h"
28223+#include "spl.h"
28224+#include "wkq.h"
28225+
1facf9fc 28226+/* policies to select one among multiple writable branches */
28227+struct au_wbr_copyup_operations {
28228+ int (*copyup)(struct dentry *dentry);
28229+};
28230+
392086de
AM
28231+#define AuWbr_DIR 1 /* target is a dir */
28232+#define AuWbr_PARENT (1 << 1) /* always require a parent */
28233+
28234+#define au_ftest_wbr(flags, name) ((flags) & AuWbr_##name)
28235+#define au_fset_wbr(flags, name) { (flags) |= AuWbr_##name; }
28236+#define au_fclr_wbr(flags, name) { (flags) &= ~AuWbr_##name; }
28237+
1facf9fc 28238+struct au_wbr_create_operations {
392086de 28239+ int (*create)(struct dentry *dentry, unsigned int flags);
1facf9fc 28240+ int (*init)(struct super_block *sb);
28241+ int (*fin)(struct super_block *sb);
28242+};
28243+
28244+struct au_wbr_mfs {
28245+ struct mutex mfs_lock; /* protect this structure */
28246+ unsigned long mfs_jiffy;
28247+ unsigned long mfs_expire;
28248+ aufs_bindex_t mfs_bindex;
28249+
28250+ unsigned long long mfsrr_bytes;
28251+ unsigned long long mfsrr_watermark;
28252+};
28253+
86dc4139
AM
28254+#define AuPlink_NHASH 100
28255+static inline int au_plink_hash(ino_t ino)
28256+{
28257+ return ino % AuPlink_NHASH;
28258+}
28259+
076b876e
AM
28260+/* File-based Hierarchical Storage Management */
28261+struct au_fhsm {
28262+#ifdef CONFIG_AUFS_FHSM
28263+ /* allow only one process who can receive the notification */
28264+ spinlock_t fhsm_spin;
28265+ pid_t fhsm_pid;
28266+ wait_queue_head_t fhsm_wqh;
28267+ atomic_t fhsm_readable;
28268+
c1595e42 28269+ /* these are protected by si_rwsem */
076b876e 28270+ unsigned long fhsm_expire;
c1595e42 28271+ aufs_bindex_t fhsm_bottom;
076b876e
AM
28272+#endif
28273+};
28274+
febd17d6
JR
28275+#define AU_PIDSTEP (int)(BITS_TO_LONGS(PID_MAX_DEFAULT) * BITS_PER_LONG)
28276+#define AU_NPIDMAP (int)DIV_ROUND_UP(PID_MAX_LIMIT, AU_PIDSTEP)
28277+struct au_si_pid {
28278+ unsigned long *pid_bitmap[AU_NPIDMAP];
28279+ struct mutex pid_mtx;
28280+};
28281+
1facf9fc 28282+struct au_branch;
28283+struct au_sbinfo {
28284+ /* nowait tasks in the system-wide workqueue */
28285+ struct au_nowait_tasks si_nowait;
28286+
b752ccd1
AM
28287+ /*
28288+ * tried sb->s_umount, but failed due to the dependecy between i_mutex.
28289+ * rwsem for au_sbinfo is necessary.
28290+ */
dece6358 28291+ struct au_rwsem si_rwsem;
1facf9fc 28292+
b752ccd1 28293+ /* prevent recursive locking in deleting inode */
febd17d6 28294+ struct au_si_pid au_si_pid;
b752ccd1 28295+
7f207e10 28296+ /*
523b37e3
AM
28297+ * dirty approach to protect sb->sb_inodes and ->s_files (gone) from
28298+ * remount.
7f207e10 28299+ */
5afbbe0d 28300+ struct percpu_counter si_ninodes, si_nfiles;
7f207e10 28301+
1facf9fc 28302+ /* branch management */
28303+ unsigned int si_generation;
28304+
2000de60 28305+ /* see AuSi_ flags */
1facf9fc 28306+ unsigned char au_si_status;
28307+
5afbbe0d 28308+ aufs_bindex_t si_bbot;
7f207e10
AM
28309+
28310+ /* dirty trick to keep br_id plus */
28311+ unsigned int si_last_br_id :
28312+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
1facf9fc 28313+ struct au_branch **si_branch;
28314+
28315+ /* policy to select a writable branch */
28316+ unsigned char si_wbr_copyup;
28317+ unsigned char si_wbr_create;
28318+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
28319+ struct au_wbr_create_operations *si_wbr_create_ops;
28320+
28321+ /* round robin */
28322+ atomic_t si_wbr_rr_next;
28323+
28324+ /* most free space */
28325+ struct au_wbr_mfs si_wbr_mfs;
28326+
076b876e
AM
28327+ /* File-based Hierarchical Storage Management */
28328+ struct au_fhsm si_fhsm;
28329+
1facf9fc 28330+ /* mount flags */
28331+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
28332+ unsigned int si_mntflags;
28333+
28334+ /* external inode number (bitmap and translation table) */
5527c038
JR
28335+ vfs_readf_t si_xread;
28336+ vfs_writef_t si_xwrite;
1facf9fc 28337+ struct file *si_xib;
28338+ struct mutex si_xib_mtx; /* protect xib members */
28339+ unsigned long *si_xib_buf;
28340+ unsigned long si_xib_last_pindex;
28341+ int si_xib_next_bit;
28342+ aufs_bindex_t si_xino_brid;
392086de
AM
28343+ unsigned long si_xino_jiffy;
28344+ unsigned long si_xino_expire;
1facf9fc 28345+ /* reserved for future use */
28346+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
28347+
28348+#ifdef CONFIG_AUFS_EXPORT
28349+ /* i_generation */
28350+ struct file *si_xigen;
28351+ atomic_t si_xigen_next;
28352+#endif
28353+
b912730e
AM
28354+ /* dirty trick to suppoer atomic_open */
28355+ struct au_sphlhead si_aopen;
28356+
1facf9fc 28357+ /* vdir parameters */
e49829fe 28358+ unsigned long si_rdcache; /* max cache time in jiffies */
1facf9fc 28359+ unsigned int si_rdblk; /* deblk size */
28360+ unsigned int si_rdhash; /* hash size */
28361+
28362+ /*
28363+ * If the number of whiteouts are larger than si_dirwh, leave all of
28364+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
28365+ * future fsck.aufs or kernel thread will remove them later.
28366+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
28367+ */
28368+ unsigned int si_dirwh;
28369+
1facf9fc 28370+ /* pseudo_link list */
86dc4139 28371+ struct au_sphlhead si_plink[AuPlink_NHASH];
1facf9fc 28372+ wait_queue_head_t si_plink_wq;
4a4d8108 28373+ spinlock_t si_plink_maint_lock;
e49829fe 28374+ pid_t si_plink_maint_pid;
1facf9fc 28375+
523b37e3
AM
28376+ /* file list */
28377+ struct au_sphlhead si_files;
28378+
b95c5147
AM
28379+ /* with/without getattr, brother of sb->s_d_op */
28380+ struct inode_operations *si_iop_array;
28381+
1facf9fc 28382+ /*
28383+ * sysfs and lifetime management.
28384+ * this is not a small structure and it may be a waste of memory in case
28385+ * of sysfs is disabled, particulary when many aufs-es are mounted.
28386+ * but using sysfs is majority.
28387+ */
28388+ struct kobject si_kobj;
28389+#ifdef CONFIG_DEBUG_FS
86dc4139
AM
28390+ struct dentry *si_dbgaufs;
28391+ struct dentry *si_dbgaufs_plink;
28392+ struct dentry *si_dbgaufs_xib;
1facf9fc 28393+#ifdef CONFIG_AUFS_EXPORT
28394+ struct dentry *si_dbgaufs_xigen;
28395+#endif
28396+#endif
28397+
e49829fe 28398+#ifdef CONFIG_AUFS_SBILIST
5afbbe0d 28399+ struct hlist_node si_list;
e49829fe
JR
28400+#endif
28401+
1facf9fc 28402+ /* dirty, necessary for unmounting, sysfs and sysrq */
28403+ struct super_block *si_sb;
28404+};
28405+
dece6358
AM
28406+/* sbinfo status flags */
28407+/*
28408+ * set true when refresh_dirs() failed at remount time.
28409+ * then try refreshing dirs at access time again.
28410+ * if it is false, refreshing dirs at access time is unnecesary
28411+ */
027c5e7a 28412+#define AuSi_FAILED_REFRESH_DIR 1
076b876e 28413+#define AuSi_FHSM (1 << 1) /* fhsm is active now */
79b8bda9 28414+#define AuSi_NO_DREVAL (1 << 2) /* disable all d_revalidate */
076b876e
AM
28415+
28416+#ifndef CONFIG_AUFS_FHSM
28417+#undef AuSi_FHSM
28418+#define AuSi_FHSM 0
28419+#endif
28420+
dece6358
AM
28421+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
28422+ unsigned int flag)
28423+{
28424+ AuRwMustAnyLock(&sbi->si_rwsem);
28425+ return sbi->au_si_status & flag;
28426+}
28427+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
28428+#define au_fset_si(sbinfo, name) do { \
28429+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
28430+ (sbinfo)->au_si_status |= AuSi_##name; \
28431+} while (0)
28432+#define au_fclr_si(sbinfo, name) do { \
28433+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
28434+ (sbinfo)->au_si_status &= ~AuSi_##name; \
28435+} while (0)
28436+
1facf9fc 28437+/* ---------------------------------------------------------------------- */
28438+
28439+/* policy to select one among writable branches */
4a4d8108
AM
28440+#define AuWbrCopyup(sbinfo, ...) \
28441+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
28442+#define AuWbrCreate(sbinfo, ...) \
28443+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
1facf9fc 28444+
28445+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
28446+#define AuLock_DW 1 /* write-lock dentry */
28447+#define AuLock_IR (1 << 1) /* read-lock inode */
28448+#define AuLock_IW (1 << 2) /* write-lock inode */
28449+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
b95c5147 28450+#define AuLock_DIRS (1 << 4) /* target is a pair of dirs */
e49829fe
JR
28451+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
28452+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
027c5e7a 28453+#define AuLock_GEN (1 << 7) /* test digen/iigen */
1facf9fc 28454+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
7f207e10
AM
28455+#define au_fset_lock(flags, name) \
28456+ do { (flags) |= AuLock_##name; } while (0)
28457+#define au_fclr_lock(flags, name) \
28458+ do { (flags) &= ~AuLock_##name; } while (0)
1facf9fc 28459+
28460+/* ---------------------------------------------------------------------- */
28461+
28462+/* super.c */
28463+extern struct file_system_type aufs_fs_type;
28464+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
79b8bda9
AM
28465+typedef unsigned long long (*au_arraycb_t)(struct super_block *sb, void *array,
28466+ unsigned long long max, void *arg);
79b8bda9
AM
28467+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
28468+ struct super_block *sb, void *arg);
7f207e10
AM
28469+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
28470+void au_iarray_free(struct inode **a, unsigned long long max);
1facf9fc 28471+
28472+/* sbinfo.c */
28473+void au_si_free(struct kobject *kobj);
28474+int au_si_alloc(struct super_block *sb);
e2f27e51 28475+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink);
1facf9fc 28476+
28477+unsigned int au_sigen_inc(struct super_block *sb);
28478+aufs_bindex_t au_new_br_id(struct super_block *sb);
28479+
e49829fe
JR
28480+int si_read_lock(struct super_block *sb, int flags);
28481+int si_write_lock(struct super_block *sb, int flags);
28482+int aufs_read_lock(struct dentry *dentry, int flags);
1facf9fc 28483+void aufs_read_unlock(struct dentry *dentry, int flags);
28484+void aufs_write_lock(struct dentry *dentry);
28485+void aufs_write_unlock(struct dentry *dentry);
e49829fe 28486+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
1facf9fc 28487+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
28488+
28489+/* wbr_policy.c */
28490+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
28491+extern struct au_wbr_create_operations au_wbr_create_ops[];
28492+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
c2b27bf2 28493+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
5afbbe0d 28494+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop);
c2b27bf2
AM
28495+
28496+/* mvdown.c */
28497+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
1facf9fc 28498+
076b876e
AM
28499+#ifdef CONFIG_AUFS_FHSM
28500+/* fhsm.c */
28501+
28502+static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
28503+{
28504+ pid_t pid;
28505+
28506+ spin_lock(&fhsm->fhsm_spin);
28507+ pid = fhsm->fhsm_pid;
28508+ spin_unlock(&fhsm->fhsm_spin);
28509+
28510+ return pid;
28511+}
28512+
28513+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
28514+void au_fhsm_wrote_all(struct super_block *sb, int force);
28515+int au_fhsm_fd(struct super_block *sb, int oflags);
28516+int au_fhsm_br_alloc(struct au_branch *br);
c1595e42 28517+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
076b876e
AM
28518+void au_fhsm_fin(struct super_block *sb);
28519+void au_fhsm_init(struct au_sbinfo *sbinfo);
28520+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
28521+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
28522+#else
28523+AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
28524+ int force)
28525+AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
28526+AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
c1595e42
JR
28527+AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
28528+AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
28529+AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
076b876e
AM
28530+AuStubVoid(au_fhsm_fin, struct super_block *sb)
28531+AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
28532+AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
28533+AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
28534+#endif
28535+
1facf9fc 28536+/* ---------------------------------------------------------------------- */
28537+
28538+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
28539+{
28540+ return sb->s_fs_info;
28541+}
28542+
28543+/* ---------------------------------------------------------------------- */
28544+
28545+#ifdef CONFIG_AUFS_EXPORT
a2a7ad62 28546+int au_test_nfsd(void);
1facf9fc 28547+void au_export_init(struct super_block *sb);
b752ccd1 28548+void au_xigen_inc(struct inode *inode);
1facf9fc 28549+int au_xigen_new(struct inode *inode);
28550+int au_xigen_set(struct super_block *sb, struct file *base);
28551+void au_xigen_clr(struct super_block *sb);
28552+
28553+static inline int au_busy_or_stale(void)
28554+{
b752ccd1 28555+ if (!au_test_nfsd())
1facf9fc 28556+ return -EBUSY;
28557+ return -ESTALE;
28558+}
28559+#else
b752ccd1 28560+AuStubInt0(au_test_nfsd, void)
a2a7ad62 28561+AuStubVoid(au_export_init, struct super_block *sb)
b752ccd1 28562+AuStubVoid(au_xigen_inc, struct inode *inode)
4a4d8108
AM
28563+AuStubInt0(au_xigen_new, struct inode *inode)
28564+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
28565+AuStubVoid(au_xigen_clr, struct super_block *sb)
c1595e42 28566+AuStub(int, au_busy_or_stale, return -EBUSY, void)
1facf9fc 28567+#endif /* CONFIG_AUFS_EXPORT */
28568+
28569+/* ---------------------------------------------------------------------- */
28570+
e49829fe
JR
28571+#ifdef CONFIG_AUFS_SBILIST
28572+/* module.c */
5afbbe0d 28573+extern struct au_sphlhead au_sbilist;
e49829fe
JR
28574+
28575+static inline void au_sbilist_init(void)
28576+{
5afbbe0d 28577+ au_sphl_init(&au_sbilist);
e49829fe
JR
28578+}
28579+
28580+static inline void au_sbilist_add(struct super_block *sb)
28581+{
5afbbe0d 28582+ au_sphl_add(&au_sbi(sb)->si_list, &au_sbilist);
e49829fe
JR
28583+}
28584+
28585+static inline void au_sbilist_del(struct super_block *sb)
28586+{
5afbbe0d 28587+ au_sphl_del(&au_sbi(sb)->si_list, &au_sbilist);
e49829fe 28588+}
53392da6
AM
28589+
28590+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
28591+static inline void au_sbilist_lock(void)
28592+{
28593+ spin_lock(&au_sbilist.spin);
28594+}
28595+
28596+static inline void au_sbilist_unlock(void)
28597+{
28598+ spin_unlock(&au_sbilist.spin);
28599+}
28600+#define AuGFP_SBILIST GFP_ATOMIC
28601+#else
28602+AuStubVoid(au_sbilist_lock, void)
28603+AuStubVoid(au_sbilist_unlock, void)
28604+#define AuGFP_SBILIST GFP_NOFS
28605+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
e49829fe
JR
28606+#else
28607+AuStubVoid(au_sbilist_init, void)
c1595e42
JR
28608+AuStubVoid(au_sbilist_add, struct super_block *sb)
28609+AuStubVoid(au_sbilist_del, struct super_block *sb)
53392da6
AM
28610+AuStubVoid(au_sbilist_lock, void)
28611+AuStubVoid(au_sbilist_unlock, void)
28612+#define AuGFP_SBILIST GFP_NOFS
e49829fe
JR
28613+#endif
28614+
28615+/* ---------------------------------------------------------------------- */
28616+
1facf9fc 28617+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
28618+{
dece6358 28619+ /*
c1595e42 28620+ * This function is a dynamic '__init' function actually,
dece6358
AM
28621+ * so the tiny check for si_rwsem is unnecessary.
28622+ */
28623+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
1facf9fc 28624+#ifdef CONFIG_DEBUG_FS
28625+ sbinfo->si_dbgaufs = NULL;
86dc4139 28626+ sbinfo->si_dbgaufs_plink = NULL;
1facf9fc 28627+ sbinfo->si_dbgaufs_xib = NULL;
28628+#ifdef CONFIG_AUFS_EXPORT
28629+ sbinfo->si_dbgaufs_xigen = NULL;
28630+#endif
28631+#endif
28632+}
28633+
28634+/* ---------------------------------------------------------------------- */
28635+
febd17d6 28636+static inline void si_pid_idx_bit(int *idx, pid_t *bit)
b752ccd1
AM
28637+{
28638+ /* the origin of pid is 1, but the bitmap's is 0 */
febd17d6
JR
28639+ *bit = current->pid - 1;
28640+ *idx = *bit / AU_PIDSTEP;
28641+ *bit %= AU_PIDSTEP;
b752ccd1
AM
28642+}
28643+
28644+static inline int si_pid_test(struct super_block *sb)
28645+{
076b876e 28646+ pid_t bit;
febd17d6
JR
28647+ int idx;
28648+ unsigned long *bitmap;
076b876e 28649+
febd17d6
JR
28650+ si_pid_idx_bit(&idx, &bit);
28651+ bitmap = au_sbi(sb)->au_si_pid.pid_bitmap[idx];
28652+ if (bitmap)
28653+ return test_bit(bit, bitmap);
28654+ return 0;
b752ccd1
AM
28655+}
28656+
28657+static inline void si_pid_clr(struct super_block *sb)
28658+{
076b876e 28659+ pid_t bit;
febd17d6
JR
28660+ int idx;
28661+ unsigned long *bitmap;
076b876e 28662+
febd17d6
JR
28663+ si_pid_idx_bit(&idx, &bit);
28664+ bitmap = au_sbi(sb)->au_si_pid.pid_bitmap[idx];
28665+ BUG_ON(!bitmap);
28666+ AuDebugOn(!test_bit(bit, bitmap));
28667+ clear_bit(bit, bitmap);
28668+ /* smp_mb(); */
b752ccd1
AM
28669+}
28670+
febd17d6
JR
28671+void si_pid_set(struct super_block *sb);
28672+
b752ccd1
AM
28673+/* ---------------------------------------------------------------------- */
28674+
1facf9fc 28675+/* lock superblock. mainly for entry point functions */
28676+/*
b752ccd1
AM
28677+ * __si_read_lock, __si_write_lock,
28678+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
1facf9fc 28679+ */
b752ccd1 28680+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
1facf9fc 28681+
dece6358
AM
28682+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
28683+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
28684+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
28685+
b752ccd1
AM
28686+static inline void si_noflush_read_lock(struct super_block *sb)
28687+{
28688+ __si_read_lock(sb);
28689+ si_pid_set(sb);
28690+}
28691+
28692+static inline int si_noflush_read_trylock(struct super_block *sb)
28693+{
076b876e
AM
28694+ int locked;
28695+
28696+ locked = __si_read_trylock(sb);
b752ccd1
AM
28697+ if (locked)
28698+ si_pid_set(sb);
28699+ return locked;
28700+}
28701+
28702+static inline void si_noflush_write_lock(struct super_block *sb)
28703+{
28704+ __si_write_lock(sb);
28705+ si_pid_set(sb);
28706+}
28707+
28708+static inline int si_noflush_write_trylock(struct super_block *sb)
28709+{
076b876e
AM
28710+ int locked;
28711+
28712+ locked = __si_write_trylock(sb);
b752ccd1
AM
28713+ if (locked)
28714+ si_pid_set(sb);
28715+ return locked;
28716+}
28717+
7e9cd9fe 28718+#if 0 /* reserved */
1facf9fc 28719+static inline int si_read_trylock(struct super_block *sb, int flags)
28720+{
28721+ if (au_ftest_lock(flags, FLUSH))
28722+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28723+ return si_noflush_read_trylock(sb);
28724+}
e49829fe 28725+#endif
1facf9fc 28726+
b752ccd1
AM
28727+static inline void si_read_unlock(struct super_block *sb)
28728+{
28729+ si_pid_clr(sb);
28730+ __si_read_unlock(sb);
28731+}
28732+
7e9cd9fe 28733+#if 0 /* reserved */
1facf9fc 28734+static inline int si_write_trylock(struct super_block *sb, int flags)
28735+{
28736+ if (au_ftest_lock(flags, FLUSH))
28737+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28738+ return si_noflush_write_trylock(sb);
28739+}
b752ccd1
AM
28740+#endif
28741+
28742+static inline void si_write_unlock(struct super_block *sb)
28743+{
28744+ si_pid_clr(sb);
28745+ __si_write_unlock(sb);
28746+}
28747+
7e9cd9fe 28748+#if 0 /* reserved */
b752ccd1
AM
28749+static inline void si_downgrade_lock(struct super_block *sb)
28750+{
28751+ __si_downgrade_lock(sb);
28752+}
28753+#endif
1facf9fc 28754+
28755+/* ---------------------------------------------------------------------- */
28756+
5afbbe0d 28757+static inline aufs_bindex_t au_sbbot(struct super_block *sb)
1facf9fc 28758+{
dece6358 28759+ SiMustAnyLock(sb);
5afbbe0d 28760+ return au_sbi(sb)->si_bbot;
1facf9fc 28761+}
28762+
28763+static inline unsigned int au_mntflags(struct super_block *sb)
28764+{
dece6358 28765+ SiMustAnyLock(sb);
1facf9fc 28766+ return au_sbi(sb)->si_mntflags;
28767+}
28768+
28769+static inline unsigned int au_sigen(struct super_block *sb)
28770+{
dece6358 28771+ SiMustAnyLock(sb);
1facf9fc 28772+ return au_sbi(sb)->si_generation;
28773+}
28774+
5afbbe0d
AM
28775+static inline unsigned long long au_ninodes(struct super_block *sb)
28776+{
28777+ s64 n = percpu_counter_sum(&au_sbi(sb)->si_ninodes);
28778+
28779+ BUG_ON(n < 0);
28780+ return n;
28781+}
28782+
7f207e10
AM
28783+static inline void au_ninodes_inc(struct super_block *sb)
28784+{
5afbbe0d 28785+ percpu_counter_inc(&au_sbi(sb)->si_ninodes);
7f207e10
AM
28786+}
28787+
28788+static inline void au_ninodes_dec(struct super_block *sb)
28789+{
5afbbe0d
AM
28790+ percpu_counter_dec(&au_sbi(sb)->si_ninodes);
28791+}
28792+
28793+static inline unsigned long long au_nfiles(struct super_block *sb)
28794+{
28795+ s64 n = percpu_counter_sum(&au_sbi(sb)->si_nfiles);
28796+
28797+ BUG_ON(n < 0);
28798+ return n;
7f207e10
AM
28799+}
28800+
28801+static inline void au_nfiles_inc(struct super_block *sb)
28802+{
5afbbe0d 28803+ percpu_counter_inc(&au_sbi(sb)->si_nfiles);
7f207e10
AM
28804+}
28805+
28806+static inline void au_nfiles_dec(struct super_block *sb)
28807+{
5afbbe0d 28808+ percpu_counter_dec(&au_sbi(sb)->si_nfiles);
7f207e10
AM
28809+}
28810+
1facf9fc 28811+static inline struct au_branch *au_sbr(struct super_block *sb,
28812+ aufs_bindex_t bindex)
28813+{
dece6358 28814+ SiMustAnyLock(sb);
1facf9fc 28815+ return au_sbi(sb)->si_branch[0 + bindex];
28816+}
28817+
28818+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
28819+{
dece6358 28820+ SiMustWriteLock(sb);
1facf9fc 28821+ au_sbi(sb)->si_xino_brid = brid;
28822+}
28823+
28824+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
28825+{
dece6358 28826+ SiMustAnyLock(sb);
1facf9fc 28827+ return au_sbi(sb)->si_xino_brid;
28828+}
28829+
28830+#endif /* __KERNEL__ */
28831+#endif /* __AUFS_SUPER_H__ */
7f207e10
AM
28832diff -urN /usr/share/empty/fs/aufs/sysaufs.c linux/fs/aufs/sysaufs.c
28833--- /usr/share/empty/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 28834+++ linux/fs/aufs/sysaufs.c 2016-10-09 16:55:36.496035060 +0200
523b37e3 28835@@ -0,0 +1,104 @@
1facf9fc 28836+/*
8cdd5066 28837+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 28838+ *
28839+ * This program, aufs is free software; you can redistribute it and/or modify
28840+ * it under the terms of the GNU General Public License as published by
28841+ * the Free Software Foundation; either version 2 of the License, or
28842+ * (at your option) any later version.
dece6358
AM
28843+ *
28844+ * This program is distributed in the hope that it will be useful,
28845+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28846+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28847+ * GNU General Public License for more details.
28848+ *
28849+ * You should have received a copy of the GNU General Public License
523b37e3 28850+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28851+ */
28852+
28853+/*
28854+ * sysfs interface and lifetime management
28855+ * they are necessary regardless sysfs is disabled.
28856+ */
28857+
1facf9fc 28858+#include <linux/random.h>
1facf9fc 28859+#include "aufs.h"
28860+
28861+unsigned long sysaufs_si_mask;
e49829fe 28862+struct kset *sysaufs_kset;
1facf9fc 28863+
28864+#define AuSiAttr(_name) { \
28865+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
28866+ .show = sysaufs_si_##_name, \
28867+}
28868+
28869+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
28870+struct attribute *sysaufs_si_attrs[] = {
28871+ &sysaufs_si_attr_xi_path.attr,
28872+ NULL,
28873+};
28874+
4a4d8108 28875+static const struct sysfs_ops au_sbi_ops = {
1facf9fc 28876+ .show = sysaufs_si_show
28877+};
28878+
28879+static struct kobj_type au_sbi_ktype = {
28880+ .release = au_si_free,
28881+ .sysfs_ops = &au_sbi_ops,
28882+ .default_attrs = sysaufs_si_attrs
28883+};
28884+
28885+/* ---------------------------------------------------------------------- */
28886+
28887+int sysaufs_si_init(struct au_sbinfo *sbinfo)
28888+{
28889+ int err;
28890+
e49829fe 28891+ sbinfo->si_kobj.kset = sysaufs_kset;
1facf9fc 28892+ /* cf. sysaufs_name() */
28893+ err = kobject_init_and_add
e49829fe 28894+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
1facf9fc 28895+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
28896+
28897+ dbgaufs_si_null(sbinfo);
28898+ if (!err) {
28899+ err = dbgaufs_si_init(sbinfo);
28900+ if (unlikely(err))
28901+ kobject_put(&sbinfo->si_kobj);
28902+ }
28903+ return err;
28904+}
28905+
28906+void sysaufs_fin(void)
28907+{
28908+ dbgaufs_fin();
e49829fe
JR
28909+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
28910+ kset_unregister(sysaufs_kset);
1facf9fc 28911+}
28912+
28913+int __init sysaufs_init(void)
28914+{
28915+ int err;
28916+
28917+ do {
28918+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
28919+ } while (!sysaufs_si_mask);
28920+
4a4d8108 28921+ err = -EINVAL;
e49829fe
JR
28922+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
28923+ if (unlikely(!sysaufs_kset))
4a4d8108 28924+ goto out;
e49829fe
JR
28925+ err = PTR_ERR(sysaufs_kset);
28926+ if (IS_ERR(sysaufs_kset))
1facf9fc 28927+ goto out;
e49829fe 28928+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
1facf9fc 28929+ if (unlikely(err)) {
e49829fe 28930+ kset_unregister(sysaufs_kset);
1facf9fc 28931+ goto out;
28932+ }
28933+
28934+ err = dbgaufs_init();
28935+ if (unlikely(err))
28936+ sysaufs_fin();
4f0767ce 28937+out:
1facf9fc 28938+ return err;
28939+}
7f207e10
AM
28940diff -urN /usr/share/empty/fs/aufs/sysaufs.h linux/fs/aufs/sysaufs.h
28941--- /usr/share/empty/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 28942+++ linux/fs/aufs/sysaufs.h 2016-10-09 16:55:36.496035060 +0200
c1595e42 28943@@ -0,0 +1,101 @@
1facf9fc 28944+/*
8cdd5066 28945+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 28946+ *
28947+ * This program, aufs is free software; you can redistribute it and/or modify
28948+ * it under the terms of the GNU General Public License as published by
28949+ * the Free Software Foundation; either version 2 of the License, or
28950+ * (at your option) any later version.
dece6358
AM
28951+ *
28952+ * This program is distributed in the hope that it will be useful,
28953+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28954+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28955+ * GNU General Public License for more details.
28956+ *
28957+ * You should have received a copy of the GNU General Public License
523b37e3 28958+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28959+ */
28960+
28961+/*
28962+ * sysfs interface and mount lifetime management
28963+ */
28964+
28965+#ifndef __SYSAUFS_H__
28966+#define __SYSAUFS_H__
28967+
28968+#ifdef __KERNEL__
28969+
1facf9fc 28970+#include <linux/sysfs.h>
1facf9fc 28971+#include "module.h"
28972+
dece6358
AM
28973+struct super_block;
28974+struct au_sbinfo;
28975+
1facf9fc 28976+struct sysaufs_si_attr {
28977+ struct attribute attr;
28978+ int (*show)(struct seq_file *seq, struct super_block *sb);
28979+};
28980+
28981+/* ---------------------------------------------------------------------- */
28982+
28983+/* sysaufs.c */
28984+extern unsigned long sysaufs_si_mask;
e49829fe 28985+extern struct kset *sysaufs_kset;
1facf9fc 28986+extern struct attribute *sysaufs_si_attrs[];
28987+int sysaufs_si_init(struct au_sbinfo *sbinfo);
28988+int __init sysaufs_init(void);
28989+void sysaufs_fin(void);
28990+
28991+/* ---------------------------------------------------------------------- */
28992+
28993+/* some people doesn't like to show a pointer in kernel */
28994+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
28995+{
28996+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
28997+}
28998+
28999+#define SysaufsSiNamePrefix "si_"
29000+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
29001+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
29002+{
29003+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
29004+ sysaufs_si_id(sbinfo));
29005+}
29006+
29007+struct au_branch;
29008+#ifdef CONFIG_SYSFS
29009+/* sysfs.c */
29010+extern struct attribute_group *sysaufs_attr_group;
29011+
29012+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
29013+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
29014+ char *buf);
076b876e
AM
29015+long au_brinfo_ioctl(struct file *file, unsigned long arg);
29016+#ifdef CONFIG_COMPAT
29017+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
29018+#endif
1facf9fc 29019+
29020+void sysaufs_br_init(struct au_branch *br);
29021+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
29022+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
29023+
29024+#define sysaufs_brs_init() do {} while (0)
29025+
29026+#else
29027+#define sysaufs_attr_group NULL
29028+
4a4d8108 29029+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
c1595e42
JR
29030+AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
29031+ struct attribute *attr, char *buf)
4a4d8108
AM
29032+AuStubVoid(sysaufs_br_init, struct au_branch *br)
29033+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
29034+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
1facf9fc 29035+
29036+static inline void sysaufs_brs_init(void)
29037+{
29038+ sysaufs_brs = 0;
29039+}
29040+
29041+#endif /* CONFIG_SYSFS */
29042+
29043+#endif /* __KERNEL__ */
29044+#endif /* __SYSAUFS_H__ */
7f207e10
AM
29045diff -urN /usr/share/empty/fs/aufs/sysfs.c linux/fs/aufs/sysfs.c
29046--- /usr/share/empty/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 29047+++ linux/fs/aufs/sysfs.c 2016-10-09 16:55:36.496035060 +0200
79b8bda9 29048@@ -0,0 +1,376 @@
1facf9fc 29049+/*
8cdd5066 29050+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 29051+ *
29052+ * This program, aufs is free software; you can redistribute it and/or modify
29053+ * it under the terms of the GNU General Public License as published by
29054+ * the Free Software Foundation; either version 2 of the License, or
29055+ * (at your option) any later version.
dece6358
AM
29056+ *
29057+ * This program is distributed in the hope that it will be useful,
29058+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29059+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29060+ * GNU General Public License for more details.
29061+ *
29062+ * You should have received a copy of the GNU General Public License
523b37e3 29063+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29064+ */
29065+
29066+/*
29067+ * sysfs interface
29068+ */
29069+
076b876e 29070+#include <linux/compat.h>
1facf9fc 29071+#include <linux/seq_file.h>
1facf9fc 29072+#include "aufs.h"
29073+
4a4d8108
AM
29074+#ifdef CONFIG_AUFS_FS_MODULE
29075+/* this entry violates the "one line per file" policy of sysfs */
29076+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
29077+ char *buf)
29078+{
29079+ ssize_t err;
29080+ static char *conf =
29081+/* this file is generated at compiling */
29082+#include "conf.str"
29083+ ;
29084+
29085+ err = snprintf(buf, PAGE_SIZE, conf);
29086+ if (unlikely(err >= PAGE_SIZE))
29087+ err = -EFBIG;
29088+ return err;
29089+}
29090+
29091+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
29092+#endif
29093+
1facf9fc 29094+static struct attribute *au_attr[] = {
4a4d8108
AM
29095+#ifdef CONFIG_AUFS_FS_MODULE
29096+ &au_config_attr.attr,
29097+#endif
1facf9fc 29098+ NULL, /* need to NULL terminate the list of attributes */
29099+};
29100+
29101+static struct attribute_group sysaufs_attr_group_body = {
29102+ .attrs = au_attr
29103+};
29104+
29105+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
29106+
29107+/* ---------------------------------------------------------------------- */
29108+
29109+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
29110+{
29111+ int err;
29112+
dece6358
AM
29113+ SiMustAnyLock(sb);
29114+
1facf9fc 29115+ err = 0;
29116+ if (au_opt_test(au_mntflags(sb), XINO)) {
29117+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
29118+ seq_putc(seq, '\n');
29119+ }
29120+ return err;
29121+}
29122+
29123+/*
29124+ * the lifetime of branch is independent from the entry under sysfs.
29125+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
29126+ * unlinked.
29127+ */
29128+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
392086de 29129+ aufs_bindex_t bindex, int idx)
1facf9fc 29130+{
1e00d052 29131+ int err;
1facf9fc 29132+ struct path path;
29133+ struct dentry *root;
29134+ struct au_branch *br;
076b876e 29135+ au_br_perm_str_t perm;
1facf9fc 29136+
29137+ AuDbg("b%d\n", bindex);
29138+
1e00d052 29139+ err = 0;
1facf9fc 29140+ root = sb->s_root;
29141+ di_read_lock_parent(root, !AuLock_IR);
29142+ br = au_sbr(sb, bindex);
392086de
AM
29143+
29144+ switch (idx) {
29145+ case AuBrSysfs_BR:
29146+ path.mnt = au_br_mnt(br);
29147+ path.dentry = au_h_dptr(root, bindex);
79b8bda9
AM
29148+ err = au_seq_path(seq, &path);
29149+ if (!err) {
29150+ au_optstr_br_perm(&perm, br->br_perm);
29151+ seq_printf(seq, "=%s\n", perm.a);
29152+ }
392086de
AM
29153+ break;
29154+ case AuBrSysfs_BRID:
79b8bda9 29155+ seq_printf(seq, "%d\n", br->br_id);
392086de
AM
29156+ break;
29157+ }
076b876e 29158+ di_read_unlock(root, !AuLock_IR);
79b8bda9 29159+ if (unlikely(err || seq_has_overflowed(seq)))
076b876e 29160+ err = -E2BIG;
392086de 29161+
1e00d052 29162+ return err;
1facf9fc 29163+}
29164+
29165+/* ---------------------------------------------------------------------- */
29166+
29167+static struct seq_file *au_seq(char *p, ssize_t len)
29168+{
29169+ struct seq_file *seq;
29170+
29171+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
29172+ if (seq) {
29173+ /* mutex_init(&seq.lock); */
29174+ seq->buf = p;
29175+ seq->size = len;
29176+ return seq; /* success */
29177+ }
29178+
29179+ seq = ERR_PTR(-ENOMEM);
29180+ return seq;
29181+}
29182+
392086de
AM
29183+#define SysaufsBr_PREFIX "br"
29184+#define SysaufsBrid_PREFIX "brid"
1facf9fc 29185+
29186+/* todo: file size may exceed PAGE_SIZE */
29187+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
1308ab2a 29188+ char *buf)
1facf9fc 29189+{
29190+ ssize_t err;
392086de 29191+ int idx;
1facf9fc 29192+ long l;
5afbbe0d 29193+ aufs_bindex_t bbot;
1facf9fc 29194+ struct au_sbinfo *sbinfo;
29195+ struct super_block *sb;
29196+ struct seq_file *seq;
29197+ char *name;
29198+ struct attribute **cattr;
29199+
29200+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
29201+ sb = sbinfo->si_sb;
1308ab2a 29202+
29203+ /*
29204+ * prevent a race condition between sysfs and aufs.
29205+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
29206+ * prohibits maintaining the sysfs entries.
29207+ * hew we acquire read lock after sysfs_get_active_two().
29208+ * on the other hand, the remount process may maintain the sysfs/aufs
29209+ * entries after acquiring write lock.
29210+ * it can cause a deadlock.
29211+ * simply we gave up processing read here.
29212+ */
29213+ err = -EBUSY;
29214+ if (unlikely(!si_noflush_read_trylock(sb)))
29215+ goto out;
1facf9fc 29216+
29217+ seq = au_seq(buf, PAGE_SIZE);
29218+ err = PTR_ERR(seq);
29219+ if (IS_ERR(seq))
1308ab2a 29220+ goto out_unlock;
1facf9fc 29221+
29222+ name = (void *)attr->name;
29223+ cattr = sysaufs_si_attrs;
29224+ while (*cattr) {
29225+ if (!strcmp(name, (*cattr)->name)) {
29226+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
29227+ ->show(seq, sb);
29228+ goto out_seq;
29229+ }
29230+ cattr++;
29231+ }
29232+
392086de
AM
29233+ if (!strncmp(name, SysaufsBrid_PREFIX,
29234+ sizeof(SysaufsBrid_PREFIX) - 1)) {
29235+ idx = AuBrSysfs_BRID;
29236+ name += sizeof(SysaufsBrid_PREFIX) - 1;
29237+ } else if (!strncmp(name, SysaufsBr_PREFIX,
29238+ sizeof(SysaufsBr_PREFIX) - 1)) {
29239+ idx = AuBrSysfs_BR;
1facf9fc 29240+ name += sizeof(SysaufsBr_PREFIX) - 1;
392086de
AM
29241+ } else
29242+ BUG();
29243+
29244+ err = kstrtol(name, 10, &l);
29245+ if (!err) {
5afbbe0d
AM
29246+ bbot = au_sbbot(sb);
29247+ if (l <= bbot)
392086de
AM
29248+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
29249+ else
29250+ err = -ENOENT;
1facf9fc 29251+ }
1facf9fc 29252+
4f0767ce 29253+out_seq:
1facf9fc 29254+ if (!err) {
29255+ err = seq->count;
29256+ /* sysfs limit */
29257+ if (unlikely(err == PAGE_SIZE))
29258+ err = -EFBIG;
29259+ }
f0c0a007 29260+ au_delayed_kfree(seq);
4f0767ce 29261+out_unlock:
1facf9fc 29262+ si_read_unlock(sb);
4f0767ce 29263+out:
1facf9fc 29264+ return err;
29265+}
29266+
29267+/* ---------------------------------------------------------------------- */
29268+
076b876e
AM
29269+static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
29270+{
29271+ int err;
29272+ int16_t brid;
5afbbe0d 29273+ aufs_bindex_t bindex, bbot;
076b876e
AM
29274+ size_t sz;
29275+ char *buf;
29276+ struct seq_file *seq;
29277+ struct au_branch *br;
29278+
29279+ si_read_lock(sb, AuLock_FLUSH);
5afbbe0d
AM
29280+ bbot = au_sbbot(sb);
29281+ err = bbot + 1;
076b876e
AM
29282+ if (!arg)
29283+ goto out;
29284+
29285+ err = -ENOMEM;
29286+ buf = (void *)__get_free_page(GFP_NOFS);
29287+ if (unlikely(!buf))
29288+ goto out;
29289+
29290+ seq = au_seq(buf, PAGE_SIZE);
29291+ err = PTR_ERR(seq);
29292+ if (IS_ERR(seq))
29293+ goto out_buf;
29294+
29295+ sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
5afbbe0d 29296+ for (bindex = 0; bindex <= bbot; bindex++, arg++) {
076b876e
AM
29297+ err = !access_ok(VERIFY_WRITE, arg, sizeof(*arg));
29298+ if (unlikely(err))
29299+ break;
29300+
29301+ br = au_sbr(sb, bindex);
29302+ brid = br->br_id;
29303+ BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
29304+ err = __put_user(brid, &arg->id);
29305+ if (unlikely(err))
29306+ break;
29307+
29308+ BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
29309+ err = __put_user(br->br_perm, &arg->perm);
29310+ if (unlikely(err))
29311+ break;
29312+
79b8bda9
AM
29313+ err = au_seq_path(seq, &br->br_path);
29314+ if (unlikely(err))
29315+ break;
29316+ seq_putc(seq, '\0');
29317+ if (!seq_has_overflowed(seq)) {
076b876e
AM
29318+ err = copy_to_user(arg->path, seq->buf, seq->count);
29319+ seq->count = 0;
29320+ if (unlikely(err))
29321+ break;
29322+ } else {
29323+ err = -E2BIG;
29324+ goto out_seq;
29325+ }
29326+ }
29327+ if (unlikely(err))
29328+ err = -EFAULT;
29329+
29330+out_seq:
f0c0a007 29331+ au_delayed_kfree(seq);
076b876e 29332+out_buf:
f0c0a007 29333+ au_delayed_free_page((unsigned long)buf);
076b876e
AM
29334+out:
29335+ si_read_unlock(sb);
29336+ return err;
29337+}
29338+
29339+long au_brinfo_ioctl(struct file *file, unsigned long arg)
29340+{
2000de60 29341+ return au_brinfo(file->f_path.dentry->d_sb, (void __user *)arg);
076b876e
AM
29342+}
29343+
29344+#ifdef CONFIG_COMPAT
29345+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
29346+{
2000de60 29347+ return au_brinfo(file->f_path.dentry->d_sb, compat_ptr(arg));
076b876e
AM
29348+}
29349+#endif
29350+
29351+/* ---------------------------------------------------------------------- */
29352+
1facf9fc 29353+void sysaufs_br_init(struct au_branch *br)
29354+{
392086de
AM
29355+ int i;
29356+ struct au_brsysfs *br_sysfs;
29357+ struct attribute *attr;
4a4d8108 29358+
392086de
AM
29359+ br_sysfs = br->br_sysfs;
29360+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29361+ attr = &br_sysfs->attr;
29362+ sysfs_attr_init(attr);
29363+ attr->name = br_sysfs->name;
29364+ attr->mode = S_IRUGO;
29365+ br_sysfs++;
29366+ }
1facf9fc 29367+}
29368+
29369+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
29370+{
29371+ struct au_branch *br;
29372+ struct kobject *kobj;
392086de
AM
29373+ struct au_brsysfs *br_sysfs;
29374+ int i;
5afbbe0d 29375+ aufs_bindex_t bbot;
1facf9fc 29376+
29377+ dbgaufs_brs_del(sb, bindex);
29378+
29379+ if (!sysaufs_brs)
29380+ return;
29381+
29382+ kobj = &au_sbi(sb)->si_kobj;
5afbbe0d
AM
29383+ bbot = au_sbbot(sb);
29384+ for (; bindex <= bbot; bindex++) {
1facf9fc 29385+ br = au_sbr(sb, bindex);
392086de
AM
29386+ br_sysfs = br->br_sysfs;
29387+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29388+ sysfs_remove_file(kobj, &br_sysfs->attr);
29389+ br_sysfs++;
29390+ }
1facf9fc 29391+ }
29392+}
29393+
29394+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
29395+{
392086de 29396+ int err, i;
5afbbe0d 29397+ aufs_bindex_t bbot;
1facf9fc 29398+ struct kobject *kobj;
29399+ struct au_branch *br;
392086de 29400+ struct au_brsysfs *br_sysfs;
1facf9fc 29401+
29402+ dbgaufs_brs_add(sb, bindex);
29403+
29404+ if (!sysaufs_brs)
29405+ return;
29406+
29407+ kobj = &au_sbi(sb)->si_kobj;
5afbbe0d
AM
29408+ bbot = au_sbbot(sb);
29409+ for (; bindex <= bbot; bindex++) {
1facf9fc 29410+ br = au_sbr(sb, bindex);
392086de
AM
29411+ br_sysfs = br->br_sysfs;
29412+ snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
29413+ SysaufsBr_PREFIX "%d", bindex);
29414+ snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
29415+ SysaufsBrid_PREFIX "%d", bindex);
29416+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29417+ err = sysfs_create_file(kobj, &br_sysfs->attr);
29418+ if (unlikely(err))
29419+ pr_warn("failed %s under sysfs(%d)\n",
29420+ br_sysfs->name, err);
29421+ br_sysfs++;
29422+ }
1facf9fc 29423+ }
29424+}
7f207e10
AM
29425diff -urN /usr/share/empty/fs/aufs/sysrq.c linux/fs/aufs/sysrq.c
29426--- /usr/share/empty/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 29427+++ linux/fs/aufs/sysrq.c 2016-10-09 16:55:36.496035060 +0200
076b876e 29428@@ -0,0 +1,157 @@
1facf9fc 29429+/*
8cdd5066 29430+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 29431+ *
29432+ * This program, aufs is free software; you can redistribute it and/or modify
29433+ * it under the terms of the GNU General Public License as published by
29434+ * the Free Software Foundation; either version 2 of the License, or
29435+ * (at your option) any later version.
dece6358
AM
29436+ *
29437+ * This program is distributed in the hope that it will be useful,
29438+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29439+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29440+ * GNU General Public License for more details.
29441+ *
29442+ * You should have received a copy of the GNU General Public License
523b37e3 29443+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29444+ */
29445+
29446+/*
29447+ * magic sysrq hanlder
29448+ */
29449+
1facf9fc 29450+/* #include <linux/sysrq.h> */
027c5e7a 29451+#include <linux/writeback.h>
1facf9fc 29452+#include "aufs.h"
29453+
29454+/* ---------------------------------------------------------------------- */
29455+
29456+static void sysrq_sb(struct super_block *sb)
29457+{
29458+ char *plevel;
29459+ struct au_sbinfo *sbinfo;
29460+ struct file *file;
523b37e3
AM
29461+ struct au_sphlhead *files;
29462+ struct au_finfo *finfo;
1facf9fc 29463+
29464+ plevel = au_plevel;
29465+ au_plevel = KERN_WARNING;
1facf9fc 29466+
4a4d8108 29467+ /* since we define pr_fmt, call printk directly */
c06a8ce3
AM
29468+#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
29469+
29470+ sbinfo = au_sbi(sb);
4a4d8108 29471+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
c06a8ce3 29472+ pr("superblock\n");
1facf9fc 29473+ au_dpri_sb(sb);
027c5e7a
AM
29474+
29475+#if 0
c06a8ce3 29476+ pr("root dentry\n");
1facf9fc 29477+ au_dpri_dentry(sb->s_root);
c06a8ce3 29478+ pr("root inode\n");
5527c038 29479+ au_dpri_inode(d_inode(sb->s_root));
027c5e7a
AM
29480+#endif
29481+
1facf9fc 29482+#if 0
027c5e7a
AM
29483+ do {
29484+ int err, i, j, ndentry;
29485+ struct au_dcsub_pages dpages;
29486+ struct au_dpage *dpage;
29487+
29488+ err = au_dpages_init(&dpages, GFP_ATOMIC);
29489+ if (unlikely(err))
29490+ break;
29491+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
29492+ if (!err)
29493+ for (i = 0; i < dpages.ndpage; i++) {
29494+ dpage = dpages.dpages + i;
29495+ ndentry = dpage->ndentry;
29496+ for (j = 0; j < ndentry; j++)
29497+ au_dpri_dentry(dpage->dentries[j]);
29498+ }
29499+ au_dpages_free(&dpages);
29500+ } while (0);
29501+#endif
29502+
29503+#if 1
29504+ {
29505+ struct inode *i;
076b876e 29506+
c06a8ce3 29507+ pr("isolated inode\n");
79b8bda9 29508+ spin_lock(&sb->s_inode_list_lock);
2cbb1c4b
JR
29509+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
29510+ spin_lock(&i->i_lock);
b4510431 29511+ if (1 || hlist_empty(&i->i_dentry))
027c5e7a 29512+ au_dpri_inode(i);
2cbb1c4b
JR
29513+ spin_unlock(&i->i_lock);
29514+ }
79b8bda9 29515+ spin_unlock(&sb->s_inode_list_lock);
027c5e7a 29516+ }
1facf9fc 29517+#endif
c06a8ce3 29518+ pr("files\n");
523b37e3
AM
29519+ files = &au_sbi(sb)->si_files;
29520+ spin_lock(&files->spin);
29521+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
4a4d8108 29522+ umode_t mode;
076b876e 29523+
523b37e3 29524+ file = finfo->fi_file;
c06a8ce3 29525+ mode = file_inode(file)->i_mode;
38d290e6 29526+ if (!special_file(mode))
1facf9fc 29527+ au_dpri_file(file);
523b37e3
AM
29528+ }
29529+ spin_unlock(&files->spin);
c06a8ce3 29530+ pr("done\n");
1facf9fc 29531+
c06a8ce3 29532+#undef pr
1facf9fc 29533+ au_plevel = plevel;
1facf9fc 29534+}
29535+
29536+/* ---------------------------------------------------------------------- */
29537+
29538+/* module parameter */
29539+static char *aufs_sysrq_key = "a";
29540+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
29541+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
29542+
0c5527e5 29543+static void au_sysrq(int key __maybe_unused)
1facf9fc 29544+{
1facf9fc 29545+ struct au_sbinfo *sbinfo;
29546+
027c5e7a 29547+ lockdep_off();
53392da6 29548+ au_sbilist_lock();
5afbbe0d 29549+ hlist_for_each_entry(sbinfo, &au_sbilist.head, si_list)
1facf9fc 29550+ sysrq_sb(sbinfo->si_sb);
53392da6 29551+ au_sbilist_unlock();
027c5e7a 29552+ lockdep_on();
1facf9fc 29553+}
29554+
29555+static struct sysrq_key_op au_sysrq_op = {
29556+ .handler = au_sysrq,
29557+ .help_msg = "Aufs",
29558+ .action_msg = "Aufs",
29559+ .enable_mask = SYSRQ_ENABLE_DUMP
29560+};
29561+
29562+/* ---------------------------------------------------------------------- */
29563+
29564+int __init au_sysrq_init(void)
29565+{
29566+ int err;
29567+ char key;
29568+
29569+ err = -1;
29570+ key = *aufs_sysrq_key;
29571+ if ('a' <= key && key <= 'z')
29572+ err = register_sysrq_key(key, &au_sysrq_op);
29573+ if (unlikely(err))
4a4d8108 29574+ pr_err("err %d, sysrq=%c\n", err, key);
1facf9fc 29575+ return err;
29576+}
29577+
29578+void au_sysrq_fin(void)
29579+{
29580+ int err;
076b876e 29581+
1facf9fc 29582+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
29583+ if (unlikely(err))
4a4d8108 29584+ pr_err("err %d (ignored)\n", err);
1facf9fc 29585+}
7f207e10
AM
29586diff -urN /usr/share/empty/fs/aufs/vdir.c linux/fs/aufs/vdir.c
29587--- /usr/share/empty/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
29588+++ linux/fs/aufs/vdir.c 2016-10-09 16:55:38.889431135 +0200
29589@@ -0,0 +1,900 @@
1facf9fc 29590+/*
8cdd5066 29591+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 29592+ *
29593+ * This program, aufs is free software; you can redistribute it and/or modify
29594+ * it under the terms of the GNU General Public License as published by
29595+ * the Free Software Foundation; either version 2 of the License, or
29596+ * (at your option) any later version.
dece6358
AM
29597+ *
29598+ * This program is distributed in the hope that it will be useful,
29599+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29600+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29601+ * GNU General Public License for more details.
29602+ *
29603+ * You should have received a copy of the GNU General Public License
523b37e3 29604+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29605+ */
29606+
29607+/*
29608+ * virtual or vertical directory
29609+ */
29610+
29611+#include "aufs.h"
29612+
dece6358 29613+static unsigned int calc_size(int nlen)
1facf9fc 29614+{
dece6358 29615+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
1facf9fc 29616+}
29617+
29618+static int set_deblk_end(union au_vdir_deblk_p *p,
29619+ union au_vdir_deblk_p *deblk_end)
29620+{
29621+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
29622+ p->de->de_str.len = 0;
29623+ /* smp_mb(); */
29624+ return 0;
29625+ }
29626+ return -1; /* error */
29627+}
29628+
29629+/* returns true or false */
29630+static int is_deblk_end(union au_vdir_deblk_p *p,
29631+ union au_vdir_deblk_p *deblk_end)
29632+{
29633+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
29634+ return !p->de->de_str.len;
29635+ return 1;
29636+}
29637+
29638+static unsigned char *last_deblk(struct au_vdir *vdir)
29639+{
29640+ return vdir->vd_deblk[vdir->vd_nblk - 1];
29641+}
29642+
29643+/* ---------------------------------------------------------------------- */
29644+
79b8bda9 29645+/* estimate the appropriate size for name hash table */
1308ab2a 29646+unsigned int au_rdhash_est(loff_t sz)
29647+{
29648+ unsigned int n;
29649+
29650+ n = UINT_MAX;
29651+ sz >>= 10;
29652+ if (sz < n)
29653+ n = sz;
29654+ if (sz < AUFS_RDHASH_DEF)
29655+ n = AUFS_RDHASH_DEF;
4a4d8108 29656+ /* pr_info("n %u\n", n); */
1308ab2a 29657+ return n;
29658+}
29659+
1facf9fc 29660+/*
29661+ * the allocated memory has to be freed by
dece6358 29662+ * au_nhash_wh_free() or au_nhash_de_free().
1facf9fc 29663+ */
dece6358 29664+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
1facf9fc 29665+{
1facf9fc 29666+ struct hlist_head *head;
dece6358 29667+ unsigned int u;
076b876e 29668+ size_t sz;
1facf9fc 29669+
076b876e
AM
29670+ sz = sizeof(*nhash->nh_head) * num_hash;
29671+ head = kmalloc(sz, gfp);
dece6358
AM
29672+ if (head) {
29673+ nhash->nh_num = num_hash;
29674+ nhash->nh_head = head;
29675+ for (u = 0; u < num_hash; u++)
1facf9fc 29676+ INIT_HLIST_HEAD(head++);
dece6358 29677+ return 0; /* success */
1facf9fc 29678+ }
1facf9fc 29679+
dece6358 29680+ return -ENOMEM;
1facf9fc 29681+}
29682+
dece6358
AM
29683+static void nhash_count(struct hlist_head *head)
29684+{
29685+#if 0
29686+ unsigned long n;
29687+ struct hlist_node *pos;
29688+
29689+ n = 0;
29690+ hlist_for_each(pos, head)
29691+ n++;
4a4d8108 29692+ pr_info("%lu\n", n);
dece6358
AM
29693+#endif
29694+}
29695+
29696+static void au_nhash_wh_do_free(struct hlist_head *head)
1facf9fc 29697+{
c06a8ce3
AM
29698+ struct au_vdir_wh *pos;
29699+ struct hlist_node *node;
1facf9fc 29700+
c06a8ce3 29701+ hlist_for_each_entry_safe(pos, node, head, wh_hash)
f0c0a007 29702+ au_delayed_kfree(pos);
1facf9fc 29703+}
29704+
dece6358 29705+static void au_nhash_de_do_free(struct hlist_head *head)
1facf9fc 29706+{
c06a8ce3
AM
29707+ struct au_vdir_dehstr *pos;
29708+ struct hlist_node *node;
1facf9fc 29709+
c06a8ce3 29710+ hlist_for_each_entry_safe(pos, node, head, hash)
f0c0a007 29711+ au_cache_dfree_vdir_dehstr(pos);
1facf9fc 29712+}
29713+
dece6358
AM
29714+static void au_nhash_do_free(struct au_nhash *nhash,
29715+ void (*free)(struct hlist_head *head))
1facf9fc 29716+{
1308ab2a 29717+ unsigned int n;
1facf9fc 29718+ struct hlist_head *head;
1facf9fc 29719+
dece6358 29720+ n = nhash->nh_num;
1308ab2a 29721+ if (!n)
29722+ return;
29723+
dece6358 29724+ head = nhash->nh_head;
1308ab2a 29725+ while (n-- > 0) {
dece6358
AM
29726+ nhash_count(head);
29727+ free(head++);
1facf9fc 29728+ }
f0c0a007 29729+ au_delayed_kfree(nhash->nh_head);
1facf9fc 29730+}
29731+
dece6358 29732+void au_nhash_wh_free(struct au_nhash *whlist)
1facf9fc 29733+{
dece6358
AM
29734+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
29735+}
1facf9fc 29736+
dece6358
AM
29737+static void au_nhash_de_free(struct au_nhash *delist)
29738+{
29739+ au_nhash_do_free(delist, au_nhash_de_do_free);
1facf9fc 29740+}
29741+
29742+/* ---------------------------------------------------------------------- */
29743+
29744+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
29745+ int limit)
29746+{
29747+ int num;
29748+ unsigned int u, n;
29749+ struct hlist_head *head;
c06a8ce3 29750+ struct au_vdir_wh *pos;
1facf9fc 29751+
29752+ num = 0;
29753+ n = whlist->nh_num;
29754+ head = whlist->nh_head;
1308ab2a 29755+ for (u = 0; u < n; u++, head++)
c06a8ce3
AM
29756+ hlist_for_each_entry(pos, head, wh_hash)
29757+ if (pos->wh_bindex == btgt && ++num > limit)
1facf9fc 29758+ return 1;
1facf9fc 29759+ return 0;
29760+}
29761+
29762+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
dece6358 29763+ unsigned char *name,
1facf9fc 29764+ unsigned int len)
29765+{
dece6358
AM
29766+ unsigned int v;
29767+ /* const unsigned int magic_bit = 12; */
29768+
1308ab2a 29769+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
29770+
dece6358 29771+ v = 0;
f0c0a007
AM
29772+ if (len > 8)
29773+ len = 8;
dece6358
AM
29774+ while (len--)
29775+ v += *name++;
29776+ /* v = hash_long(v, magic_bit); */
29777+ v %= nhash->nh_num;
29778+ return nhash->nh_head + v;
29779+}
29780+
29781+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
29782+ int nlen)
29783+{
29784+ return str->len == nlen && !memcmp(str->name, name, nlen);
1facf9fc 29785+}
29786+
29787+/* returns found or not */
dece6358 29788+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
1facf9fc 29789+{
29790+ struct hlist_head *head;
c06a8ce3 29791+ struct au_vdir_wh *pos;
1facf9fc 29792+ struct au_vdir_destr *str;
29793+
dece6358 29794+ head = au_name_hash(whlist, name, nlen);
c06a8ce3
AM
29795+ hlist_for_each_entry(pos, head, wh_hash) {
29796+ str = &pos->wh_str;
1facf9fc 29797+ AuDbg("%.*s\n", str->len, str->name);
dece6358
AM
29798+ if (au_nhash_test_name(str, name, nlen))
29799+ return 1;
29800+ }
29801+ return 0;
29802+}
29803+
29804+/* returns found(true) or not */
29805+static int test_known(struct au_nhash *delist, char *name, int nlen)
29806+{
29807+ struct hlist_head *head;
c06a8ce3 29808+ struct au_vdir_dehstr *pos;
dece6358
AM
29809+ struct au_vdir_destr *str;
29810+
29811+ head = au_name_hash(delist, name, nlen);
c06a8ce3
AM
29812+ hlist_for_each_entry(pos, head, hash) {
29813+ str = pos->str;
dece6358
AM
29814+ AuDbg("%.*s\n", str->len, str->name);
29815+ if (au_nhash_test_name(str, name, nlen))
1facf9fc 29816+ return 1;
29817+ }
29818+ return 0;
29819+}
29820+
dece6358
AM
29821+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
29822+ unsigned char d_type)
29823+{
29824+#ifdef CONFIG_AUFS_SHWH
29825+ wh->wh_ino = ino;
29826+ wh->wh_type = d_type;
29827+#endif
29828+}
29829+
29830+/* ---------------------------------------------------------------------- */
29831+
29832+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
29833+ unsigned int d_type, aufs_bindex_t bindex,
29834+ unsigned char shwh)
1facf9fc 29835+{
29836+ int err;
29837+ struct au_vdir_destr *str;
29838+ struct au_vdir_wh *wh;
29839+
dece6358 29840+ AuDbg("%.*s\n", nlen, name);
1308ab2a 29841+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
29842+
1facf9fc 29843+ err = -ENOMEM;
dece6358 29844+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
1facf9fc 29845+ if (unlikely(!wh))
29846+ goto out;
29847+
29848+ err = 0;
29849+ wh->wh_bindex = bindex;
dece6358
AM
29850+ if (shwh)
29851+ au_shwh_init_wh(wh, ino, d_type);
1facf9fc 29852+ str = &wh->wh_str;
dece6358
AM
29853+ str->len = nlen;
29854+ memcpy(str->name, name, nlen);
29855+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
1facf9fc 29856+ /* smp_mb(); */
29857+
4f0767ce 29858+out:
1facf9fc 29859+ return err;
29860+}
29861+
1facf9fc 29862+static int append_deblk(struct au_vdir *vdir)
29863+{
29864+ int err;
dece6358 29865+ unsigned long ul;
1facf9fc 29866+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
29867+ union au_vdir_deblk_p p, deblk_end;
29868+ unsigned char **o;
29869+
29870+ err = -ENOMEM;
e2f27e51
AM
29871+ o = au_krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
29872+ GFP_NOFS, /*may_shrink*/0);
1facf9fc 29873+ if (unlikely(!o))
29874+ goto out;
29875+
29876+ vdir->vd_deblk = o;
29877+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
29878+ if (p.deblk) {
29879+ ul = vdir->vd_nblk++;
29880+ vdir->vd_deblk[ul] = p.deblk;
29881+ vdir->vd_last.ul = ul;
29882+ vdir->vd_last.p.deblk = p.deblk;
29883+ deblk_end.deblk = p.deblk + deblk_sz;
29884+ err = set_deblk_end(&p, &deblk_end);
29885+ }
29886+
4f0767ce 29887+out:
1facf9fc 29888+ return err;
29889+}
29890+
dece6358
AM
29891+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
29892+ unsigned int d_type, struct au_nhash *delist)
29893+{
29894+ int err;
29895+ unsigned int sz;
29896+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
29897+ union au_vdir_deblk_p p, *room, deblk_end;
29898+ struct au_vdir_dehstr *dehstr;
29899+
29900+ p.deblk = last_deblk(vdir);
29901+ deblk_end.deblk = p.deblk + deblk_sz;
29902+ room = &vdir->vd_last.p;
29903+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
29904+ || !is_deblk_end(room, &deblk_end));
29905+
29906+ sz = calc_size(nlen);
29907+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
29908+ err = append_deblk(vdir);
29909+ if (unlikely(err))
29910+ goto out;
29911+
29912+ p.deblk = last_deblk(vdir);
29913+ deblk_end.deblk = p.deblk + deblk_sz;
29914+ /* smp_mb(); */
29915+ AuDebugOn(room->deblk != p.deblk);
29916+ }
29917+
29918+ err = -ENOMEM;
4a4d8108 29919+ dehstr = au_cache_alloc_vdir_dehstr();
dece6358
AM
29920+ if (unlikely(!dehstr))
29921+ goto out;
29922+
29923+ dehstr->str = &room->de->de_str;
29924+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
29925+ room->de->de_ino = ino;
29926+ room->de->de_type = d_type;
29927+ room->de->de_str.len = nlen;
29928+ memcpy(room->de->de_str.name, name, nlen);
29929+
29930+ err = 0;
29931+ room->deblk += sz;
29932+ if (unlikely(set_deblk_end(room, &deblk_end)))
29933+ err = append_deblk(vdir);
29934+ /* smp_mb(); */
29935+
4f0767ce 29936+out:
dece6358
AM
29937+ return err;
29938+}
29939+
29940+/* ---------------------------------------------------------------------- */
29941+
f0c0a007 29942+void au_vdir_free(struct au_vdir *vdir, int atonce)
dece6358
AM
29943+{
29944+ unsigned char **deblk;
29945+
29946+ deblk = vdir->vd_deblk;
f0c0a007
AM
29947+ if (!atonce) {
29948+ while (vdir->vd_nblk--)
29949+ au_delayed_kfree(*deblk++);
29950+ au_delayed_kfree(vdir->vd_deblk);
29951+ au_cache_dfree_vdir(vdir);
29952+ } else {
29953+ /* not delayed */
29954+ while (vdir->vd_nblk--)
29955+ kfree(*deblk++);
29956+ kfree(vdir->vd_deblk);
29957+ au_cache_free_vdir(vdir);
29958+ }
dece6358
AM
29959+}
29960+
1308ab2a 29961+static struct au_vdir *alloc_vdir(struct file *file)
1facf9fc 29962+{
29963+ struct au_vdir *vdir;
1308ab2a 29964+ struct super_block *sb;
1facf9fc 29965+ int err;
29966+
2000de60 29967+ sb = file->f_path.dentry->d_sb;
dece6358
AM
29968+ SiMustAnyLock(sb);
29969+
1facf9fc 29970+ err = -ENOMEM;
29971+ vdir = au_cache_alloc_vdir();
29972+ if (unlikely(!vdir))
29973+ goto out;
29974+
29975+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
29976+ if (unlikely(!vdir->vd_deblk))
29977+ goto out_free;
29978+
29979+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
1308ab2a 29980+ if (!vdir->vd_deblk_sz) {
79b8bda9 29981+ /* estimate the appropriate size for deblk */
1308ab2a 29982+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
4a4d8108 29983+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
1308ab2a 29984+ }
1facf9fc 29985+ vdir->vd_nblk = 0;
29986+ vdir->vd_version = 0;
29987+ vdir->vd_jiffy = 0;
29988+ err = append_deblk(vdir);
29989+ if (!err)
29990+ return vdir; /* success */
29991+
f0c0a007 29992+ au_delayed_kfree(vdir->vd_deblk);
1facf9fc 29993+
4f0767ce 29994+out_free:
f0c0a007 29995+ au_cache_dfree_vdir(vdir);
4f0767ce 29996+out:
1facf9fc 29997+ vdir = ERR_PTR(err);
29998+ return vdir;
29999+}
30000+
30001+static int reinit_vdir(struct au_vdir *vdir)
30002+{
30003+ int err;
30004+ union au_vdir_deblk_p p, deblk_end;
30005+
30006+ while (vdir->vd_nblk > 1) {
f0c0a007 30007+ au_delayed_kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
1facf9fc 30008+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
30009+ vdir->vd_nblk--;
30010+ }
30011+ p.deblk = vdir->vd_deblk[0];
30012+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
30013+ err = set_deblk_end(&p, &deblk_end);
30014+ /* keep vd_dblk_sz */
30015+ vdir->vd_last.ul = 0;
30016+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
30017+ vdir->vd_version = 0;
30018+ vdir->vd_jiffy = 0;
30019+ /* smp_mb(); */
30020+ return err;
30021+}
30022+
30023+/* ---------------------------------------------------------------------- */
30024+
1facf9fc 30025+#define AuFillVdir_CALLED 1
30026+#define AuFillVdir_WHABLE (1 << 1)
dece6358 30027+#define AuFillVdir_SHWH (1 << 2)
1facf9fc 30028+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
7f207e10
AM
30029+#define au_fset_fillvdir(flags, name) \
30030+ do { (flags) |= AuFillVdir_##name; } while (0)
30031+#define au_fclr_fillvdir(flags, name) \
30032+ do { (flags) &= ~AuFillVdir_##name; } while (0)
1facf9fc 30033+
dece6358
AM
30034+#ifndef CONFIG_AUFS_SHWH
30035+#undef AuFillVdir_SHWH
30036+#define AuFillVdir_SHWH 0
30037+#endif
30038+
1facf9fc 30039+struct fillvdir_arg {
392086de 30040+ struct dir_context ctx;
1facf9fc 30041+ struct file *file;
30042+ struct au_vdir *vdir;
dece6358
AM
30043+ struct au_nhash delist;
30044+ struct au_nhash whlist;
1facf9fc 30045+ aufs_bindex_t bindex;
30046+ unsigned int flags;
30047+ int err;
30048+};
30049+
392086de 30050+static int fillvdir(struct dir_context *ctx, const char *__name, int nlen,
1facf9fc 30051+ loff_t offset __maybe_unused, u64 h_ino,
30052+ unsigned int d_type)
30053+{
392086de 30054+ struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
1facf9fc 30055+ char *name = (void *)__name;
30056+ struct super_block *sb;
1facf9fc 30057+ ino_t ino;
dece6358 30058+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
1facf9fc 30059+
1facf9fc 30060+ arg->err = 0;
2000de60 30061+ sb = arg->file->f_path.dentry->d_sb;
1facf9fc 30062+ au_fset_fillvdir(arg->flags, CALLED);
30063+ /* smp_mb(); */
dece6358 30064+ if (nlen <= AUFS_WH_PFX_LEN
1facf9fc 30065+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
dece6358
AM
30066+ if (test_known(&arg->delist, name, nlen)
30067+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
30068+ goto out; /* already exists or whiteouted */
1facf9fc 30069+
dece6358 30070+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
4a4d8108
AM
30071+ if (!arg->err) {
30072+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
30073+ d_type = DT_UNKNOWN;
dece6358
AM
30074+ arg->err = append_de(arg->vdir, name, nlen, ino,
30075+ d_type, &arg->delist);
4a4d8108 30076+ }
1facf9fc 30077+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
30078+ name += AUFS_WH_PFX_LEN;
dece6358
AM
30079+ nlen -= AUFS_WH_PFX_LEN;
30080+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
30081+ goto out; /* already whiteouted */
1facf9fc 30082+
dece6358
AM
30083+ if (shwh)
30084+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
30085+ &ino);
4a4d8108
AM
30086+ if (!arg->err) {
30087+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
30088+ d_type = DT_UNKNOWN;
1facf9fc 30089+ arg->err = au_nhash_append_wh
dece6358
AM
30090+ (&arg->whlist, name, nlen, ino, d_type,
30091+ arg->bindex, shwh);
4a4d8108 30092+ }
1facf9fc 30093+ }
30094+
4f0767ce 30095+out:
1facf9fc 30096+ if (!arg->err)
30097+ arg->vdir->vd_jiffy = jiffies;
30098+ /* smp_mb(); */
30099+ AuTraceErr(arg->err);
30100+ return arg->err;
30101+}
30102+
dece6358
AM
30103+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
30104+ struct au_nhash *whlist, struct au_nhash *delist)
30105+{
30106+#ifdef CONFIG_AUFS_SHWH
30107+ int err;
30108+ unsigned int nh, u;
30109+ struct hlist_head *head;
c06a8ce3
AM
30110+ struct au_vdir_wh *pos;
30111+ struct hlist_node *n;
dece6358
AM
30112+ char *p, *o;
30113+ struct au_vdir_destr *destr;
30114+
30115+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
30116+
30117+ err = -ENOMEM;
537831f9 30118+ o = p = (void *)__get_free_page(GFP_NOFS);
dece6358
AM
30119+ if (unlikely(!p))
30120+ goto out;
30121+
30122+ err = 0;
30123+ nh = whlist->nh_num;
30124+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
30125+ p += AUFS_WH_PFX_LEN;
30126+ for (u = 0; u < nh; u++) {
30127+ head = whlist->nh_head + u;
c06a8ce3
AM
30128+ hlist_for_each_entry_safe(pos, n, head, wh_hash) {
30129+ destr = &pos->wh_str;
dece6358
AM
30130+ memcpy(p, destr->name, destr->len);
30131+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
c06a8ce3 30132+ pos->wh_ino, pos->wh_type, delist);
dece6358
AM
30133+ if (unlikely(err))
30134+ break;
30135+ }
30136+ }
30137+
f0c0a007 30138+ au_delayed_free_page((unsigned long)o);
dece6358 30139+
4f0767ce 30140+out:
dece6358
AM
30141+ AuTraceErr(err);
30142+ return err;
30143+#else
30144+ return 0;
30145+#endif
30146+}
30147+
1facf9fc 30148+static int au_do_read_vdir(struct fillvdir_arg *arg)
30149+{
30150+ int err;
dece6358 30151+ unsigned int rdhash;
1facf9fc 30152+ loff_t offset;
5afbbe0d 30153+ aufs_bindex_t bbot, bindex, btop;
dece6358 30154+ unsigned char shwh;
1facf9fc 30155+ struct file *hf, *file;
30156+ struct super_block *sb;
30157+
1facf9fc 30158+ file = arg->file;
2000de60 30159+ sb = file->f_path.dentry->d_sb;
dece6358
AM
30160+ SiMustAnyLock(sb);
30161+
30162+ rdhash = au_sbi(sb)->si_rdhash;
1308ab2a 30163+ if (!rdhash)
30164+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
dece6358
AM
30165+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
30166+ if (unlikely(err))
1facf9fc 30167+ goto out;
dece6358
AM
30168+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
30169+ if (unlikely(err))
1facf9fc 30170+ goto out_delist;
30171+
30172+ err = 0;
30173+ arg->flags = 0;
dece6358
AM
30174+ shwh = 0;
30175+ if (au_opt_test(au_mntflags(sb), SHWH)) {
30176+ shwh = 1;
30177+ au_fset_fillvdir(arg->flags, SHWH);
30178+ }
5afbbe0d
AM
30179+ btop = au_fbtop(file);
30180+ bbot = au_fbbot_dir(file);
30181+ for (bindex = btop; !err && bindex <= bbot; bindex++) {
4a4d8108 30182+ hf = au_hf_dir(file, bindex);
1facf9fc 30183+ if (!hf)
30184+ continue;
30185+
30186+ offset = vfsub_llseek(hf, 0, SEEK_SET);
30187+ err = offset;
30188+ if (unlikely(offset))
30189+ break;
30190+
30191+ arg->bindex = bindex;
30192+ au_fclr_fillvdir(arg->flags, WHABLE);
dece6358 30193+ if (shwh
5afbbe0d 30194+ || (bindex != bbot
dece6358 30195+ && au_br_whable(au_sbr_perm(sb, bindex))))
1facf9fc 30196+ au_fset_fillvdir(arg->flags, WHABLE);
30197+ do {
30198+ arg->err = 0;
30199+ au_fclr_fillvdir(arg->flags, CALLED);
30200+ /* smp_mb(); */
392086de 30201+ err = vfsub_iterate_dir(hf, &arg->ctx);
1facf9fc 30202+ if (err >= 0)
30203+ err = arg->err;
30204+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
392086de
AM
30205+
30206+ /*
30207+ * dir_relax() may be good for concurrency, but aufs should not
30208+ * use it since it will cause a lockdep problem.
30209+ */
1facf9fc 30210+ }
dece6358
AM
30211+
30212+ if (!err && shwh)
30213+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
30214+
30215+ au_nhash_wh_free(&arg->whlist);
1facf9fc 30216+
4f0767ce 30217+out_delist:
dece6358 30218+ au_nhash_de_free(&arg->delist);
4f0767ce 30219+out:
1facf9fc 30220+ return err;
30221+}
30222+
30223+static int read_vdir(struct file *file, int may_read)
30224+{
30225+ int err;
30226+ unsigned long expire;
30227+ unsigned char do_read;
392086de
AM
30228+ struct fillvdir_arg arg = {
30229+ .ctx = {
2000de60 30230+ .actor = fillvdir
392086de
AM
30231+ }
30232+ };
1facf9fc 30233+ struct inode *inode;
30234+ struct au_vdir *vdir, *allocated;
30235+
30236+ err = 0;
c06a8ce3 30237+ inode = file_inode(file);
1facf9fc 30238+ IMustLock(inode);
5afbbe0d 30239+ IiMustWriteLock(inode);
dece6358
AM
30240+ SiMustAnyLock(inode->i_sb);
30241+
1facf9fc 30242+ allocated = NULL;
30243+ do_read = 0;
30244+ expire = au_sbi(inode->i_sb)->si_rdcache;
30245+ vdir = au_ivdir(inode);
30246+ if (!vdir) {
30247+ do_read = 1;
1308ab2a 30248+ vdir = alloc_vdir(file);
1facf9fc 30249+ err = PTR_ERR(vdir);
30250+ if (IS_ERR(vdir))
30251+ goto out;
30252+ err = 0;
30253+ allocated = vdir;
30254+ } else if (may_read
30255+ && (inode->i_version != vdir->vd_version
30256+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
30257+ do_read = 1;
30258+ err = reinit_vdir(vdir);
30259+ if (unlikely(err))
30260+ goto out;
30261+ }
30262+
30263+ if (!do_read)
30264+ return 0; /* success */
30265+
30266+ arg.file = file;
30267+ arg.vdir = vdir;
30268+ err = au_do_read_vdir(&arg);
30269+ if (!err) {
392086de 30270+ /* file->f_pos = 0; */ /* todo: ctx->pos? */
1facf9fc 30271+ vdir->vd_version = inode->i_version;
30272+ vdir->vd_last.ul = 0;
30273+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
30274+ if (allocated)
30275+ au_set_ivdir(inode, allocated);
30276+ } else if (allocated)
f0c0a007 30277+ au_vdir_free(allocated, /*atonce*/0);
1facf9fc 30278+
4f0767ce 30279+out:
1facf9fc 30280+ return err;
30281+}
30282+
30283+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
30284+{
30285+ int err, rerr;
30286+ unsigned long ul, n;
30287+ const unsigned int deblk_sz = src->vd_deblk_sz;
30288+
30289+ AuDebugOn(tgt->vd_nblk != 1);
30290+
30291+ err = -ENOMEM;
30292+ if (tgt->vd_nblk < src->vd_nblk) {
30293+ unsigned char **p;
30294+
e2f27e51
AM
30295+ p = au_krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
30296+ GFP_NOFS, /*may_shrink*/0);
1facf9fc 30297+ if (unlikely(!p))
30298+ goto out;
30299+ tgt->vd_deblk = p;
30300+ }
30301+
1308ab2a 30302+ if (tgt->vd_deblk_sz != deblk_sz) {
30303+ unsigned char *p;
30304+
30305+ tgt->vd_deblk_sz = deblk_sz;
e2f27e51
AM
30306+ p = au_krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS,
30307+ /*may_shrink*/1);
1308ab2a 30308+ if (unlikely(!p))
30309+ goto out;
30310+ tgt->vd_deblk[0] = p;
30311+ }
1facf9fc 30312+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
1facf9fc 30313+ tgt->vd_version = src->vd_version;
30314+ tgt->vd_jiffy = src->vd_jiffy;
30315+
30316+ n = src->vd_nblk;
30317+ for (ul = 1; ul < n; ul++) {
dece6358
AM
30318+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
30319+ GFP_NOFS);
30320+ if (unlikely(!tgt->vd_deblk[ul]))
1facf9fc 30321+ goto out;
1308ab2a 30322+ tgt->vd_nblk++;
1facf9fc 30323+ }
1308ab2a 30324+ tgt->vd_nblk = n;
30325+ tgt->vd_last.ul = tgt->vd_last.ul;
30326+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
30327+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
30328+ - src->vd_deblk[src->vd_last.ul];
1facf9fc 30329+ /* smp_mb(); */
30330+ return 0; /* success */
30331+
4f0767ce 30332+out:
1facf9fc 30333+ rerr = reinit_vdir(tgt);
30334+ BUG_ON(rerr);
30335+ return err;
30336+}
30337+
30338+int au_vdir_init(struct file *file)
30339+{
30340+ int err;
30341+ struct inode *inode;
30342+ struct au_vdir *vdir_cache, *allocated;
30343+
392086de 30344+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 30345+ err = read_vdir(file, !file->f_pos);
30346+ if (unlikely(err))
30347+ goto out;
30348+
30349+ allocated = NULL;
30350+ vdir_cache = au_fvdir_cache(file);
30351+ if (!vdir_cache) {
1308ab2a 30352+ vdir_cache = alloc_vdir(file);
1facf9fc 30353+ err = PTR_ERR(vdir_cache);
30354+ if (IS_ERR(vdir_cache))
30355+ goto out;
30356+ allocated = vdir_cache;
30357+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
392086de 30358+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 30359+ err = reinit_vdir(vdir_cache);
30360+ if (unlikely(err))
30361+ goto out;
30362+ } else
30363+ return 0; /* success */
30364+
c06a8ce3 30365+ inode = file_inode(file);
1facf9fc 30366+ err = copy_vdir(vdir_cache, au_ivdir(inode));
30367+ if (!err) {
30368+ file->f_version = inode->i_version;
30369+ if (allocated)
30370+ au_set_fvdir_cache(file, allocated);
30371+ } else if (allocated)
f0c0a007 30372+ au_vdir_free(allocated, /*atonce*/0);
1facf9fc 30373+
4f0767ce 30374+out:
1facf9fc 30375+ return err;
30376+}
30377+
30378+static loff_t calc_offset(struct au_vdir *vdir)
30379+{
30380+ loff_t offset;
30381+ union au_vdir_deblk_p p;
30382+
30383+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
30384+ offset = vdir->vd_last.p.deblk - p.deblk;
30385+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
30386+ return offset;
30387+}
30388+
30389+/* returns true or false */
392086de 30390+static int seek_vdir(struct file *file, struct dir_context *ctx)
1facf9fc 30391+{
30392+ int valid;
30393+ unsigned int deblk_sz;
30394+ unsigned long ul, n;
30395+ loff_t offset;
30396+ union au_vdir_deblk_p p, deblk_end;
30397+ struct au_vdir *vdir_cache;
30398+
30399+ valid = 1;
30400+ vdir_cache = au_fvdir_cache(file);
30401+ offset = calc_offset(vdir_cache);
30402+ AuDbg("offset %lld\n", offset);
392086de 30403+ if (ctx->pos == offset)
1facf9fc 30404+ goto out;
30405+
30406+ vdir_cache->vd_last.ul = 0;
30407+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
392086de 30408+ if (!ctx->pos)
1facf9fc 30409+ goto out;
30410+
30411+ valid = 0;
30412+ deblk_sz = vdir_cache->vd_deblk_sz;
392086de 30413+ ul = div64_u64(ctx->pos, deblk_sz);
1facf9fc 30414+ AuDbg("ul %lu\n", ul);
30415+ if (ul >= vdir_cache->vd_nblk)
30416+ goto out;
30417+
30418+ n = vdir_cache->vd_nblk;
30419+ for (; ul < n; ul++) {
30420+ p.deblk = vdir_cache->vd_deblk[ul];
30421+ deblk_end.deblk = p.deblk + deblk_sz;
30422+ offset = ul;
30423+ offset *= deblk_sz;
392086de 30424+ while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
1facf9fc 30425+ unsigned int l;
30426+
30427+ l = calc_size(p.de->de_str.len);
30428+ offset += l;
30429+ p.deblk += l;
30430+ }
30431+ if (!is_deblk_end(&p, &deblk_end)) {
30432+ valid = 1;
30433+ vdir_cache->vd_last.ul = ul;
30434+ vdir_cache->vd_last.p = p;
30435+ break;
30436+ }
30437+ }
30438+
4f0767ce 30439+out:
1facf9fc 30440+ /* smp_mb(); */
30441+ AuTraceErr(!valid);
30442+ return valid;
30443+}
30444+
392086de 30445+int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
1facf9fc 30446+{
1facf9fc 30447+ unsigned int l, deblk_sz;
30448+ union au_vdir_deblk_p deblk_end;
30449+ struct au_vdir *vdir_cache;
30450+ struct au_vdir_de *de;
30451+
30452+ vdir_cache = au_fvdir_cache(file);
392086de 30453+ if (!seek_vdir(file, ctx))
1facf9fc 30454+ return 0;
30455+
30456+ deblk_sz = vdir_cache->vd_deblk_sz;
30457+ while (1) {
30458+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
30459+ deblk_end.deblk += deblk_sz;
30460+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
30461+ de = vdir_cache->vd_last.p.de;
30462+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
392086de 30463+ de->de_str.len, de->de_str.name, ctx->pos,
1facf9fc 30464+ (unsigned long)de->de_ino, de->de_type);
392086de
AM
30465+ if (unlikely(!dir_emit(ctx, de->de_str.name,
30466+ de->de_str.len, de->de_ino,
30467+ de->de_type))) {
1facf9fc 30468+ /* todo: ignore the error caused by udba? */
30469+ /* return err; */
30470+ return 0;
30471+ }
30472+
30473+ l = calc_size(de->de_str.len);
30474+ vdir_cache->vd_last.p.deblk += l;
392086de 30475+ ctx->pos += l;
1facf9fc 30476+ }
30477+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
30478+ vdir_cache->vd_last.ul++;
30479+ vdir_cache->vd_last.p.deblk
30480+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
392086de 30481+ ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
1facf9fc 30482+ continue;
30483+ }
30484+ break;
30485+ }
30486+
30487+ /* smp_mb(); */
30488+ return 0;
30489+}
7f207e10
AM
30490diff -urN /usr/share/empty/fs/aufs/vfsub.c linux/fs/aufs/vfsub.c
30491--- /usr/share/empty/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 30492+++ linux/fs/aufs/vfsub.c 2016-10-09 16:55:36.496035060 +0200
febd17d6 30493@@ -0,0 +1,884 @@
1facf9fc 30494+/*
8cdd5066 30495+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 30496+ *
30497+ * This program, aufs is free software; you can redistribute it and/or modify
30498+ * it under the terms of the GNU General Public License as published by
30499+ * the Free Software Foundation; either version 2 of the License, or
30500+ * (at your option) any later version.
dece6358
AM
30501+ *
30502+ * This program is distributed in the hope that it will be useful,
30503+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30504+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30505+ * GNU General Public License for more details.
30506+ *
30507+ * You should have received a copy of the GNU General Public License
523b37e3 30508+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 30509+ */
30510+
30511+/*
30512+ * sub-routines for VFS
30513+ */
30514+
dece6358 30515+#include <linux/namei.h>
8cdd5066 30516+#include <linux/nsproxy.h>
dece6358
AM
30517+#include <linux/security.h>
30518+#include <linux/splice.h>
8cdd5066 30519+#include "../fs/mount.h"
1facf9fc 30520+#include "aufs.h"
30521+
8cdd5066
JR
30522+#ifdef CONFIG_AUFS_BR_FUSE
30523+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb)
30524+{
30525+ struct nsproxy *ns;
30526+
30527+ if (!au_test_fuse(h_sb) || !au_userns)
30528+ return 0;
30529+
30530+ ns = current->nsproxy;
30531+ /* no {get,put}_nsproxy(ns) */
30532+ return real_mount(mnt)->mnt_ns == ns->mnt_ns ? 0 : -EACCES;
30533+}
30534+#endif
30535+
30536+/* ---------------------------------------------------------------------- */
30537+
1facf9fc 30538+int vfsub_update_h_iattr(struct path *h_path, int *did)
30539+{
30540+ int err;
30541+ struct kstat st;
30542+ struct super_block *h_sb;
30543+
30544+ /* for remote fs, leave work for its getattr or d_revalidate */
30545+ /* for bad i_attr fs, handle them in aufs_getattr() */
30546+ /* still some fs may acquire i_mutex. we need to skip them */
30547+ err = 0;
30548+ if (!did)
30549+ did = &err;
30550+ h_sb = h_path->dentry->d_sb;
30551+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
30552+ if (*did)
c06a8ce3 30553+ err = vfs_getattr(h_path, &st);
1facf9fc 30554+
30555+ return err;
30556+}
30557+
30558+/* ---------------------------------------------------------------------- */
30559+
4a4d8108 30560+struct file *vfsub_dentry_open(struct path *path, int flags)
1308ab2a 30561+{
30562+ struct file *file;
30563+
b4510431 30564+ file = dentry_open(path, flags /* | __FMODE_NONOTIFY */,
7f207e10 30565+ current_cred());
2cbb1c4b
JR
30566+ if (!IS_ERR_OR_NULL(file)
30567+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
5527c038 30568+ i_readcount_inc(d_inode(path->dentry));
4a4d8108 30569+
1308ab2a 30570+ return file;
30571+}
30572+
1facf9fc 30573+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
30574+{
30575+ struct file *file;
30576+
2cbb1c4b 30577+ lockdep_off();
7f207e10 30578+ file = filp_open(path,
2cbb1c4b 30579+ oflags /* | __FMODE_NONOTIFY */,
7f207e10 30580+ mode);
2cbb1c4b 30581+ lockdep_on();
1facf9fc 30582+ if (IS_ERR(file))
30583+ goto out;
30584+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30585+
4f0767ce 30586+out:
1facf9fc 30587+ return file;
30588+}
30589+
b912730e
AM
30590+/*
30591+ * Ideally this function should call VFS:do_last() in order to keep all its
30592+ * checkings. But it is very hard for aufs to regenerate several VFS internal
30593+ * structure such as nameidata. This is a second (or third) best approach.
30594+ * cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
30595+ */
30596+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
30597+ struct vfsub_aopen_args *args, struct au_branch *br)
30598+{
30599+ int err;
30600+ struct file *file = args->file;
30601+ /* copied from linux/fs/namei.c:atomic_open() */
30602+ struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
30603+
30604+ IMustLock(dir);
30605+ AuDebugOn(!dir->i_op->atomic_open);
30606+
30607+ err = au_br_test_oflag(args->open_flag, br);
30608+ if (unlikely(err))
30609+ goto out;
30610+
30611+ args->file->f_path.dentry = DENTRY_NOT_SET;
30612+ args->file->f_path.mnt = au_br_mnt(br);
30613+ err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
30614+ args->create_mode, args->opened);
30615+ if (err >= 0) {
30616+ /* some filesystems don't set FILE_CREATED while succeeded? */
30617+ if (*args->opened & FILE_CREATED)
30618+ fsnotify_create(dir, dentry);
30619+ } else
30620+ goto out;
30621+
30622+
30623+ if (!err) {
30624+ /* todo: call VFS:may_open() here */
30625+ err = open_check_o_direct(file);
30626+ /* todo: ima_file_check() too? */
30627+ if (!err && (args->open_flag & __FMODE_EXEC))
30628+ err = deny_write_access(file);
30629+ if (unlikely(err))
30630+ /* note that the file is created and still opened */
30631+ goto out;
30632+ }
30633+
5afbbe0d 30634+ au_br_get(br);
b912730e
AM
30635+ fsnotify_open(file);
30636+
30637+out:
30638+ return err;
30639+}
30640+
1facf9fc 30641+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
30642+{
30643+ int err;
30644+
1facf9fc 30645+ err = kern_path(name, flags, path);
5527c038 30646+ if (!err && d_is_positive(path->dentry))
1facf9fc 30647+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
30648+ return err;
30649+}
30650+
febd17d6
JR
30651+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
30652+ struct dentry *parent, int len)
30653+{
30654+ struct path path = {
30655+ .mnt = NULL
30656+ };
30657+
30658+ path.dentry = lookup_one_len_unlocked(name, parent, len);
30659+ if (IS_ERR(path.dentry))
30660+ goto out;
30661+ if (d_is_positive(path.dentry))
30662+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
30663+
30664+out:
30665+ AuTraceErrPtr(path.dentry);
30666+ return path.dentry;
30667+}
30668+
1facf9fc 30669+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
30670+ int len)
30671+{
30672+ struct path path = {
30673+ .mnt = NULL
30674+ };
30675+
1308ab2a 30676+ /* VFS checks it too, but by WARN_ON_ONCE() */
5527c038 30677+ IMustLock(d_inode(parent));
1facf9fc 30678+
30679+ path.dentry = lookup_one_len(name, parent, len);
30680+ if (IS_ERR(path.dentry))
30681+ goto out;
5527c038 30682+ if (d_is_positive(path.dentry))
1facf9fc 30683+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
30684+
4f0767ce 30685+out:
4a4d8108 30686+ AuTraceErrPtr(path.dentry);
1facf9fc 30687+ return path.dentry;
30688+}
30689+
b4510431 30690+void vfsub_call_lkup_one(void *args)
2cbb1c4b 30691+{
b4510431
AM
30692+ struct vfsub_lkup_one_args *a = args;
30693+ *a->errp = vfsub_lkup_one(a->name, a->parent);
2cbb1c4b
JR
30694+}
30695+
1facf9fc 30696+/* ---------------------------------------------------------------------- */
30697+
30698+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
30699+ struct dentry *d2, struct au_hinode *hdir2)
30700+{
30701+ struct dentry *d;
30702+
2cbb1c4b 30703+ lockdep_off();
1facf9fc 30704+ d = lock_rename(d1, d2);
2cbb1c4b 30705+ lockdep_on();
4a4d8108 30706+ au_hn_suspend(hdir1);
1facf9fc 30707+ if (hdir1 != hdir2)
4a4d8108 30708+ au_hn_suspend(hdir2);
1facf9fc 30709+
30710+ return d;
30711+}
30712+
30713+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
30714+ struct dentry *d2, struct au_hinode *hdir2)
30715+{
4a4d8108 30716+ au_hn_resume(hdir1);
1facf9fc 30717+ if (hdir1 != hdir2)
4a4d8108 30718+ au_hn_resume(hdir2);
2cbb1c4b 30719+ lockdep_off();
1facf9fc 30720+ unlock_rename(d1, d2);
2cbb1c4b 30721+ lockdep_on();
1facf9fc 30722+}
30723+
30724+/* ---------------------------------------------------------------------- */
30725+
b4510431 30726+int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
1facf9fc 30727+{
30728+ int err;
30729+ struct dentry *d;
30730+
30731+ IMustLock(dir);
30732+
30733+ d = path->dentry;
30734+ path->dentry = d->d_parent;
b752ccd1 30735+ err = security_path_mknod(path, d, mode, 0);
1facf9fc 30736+ path->dentry = d;
30737+ if (unlikely(err))
30738+ goto out;
30739+
c1595e42 30740+ lockdep_off();
b4510431 30741+ err = vfs_create(dir, path->dentry, mode, want_excl);
c1595e42 30742+ lockdep_on();
1facf9fc 30743+ if (!err) {
30744+ struct path tmp = *path;
30745+ int did;
30746+
30747+ vfsub_update_h_iattr(&tmp, &did);
30748+ if (did) {
30749+ tmp.dentry = path->dentry->d_parent;
30750+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30751+ }
30752+ /*ignore*/
30753+ }
30754+
4f0767ce 30755+out:
1facf9fc 30756+ return err;
30757+}
30758+
30759+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
30760+{
30761+ int err;
30762+ struct dentry *d;
30763+
30764+ IMustLock(dir);
30765+
30766+ d = path->dentry;
30767+ path->dentry = d->d_parent;
b752ccd1 30768+ err = security_path_symlink(path, d, symname);
1facf9fc 30769+ path->dentry = d;
30770+ if (unlikely(err))
30771+ goto out;
30772+
c1595e42 30773+ lockdep_off();
1facf9fc 30774+ err = vfs_symlink(dir, path->dentry, symname);
c1595e42 30775+ lockdep_on();
1facf9fc 30776+ if (!err) {
30777+ struct path tmp = *path;
30778+ int did;
30779+
30780+ vfsub_update_h_iattr(&tmp, &did);
30781+ if (did) {
30782+ tmp.dentry = path->dentry->d_parent;
30783+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30784+ }
30785+ /*ignore*/
30786+ }
30787+
4f0767ce 30788+out:
1facf9fc 30789+ return err;
30790+}
30791+
30792+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
30793+{
30794+ int err;
30795+ struct dentry *d;
30796+
30797+ IMustLock(dir);
30798+
30799+ d = path->dentry;
30800+ path->dentry = d->d_parent;
027c5e7a 30801+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
1facf9fc 30802+ path->dentry = d;
30803+ if (unlikely(err))
30804+ goto out;
30805+
c1595e42 30806+ lockdep_off();
1facf9fc 30807+ err = vfs_mknod(dir, path->dentry, mode, dev);
c1595e42 30808+ lockdep_on();
1facf9fc 30809+ if (!err) {
30810+ struct path tmp = *path;
30811+ int did;
30812+
30813+ vfsub_update_h_iattr(&tmp, &did);
30814+ if (did) {
30815+ tmp.dentry = path->dentry->d_parent;
30816+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30817+ }
30818+ /*ignore*/
30819+ }
30820+
4f0767ce 30821+out:
1facf9fc 30822+ return err;
30823+}
30824+
30825+static int au_test_nlink(struct inode *inode)
30826+{
30827+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
30828+
30829+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
30830+ || inode->i_nlink < link_max)
30831+ return 0;
30832+ return -EMLINK;
30833+}
30834+
523b37e3
AM
30835+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
30836+ struct inode **delegated_inode)
1facf9fc 30837+{
30838+ int err;
30839+ struct dentry *d;
30840+
30841+ IMustLock(dir);
30842+
5527c038 30843+ err = au_test_nlink(d_inode(src_dentry));
1facf9fc 30844+ if (unlikely(err))
30845+ return err;
30846+
b4510431 30847+ /* we don't call may_linkat() */
1facf9fc 30848+ d = path->dentry;
30849+ path->dentry = d->d_parent;
b752ccd1 30850+ err = security_path_link(src_dentry, path, d);
1facf9fc 30851+ path->dentry = d;
30852+ if (unlikely(err))
30853+ goto out;
30854+
2cbb1c4b 30855+ lockdep_off();
523b37e3 30856+ err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
2cbb1c4b 30857+ lockdep_on();
1facf9fc 30858+ if (!err) {
30859+ struct path tmp = *path;
30860+ int did;
30861+
30862+ /* fuse has different memory inode for the same inumber */
30863+ vfsub_update_h_iattr(&tmp, &did);
30864+ if (did) {
30865+ tmp.dentry = path->dentry->d_parent;
30866+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30867+ tmp.dentry = src_dentry;
30868+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30869+ }
30870+ /*ignore*/
30871+ }
30872+
4f0767ce 30873+out:
1facf9fc 30874+ return err;
30875+}
30876+
30877+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
523b37e3
AM
30878+ struct inode *dir, struct path *path,
30879+ struct inode **delegated_inode)
1facf9fc 30880+{
30881+ int err;
30882+ struct path tmp = {
30883+ .mnt = path->mnt
30884+ };
30885+ struct dentry *d;
30886+
30887+ IMustLock(dir);
30888+ IMustLock(src_dir);
30889+
30890+ d = path->dentry;
30891+ path->dentry = d->d_parent;
30892+ tmp.dentry = src_dentry->d_parent;
38d290e6 30893+ err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
1facf9fc 30894+ path->dentry = d;
30895+ if (unlikely(err))
30896+ goto out;
30897+
2cbb1c4b 30898+ lockdep_off();
523b37e3 30899+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
38d290e6 30900+ delegated_inode, /*flags*/0);
2cbb1c4b 30901+ lockdep_on();
1facf9fc 30902+ if (!err) {
30903+ int did;
30904+
30905+ tmp.dentry = d->d_parent;
30906+ vfsub_update_h_iattr(&tmp, &did);
30907+ if (did) {
30908+ tmp.dentry = src_dentry;
30909+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30910+ tmp.dentry = src_dentry->d_parent;
30911+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30912+ }
30913+ /*ignore*/
30914+ }
30915+
4f0767ce 30916+out:
1facf9fc 30917+ return err;
30918+}
30919+
30920+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
30921+{
30922+ int err;
30923+ struct dentry *d;
30924+
30925+ IMustLock(dir);
30926+
30927+ d = path->dentry;
30928+ path->dentry = d->d_parent;
b752ccd1 30929+ err = security_path_mkdir(path, d, mode);
1facf9fc 30930+ path->dentry = d;
30931+ if (unlikely(err))
30932+ goto out;
30933+
c1595e42 30934+ lockdep_off();
1facf9fc 30935+ err = vfs_mkdir(dir, path->dentry, mode);
c1595e42 30936+ lockdep_on();
1facf9fc 30937+ if (!err) {
30938+ struct path tmp = *path;
30939+ int did;
30940+
30941+ vfsub_update_h_iattr(&tmp, &did);
30942+ if (did) {
30943+ tmp.dentry = path->dentry->d_parent;
30944+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30945+ }
30946+ /*ignore*/
30947+ }
30948+
4f0767ce 30949+out:
1facf9fc 30950+ return err;
30951+}
30952+
30953+int vfsub_rmdir(struct inode *dir, struct path *path)
30954+{
30955+ int err;
30956+ struct dentry *d;
30957+
30958+ IMustLock(dir);
30959+
30960+ d = path->dentry;
30961+ path->dentry = d->d_parent;
b752ccd1 30962+ err = security_path_rmdir(path, d);
1facf9fc 30963+ path->dentry = d;
30964+ if (unlikely(err))
30965+ goto out;
30966+
2cbb1c4b 30967+ lockdep_off();
1facf9fc 30968+ err = vfs_rmdir(dir, path->dentry);
2cbb1c4b 30969+ lockdep_on();
1facf9fc 30970+ if (!err) {
30971+ struct path tmp = {
30972+ .dentry = path->dentry->d_parent,
30973+ .mnt = path->mnt
30974+ };
30975+
30976+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
30977+ }
30978+
4f0767ce 30979+out:
1facf9fc 30980+ return err;
30981+}
30982+
30983+/* ---------------------------------------------------------------------- */
30984+
9dbd164d 30985+/* todo: support mmap_sem? */
1facf9fc 30986+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
30987+ loff_t *ppos)
30988+{
30989+ ssize_t err;
30990+
2cbb1c4b 30991+ lockdep_off();
1facf9fc 30992+ err = vfs_read(file, ubuf, count, ppos);
2cbb1c4b 30993+ lockdep_on();
1facf9fc 30994+ if (err >= 0)
30995+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30996+ return err;
30997+}
30998+
30999+/* todo: kernel_read()? */
31000+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
31001+ loff_t *ppos)
31002+{
31003+ ssize_t err;
31004+ mm_segment_t oldfs;
b752ccd1
AM
31005+ union {
31006+ void *k;
31007+ char __user *u;
31008+ } buf;
1facf9fc 31009+
b752ccd1 31010+ buf.k = kbuf;
1facf9fc 31011+ oldfs = get_fs();
31012+ set_fs(KERNEL_DS);
b752ccd1 31013+ err = vfsub_read_u(file, buf.u, count, ppos);
1facf9fc 31014+ set_fs(oldfs);
31015+ return err;
31016+}
31017+
31018+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
31019+ loff_t *ppos)
31020+{
31021+ ssize_t err;
31022+
2cbb1c4b 31023+ lockdep_off();
1facf9fc 31024+ err = vfs_write(file, ubuf, count, ppos);
2cbb1c4b 31025+ lockdep_on();
1facf9fc 31026+ if (err >= 0)
31027+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
31028+ return err;
31029+}
31030+
31031+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
31032+{
31033+ ssize_t err;
31034+ mm_segment_t oldfs;
b752ccd1
AM
31035+ union {
31036+ void *k;
31037+ const char __user *u;
31038+ } buf;
1facf9fc 31039+
b752ccd1 31040+ buf.k = kbuf;
1facf9fc 31041+ oldfs = get_fs();
31042+ set_fs(KERNEL_DS);
b752ccd1 31043+ err = vfsub_write_u(file, buf.u, count, ppos);
1facf9fc 31044+ set_fs(oldfs);
31045+ return err;
31046+}
31047+
4a4d8108
AM
31048+int vfsub_flush(struct file *file, fl_owner_t id)
31049+{
31050+ int err;
31051+
31052+ err = 0;
523b37e3 31053+ if (file->f_op->flush) {
2000de60 31054+ if (!au_test_nfs(file->f_path.dentry->d_sb))
2cbb1c4b
JR
31055+ err = file->f_op->flush(file, id);
31056+ else {
31057+ lockdep_off();
31058+ err = file->f_op->flush(file, id);
31059+ lockdep_on();
31060+ }
4a4d8108
AM
31061+ if (!err)
31062+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
31063+ /*ignore*/
31064+ }
31065+ return err;
31066+}
31067+
392086de 31068+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
1facf9fc 31069+{
31070+ int err;
31071+
523b37e3 31072+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 31073+
2cbb1c4b 31074+ lockdep_off();
392086de 31075+ err = iterate_dir(file, ctx);
2cbb1c4b 31076+ lockdep_on();
1facf9fc 31077+ if (err >= 0)
31078+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
31079+ return err;
31080+}
31081+
31082+long vfsub_splice_to(struct file *in, loff_t *ppos,
31083+ struct pipe_inode_info *pipe, size_t len,
31084+ unsigned int flags)
31085+{
31086+ long err;
31087+
2cbb1c4b 31088+ lockdep_off();
0fc653ad 31089+ err = do_splice_to(in, ppos, pipe, len, flags);
2cbb1c4b 31090+ lockdep_on();
4a4d8108 31091+ file_accessed(in);
1facf9fc 31092+ if (err >= 0)
31093+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
31094+ return err;
31095+}
31096+
31097+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
31098+ loff_t *ppos, size_t len, unsigned int flags)
31099+{
31100+ long err;
31101+
2cbb1c4b 31102+ lockdep_off();
0fc653ad 31103+ err = do_splice_from(pipe, out, ppos, len, flags);
2cbb1c4b 31104+ lockdep_on();
1facf9fc 31105+ if (err >= 0)
31106+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
31107+ return err;
31108+}
31109+
53392da6
AM
31110+int vfsub_fsync(struct file *file, struct path *path, int datasync)
31111+{
31112+ int err;
31113+
31114+ /* file can be NULL */
31115+ lockdep_off();
31116+ err = vfs_fsync(file, datasync);
31117+ lockdep_on();
31118+ if (!err) {
31119+ if (!path) {
31120+ AuDebugOn(!file);
31121+ path = &file->f_path;
31122+ }
31123+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
31124+ }
31125+ return err;
31126+}
31127+
1facf9fc 31128+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
31129+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
31130+ struct file *h_file)
31131+{
31132+ int err;
31133+ struct inode *h_inode;
c06a8ce3 31134+ struct super_block *h_sb;
1facf9fc 31135+
1facf9fc 31136+ if (!h_file) {
c06a8ce3
AM
31137+ err = vfsub_truncate(h_path, length);
31138+ goto out;
1facf9fc 31139+ }
31140+
5527c038 31141+ h_inode = d_inode(h_path->dentry);
c06a8ce3
AM
31142+ h_sb = h_inode->i_sb;
31143+ lockdep_off();
31144+ sb_start_write(h_sb);
31145+ lockdep_on();
1facf9fc 31146+ err = locks_verify_truncate(h_inode, h_file, length);
31147+ if (!err)
953406b4 31148+ err = security_path_truncate(h_path);
2cbb1c4b
JR
31149+ if (!err) {
31150+ lockdep_off();
1facf9fc 31151+ err = do_truncate(h_path->dentry, length, attr, h_file);
2cbb1c4b
JR
31152+ lockdep_on();
31153+ }
c06a8ce3
AM
31154+ lockdep_off();
31155+ sb_end_write(h_sb);
31156+ lockdep_on();
1facf9fc 31157+
4f0767ce 31158+out:
1facf9fc 31159+ return err;
31160+}
31161+
31162+/* ---------------------------------------------------------------------- */
31163+
31164+struct au_vfsub_mkdir_args {
31165+ int *errp;
31166+ struct inode *dir;
31167+ struct path *path;
31168+ int mode;
31169+};
31170+
31171+static void au_call_vfsub_mkdir(void *args)
31172+{
31173+ struct au_vfsub_mkdir_args *a = args;
31174+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
31175+}
31176+
31177+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
31178+{
31179+ int err, do_sio, wkq_err;
31180+
31181+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
31182+ if (!do_sio) {
31183+ lockdep_off();
1facf9fc 31184+ err = vfsub_mkdir(dir, path, mode);
c1595e42
JR
31185+ lockdep_on();
31186+ } else {
1facf9fc 31187+ struct au_vfsub_mkdir_args args = {
31188+ .errp = &err,
31189+ .dir = dir,
31190+ .path = path,
31191+ .mode = mode
31192+ };
31193+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
31194+ if (unlikely(wkq_err))
31195+ err = wkq_err;
31196+ }
31197+
31198+ return err;
31199+}
31200+
31201+struct au_vfsub_rmdir_args {
31202+ int *errp;
31203+ struct inode *dir;
31204+ struct path *path;
31205+};
31206+
31207+static void au_call_vfsub_rmdir(void *args)
31208+{
31209+ struct au_vfsub_rmdir_args *a = args;
31210+ *a->errp = vfsub_rmdir(a->dir, a->path);
31211+}
31212+
31213+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
31214+{
31215+ int err, do_sio, wkq_err;
31216+
31217+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
31218+ if (!do_sio) {
31219+ lockdep_off();
1facf9fc 31220+ err = vfsub_rmdir(dir, path);
c1595e42
JR
31221+ lockdep_on();
31222+ } else {
1facf9fc 31223+ struct au_vfsub_rmdir_args args = {
31224+ .errp = &err,
31225+ .dir = dir,
31226+ .path = path
31227+ };
31228+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
31229+ if (unlikely(wkq_err))
31230+ err = wkq_err;
31231+ }
31232+
31233+ return err;
31234+}
31235+
31236+/* ---------------------------------------------------------------------- */
31237+
31238+struct notify_change_args {
31239+ int *errp;
31240+ struct path *path;
31241+ struct iattr *ia;
523b37e3 31242+ struct inode **delegated_inode;
1facf9fc 31243+};
31244+
31245+static void call_notify_change(void *args)
31246+{
31247+ struct notify_change_args *a = args;
31248+ struct inode *h_inode;
31249+
5527c038 31250+ h_inode = d_inode(a->path->dentry);
1facf9fc 31251+ IMustLock(h_inode);
31252+
31253+ *a->errp = -EPERM;
31254+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
c1595e42 31255+ lockdep_off();
523b37e3
AM
31256+ *a->errp = notify_change(a->path->dentry, a->ia,
31257+ a->delegated_inode);
c1595e42 31258+ lockdep_on();
1facf9fc 31259+ if (!*a->errp)
31260+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
31261+ }
31262+ AuTraceErr(*a->errp);
31263+}
31264+
523b37e3
AM
31265+int vfsub_notify_change(struct path *path, struct iattr *ia,
31266+ struct inode **delegated_inode)
1facf9fc 31267+{
31268+ int err;
31269+ struct notify_change_args args = {
523b37e3
AM
31270+ .errp = &err,
31271+ .path = path,
31272+ .ia = ia,
31273+ .delegated_inode = delegated_inode
1facf9fc 31274+ };
31275+
31276+ call_notify_change(&args);
31277+
31278+ return err;
31279+}
31280+
523b37e3
AM
31281+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
31282+ struct inode **delegated_inode)
1facf9fc 31283+{
31284+ int err, wkq_err;
31285+ struct notify_change_args args = {
523b37e3
AM
31286+ .errp = &err,
31287+ .path = path,
31288+ .ia = ia,
31289+ .delegated_inode = delegated_inode
1facf9fc 31290+ };
31291+
31292+ wkq_err = au_wkq_wait(call_notify_change, &args);
31293+ if (unlikely(wkq_err))
31294+ err = wkq_err;
31295+
31296+ return err;
31297+}
31298+
31299+/* ---------------------------------------------------------------------- */
31300+
31301+struct unlink_args {
31302+ int *errp;
31303+ struct inode *dir;
31304+ struct path *path;
523b37e3 31305+ struct inode **delegated_inode;
1facf9fc 31306+};
31307+
31308+static void call_unlink(void *args)
31309+{
31310+ struct unlink_args *a = args;
31311+ struct dentry *d = a->path->dentry;
31312+ struct inode *h_inode;
31313+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
c1595e42 31314+ && au_dcount(d) == 1);
1facf9fc 31315+
31316+ IMustLock(a->dir);
31317+
31318+ a->path->dentry = d->d_parent;
31319+ *a->errp = security_path_unlink(a->path, d);
31320+ a->path->dentry = d;
31321+ if (unlikely(*a->errp))
31322+ return;
31323+
31324+ if (!stop_sillyrename)
31325+ dget(d);
5527c038
JR
31326+ h_inode = NULL;
31327+ if (d_is_positive(d)) {
31328+ h_inode = d_inode(d);
027c5e7a 31329+ ihold(h_inode);
5527c038 31330+ }
1facf9fc 31331+
2cbb1c4b 31332+ lockdep_off();
523b37e3 31333+ *a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
2cbb1c4b 31334+ lockdep_on();
1facf9fc 31335+ if (!*a->errp) {
31336+ struct path tmp = {
31337+ .dentry = d->d_parent,
31338+ .mnt = a->path->mnt
31339+ };
31340+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
31341+ }
31342+
31343+ if (!stop_sillyrename)
31344+ dput(d);
31345+ if (h_inode)
31346+ iput(h_inode);
31347+
31348+ AuTraceErr(*a->errp);
31349+}
31350+
31351+/*
31352+ * @dir: must be locked.
31353+ * @dentry: target dentry.
31354+ */
523b37e3
AM
31355+int vfsub_unlink(struct inode *dir, struct path *path,
31356+ struct inode **delegated_inode, int force)
1facf9fc 31357+{
31358+ int err;
31359+ struct unlink_args args = {
523b37e3
AM
31360+ .errp = &err,
31361+ .dir = dir,
31362+ .path = path,
31363+ .delegated_inode = delegated_inode
1facf9fc 31364+ };
31365+
31366+ if (!force)
31367+ call_unlink(&args);
31368+ else {
31369+ int wkq_err;
31370+
31371+ wkq_err = au_wkq_wait(call_unlink, &args);
31372+ if (unlikely(wkq_err))
31373+ err = wkq_err;
31374+ }
31375+
31376+ return err;
31377+}
7f207e10
AM
31378diff -urN /usr/share/empty/fs/aufs/vfsub.h linux/fs/aufs/vfsub.h
31379--- /usr/share/empty/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 31380+++ linux/fs/aufs/vfsub.h 2016-10-09 16:55:36.496035060 +0200
f0c0a007 31381@@ -0,0 +1,316 @@
1facf9fc 31382+/*
8cdd5066 31383+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 31384+ *
31385+ * This program, aufs is free software; you can redistribute it and/or modify
31386+ * it under the terms of the GNU General Public License as published by
31387+ * the Free Software Foundation; either version 2 of the License, or
31388+ * (at your option) any later version.
dece6358
AM
31389+ *
31390+ * This program is distributed in the hope that it will be useful,
31391+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31392+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31393+ * GNU General Public License for more details.
31394+ *
31395+ * You should have received a copy of the GNU General Public License
523b37e3 31396+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31397+ */
31398+
31399+/*
31400+ * sub-routines for VFS
31401+ */
31402+
31403+#ifndef __AUFS_VFSUB_H__
31404+#define __AUFS_VFSUB_H__
31405+
31406+#ifdef __KERNEL__
31407+
31408+#include <linux/fs.h>
b4510431 31409+#include <linux/mount.h>
8cdd5066 31410+#include <linux/posix_acl.h>
c1595e42 31411+#include <linux/xattr.h>
7f207e10 31412+#include "debug.h"
1facf9fc 31413+
7f207e10 31414+/* copied from linux/fs/internal.h */
2cbb1c4b 31415+/* todo: BAD approach!! */
c06a8ce3 31416+extern void __mnt_drop_write(struct vfsmount *);
b912730e 31417+extern int open_check_o_direct(struct file *f);
7f207e10
AM
31418+
31419+/* ---------------------------------------------------------------------- */
1facf9fc 31420+
31421+/* lock subclass for lower inode */
31422+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
31423+/* reduce? gave up. */
31424+enum {
c1595e42 31425+ AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
1facf9fc 31426+ AuLsc_I_PARENT, /* lower inode, parent first */
31427+ AuLsc_I_PARENT2, /* copyup dirs */
dece6358 31428+ AuLsc_I_PARENT3, /* copyup wh */
1facf9fc 31429+ AuLsc_I_CHILD,
31430+ AuLsc_I_CHILD2,
31431+ AuLsc_I_End
31432+};
31433+
31434+/* to debug easier, do not make them inlined functions */
31435+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
febd17d6 31436+#define IMustLock(i) AuDebugOn(!inode_is_locked(i))
1facf9fc 31437+
31438+/* ---------------------------------------------------------------------- */
31439+
7f207e10
AM
31440+static inline void vfsub_drop_nlink(struct inode *inode)
31441+{
31442+ AuDebugOn(!inode->i_nlink);
31443+ drop_nlink(inode);
31444+}
31445+
027c5e7a
AM
31446+static inline void vfsub_dead_dir(struct inode *inode)
31447+{
31448+ AuDebugOn(!S_ISDIR(inode->i_mode));
31449+ inode->i_flags |= S_DEAD;
31450+ clear_nlink(inode);
31451+}
31452+
392086de
AM
31453+static inline int vfsub_native_ro(struct inode *inode)
31454+{
31455+ return (inode->i_sb->s_flags & MS_RDONLY)
31456+ || IS_RDONLY(inode)
31457+ /* || IS_APPEND(inode) */
31458+ || IS_IMMUTABLE(inode);
31459+}
31460+
8cdd5066
JR
31461+#ifdef CONFIG_AUFS_BR_FUSE
31462+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb);
31463+#else
31464+AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
31465+#endif
31466+
7f207e10
AM
31467+/* ---------------------------------------------------------------------- */
31468+
31469+int vfsub_update_h_iattr(struct path *h_path, int *did);
31470+struct file *vfsub_dentry_open(struct path *path, int flags);
31471+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
b912730e
AM
31472+struct vfsub_aopen_args {
31473+ struct file *file;
31474+ unsigned int open_flag;
31475+ umode_t create_mode;
31476+ int *opened;
31477+};
31478+struct au_branch;
31479+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
31480+ struct vfsub_aopen_args *args, struct au_branch *br);
1facf9fc 31481+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
b4510431 31482+
febd17d6
JR
31483+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
31484+ struct dentry *parent, int len);
1facf9fc 31485+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
31486+ int len);
b4510431
AM
31487+
31488+struct vfsub_lkup_one_args {
31489+ struct dentry **errp;
31490+ struct qstr *name;
31491+ struct dentry *parent;
31492+};
31493+
31494+static inline struct dentry *vfsub_lkup_one(struct qstr *name,
31495+ struct dentry *parent)
31496+{
31497+ return vfsub_lookup_one_len(name->name, parent, name->len);
31498+}
31499+
31500+void vfsub_call_lkup_one(void *args);
31501+
31502+/* ---------------------------------------------------------------------- */
31503+
31504+static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
31505+{
31506+ int err;
076b876e 31507+
b4510431
AM
31508+ lockdep_off();
31509+ err = mnt_want_write(mnt);
31510+ lockdep_on();
31511+ return err;
31512+}
31513+
31514+static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
31515+{
31516+ lockdep_off();
31517+ mnt_drop_write(mnt);
31518+ lockdep_on();
31519+}
1facf9fc 31520+
7e9cd9fe 31521+#if 0 /* reserved */
c06a8ce3
AM
31522+static inline void vfsub_mnt_drop_write_file(struct file *file)
31523+{
31524+ lockdep_off();
31525+ mnt_drop_write_file(file);
31526+ lockdep_on();
31527+}
7e9cd9fe 31528+#endif
c06a8ce3 31529+
1facf9fc 31530+/* ---------------------------------------------------------------------- */
31531+
31532+struct au_hinode;
31533+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
31534+ struct dentry *d2, struct au_hinode *hdir2);
31535+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
31536+ struct dentry *d2, struct au_hinode *hdir2);
31537+
537831f9
AM
31538+int vfsub_create(struct inode *dir, struct path *path, int mode,
31539+ bool want_excl);
1facf9fc 31540+int vfsub_symlink(struct inode *dir, struct path *path,
31541+ const char *symname);
31542+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
31543+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
523b37e3 31544+ struct path *path, struct inode **delegated_inode);
1facf9fc 31545+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
523b37e3
AM
31546+ struct inode *hdir, struct path *path,
31547+ struct inode **delegated_inode);
1facf9fc 31548+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
31549+int vfsub_rmdir(struct inode *dir, struct path *path);
31550+
31551+/* ---------------------------------------------------------------------- */
31552+
31553+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
31554+ loff_t *ppos);
31555+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
31556+ loff_t *ppos);
31557+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
31558+ loff_t *ppos);
31559+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
31560+ loff_t *ppos);
4a4d8108 31561+int vfsub_flush(struct file *file, fl_owner_t id);
392086de
AM
31562+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
31563+
c06a8ce3
AM
31564+static inline loff_t vfsub_f_size_read(struct file *file)
31565+{
31566+ return i_size_read(file_inode(file));
31567+}
31568+
4a4d8108
AM
31569+static inline unsigned int vfsub_file_flags(struct file *file)
31570+{
31571+ unsigned int flags;
31572+
31573+ spin_lock(&file->f_lock);
31574+ flags = file->f_flags;
31575+ spin_unlock(&file->f_lock);
31576+
31577+ return flags;
31578+}
1308ab2a 31579+
f0c0a007
AM
31580+static inline int vfsub_file_execed(struct file *file)
31581+{
31582+ /* todo: direct access f_flags */
31583+ return !!(vfsub_file_flags(file) & __FMODE_EXEC);
31584+}
31585+
7e9cd9fe 31586+#if 0 /* reserved */
1facf9fc 31587+static inline void vfsub_file_accessed(struct file *h_file)
31588+{
31589+ file_accessed(h_file);
31590+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
31591+}
7e9cd9fe 31592+#endif
1facf9fc 31593+
79b8bda9 31594+#if 0 /* reserved */
1facf9fc 31595+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
31596+ struct dentry *h_dentry)
31597+{
31598+ struct path h_path = {
31599+ .dentry = h_dentry,
31600+ .mnt = h_mnt
31601+ };
92d182d2 31602+ touch_atime(&h_path);
1facf9fc 31603+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
31604+}
79b8bda9 31605+#endif
1facf9fc 31606+
0c3ec466
AM
31607+static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts,
31608+ int flags)
31609+{
5afbbe0d 31610+ return update_time(h_inode, ts, flags);
0c3ec466
AM
31611+ /* no vfsub_update_h_iattr() since we don't have struct path */
31612+}
31613+
8cdd5066
JR
31614+#ifdef CONFIG_FS_POSIX_ACL
31615+static inline int vfsub_acl_chmod(struct inode *h_inode, umode_t h_mode)
31616+{
31617+ int err;
31618+
31619+ err = posix_acl_chmod(h_inode, h_mode);
31620+ if (err == -EOPNOTSUPP)
31621+ err = 0;
31622+ return err;
31623+}
31624+#else
31625+AuStubInt0(vfsub_acl_chmod, struct inode *h_inode, umode_t h_mode);
31626+#endif
31627+
4a4d8108
AM
31628+long vfsub_splice_to(struct file *in, loff_t *ppos,
31629+ struct pipe_inode_info *pipe, size_t len,
31630+ unsigned int flags);
31631+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
31632+ loff_t *ppos, size_t len, unsigned int flags);
c06a8ce3
AM
31633+
31634+static inline long vfsub_truncate(struct path *path, loff_t length)
31635+{
31636+ long err;
076b876e 31637+
c06a8ce3
AM
31638+ lockdep_off();
31639+ err = vfs_truncate(path, length);
31640+ lockdep_on();
31641+ return err;
31642+}
31643+
4a4d8108
AM
31644+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
31645+ struct file *h_file);
53392da6 31646+int vfsub_fsync(struct file *file, struct path *path, int datasync);
4a4d8108 31647+
1facf9fc 31648+/* ---------------------------------------------------------------------- */
31649+
31650+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
31651+{
31652+ loff_t err;
31653+
2cbb1c4b 31654+ lockdep_off();
1facf9fc 31655+ err = vfs_llseek(file, offset, origin);
2cbb1c4b 31656+ lockdep_on();
1facf9fc 31657+ return err;
31658+}
31659+
31660+/* ---------------------------------------------------------------------- */
31661+
4a4d8108
AM
31662+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
31663+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
523b37e3
AM
31664+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
31665+ struct inode **delegated_inode);
31666+int vfsub_notify_change(struct path *path, struct iattr *ia,
31667+ struct inode **delegated_inode);
31668+int vfsub_unlink(struct inode *dir, struct path *path,
31669+ struct inode **delegated_inode, int force);
4a4d8108 31670+
c1595e42
JR
31671+/* ---------------------------------------------------------------------- */
31672+
31673+static inline int vfsub_setxattr(struct dentry *dentry, const char *name,
31674+ const void *value, size_t size, int flags)
31675+{
31676+ int err;
31677+
31678+ lockdep_off();
31679+ err = vfs_setxattr(dentry, name, value, size, flags);
31680+ lockdep_on();
31681+
31682+ return err;
31683+}
31684+
31685+static inline int vfsub_removexattr(struct dentry *dentry, const char *name)
31686+{
31687+ int err;
31688+
31689+ lockdep_off();
31690+ err = vfs_removexattr(dentry, name);
31691+ lockdep_on();
31692+
31693+ return err;
31694+}
31695+
1facf9fc 31696+#endif /* __KERNEL__ */
31697+#endif /* __AUFS_VFSUB_H__ */
7f207e10
AM
31698diff -urN /usr/share/empty/fs/aufs/wbr_policy.c linux/fs/aufs/wbr_policy.c
31699--- /usr/share/empty/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 31700+++ linux/fs/aufs/wbr_policy.c 2016-10-09 16:55:36.496035060 +0200
076b876e 31701@@ -0,0 +1,765 @@
1facf9fc 31702+/*
8cdd5066 31703+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 31704+ *
31705+ * This program, aufs is free software; you can redistribute it and/or modify
31706+ * it under the terms of the GNU General Public License as published by
31707+ * the Free Software Foundation; either version 2 of the License, or
31708+ * (at your option) any later version.
dece6358
AM
31709+ *
31710+ * This program is distributed in the hope that it will be useful,
31711+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31712+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31713+ * GNU General Public License for more details.
31714+ *
31715+ * You should have received a copy of the GNU General Public License
523b37e3 31716+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31717+ */
31718+
31719+/*
31720+ * policies for selecting one among multiple writable branches
31721+ */
31722+
31723+#include <linux/statfs.h>
31724+#include "aufs.h"
31725+
31726+/* subset of cpup_attr() */
31727+static noinline_for_stack
31728+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
31729+{
31730+ int err, sbits;
31731+ struct iattr ia;
31732+ struct inode *h_isrc;
31733+
5527c038 31734+ h_isrc = d_inode(h_src);
1facf9fc 31735+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
31736+ ia.ia_mode = h_isrc->i_mode;
31737+ ia.ia_uid = h_isrc->i_uid;
31738+ ia.ia_gid = h_isrc->i_gid;
31739+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
5527c038 31740+ au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags);
523b37e3
AM
31741+ /* no delegation since it is just created */
31742+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 31743+
31744+ /* is this nfs only? */
31745+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
31746+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
31747+ ia.ia_mode = h_isrc->i_mode;
523b37e3 31748+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 31749+ }
31750+
31751+ return err;
31752+}
31753+
31754+#define AuCpdown_PARENT_OPQ 1
31755+#define AuCpdown_WHED (1 << 1)
31756+#define AuCpdown_MADE_DIR (1 << 2)
31757+#define AuCpdown_DIROPQ (1 << 3)
31758+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
7f207e10
AM
31759+#define au_fset_cpdown(flags, name) \
31760+ do { (flags) |= AuCpdown_##name; } while (0)
31761+#define au_fclr_cpdown(flags, name) \
31762+ do { (flags) &= ~AuCpdown_##name; } while (0)
1facf9fc 31763+
1facf9fc 31764+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
c2b27bf2 31765+ unsigned int *flags)
1facf9fc 31766+{
31767+ int err;
31768+ struct dentry *opq_dentry;
31769+
31770+ opq_dentry = au_diropq_create(dentry, bdst);
31771+ err = PTR_ERR(opq_dentry);
31772+ if (IS_ERR(opq_dentry))
31773+ goto out;
31774+ dput(opq_dentry);
c2b27bf2 31775+ au_fset_cpdown(*flags, DIROPQ);
1facf9fc 31776+
4f0767ce 31777+out:
1facf9fc 31778+ return err;
31779+}
31780+
31781+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
31782+ struct inode *dir, aufs_bindex_t bdst)
31783+{
31784+ int err;
31785+ struct path h_path;
31786+ struct au_branch *br;
31787+
31788+ br = au_sbr(dentry->d_sb, bdst);
31789+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
31790+ err = PTR_ERR(h_path.dentry);
31791+ if (IS_ERR(h_path.dentry))
31792+ goto out;
31793+
31794+ err = 0;
5527c038 31795+ if (d_is_positive(h_path.dentry)) {
86dc4139 31796+ h_path.mnt = au_br_mnt(br);
1facf9fc 31797+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
31798+ dentry);
31799+ }
31800+ dput(h_path.dentry);
31801+
4f0767ce 31802+out:
1facf9fc 31803+ return err;
31804+}
31805+
31806+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 31807+ struct au_pin *pin,
1facf9fc 31808+ struct dentry *h_parent, void *arg)
31809+{
31810+ int err, rerr;
5afbbe0d 31811+ aufs_bindex_t bopq, btop;
1facf9fc 31812+ struct path h_path;
31813+ struct dentry *parent;
31814+ struct inode *h_dir, *h_inode, *inode, *dir;
c2b27bf2 31815+ unsigned int *flags = arg;
1facf9fc 31816+
5afbbe0d 31817+ btop = au_dbtop(dentry);
1facf9fc 31818+ /* dentry is di-locked */
31819+ parent = dget_parent(dentry);
5527c038
JR
31820+ dir = d_inode(parent);
31821+ h_dir = d_inode(h_parent);
1facf9fc 31822+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
31823+ IMustLock(h_dir);
31824+
86dc4139 31825+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
1facf9fc 31826+ if (unlikely(err < 0))
31827+ goto out;
31828+ h_path.dentry = au_h_dptr(dentry, bdst);
31829+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
31830+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
31831+ S_IRWXU | S_IRUGO | S_IXUGO);
31832+ if (unlikely(err))
31833+ goto out_put;
c2b27bf2 31834+ au_fset_cpdown(*flags, MADE_DIR);
1facf9fc 31835+
1facf9fc 31836+ bopq = au_dbdiropq(dentry);
c2b27bf2
AM
31837+ au_fclr_cpdown(*flags, WHED);
31838+ au_fclr_cpdown(*flags, DIROPQ);
1facf9fc 31839+ if (au_dbwh(dentry) == bdst)
c2b27bf2
AM
31840+ au_fset_cpdown(*flags, WHED);
31841+ if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
31842+ au_fset_cpdown(*flags, PARENT_OPQ);
5527c038 31843+ h_inode = d_inode(h_path.dentry);
febd17d6 31844+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
c2b27bf2
AM
31845+ if (au_ftest_cpdown(*flags, WHED)) {
31846+ err = au_cpdown_dir_opq(dentry, bdst, flags);
1facf9fc 31847+ if (unlikely(err)) {
febd17d6 31848+ inode_unlock(h_inode);
1facf9fc 31849+ goto out_dir;
31850+ }
31851+ }
31852+
5afbbe0d 31853+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, btop));
febd17d6 31854+ inode_unlock(h_inode);
1facf9fc 31855+ if (unlikely(err))
31856+ goto out_opq;
31857+
c2b27bf2 31858+ if (au_ftest_cpdown(*flags, WHED)) {
1facf9fc 31859+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
31860+ if (unlikely(err))
31861+ goto out_opq;
31862+ }
31863+
5527c038 31864+ inode = d_inode(dentry);
5afbbe0d
AM
31865+ if (au_ibbot(inode) < bdst)
31866+ au_set_ibbot(inode, bdst);
1facf9fc 31867+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
31868+ au_hi_flags(inode, /*isdir*/1));
076b876e 31869+ au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
1facf9fc 31870+ goto out; /* success */
31871+
31872+ /* revert */
4f0767ce 31873+out_opq:
c2b27bf2 31874+ if (au_ftest_cpdown(*flags, DIROPQ)) {
febd17d6 31875+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
1facf9fc 31876+ rerr = au_diropq_remove(dentry, bdst);
febd17d6 31877+ inode_unlock(h_inode);
1facf9fc 31878+ if (unlikely(rerr)) {
523b37e3
AM
31879+ AuIOErr("failed removing diropq for %pd b%d (%d)\n",
31880+ dentry, bdst, rerr);
1facf9fc 31881+ err = -EIO;
31882+ goto out;
31883+ }
31884+ }
4f0767ce 31885+out_dir:
c2b27bf2 31886+ if (au_ftest_cpdown(*flags, MADE_DIR)) {
1facf9fc 31887+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
31888+ if (unlikely(rerr)) {
523b37e3
AM
31889+ AuIOErr("failed removing %pd b%d (%d)\n",
31890+ dentry, bdst, rerr);
1facf9fc 31891+ err = -EIO;
31892+ }
31893+ }
4f0767ce 31894+out_put:
1facf9fc 31895+ au_set_h_dptr(dentry, bdst, NULL);
5afbbe0d
AM
31896+ if (au_dbbot(dentry) == bdst)
31897+ au_update_dbbot(dentry);
4f0767ce 31898+out:
1facf9fc 31899+ dput(parent);
31900+ return err;
31901+}
31902+
31903+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
31904+{
31905+ int err;
c2b27bf2 31906+ unsigned int flags;
1facf9fc 31907+
c2b27bf2
AM
31908+ flags = 0;
31909+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
1facf9fc 31910+
31911+ return err;
31912+}
31913+
31914+/* ---------------------------------------------------------------------- */
31915+
31916+/* policies for create */
31917+
c2b27bf2 31918+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
4a4d8108
AM
31919+{
31920+ int err, i, j, ndentry;
31921+ aufs_bindex_t bopq;
31922+ struct au_dcsub_pages dpages;
31923+ struct au_dpage *dpage;
31924+ struct dentry **dentries, *parent, *d;
31925+
31926+ err = au_dpages_init(&dpages, GFP_NOFS);
31927+ if (unlikely(err))
31928+ goto out;
31929+ parent = dget_parent(dentry);
027c5e7a 31930+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
4a4d8108
AM
31931+ if (unlikely(err))
31932+ goto out_free;
31933+
31934+ err = bindex;
31935+ for (i = 0; i < dpages.ndpage; i++) {
31936+ dpage = dpages.dpages + i;
31937+ dentries = dpage->dentries;
31938+ ndentry = dpage->ndentry;
31939+ for (j = 0; j < ndentry; j++) {
31940+ d = dentries[j];
31941+ di_read_lock_parent2(d, !AuLock_IR);
31942+ bopq = au_dbdiropq(d);
31943+ di_read_unlock(d, !AuLock_IR);
31944+ if (bopq >= 0 && bopq < err)
31945+ err = bopq;
31946+ }
31947+ }
31948+
31949+out_free:
31950+ dput(parent);
31951+ au_dpages_free(&dpages);
31952+out:
31953+ return err;
31954+}
31955+
1facf9fc 31956+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
31957+{
31958+ for (; bindex >= 0; bindex--)
31959+ if (!au_br_rdonly(au_sbr(sb, bindex)))
31960+ return bindex;
31961+ return -EROFS;
31962+}
31963+
31964+/* top down parent */
392086de
AM
31965+static int au_wbr_create_tdp(struct dentry *dentry,
31966+ unsigned int flags __maybe_unused)
1facf9fc 31967+{
31968+ int err;
5afbbe0d 31969+ aufs_bindex_t btop, bindex;
1facf9fc 31970+ struct super_block *sb;
31971+ struct dentry *parent, *h_parent;
31972+
31973+ sb = dentry->d_sb;
5afbbe0d
AM
31974+ btop = au_dbtop(dentry);
31975+ err = btop;
31976+ if (!au_br_rdonly(au_sbr(sb, btop)))
1facf9fc 31977+ goto out;
31978+
31979+ err = -EROFS;
31980+ parent = dget_parent(dentry);
5afbbe0d 31981+ for (bindex = au_dbtop(parent); bindex < btop; bindex++) {
1facf9fc 31982+ h_parent = au_h_dptr(parent, bindex);
5527c038 31983+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 31984+ continue;
31985+
31986+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
31987+ err = bindex;
31988+ break;
31989+ }
31990+ }
31991+ dput(parent);
31992+
31993+ /* bottom up here */
4a4d8108 31994+ if (unlikely(err < 0)) {
5afbbe0d 31995+ err = au_wbr_bu(sb, btop - 1);
4a4d8108
AM
31996+ if (err >= 0)
31997+ err = au_wbr_nonopq(dentry, err);
31998+ }
1facf9fc 31999+
4f0767ce 32000+out:
1facf9fc 32001+ AuDbg("b%d\n", err);
32002+ return err;
32003+}
32004+
32005+/* ---------------------------------------------------------------------- */
32006+
32007+/* an exception for the policy other than tdp */
32008+static int au_wbr_create_exp(struct dentry *dentry)
32009+{
32010+ int err;
32011+ aufs_bindex_t bwh, bdiropq;
32012+ struct dentry *parent;
32013+
32014+ err = -1;
32015+ bwh = au_dbwh(dentry);
32016+ parent = dget_parent(dentry);
32017+ bdiropq = au_dbdiropq(parent);
32018+ if (bwh >= 0) {
32019+ if (bdiropq >= 0)
32020+ err = min(bdiropq, bwh);
32021+ else
32022+ err = bwh;
32023+ AuDbg("%d\n", err);
32024+ } else if (bdiropq >= 0) {
32025+ err = bdiropq;
32026+ AuDbg("%d\n", err);
32027+ }
32028+ dput(parent);
32029+
4a4d8108
AM
32030+ if (err >= 0)
32031+ err = au_wbr_nonopq(dentry, err);
32032+
1facf9fc 32033+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
32034+ err = -1;
32035+
32036+ AuDbg("%d\n", err);
32037+ return err;
32038+}
32039+
32040+/* ---------------------------------------------------------------------- */
32041+
32042+/* round robin */
32043+static int au_wbr_create_init_rr(struct super_block *sb)
32044+{
32045+ int err;
32046+
5afbbe0d 32047+ err = au_wbr_bu(sb, au_sbbot(sb));
1facf9fc 32048+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
dece6358 32049+ /* smp_mb(); */
1facf9fc 32050+
32051+ AuDbg("b%d\n", err);
32052+ return err;
32053+}
32054+
392086de 32055+static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
1facf9fc 32056+{
32057+ int err, nbr;
32058+ unsigned int u;
5afbbe0d 32059+ aufs_bindex_t bindex, bbot;
1facf9fc 32060+ struct super_block *sb;
32061+ atomic_t *next;
32062+
32063+ err = au_wbr_create_exp(dentry);
32064+ if (err >= 0)
32065+ goto out;
32066+
32067+ sb = dentry->d_sb;
32068+ next = &au_sbi(sb)->si_wbr_rr_next;
5afbbe0d
AM
32069+ bbot = au_sbbot(sb);
32070+ nbr = bbot + 1;
32071+ for (bindex = 0; bindex <= bbot; bindex++) {
392086de 32072+ if (!au_ftest_wbr(flags, DIR)) {
1facf9fc 32073+ err = atomic_dec_return(next) + 1;
32074+ /* modulo for 0 is meaningless */
32075+ if (unlikely(!err))
32076+ err = atomic_dec_return(next) + 1;
32077+ } else
32078+ err = atomic_read(next);
32079+ AuDbg("%d\n", err);
32080+ u = err;
32081+ err = u % nbr;
32082+ AuDbg("%d\n", err);
32083+ if (!au_br_rdonly(au_sbr(sb, err)))
32084+ break;
32085+ err = -EROFS;
32086+ }
32087+
4a4d8108
AM
32088+ if (err >= 0)
32089+ err = au_wbr_nonopq(dentry, err);
32090+
4f0767ce 32091+out:
1facf9fc 32092+ AuDbg("%d\n", err);
32093+ return err;
32094+}
32095+
32096+/* ---------------------------------------------------------------------- */
32097+
32098+/* most free space */
392086de 32099+static void au_mfs(struct dentry *dentry, struct dentry *parent)
1facf9fc 32100+{
32101+ struct super_block *sb;
32102+ struct au_branch *br;
32103+ struct au_wbr_mfs *mfs;
392086de 32104+ struct dentry *h_parent;
5afbbe0d 32105+ aufs_bindex_t bindex, bbot;
1facf9fc 32106+ int err;
32107+ unsigned long long b, bavail;
7f207e10 32108+ struct path h_path;
1facf9fc 32109+ /* reduce the stack usage */
32110+ struct kstatfs *st;
32111+
32112+ st = kmalloc(sizeof(*st), GFP_NOFS);
32113+ if (unlikely(!st)) {
32114+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
32115+ return;
32116+ }
32117+
32118+ bavail = 0;
32119+ sb = dentry->d_sb;
32120+ mfs = &au_sbi(sb)->si_wbr_mfs;
dece6358 32121+ MtxMustLock(&mfs->mfs_lock);
1facf9fc 32122+ mfs->mfs_bindex = -EROFS;
32123+ mfs->mfsrr_bytes = 0;
392086de
AM
32124+ if (!parent) {
32125+ bindex = 0;
5afbbe0d 32126+ bbot = au_sbbot(sb);
392086de 32127+ } else {
5afbbe0d
AM
32128+ bindex = au_dbtop(parent);
32129+ bbot = au_dbtaildir(parent);
392086de
AM
32130+ }
32131+
5afbbe0d 32132+ for (; bindex <= bbot; bindex++) {
392086de
AM
32133+ if (parent) {
32134+ h_parent = au_h_dptr(parent, bindex);
5527c038 32135+ if (!h_parent || d_is_negative(h_parent))
392086de
AM
32136+ continue;
32137+ }
1facf9fc 32138+ br = au_sbr(sb, bindex);
32139+ if (au_br_rdonly(br))
32140+ continue;
32141+
32142+ /* sb->s_root for NFS is unreliable */
86dc4139 32143+ h_path.mnt = au_br_mnt(br);
7f207e10
AM
32144+ h_path.dentry = h_path.mnt->mnt_root;
32145+ err = vfs_statfs(&h_path, st);
1facf9fc 32146+ if (unlikely(err)) {
32147+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
32148+ continue;
32149+ }
32150+
32151+ /* when the available size is equal, select the lower one */
32152+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
32153+ || sizeof(b) < sizeof(st->f_bsize));
32154+ b = st->f_bavail * st->f_bsize;
32155+ br->br_wbr->wbr_bytes = b;
32156+ if (b >= bavail) {
32157+ bavail = b;
32158+ mfs->mfs_bindex = bindex;
32159+ mfs->mfs_jiffy = jiffies;
32160+ }
32161+ }
32162+
32163+ mfs->mfsrr_bytes = bavail;
32164+ AuDbg("b%d\n", mfs->mfs_bindex);
f0c0a007 32165+ au_delayed_kfree(st);
1facf9fc 32166+}
32167+
392086de 32168+static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
1facf9fc 32169+{
32170+ int err;
392086de 32171+ struct dentry *parent;
1facf9fc 32172+ struct super_block *sb;
32173+ struct au_wbr_mfs *mfs;
32174+
32175+ err = au_wbr_create_exp(dentry);
32176+ if (err >= 0)
32177+ goto out;
32178+
32179+ sb = dentry->d_sb;
392086de
AM
32180+ parent = NULL;
32181+ if (au_ftest_wbr(flags, PARENT))
32182+ parent = dget_parent(dentry);
1facf9fc 32183+ mfs = &au_sbi(sb)->si_wbr_mfs;
32184+ mutex_lock(&mfs->mfs_lock);
32185+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
32186+ || mfs->mfs_bindex < 0
32187+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
392086de 32188+ au_mfs(dentry, parent);
1facf9fc 32189+ mutex_unlock(&mfs->mfs_lock);
32190+ err = mfs->mfs_bindex;
392086de 32191+ dput(parent);
1facf9fc 32192+
4a4d8108
AM
32193+ if (err >= 0)
32194+ err = au_wbr_nonopq(dentry, err);
32195+
4f0767ce 32196+out:
1facf9fc 32197+ AuDbg("b%d\n", err);
32198+ return err;
32199+}
32200+
32201+static int au_wbr_create_init_mfs(struct super_block *sb)
32202+{
32203+ struct au_wbr_mfs *mfs;
32204+
32205+ mfs = &au_sbi(sb)->si_wbr_mfs;
32206+ mutex_init(&mfs->mfs_lock);
32207+ mfs->mfs_jiffy = 0;
32208+ mfs->mfs_bindex = -EROFS;
32209+
32210+ return 0;
32211+}
32212+
32213+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
32214+{
32215+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
32216+ return 0;
32217+}
32218+
32219+/* ---------------------------------------------------------------------- */
32220+
32221+/* most free space and then round robin */
392086de 32222+static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
1facf9fc 32223+{
32224+ int err;
32225+ struct au_wbr_mfs *mfs;
32226+
392086de 32227+ err = au_wbr_create_mfs(dentry, flags);
1facf9fc 32228+ if (err >= 0) {
32229+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
dece6358 32230+ mutex_lock(&mfs->mfs_lock);
1facf9fc 32231+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
392086de 32232+ err = au_wbr_create_rr(dentry, flags);
dece6358 32233+ mutex_unlock(&mfs->mfs_lock);
1facf9fc 32234+ }
32235+
32236+ AuDbg("b%d\n", err);
32237+ return err;
32238+}
32239+
32240+static int au_wbr_create_init_mfsrr(struct super_block *sb)
32241+{
32242+ int err;
32243+
32244+ au_wbr_create_init_mfs(sb); /* ignore */
32245+ err = au_wbr_create_init_rr(sb);
32246+
32247+ return err;
32248+}
32249+
32250+/* ---------------------------------------------------------------------- */
32251+
32252+/* top down parent and most free space */
392086de 32253+static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
1facf9fc 32254+{
32255+ int err, e2;
32256+ unsigned long long b;
5afbbe0d 32257+ aufs_bindex_t bindex, btop, bbot;
1facf9fc 32258+ struct super_block *sb;
32259+ struct dentry *parent, *h_parent;
32260+ struct au_branch *br;
32261+
392086de 32262+ err = au_wbr_create_tdp(dentry, flags);
1facf9fc 32263+ if (unlikely(err < 0))
32264+ goto out;
32265+ parent = dget_parent(dentry);
5afbbe0d
AM
32266+ btop = au_dbtop(parent);
32267+ bbot = au_dbtaildir(parent);
32268+ if (btop == bbot)
1facf9fc 32269+ goto out_parent; /* success */
32270+
392086de 32271+ e2 = au_wbr_create_mfs(dentry, flags);
1facf9fc 32272+ if (e2 < 0)
32273+ goto out_parent; /* success */
32274+
32275+ /* when the available size is equal, select upper one */
32276+ sb = dentry->d_sb;
32277+ br = au_sbr(sb, err);
32278+ b = br->br_wbr->wbr_bytes;
32279+ AuDbg("b%d, %llu\n", err, b);
32280+
5afbbe0d 32281+ for (bindex = btop; bindex <= bbot; bindex++) {
1facf9fc 32282+ h_parent = au_h_dptr(parent, bindex);
5527c038 32283+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 32284+ continue;
32285+
32286+ br = au_sbr(sb, bindex);
32287+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
32288+ b = br->br_wbr->wbr_bytes;
32289+ err = bindex;
32290+ AuDbg("b%d, %llu\n", err, b);
32291+ }
32292+ }
32293+
4a4d8108
AM
32294+ if (err >= 0)
32295+ err = au_wbr_nonopq(dentry, err);
32296+
4f0767ce 32297+out_parent:
1facf9fc 32298+ dput(parent);
4f0767ce 32299+out:
1facf9fc 32300+ AuDbg("b%d\n", err);
32301+ return err;
32302+}
32303+
32304+/* ---------------------------------------------------------------------- */
32305+
392086de
AM
32306+/*
32307+ * - top down parent
32308+ * - most free space with parent
32309+ * - most free space round-robin regardless parent
32310+ */
32311+static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
32312+{
32313+ int err;
32314+ unsigned long long watermark;
32315+ struct super_block *sb;
32316+ struct au_branch *br;
32317+ struct au_wbr_mfs *mfs;
32318+
32319+ err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
32320+ if (unlikely(err < 0))
32321+ goto out;
32322+
32323+ sb = dentry->d_sb;
32324+ br = au_sbr(sb, err);
32325+ mfs = &au_sbi(sb)->si_wbr_mfs;
32326+ mutex_lock(&mfs->mfs_lock);
32327+ watermark = mfs->mfsrr_watermark;
32328+ mutex_unlock(&mfs->mfs_lock);
32329+ if (br->br_wbr->wbr_bytes < watermark)
32330+ /* regardless the parent dir */
32331+ err = au_wbr_create_mfsrr(dentry, flags);
32332+
32333+out:
32334+ AuDbg("b%d\n", err);
32335+ return err;
32336+}
32337+
32338+/* ---------------------------------------------------------------------- */
32339+
1facf9fc 32340+/* policies for copyup */
32341+
32342+/* top down parent */
32343+static int au_wbr_copyup_tdp(struct dentry *dentry)
32344+{
392086de 32345+ return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
1facf9fc 32346+}
32347+
32348+/* bottom up parent */
32349+static int au_wbr_copyup_bup(struct dentry *dentry)
32350+{
32351+ int err;
5afbbe0d 32352+ aufs_bindex_t bindex, btop;
1facf9fc 32353+ struct dentry *parent, *h_parent;
32354+ struct super_block *sb;
32355+
32356+ err = -EROFS;
32357+ sb = dentry->d_sb;
32358+ parent = dget_parent(dentry);
5afbbe0d
AM
32359+ btop = au_dbtop(parent);
32360+ for (bindex = au_dbtop(dentry); bindex >= btop; bindex--) {
1facf9fc 32361+ h_parent = au_h_dptr(parent, bindex);
5527c038 32362+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 32363+ continue;
32364+
32365+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
32366+ err = bindex;
32367+ break;
32368+ }
32369+ }
32370+ dput(parent);
32371+
32372+ /* bottom up here */
32373+ if (unlikely(err < 0))
5afbbe0d 32374+ err = au_wbr_bu(sb, btop - 1);
1facf9fc 32375+
32376+ AuDbg("b%d\n", err);
32377+ return err;
32378+}
32379+
32380+/* bottom up */
5afbbe0d 32381+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop)
1facf9fc 32382+{
32383+ int err;
32384+
5afbbe0d 32385+ err = au_wbr_bu(dentry->d_sb, btop);
4a4d8108 32386+ AuDbg("b%d\n", err);
5afbbe0d 32387+ if (err > btop)
4a4d8108 32388+ err = au_wbr_nonopq(dentry, err);
1facf9fc 32389+
32390+ AuDbg("b%d\n", err);
32391+ return err;
32392+}
32393+
076b876e
AM
32394+static int au_wbr_copyup_bu(struct dentry *dentry)
32395+{
32396+ int err;
5afbbe0d 32397+ aufs_bindex_t btop;
076b876e 32398+
5afbbe0d
AM
32399+ btop = au_dbtop(dentry);
32400+ err = au_wbr_do_copyup_bu(dentry, btop);
076b876e
AM
32401+ return err;
32402+}
32403+
1facf9fc 32404+/* ---------------------------------------------------------------------- */
32405+
32406+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
32407+ [AuWbrCopyup_TDP] = {
32408+ .copyup = au_wbr_copyup_tdp
32409+ },
32410+ [AuWbrCopyup_BUP] = {
32411+ .copyup = au_wbr_copyup_bup
32412+ },
32413+ [AuWbrCopyup_BU] = {
32414+ .copyup = au_wbr_copyup_bu
32415+ }
32416+};
32417+
32418+struct au_wbr_create_operations au_wbr_create_ops[] = {
32419+ [AuWbrCreate_TDP] = {
32420+ .create = au_wbr_create_tdp
32421+ },
32422+ [AuWbrCreate_RR] = {
32423+ .create = au_wbr_create_rr,
32424+ .init = au_wbr_create_init_rr
32425+ },
32426+ [AuWbrCreate_MFS] = {
32427+ .create = au_wbr_create_mfs,
32428+ .init = au_wbr_create_init_mfs,
32429+ .fin = au_wbr_create_fin_mfs
32430+ },
32431+ [AuWbrCreate_MFSV] = {
32432+ .create = au_wbr_create_mfs,
32433+ .init = au_wbr_create_init_mfs,
32434+ .fin = au_wbr_create_fin_mfs
32435+ },
32436+ [AuWbrCreate_MFSRR] = {
32437+ .create = au_wbr_create_mfsrr,
32438+ .init = au_wbr_create_init_mfsrr,
32439+ .fin = au_wbr_create_fin_mfs
32440+ },
32441+ [AuWbrCreate_MFSRRV] = {
32442+ .create = au_wbr_create_mfsrr,
32443+ .init = au_wbr_create_init_mfsrr,
32444+ .fin = au_wbr_create_fin_mfs
32445+ },
32446+ [AuWbrCreate_PMFS] = {
32447+ .create = au_wbr_create_pmfs,
32448+ .init = au_wbr_create_init_mfs,
32449+ .fin = au_wbr_create_fin_mfs
32450+ },
32451+ [AuWbrCreate_PMFSV] = {
32452+ .create = au_wbr_create_pmfs,
32453+ .init = au_wbr_create_init_mfs,
32454+ .fin = au_wbr_create_fin_mfs
392086de
AM
32455+ },
32456+ [AuWbrCreate_PMFSRR] = {
32457+ .create = au_wbr_create_pmfsrr,
32458+ .init = au_wbr_create_init_mfsrr,
32459+ .fin = au_wbr_create_fin_mfs
32460+ },
32461+ [AuWbrCreate_PMFSRRV] = {
32462+ .create = au_wbr_create_pmfsrr,
32463+ .init = au_wbr_create_init_mfsrr,
32464+ .fin = au_wbr_create_fin_mfs
1facf9fc 32465+ }
32466+};
7f207e10
AM
32467diff -urN /usr/share/empty/fs/aufs/whout.c linux/fs/aufs/whout.c
32468--- /usr/share/empty/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 32469+++ linux/fs/aufs/whout.c 2016-10-09 16:55:36.496035060 +0200
be52b249 32470@@ -0,0 +1,1060 @@
1facf9fc 32471+/*
8cdd5066 32472+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 32473+ *
32474+ * This program, aufs is free software; you can redistribute it and/or modify
32475+ * it under the terms of the GNU General Public License as published by
32476+ * the Free Software Foundation; either version 2 of the License, or
32477+ * (at your option) any later version.
dece6358
AM
32478+ *
32479+ * This program is distributed in the hope that it will be useful,
32480+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
32481+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32482+ * GNU General Public License for more details.
32483+ *
32484+ * You should have received a copy of the GNU General Public License
523b37e3 32485+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 32486+ */
32487+
32488+/*
32489+ * whiteout for logical deletion and opaque directory
32490+ */
32491+
1facf9fc 32492+#include "aufs.h"
32493+
32494+#define WH_MASK S_IRUGO
32495+
32496+/*
32497+ * If a directory contains this file, then it is opaque. We start with the
32498+ * .wh. flag so that it is blocked by lookup.
32499+ */
0c3ec466
AM
32500+static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ,
32501+ sizeof(AUFS_WH_DIROPQ) - 1);
1facf9fc 32502+
32503+/*
32504+ * generate whiteout name, which is NOT terminated by NULL.
32505+ * @name: original d_name.name
32506+ * @len: original d_name.len
32507+ * @wh: whiteout qstr
32508+ * returns zero when succeeds, otherwise error.
32509+ * succeeded value as wh->name should be freed by kfree().
32510+ */
32511+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
32512+{
32513+ char *p;
32514+
32515+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
32516+ return -ENAMETOOLONG;
32517+
32518+ wh->len = name->len + AUFS_WH_PFX_LEN;
32519+ p = kmalloc(wh->len, GFP_NOFS);
32520+ wh->name = p;
32521+ if (p) {
32522+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
32523+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
32524+ /* smp_mb(); */
32525+ return 0;
32526+ }
32527+ return -ENOMEM;
32528+}
32529+
32530+/* ---------------------------------------------------------------------- */
32531+
32532+/*
32533+ * test if the @wh_name exists under @h_parent.
32534+ * @try_sio specifies the necessary of super-io.
32535+ */
076b876e 32536+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio)
1facf9fc 32537+{
32538+ int err;
32539+ struct dentry *wh_dentry;
1facf9fc 32540+
1facf9fc 32541+ if (!try_sio)
b4510431 32542+ wh_dentry = vfsub_lkup_one(wh_name, h_parent);
1facf9fc 32543+ else
076b876e 32544+ wh_dentry = au_sio_lkup_one(wh_name, h_parent);
1facf9fc 32545+ err = PTR_ERR(wh_dentry);
2000de60
JR
32546+ if (IS_ERR(wh_dentry)) {
32547+ if (err == -ENAMETOOLONG)
32548+ err = 0;
1facf9fc 32549+ goto out;
2000de60 32550+ }
1facf9fc 32551+
32552+ err = 0;
5527c038 32553+ if (d_is_negative(wh_dentry))
1facf9fc 32554+ goto out_wh; /* success */
32555+
32556+ err = 1;
7e9cd9fe 32557+ if (d_is_reg(wh_dentry))
1facf9fc 32558+ goto out_wh; /* success */
32559+
32560+ err = -EIO;
523b37e3 32561+ AuIOErr("%pd Invalid whiteout entry type 0%o.\n",
5527c038 32562+ wh_dentry, d_inode(wh_dentry)->i_mode);
1facf9fc 32563+
4f0767ce 32564+out_wh:
1facf9fc 32565+ dput(wh_dentry);
4f0767ce 32566+out:
1facf9fc 32567+ return err;
32568+}
32569+
32570+/*
32571+ * test if the @h_dentry sets opaque or not.
32572+ */
076b876e 32573+int au_diropq_test(struct dentry *h_dentry)
1facf9fc 32574+{
32575+ int err;
32576+ struct inode *h_dir;
32577+
5527c038 32578+ h_dir = d_inode(h_dentry);
076b876e 32579+ err = au_wh_test(h_dentry, &diropq_name,
1facf9fc 32580+ au_test_h_perm_sio(h_dir, MAY_EXEC));
32581+ return err;
32582+}
32583+
32584+/*
32585+ * returns a negative dentry whose name is unique and temporary.
32586+ */
32587+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
32588+ struct qstr *prefix)
32589+{
1facf9fc 32590+ struct dentry *dentry;
32591+ int i;
027c5e7a 32592+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
4a4d8108 32593+ *name, *p;
027c5e7a 32594+ /* strict atomic_t is unnecessary here */
1facf9fc 32595+ static unsigned short cnt;
32596+ struct qstr qs;
32597+
4a4d8108
AM
32598+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
32599+
1facf9fc 32600+ name = defname;
027c5e7a
AM
32601+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
32602+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
1facf9fc 32603+ dentry = ERR_PTR(-ENAMETOOLONG);
4a4d8108 32604+ if (unlikely(qs.len > NAME_MAX))
1facf9fc 32605+ goto out;
32606+ dentry = ERR_PTR(-ENOMEM);
32607+ name = kmalloc(qs.len + 1, GFP_NOFS);
32608+ if (unlikely(!name))
32609+ goto out;
32610+ }
32611+
32612+ /* doubly whiteout-ed */
32613+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
32614+ p = name + AUFS_WH_PFX_LEN * 2;
32615+ memcpy(p, prefix->name, prefix->len);
32616+ p += prefix->len;
32617+ *p++ = '.';
4a4d8108 32618+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
1facf9fc 32619+
32620+ qs.name = name;
32621+ for (i = 0; i < 3; i++) {
b752ccd1 32622+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
076b876e 32623+ dentry = au_sio_lkup_one(&qs, h_parent);
5527c038 32624+ if (IS_ERR(dentry) || d_is_negative(dentry))
1facf9fc 32625+ goto out_name;
32626+ dput(dentry);
32627+ }
0c3ec466 32628+ /* pr_warn("could not get random name\n"); */
1facf9fc 32629+ dentry = ERR_PTR(-EEXIST);
32630+ AuDbg("%.*s\n", AuLNPair(&qs));
32631+ BUG();
32632+
4f0767ce 32633+out_name:
1facf9fc 32634+ if (name != defname)
f0c0a007 32635+ au_delayed_kfree(name);
4f0767ce 32636+out:
4a4d8108 32637+ AuTraceErrPtr(dentry);
1facf9fc 32638+ return dentry;
1facf9fc 32639+}
32640+
32641+/*
32642+ * rename the @h_dentry on @br to the whiteouted temporary name.
32643+ */
32644+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
32645+{
32646+ int err;
32647+ struct path h_path = {
86dc4139 32648+ .mnt = au_br_mnt(br)
1facf9fc 32649+ };
523b37e3 32650+ struct inode *h_dir, *delegated;
1facf9fc 32651+ struct dentry *h_parent;
32652+
32653+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5527c038 32654+ h_dir = d_inode(h_parent);
1facf9fc 32655+ IMustLock(h_dir);
32656+
32657+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
32658+ err = PTR_ERR(h_path.dentry);
32659+ if (IS_ERR(h_path.dentry))
32660+ goto out;
32661+
32662+ /* under the same dir, no need to lock_rename() */
523b37e3
AM
32663+ delegated = NULL;
32664+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated);
1facf9fc 32665+ AuTraceErr(err);
523b37e3
AM
32666+ if (unlikely(err == -EWOULDBLOCK)) {
32667+ pr_warn("cannot retry for NFSv4 delegation"
32668+ " for an internal rename\n");
32669+ iput(delegated);
32670+ }
1facf9fc 32671+ dput(h_path.dentry);
32672+
4f0767ce 32673+out:
4a4d8108 32674+ AuTraceErr(err);
1facf9fc 32675+ return err;
32676+}
32677+
32678+/* ---------------------------------------------------------------------- */
32679+/*
32680+ * functions for removing a whiteout
32681+ */
32682+
32683+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
32684+{
523b37e3
AM
32685+ int err, force;
32686+ struct inode *delegated;
1facf9fc 32687+
32688+ /*
32689+ * forces superio when the dir has a sticky bit.
32690+ * this may be a violation of unix fs semantics.
32691+ */
32692+ force = (h_dir->i_mode & S_ISVTX)
5527c038 32693+ && !uid_eq(current_fsuid(), d_inode(h_path->dentry)->i_uid);
523b37e3
AM
32694+ delegated = NULL;
32695+ err = vfsub_unlink(h_dir, h_path, &delegated, force);
32696+ if (unlikely(err == -EWOULDBLOCK)) {
32697+ pr_warn("cannot retry for NFSv4 delegation"
32698+ " for an internal unlink\n");
32699+ iput(delegated);
32700+ }
32701+ return err;
1facf9fc 32702+}
32703+
32704+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
32705+ struct dentry *dentry)
32706+{
32707+ int err;
32708+
32709+ err = do_unlink_wh(h_dir, h_path);
32710+ if (!err && dentry)
32711+ au_set_dbwh(dentry, -1);
32712+
32713+ return err;
32714+}
32715+
32716+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
32717+ struct au_branch *br)
32718+{
32719+ int err;
32720+ struct path h_path = {
86dc4139 32721+ .mnt = au_br_mnt(br)
1facf9fc 32722+ };
32723+
32724+ err = 0;
b4510431 32725+ h_path.dentry = vfsub_lkup_one(wh, h_parent);
1facf9fc 32726+ if (IS_ERR(h_path.dentry))
32727+ err = PTR_ERR(h_path.dentry);
32728+ else {
5527c038
JR
32729+ if (d_is_reg(h_path.dentry))
32730+ err = do_unlink_wh(d_inode(h_parent), &h_path);
1facf9fc 32731+ dput(h_path.dentry);
32732+ }
32733+
32734+ return err;
32735+}
32736+
32737+/* ---------------------------------------------------------------------- */
32738+/*
32739+ * initialize/clean whiteout for a branch
32740+ */
32741+
32742+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
32743+ const int isdir)
32744+{
32745+ int err;
523b37e3 32746+ struct inode *delegated;
1facf9fc 32747+
5527c038 32748+ if (d_is_negative(whpath->dentry))
1facf9fc 32749+ return;
32750+
86dc4139
AM
32751+ if (isdir)
32752+ err = vfsub_rmdir(h_dir, whpath);
523b37e3
AM
32753+ else {
32754+ delegated = NULL;
32755+ err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0);
32756+ if (unlikely(err == -EWOULDBLOCK)) {
32757+ pr_warn("cannot retry for NFSv4 delegation"
32758+ " for an internal unlink\n");
32759+ iput(delegated);
32760+ }
32761+ }
1facf9fc 32762+ if (unlikely(err))
523b37e3
AM
32763+ pr_warn("failed removing %pd (%d), ignored.\n",
32764+ whpath->dentry, err);
1facf9fc 32765+}
32766+
32767+static int test_linkable(struct dentry *h_root)
32768+{
5527c038 32769+ struct inode *h_dir = d_inode(h_root);
1facf9fc 32770+
32771+ if (h_dir->i_op->link)
32772+ return 0;
32773+
523b37e3
AM
32774+ pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n",
32775+ h_root, au_sbtype(h_root->d_sb));
1facf9fc 32776+ return -ENOSYS;
32777+}
32778+
32779+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
32780+static int au_whdir(struct inode *h_dir, struct path *path)
32781+{
32782+ int err;
32783+
32784+ err = -EEXIST;
5527c038 32785+ if (d_is_negative(path->dentry)) {
1facf9fc 32786+ int mode = S_IRWXU;
32787+
32788+ if (au_test_nfs(path->dentry->d_sb))
32789+ mode |= S_IXUGO;
86dc4139 32790+ err = vfsub_mkdir(h_dir, path, mode);
2000de60 32791+ } else if (d_is_dir(path->dentry))
1facf9fc 32792+ err = 0;
32793+ else
523b37e3 32794+ pr_err("unknown %pd exists\n", path->dentry);
1facf9fc 32795+
32796+ return err;
32797+}
32798+
32799+struct au_wh_base {
32800+ const struct qstr *name;
32801+ struct dentry *dentry;
32802+};
32803+
32804+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
32805+ struct path *h_path)
32806+{
32807+ h_path->dentry = base[AuBrWh_BASE].dentry;
32808+ au_wh_clean(h_dir, h_path, /*isdir*/0);
32809+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32810+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32811+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32812+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32813+}
32814+
32815+/*
32816+ * returns tri-state,
c1595e42 32817+ * minus: error, caller should print the message
1facf9fc 32818+ * zero: succuess
c1595e42 32819+ * plus: error, caller should NOT print the message
1facf9fc 32820+ */
32821+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
32822+ int do_plink, struct au_wh_base base[],
32823+ struct path *h_path)
32824+{
32825+ int err;
32826+ struct inode *h_dir;
32827+
5527c038 32828+ h_dir = d_inode(h_root);
1facf9fc 32829+ h_path->dentry = base[AuBrWh_BASE].dentry;
32830+ au_wh_clean(h_dir, h_path, /*isdir*/0);
32831+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32832+ if (do_plink) {
32833+ err = test_linkable(h_root);
32834+ if (unlikely(err)) {
32835+ err = 1;
32836+ goto out;
32837+ }
32838+
32839+ err = au_whdir(h_dir, h_path);
32840+ if (unlikely(err))
32841+ goto out;
32842+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
32843+ } else
32844+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32845+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32846+ err = au_whdir(h_dir, h_path);
32847+ if (unlikely(err))
32848+ goto out;
32849+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
32850+
4f0767ce 32851+out:
1facf9fc 32852+ return err;
32853+}
32854+
32855+/*
32856+ * for the moment, aufs supports the branch filesystem which does not support
32857+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
32858+ * copyup failed. finally, such filesystem will not be used as the writable
32859+ * branch.
32860+ *
32861+ * returns tri-state, see above.
32862+ */
32863+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
32864+ int do_plink, struct au_wh_base base[],
32865+ struct path *h_path)
32866+{
32867+ int err;
32868+ struct inode *h_dir;
32869+
1308ab2a 32870+ WbrWhMustWriteLock(wbr);
32871+
1facf9fc 32872+ err = test_linkable(h_root);
32873+ if (unlikely(err)) {
32874+ err = 1;
32875+ goto out;
32876+ }
32877+
32878+ /*
32879+ * todo: should this create be done in /sbin/mount.aufs helper?
32880+ */
32881+ err = -EEXIST;
5527c038
JR
32882+ h_dir = d_inode(h_root);
32883+ if (d_is_negative(base[AuBrWh_BASE].dentry)) {
86dc4139
AM
32884+ h_path->dentry = base[AuBrWh_BASE].dentry;
32885+ err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true);
7e9cd9fe 32886+ } else if (d_is_reg(base[AuBrWh_BASE].dentry))
1facf9fc 32887+ err = 0;
32888+ else
523b37e3 32889+ pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry);
1facf9fc 32890+ if (unlikely(err))
32891+ goto out;
32892+
32893+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32894+ if (do_plink) {
32895+ err = au_whdir(h_dir, h_path);
32896+ if (unlikely(err))
32897+ goto out;
32898+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
32899+ } else
32900+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32901+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
32902+
32903+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32904+ err = au_whdir(h_dir, h_path);
32905+ if (unlikely(err))
32906+ goto out;
32907+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
32908+
4f0767ce 32909+out:
1facf9fc 32910+ return err;
32911+}
32912+
32913+/*
32914+ * initialize the whiteout base file/dir for @br.
32915+ */
86dc4139 32916+int au_wh_init(struct au_branch *br, struct super_block *sb)
1facf9fc 32917+{
32918+ int err, i;
32919+ const unsigned char do_plink
32920+ = !!au_opt_test(au_mntflags(sb), PLINK);
1facf9fc 32921+ struct inode *h_dir;
86dc4139
AM
32922+ struct path path = br->br_path;
32923+ struct dentry *h_root = path.dentry;
1facf9fc 32924+ struct au_wbr *wbr = br->br_wbr;
32925+ static const struct qstr base_name[] = {
0c3ec466
AM
32926+ [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME,
32927+ sizeof(AUFS_BASE_NAME) - 1),
32928+ [AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME,
32929+ sizeof(AUFS_PLINKDIR_NAME) - 1),
32930+ [AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME,
32931+ sizeof(AUFS_ORPHDIR_NAME) - 1)
1facf9fc 32932+ };
32933+ struct au_wh_base base[] = {
32934+ [AuBrWh_BASE] = {
32935+ .name = base_name + AuBrWh_BASE,
32936+ .dentry = NULL
32937+ },
32938+ [AuBrWh_PLINK] = {
32939+ .name = base_name + AuBrWh_PLINK,
32940+ .dentry = NULL
32941+ },
32942+ [AuBrWh_ORPH] = {
32943+ .name = base_name + AuBrWh_ORPH,
32944+ .dentry = NULL
32945+ }
32946+ };
32947+
1308ab2a 32948+ if (wbr)
32949+ WbrWhMustWriteLock(wbr);
1facf9fc 32950+
1facf9fc 32951+ for (i = 0; i < AuBrWh_Last; i++) {
32952+ /* doubly whiteouted */
32953+ struct dentry *d;
32954+
32955+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
32956+ err = PTR_ERR(d);
32957+ if (IS_ERR(d))
32958+ goto out;
32959+
32960+ base[i].dentry = d;
32961+ AuDebugOn(wbr
32962+ && wbr->wbr_wh[i]
32963+ && wbr->wbr_wh[i] != base[i].dentry);
32964+ }
32965+
32966+ if (wbr)
32967+ for (i = 0; i < AuBrWh_Last; i++) {
32968+ dput(wbr->wbr_wh[i]);
32969+ wbr->wbr_wh[i] = NULL;
32970+ }
32971+
32972+ err = 0;
1e00d052 32973+ if (!au_br_writable(br->br_perm)) {
5527c038 32974+ h_dir = d_inode(h_root);
1facf9fc 32975+ au_wh_init_ro(h_dir, base, &path);
1e00d052 32976+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 32977+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
32978+ if (err > 0)
32979+ goto out;
32980+ else if (err)
32981+ goto out_err;
1e00d052 32982+ } else {
1facf9fc 32983+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
32984+ if (err > 0)
32985+ goto out;
32986+ else if (err)
32987+ goto out_err;
1facf9fc 32988+ }
32989+ goto out; /* success */
32990+
4f0767ce 32991+out_err:
523b37e3
AM
32992+ pr_err("an error(%d) on the writable branch %pd(%s)\n",
32993+ err, h_root, au_sbtype(h_root->d_sb));
4f0767ce 32994+out:
1facf9fc 32995+ for (i = 0; i < AuBrWh_Last; i++)
32996+ dput(base[i].dentry);
32997+ return err;
32998+}
32999+
33000+/* ---------------------------------------------------------------------- */
33001+/*
33002+ * whiteouts are all hard-linked usually.
33003+ * when its link count reaches a ceiling, we create a new whiteout base
33004+ * asynchronously.
33005+ */
33006+
33007+struct reinit_br_wh {
33008+ struct super_block *sb;
33009+ struct au_branch *br;
33010+};
33011+
33012+static void reinit_br_wh(void *arg)
33013+{
33014+ int err;
33015+ aufs_bindex_t bindex;
33016+ struct path h_path;
33017+ struct reinit_br_wh *a = arg;
33018+ struct au_wbr *wbr;
523b37e3 33019+ struct inode *dir, *delegated;
1facf9fc 33020+ struct dentry *h_root;
33021+ struct au_hinode *hdir;
33022+
33023+ err = 0;
33024+ wbr = a->br->br_wbr;
33025+ /* big aufs lock */
33026+ si_noflush_write_lock(a->sb);
33027+ if (!au_br_writable(a->br->br_perm))
33028+ goto out;
33029+ bindex = au_br_index(a->sb, a->br->br_id);
33030+ if (unlikely(bindex < 0))
33031+ goto out;
33032+
1308ab2a 33033+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
5527c038 33034+ dir = d_inode(a->sb->s_root);
1facf9fc 33035+ hdir = au_hi(dir, bindex);
33036+ h_root = au_h_dptr(a->sb->s_root, bindex);
86dc4139 33037+ AuDebugOn(h_root != au_br_dentry(a->br));
1facf9fc 33038+
5afbbe0d 33039+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 33040+ wbr_wh_write_lock(wbr);
33041+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
33042+ h_root, a->br);
33043+ if (!err) {
86dc4139
AM
33044+ h_path.dentry = wbr->wbr_whbase;
33045+ h_path.mnt = au_br_mnt(a->br);
523b37e3
AM
33046+ delegated = NULL;
33047+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated,
33048+ /*force*/0);
33049+ if (unlikely(err == -EWOULDBLOCK)) {
33050+ pr_warn("cannot retry for NFSv4 delegation"
33051+ " for an internal unlink\n");
33052+ iput(delegated);
33053+ }
1facf9fc 33054+ } else {
523b37e3 33055+ pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase);
1facf9fc 33056+ err = 0;
33057+ }
33058+ dput(wbr->wbr_whbase);
33059+ wbr->wbr_whbase = NULL;
33060+ if (!err)
86dc4139 33061+ err = au_wh_init(a->br, a->sb);
1facf9fc 33062+ wbr_wh_write_unlock(wbr);
5afbbe0d 33063+ au_hn_inode_unlock(hdir);
1308ab2a 33064+ di_read_unlock(a->sb->s_root, AuLock_IR);
076b876e
AM
33065+ if (!err)
33066+ au_fhsm_wrote(a->sb, bindex, /*force*/0);
1facf9fc 33067+
4f0767ce 33068+out:
1facf9fc 33069+ if (wbr)
33070+ atomic_dec(&wbr->wbr_wh_running);
5afbbe0d 33071+ au_br_put(a->br);
1facf9fc 33072+ si_write_unlock(a->sb);
027c5e7a 33073+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
f0c0a007 33074+ au_delayed_kfree(arg);
1facf9fc 33075+ if (unlikely(err))
33076+ AuIOErr("err %d\n", err);
33077+}
33078+
33079+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
33080+{
33081+ int do_dec, wkq_err;
33082+ struct reinit_br_wh *arg;
33083+
33084+ do_dec = 1;
33085+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
33086+ goto out;
33087+
33088+ /* ignore ENOMEM */
33089+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
33090+ if (arg) {
33091+ /*
33092+ * dec(wh_running), kfree(arg) and dec(br_count)
33093+ * in reinit function
33094+ */
33095+ arg->sb = sb;
33096+ arg->br = br;
5afbbe0d 33097+ au_br_get(br);
53392da6 33098+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
1facf9fc 33099+ if (unlikely(wkq_err)) {
33100+ atomic_dec(&br->br_wbr->wbr_wh_running);
5afbbe0d 33101+ au_br_put(br);
f0c0a007 33102+ au_delayed_kfree(arg);
1facf9fc 33103+ }
33104+ do_dec = 0;
33105+ }
33106+
4f0767ce 33107+out:
1facf9fc 33108+ if (do_dec)
33109+ atomic_dec(&br->br_wbr->wbr_wh_running);
33110+}
33111+
33112+/* ---------------------------------------------------------------------- */
33113+
33114+/*
33115+ * create the whiteout @wh.
33116+ */
33117+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
33118+ struct dentry *wh)
33119+{
33120+ int err;
33121+ struct path h_path = {
33122+ .dentry = wh
33123+ };
33124+ struct au_branch *br;
33125+ struct au_wbr *wbr;
33126+ struct dentry *h_parent;
523b37e3 33127+ struct inode *h_dir, *delegated;
1facf9fc 33128+
33129+ h_parent = wh->d_parent; /* dir inode is locked */
5527c038 33130+ h_dir = d_inode(h_parent);
1facf9fc 33131+ IMustLock(h_dir);
33132+
33133+ br = au_sbr(sb, bindex);
86dc4139 33134+ h_path.mnt = au_br_mnt(br);
1facf9fc 33135+ wbr = br->br_wbr;
33136+ wbr_wh_read_lock(wbr);
33137+ if (wbr->wbr_whbase) {
523b37e3
AM
33138+ delegated = NULL;
33139+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated);
33140+ if (unlikely(err == -EWOULDBLOCK)) {
33141+ pr_warn("cannot retry for NFSv4 delegation"
33142+ " for an internal link\n");
33143+ iput(delegated);
33144+ }
1facf9fc 33145+ if (!err || err != -EMLINK)
33146+ goto out;
33147+
33148+ /* link count full. re-initialize br_whbase. */
33149+ kick_reinit_br_wh(sb, br);
33150+ }
33151+
33152+ /* return this error in this context */
b4510431 33153+ err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true);
076b876e
AM
33154+ if (!err)
33155+ au_fhsm_wrote(sb, bindex, /*force*/0);
1facf9fc 33156+
4f0767ce 33157+out:
1facf9fc 33158+ wbr_wh_read_unlock(wbr);
33159+ return err;
33160+}
33161+
33162+/* ---------------------------------------------------------------------- */
33163+
33164+/*
33165+ * create or remove the diropq.
33166+ */
33167+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
33168+ unsigned int flags)
33169+{
33170+ struct dentry *opq_dentry, *h_dentry;
33171+ struct super_block *sb;
33172+ struct au_branch *br;
33173+ int err;
33174+
33175+ sb = dentry->d_sb;
33176+ br = au_sbr(sb, bindex);
33177+ h_dentry = au_h_dptr(dentry, bindex);
b4510431 33178+ opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry);
1facf9fc 33179+ if (IS_ERR(opq_dentry))
33180+ goto out;
33181+
33182+ if (au_ftest_diropq(flags, CREATE)) {
33183+ err = link_or_create_wh(sb, bindex, opq_dentry);
33184+ if (!err) {
33185+ au_set_dbdiropq(dentry, bindex);
33186+ goto out; /* success */
33187+ }
33188+ } else {
33189+ struct path tmp = {
33190+ .dentry = opq_dentry,
86dc4139 33191+ .mnt = au_br_mnt(br)
1facf9fc 33192+ };
5527c038 33193+ err = do_unlink_wh(au_h_iptr(d_inode(dentry), bindex), &tmp);
1facf9fc 33194+ if (!err)
33195+ au_set_dbdiropq(dentry, -1);
33196+ }
33197+ dput(opq_dentry);
33198+ opq_dentry = ERR_PTR(err);
33199+
4f0767ce 33200+out:
1facf9fc 33201+ return opq_dentry;
33202+}
33203+
33204+struct do_diropq_args {
33205+ struct dentry **errp;
33206+ struct dentry *dentry;
33207+ aufs_bindex_t bindex;
33208+ unsigned int flags;
33209+};
33210+
33211+static void call_do_diropq(void *args)
33212+{
33213+ struct do_diropq_args *a = args;
33214+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
33215+}
33216+
33217+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
33218+ unsigned int flags)
33219+{
33220+ struct dentry *diropq, *h_dentry;
33221+
33222+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 33223+ if (!au_test_h_perm_sio(d_inode(h_dentry), MAY_EXEC | MAY_WRITE))
1facf9fc 33224+ diropq = do_diropq(dentry, bindex, flags);
33225+ else {
33226+ int wkq_err;
33227+ struct do_diropq_args args = {
33228+ .errp = &diropq,
33229+ .dentry = dentry,
33230+ .bindex = bindex,
33231+ .flags = flags
33232+ };
33233+
33234+ wkq_err = au_wkq_wait(call_do_diropq, &args);
33235+ if (unlikely(wkq_err))
33236+ diropq = ERR_PTR(wkq_err);
33237+ }
33238+
33239+ return diropq;
33240+}
33241+
33242+/* ---------------------------------------------------------------------- */
33243+
33244+/*
33245+ * lookup whiteout dentry.
33246+ * @h_parent: lower parent dentry which must exist and be locked
33247+ * @base_name: name of dentry which will be whiteouted
33248+ * returns dentry for whiteout.
33249+ */
33250+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
33251+ struct au_branch *br)
33252+{
33253+ int err;
33254+ struct qstr wh_name;
33255+ struct dentry *wh_dentry;
33256+
33257+ err = au_wh_name_alloc(&wh_name, base_name);
33258+ wh_dentry = ERR_PTR(err);
33259+ if (!err) {
b4510431 33260+ wh_dentry = vfsub_lkup_one(&wh_name, h_parent);
f0c0a007 33261+ au_delayed_kfree(wh_name.name);
1facf9fc 33262+ }
33263+ return wh_dentry;
33264+}
33265+
33266+/*
33267+ * link/create a whiteout for @dentry on @bindex.
33268+ */
33269+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
33270+ struct dentry *h_parent)
33271+{
33272+ struct dentry *wh_dentry;
33273+ struct super_block *sb;
33274+ int err;
33275+
33276+ sb = dentry->d_sb;
33277+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
5527c038 33278+ if (!IS_ERR(wh_dentry) && d_is_negative(wh_dentry)) {
1facf9fc 33279+ err = link_or_create_wh(sb, bindex, wh_dentry);
076b876e 33280+ if (!err) {
1facf9fc 33281+ au_set_dbwh(dentry, bindex);
076b876e
AM
33282+ au_fhsm_wrote(sb, bindex, /*force*/0);
33283+ } else {
1facf9fc 33284+ dput(wh_dentry);
33285+ wh_dentry = ERR_PTR(err);
33286+ }
33287+ }
33288+
33289+ return wh_dentry;
33290+}
33291+
33292+/* ---------------------------------------------------------------------- */
33293+
33294+/* Delete all whiteouts in this directory on branch bindex. */
33295+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
33296+ aufs_bindex_t bindex, struct au_branch *br)
33297+{
33298+ int err;
33299+ unsigned long ul, n;
33300+ struct qstr wh_name;
33301+ char *p;
33302+ struct hlist_head *head;
c06a8ce3 33303+ struct au_vdir_wh *pos;
1facf9fc 33304+ struct au_vdir_destr *str;
33305+
33306+ err = -ENOMEM;
537831f9 33307+ p = (void *)__get_free_page(GFP_NOFS);
1facf9fc 33308+ wh_name.name = p;
33309+ if (unlikely(!wh_name.name))
33310+ goto out;
33311+
33312+ err = 0;
33313+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
33314+ p += AUFS_WH_PFX_LEN;
33315+ n = whlist->nh_num;
33316+ head = whlist->nh_head;
33317+ for (ul = 0; !err && ul < n; ul++, head++) {
c06a8ce3
AM
33318+ hlist_for_each_entry(pos, head, wh_hash) {
33319+ if (pos->wh_bindex != bindex)
1facf9fc 33320+ continue;
33321+
c06a8ce3 33322+ str = &pos->wh_str;
1facf9fc 33323+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
33324+ memcpy(p, str->name, str->len);
33325+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
33326+ err = unlink_wh_name(h_dentry, &wh_name, br);
33327+ if (!err)
33328+ continue;
33329+ break;
33330+ }
33331+ AuIOErr("whiteout name too long %.*s\n",
33332+ str->len, str->name);
33333+ err = -EIO;
33334+ break;
33335+ }
33336+ }
f0c0a007 33337+ au_delayed_free_page((unsigned long)wh_name.name);
1facf9fc 33338+
4f0767ce 33339+out:
1facf9fc 33340+ return err;
33341+}
33342+
33343+struct del_wh_children_args {
33344+ int *errp;
33345+ struct dentry *h_dentry;
1308ab2a 33346+ struct au_nhash *whlist;
1facf9fc 33347+ aufs_bindex_t bindex;
33348+ struct au_branch *br;
33349+};
33350+
33351+static void call_del_wh_children(void *args)
33352+{
33353+ struct del_wh_children_args *a = args;
1308ab2a 33354+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
1facf9fc 33355+}
33356+
33357+/* ---------------------------------------------------------------------- */
33358+
33359+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
33360+{
33361+ struct au_whtmp_rmdir *whtmp;
dece6358 33362+ int err;
1308ab2a 33363+ unsigned int rdhash;
dece6358
AM
33364+
33365+ SiMustAnyLock(sb);
1facf9fc 33366+
be52b249 33367+ whtmp = kzalloc(sizeof(*whtmp), gfp);
dece6358
AM
33368+ if (unlikely(!whtmp)) {
33369+ whtmp = ERR_PTR(-ENOMEM);
1facf9fc 33370+ goto out;
dece6358 33371+ }
1facf9fc 33372+
1308ab2a 33373+ /* no estimation for dir size */
33374+ rdhash = au_sbi(sb)->si_rdhash;
33375+ if (!rdhash)
33376+ rdhash = AUFS_RDHASH_DEF;
33377+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
33378+ if (unlikely(err)) {
f0c0a007 33379+ au_delayed_kfree(whtmp);
1308ab2a 33380+ whtmp = ERR_PTR(err);
33381+ }
dece6358 33382+
4f0767ce 33383+out:
dece6358 33384+ return whtmp;
1facf9fc 33385+}
33386+
33387+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
33388+{
027c5e7a 33389+ if (whtmp->br)
5afbbe0d 33390+ au_br_put(whtmp->br);
1facf9fc 33391+ dput(whtmp->wh_dentry);
33392+ iput(whtmp->dir);
dece6358 33393+ au_nhash_wh_free(&whtmp->whlist);
f0c0a007 33394+ au_delayed_kfree(whtmp);
1facf9fc 33395+}
33396+
33397+/*
33398+ * rmdir the whiteouted temporary named dir @h_dentry.
33399+ * @whlist: whiteouted children.
33400+ */
33401+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
33402+ struct dentry *wh_dentry, struct au_nhash *whlist)
33403+{
33404+ int err;
2000de60 33405+ unsigned int h_nlink;
1facf9fc 33406+ struct path h_tmp;
33407+ struct inode *wh_inode, *h_dir;
33408+ struct au_branch *br;
33409+
5527c038 33410+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
1facf9fc 33411+ IMustLock(h_dir);
33412+
33413+ br = au_sbr(dir->i_sb, bindex);
5527c038 33414+ wh_inode = d_inode(wh_dentry);
febd17d6 33415+ inode_lock_nested(wh_inode, AuLsc_I_CHILD);
1facf9fc 33416+
33417+ /*
33418+ * someone else might change some whiteouts while we were sleeping.
33419+ * it means this whlist may have an obsoleted entry.
33420+ */
33421+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
33422+ err = del_wh_children(wh_dentry, whlist, bindex, br);
33423+ else {
33424+ int wkq_err;
33425+ struct del_wh_children_args args = {
33426+ .errp = &err,
33427+ .h_dentry = wh_dentry,
1308ab2a 33428+ .whlist = whlist,
1facf9fc 33429+ .bindex = bindex,
33430+ .br = br
33431+ };
33432+
33433+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
33434+ if (unlikely(wkq_err))
33435+ err = wkq_err;
33436+ }
febd17d6 33437+ inode_unlock(wh_inode);
1facf9fc 33438+
33439+ if (!err) {
33440+ h_tmp.dentry = wh_dentry;
86dc4139 33441+ h_tmp.mnt = au_br_mnt(br);
2000de60 33442+ h_nlink = h_dir->i_nlink;
1facf9fc 33443+ err = vfsub_rmdir(h_dir, &h_tmp);
2000de60
JR
33444+ /* some fs doesn't change the parent nlink in some cases */
33445+ h_nlink -= h_dir->i_nlink;
1facf9fc 33446+ }
33447+
33448+ if (!err) {
5afbbe0d 33449+ if (au_ibtop(dir) == bindex) {
7f207e10 33450+ /* todo: dir->i_mutex is necessary */
1facf9fc 33451+ au_cpup_attr_timesizes(dir);
2000de60
JR
33452+ if (h_nlink)
33453+ vfsub_drop_nlink(dir);
1facf9fc 33454+ }
33455+ return 0; /* success */
33456+ }
33457+
523b37e3 33458+ pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
1facf9fc 33459+ return err;
33460+}
33461+
33462+static void call_rmdir_whtmp(void *args)
33463+{
33464+ int err;
e49829fe 33465+ aufs_bindex_t bindex;
1facf9fc 33466+ struct au_whtmp_rmdir *a = args;
33467+ struct super_block *sb;
33468+ struct dentry *h_parent;
33469+ struct inode *h_dir;
1facf9fc 33470+ struct au_hinode *hdir;
33471+
33472+ /* rmdir by nfsd may cause deadlock with this i_mutex */
febd17d6 33473+ /* inode_lock(a->dir); */
e49829fe 33474+ err = -EROFS;
1facf9fc 33475+ sb = a->dir->i_sb;
e49829fe
JR
33476+ si_read_lock(sb, !AuLock_FLUSH);
33477+ if (!au_br_writable(a->br->br_perm))
33478+ goto out;
33479+ bindex = au_br_index(sb, a->br->br_id);
33480+ if (unlikely(bindex < 0))
1facf9fc 33481+ goto out;
33482+
33483+ err = -EIO;
1facf9fc 33484+ ii_write_lock_parent(a->dir);
33485+ h_parent = dget_parent(a->wh_dentry);
5527c038 33486+ h_dir = d_inode(h_parent);
e49829fe 33487+ hdir = au_hi(a->dir, bindex);
86dc4139
AM
33488+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
33489+ if (unlikely(err))
33490+ goto out_mnt;
5afbbe0d 33491+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
e49829fe
JR
33492+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
33493+ a->br);
86dc4139
AM
33494+ if (!err)
33495+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
5afbbe0d 33496+ au_hn_inode_unlock(hdir);
86dc4139
AM
33497+ vfsub_mnt_drop_write(au_br_mnt(a->br));
33498+
33499+out_mnt:
1facf9fc 33500+ dput(h_parent);
33501+ ii_write_unlock(a->dir);
4f0767ce 33502+out:
febd17d6 33503+ /* inode_unlock(a->dir); */
1facf9fc 33504+ au_whtmp_rmdir_free(a);
027c5e7a
AM
33505+ si_read_unlock(sb);
33506+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 33507+ if (unlikely(err))
33508+ AuIOErr("err %d\n", err);
33509+}
33510+
33511+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33512+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
33513+{
33514+ int wkq_err;
e49829fe 33515+ struct super_block *sb;
1facf9fc 33516+
33517+ IMustLock(dir);
33518+
33519+ /* all post-process will be done in do_rmdir_whtmp(). */
e49829fe 33520+ sb = dir->i_sb;
1facf9fc 33521+ args->dir = au_igrab(dir);
e49829fe 33522+ args->br = au_sbr(sb, bindex);
5afbbe0d 33523+ au_br_get(args->br);
1facf9fc 33524+ args->wh_dentry = dget(wh_dentry);
53392da6 33525+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1facf9fc 33526+ if (unlikely(wkq_err)) {
523b37e3 33527+ pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
1facf9fc 33528+ au_whtmp_rmdir_free(args);
33529+ }
33530+}
7f207e10
AM
33531diff -urN /usr/share/empty/fs/aufs/whout.h linux/fs/aufs/whout.h
33532--- /usr/share/empty/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 33533+++ linux/fs/aufs/whout.h 2016-10-09 16:55:36.496035060 +0200
076b876e 33534@@ -0,0 +1,85 @@
1facf9fc 33535+/*
8cdd5066 33536+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33537+ *
33538+ * This program, aufs is free software; you can redistribute it and/or modify
33539+ * it under the terms of the GNU General Public License as published by
33540+ * the Free Software Foundation; either version 2 of the License, or
33541+ * (at your option) any later version.
dece6358
AM
33542+ *
33543+ * This program is distributed in the hope that it will be useful,
33544+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33545+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33546+ * GNU General Public License for more details.
33547+ *
33548+ * You should have received a copy of the GNU General Public License
523b37e3 33549+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33550+ */
33551+
33552+/*
33553+ * whiteout for logical deletion and opaque directory
33554+ */
33555+
33556+#ifndef __AUFS_WHOUT_H__
33557+#define __AUFS_WHOUT_H__
33558+
33559+#ifdef __KERNEL__
33560+
1facf9fc 33561+#include "dir.h"
33562+
33563+/* whout.c */
33564+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
076b876e
AM
33565+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
33566+int au_diropq_test(struct dentry *h_dentry);
7e9cd9fe 33567+struct au_branch;
1facf9fc 33568+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
33569+ struct qstr *prefix);
33570+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
33571+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
33572+ struct dentry *dentry);
86dc4139 33573+int au_wh_init(struct au_branch *br, struct super_block *sb);
1facf9fc 33574+
33575+/* diropq flags */
33576+#define AuDiropq_CREATE 1
33577+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
7f207e10
AM
33578+#define au_fset_diropq(flags, name) \
33579+ do { (flags) |= AuDiropq_##name; } while (0)
33580+#define au_fclr_diropq(flags, name) \
33581+ do { (flags) &= ~AuDiropq_##name; } while (0)
1facf9fc 33582+
33583+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
33584+ unsigned int flags);
33585+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
33586+ struct au_branch *br);
33587+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
33588+ struct dentry *h_parent);
33589+
33590+/* real rmdir for the whiteout-ed dir */
33591+struct au_whtmp_rmdir {
33592+ struct inode *dir;
e49829fe 33593+ struct au_branch *br;
1facf9fc 33594+ struct dentry *wh_dentry;
dece6358 33595+ struct au_nhash whlist;
1facf9fc 33596+};
33597+
33598+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
33599+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
33600+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
33601+ struct dentry *wh_dentry, struct au_nhash *whlist);
33602+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33603+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
33604+
33605+/* ---------------------------------------------------------------------- */
33606+
33607+static inline struct dentry *au_diropq_create(struct dentry *dentry,
33608+ aufs_bindex_t bindex)
33609+{
33610+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
33611+}
33612+
33613+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
33614+{
33615+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
33616+}
33617+
33618+#endif /* __KERNEL__ */
33619+#endif /* __AUFS_WHOUT_H__ */
7f207e10
AM
33620diff -urN /usr/share/empty/fs/aufs/wkq.c linux/fs/aufs/wkq.c
33621--- /usr/share/empty/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 33622+++ linux/fs/aufs/wkq.c 2016-10-09 16:55:36.496035060 +0200
f0c0a007 33623@@ -0,0 +1,213 @@
1facf9fc 33624+/*
8cdd5066 33625+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33626+ *
33627+ * This program, aufs is free software; you can redistribute it and/or modify
33628+ * it under the terms of the GNU General Public License as published by
33629+ * the Free Software Foundation; either version 2 of the License, or
33630+ * (at your option) any later version.
dece6358
AM
33631+ *
33632+ * This program is distributed in the hope that it will be useful,
33633+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33634+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33635+ * GNU General Public License for more details.
33636+ *
33637+ * You should have received a copy of the GNU General Public License
523b37e3 33638+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33639+ */
33640+
33641+/*
33642+ * workqueue for asynchronous/super-io operations
33643+ * todo: try new dredential scheme
33644+ */
33645+
dece6358 33646+#include <linux/module.h>
1facf9fc 33647+#include "aufs.h"
33648+
9dbd164d 33649+/* internal workqueue named AUFS_WKQ_NAME */
b752ccd1 33650+
9dbd164d 33651+static struct workqueue_struct *au_wkq;
1facf9fc 33652+
33653+struct au_wkinfo {
33654+ struct work_struct wk;
7f207e10 33655+ struct kobject *kobj;
1facf9fc 33656+
33657+ unsigned int flags; /* see wkq.h */
33658+
33659+ au_wkq_func_t func;
33660+ void *args;
33661+
1facf9fc 33662+ struct completion *comp;
33663+};
33664+
33665+/* ---------------------------------------------------------------------- */
33666+
1facf9fc 33667+static void wkq_func(struct work_struct *wk)
33668+{
33669+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
33670+
2dfbb274 33671+ AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
7f207e10
AM
33672+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
33673+
1facf9fc 33674+ wkinfo->func(wkinfo->args);
1facf9fc 33675+ if (au_ftest_wkq(wkinfo->flags, WAIT))
33676+ complete(wkinfo->comp);
33677+ else {
7f207e10 33678+ kobject_put(wkinfo->kobj);
9dbd164d 33679+ module_put(THIS_MODULE); /* todo: ?? */
f0c0a007 33680+ au_delayed_kfree(wkinfo);
1facf9fc 33681+ }
33682+}
33683+
33684+/*
33685+ * Since struct completion is large, try allocating it dynamically.
33686+ */
c2b27bf2 33687+#if 1 /* defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS) */
1facf9fc 33688+#define AuWkqCompDeclare(name) struct completion *comp = NULL
33689+
33690+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33691+{
33692+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
33693+ if (*comp) {
33694+ init_completion(*comp);
33695+ wkinfo->comp = *comp;
33696+ return 0;
33697+ }
33698+ return -ENOMEM;
33699+}
33700+
33701+static void au_wkq_comp_free(struct completion *comp)
33702+{
f0c0a007 33703+ au_delayed_kfree(comp);
1facf9fc 33704+}
33705+
33706+#else
33707+
33708+/* no braces */
33709+#define AuWkqCompDeclare(name) \
33710+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
33711+ struct completion *comp = &_ ## name
33712+
33713+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33714+{
33715+ wkinfo->comp = *comp;
33716+ return 0;
33717+}
33718+
33719+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
33720+{
33721+ /* empty */
33722+}
33723+#endif /* 4KSTACKS */
33724+
53392da6 33725+static void au_wkq_run(struct au_wkinfo *wkinfo)
1facf9fc 33726+{
53392da6
AM
33727+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
33728+ if (au_wkq_test()) {
38d290e6
JR
33729+ AuWarn1("wkq from wkq, unless silly-rename on NFS,"
33730+ " due to a dead dir by UDBA?\n");
53392da6
AM
33731+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
33732+ }
33733+ } else
33734+ au_dbg_verify_kthread();
33735+
33736+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
a1f66529 33737+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
9dbd164d 33738+ queue_work(au_wkq, &wkinfo->wk);
4a4d8108
AM
33739+ } else {
33740+ INIT_WORK(&wkinfo->wk, wkq_func);
33741+ schedule_work(&wkinfo->wk);
33742+ }
1facf9fc 33743+}
33744+
7f207e10
AM
33745+/*
33746+ * Be careful. It is easy to make deadlock happen.
33747+ * processA: lock, wkq and wait
33748+ * processB: wkq and wait, lock in wkq
33749+ * --> deadlock
33750+ */
b752ccd1 33751+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
1facf9fc 33752+{
33753+ int err;
33754+ AuWkqCompDeclare(comp);
33755+ struct au_wkinfo wkinfo = {
b752ccd1 33756+ .flags = flags,
1facf9fc 33757+ .func = func,
33758+ .args = args
33759+ };
33760+
33761+ err = au_wkq_comp_alloc(&wkinfo, &comp);
33762+ if (!err) {
53392da6 33763+ au_wkq_run(&wkinfo);
1facf9fc 33764+ /* no timeout, no interrupt */
33765+ wait_for_completion(wkinfo.comp);
33766+ au_wkq_comp_free(comp);
4a4d8108 33767+ destroy_work_on_stack(&wkinfo.wk);
1facf9fc 33768+ }
33769+
33770+ return err;
33771+
33772+}
33773+
027c5e7a
AM
33774+/*
33775+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
33776+ * problem in a concurrent umounting.
33777+ */
53392da6
AM
33778+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
33779+ unsigned int flags)
1facf9fc 33780+{
33781+ int err;
33782+ struct au_wkinfo *wkinfo;
33783+
f0c0a007 33784+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
1facf9fc 33785+
33786+ /*
33787+ * wkq_func() must free this wkinfo.
33788+ * it highly depends upon the implementation of workqueue.
33789+ */
33790+ err = 0;
33791+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
33792+ if (wkinfo) {
7f207e10 33793+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
53392da6 33794+ wkinfo->flags = flags & ~AuWkq_WAIT;
1facf9fc 33795+ wkinfo->func = func;
33796+ wkinfo->args = args;
33797+ wkinfo->comp = NULL;
7f207e10 33798+ kobject_get(wkinfo->kobj);
9dbd164d 33799+ __module_get(THIS_MODULE); /* todo: ?? */
1facf9fc 33800+
53392da6 33801+ au_wkq_run(wkinfo);
1facf9fc 33802+ } else {
33803+ err = -ENOMEM;
e49829fe 33804+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 33805+ }
33806+
33807+ return err;
33808+}
33809+
33810+/* ---------------------------------------------------------------------- */
33811+
33812+void au_nwt_init(struct au_nowait_tasks *nwt)
33813+{
f0c0a007
AM
33814+ atomic_set(&nwt->nw_len, 0);
33815+ /* smp_mb(); */ /* atomic_set */
1facf9fc 33816+ init_waitqueue_head(&nwt->nw_wq);
33817+}
33818+
33819+void au_wkq_fin(void)
33820+{
9dbd164d 33821+ destroy_workqueue(au_wkq);
1facf9fc 33822+}
33823+
33824+int __init au_wkq_init(void)
33825+{
9dbd164d 33826+ int err;
b752ccd1
AM
33827+
33828+ err = 0;
86dc4139 33829+ au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
9dbd164d
AM
33830+ if (IS_ERR(au_wkq))
33831+ err = PTR_ERR(au_wkq);
33832+ else if (!au_wkq)
33833+ err = -ENOMEM;
b752ccd1
AM
33834+
33835+ return err;
1facf9fc 33836+}
7f207e10
AM
33837diff -urN /usr/share/empty/fs/aufs/wkq.h linux/fs/aufs/wkq.h
33838--- /usr/share/empty/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 33839+++ linux/fs/aufs/wkq.h 2016-10-09 16:55:36.496035060 +0200
f0c0a007 33840@@ -0,0 +1,93 @@
1facf9fc 33841+/*
8cdd5066 33842+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33843+ *
33844+ * This program, aufs is free software; you can redistribute it and/or modify
33845+ * it under the terms of the GNU General Public License as published by
33846+ * the Free Software Foundation; either version 2 of the License, or
33847+ * (at your option) any later version.
dece6358
AM
33848+ *
33849+ * This program is distributed in the hope that it will be useful,
33850+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33851+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33852+ * GNU General Public License for more details.
33853+ *
33854+ * You should have received a copy of the GNU General Public License
523b37e3 33855+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33856+ */
33857+
33858+/*
33859+ * workqueue for asynchronous/super-io operations
33860+ * todo: try new credentials management scheme
33861+ */
33862+
33863+#ifndef __AUFS_WKQ_H__
33864+#define __AUFS_WKQ_H__
33865+
33866+#ifdef __KERNEL__
33867+
5afbbe0d
AM
33868+#include <linux/percpu_counter.h>
33869+
dece6358
AM
33870+struct super_block;
33871+
1facf9fc 33872+/* ---------------------------------------------------------------------- */
33873+
33874+/*
33875+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
33876+ */
33877+struct au_nowait_tasks {
f0c0a007 33878+ atomic_t nw_len;
1facf9fc 33879+ wait_queue_head_t nw_wq;
33880+};
33881+
33882+/* ---------------------------------------------------------------------- */
33883+
33884+typedef void (*au_wkq_func_t)(void *args);
33885+
33886+/* wkq flags */
33887+#define AuWkq_WAIT 1
9dbd164d 33888+#define AuWkq_NEST (1 << 1)
1facf9fc 33889+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
7f207e10
AM
33890+#define au_fset_wkq(flags, name) \
33891+ do { (flags) |= AuWkq_##name; } while (0)
33892+#define au_fclr_wkq(flags, name) \
33893+ do { (flags) &= ~AuWkq_##name; } while (0)
1facf9fc 33894+
9dbd164d
AM
33895+#ifndef CONFIG_AUFS_HNOTIFY
33896+#undef AuWkq_NEST
33897+#define AuWkq_NEST 0
33898+#endif
33899+
1facf9fc 33900+/* wkq.c */
b752ccd1 33901+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
53392da6
AM
33902+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
33903+ unsigned int flags);
1facf9fc 33904+void au_nwt_init(struct au_nowait_tasks *nwt);
33905+int __init au_wkq_init(void);
33906+void au_wkq_fin(void);
33907+
33908+/* ---------------------------------------------------------------------- */
33909+
53392da6
AM
33910+static inline int au_wkq_test(void)
33911+{
33912+ return current->flags & PF_WQ_WORKER;
33913+}
33914+
b752ccd1 33915+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
1facf9fc 33916+{
b752ccd1 33917+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
1facf9fc 33918+}
33919+
33920+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
33921+{
f0c0a007 33922+ if (atomic_dec_and_test(&nwt->nw_len))
1facf9fc 33923+ wake_up_all(&nwt->nw_wq);
33924+}
33925+
33926+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
33927+{
f0c0a007 33928+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
1facf9fc 33929+ return 0;
33930+}
33931+
33932+#endif /* __KERNEL__ */
33933+#endif /* __AUFS_WKQ_H__ */
c1595e42
JR
33934diff -urN /usr/share/empty/fs/aufs/xattr.c linux/fs/aufs/xattr.c
33935--- /usr/share/empty/fs/aufs/xattr.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 33936+++ linux/fs/aufs/xattr.c 2016-10-09 16:55:36.496035060 +0200
f0c0a007 33937@@ -0,0 +1,347 @@
c1595e42 33938+/*
8cdd5066 33939+ * Copyright (C) 2014-2016 Junjiro R. Okajima
c1595e42
JR
33940+ *
33941+ * This program, aufs is free software; you can redistribute it and/or modify
33942+ * it under the terms of the GNU General Public License as published by
33943+ * the Free Software Foundation; either version 2 of the License, or
33944+ * (at your option) any later version.
33945+ *
33946+ * This program is distributed in the hope that it will be useful,
33947+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33948+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33949+ * GNU General Public License for more details.
33950+ *
33951+ * You should have received a copy of the GNU General Public License
33952+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
33953+ */
33954+
33955+/*
33956+ * handling xattr functions
33957+ */
33958+
33959+#include <linux/xattr.h>
33960+#include "aufs.h"
33961+
33962+static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
33963+{
33964+ if (!ignore_flags)
33965+ goto out;
33966+ switch (err) {
33967+ case -ENOMEM:
33968+ case -EDQUOT:
33969+ goto out;
33970+ }
33971+
33972+ if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
33973+ err = 0;
33974+ goto out;
33975+ }
33976+
33977+#define cmp(brattr, prefix) do { \
33978+ if (!strncmp(name, XATTR_##prefix##_PREFIX, \
33979+ XATTR_##prefix##_PREFIX_LEN)) { \
33980+ if (ignore_flags & AuBrAttr_ICEX_##brattr) \
33981+ err = 0; \
33982+ goto out; \
33983+ } \
33984+ } while (0)
33985+
33986+ cmp(SEC, SECURITY);
33987+ cmp(SYS, SYSTEM);
33988+ cmp(TR, TRUSTED);
33989+ cmp(USR, USER);
33990+#undef cmp
33991+
33992+ if (ignore_flags & AuBrAttr_ICEX_OTH)
33993+ err = 0;
33994+
33995+out:
33996+ return err;
33997+}
33998+
33999+static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
34000+
34001+static int au_do_cpup_xattr(struct dentry *h_dst, struct dentry *h_src,
7e9cd9fe
AM
34002+ char *name, char **buf, unsigned int ignore_flags,
34003+ unsigned int verbose)
c1595e42
JR
34004+{
34005+ int err;
34006+ ssize_t ssz;
34007+ struct inode *h_idst;
34008+
34009+ ssz = vfs_getxattr_alloc(h_src, name, buf, 0, GFP_NOFS);
34010+ err = ssz;
34011+ if (unlikely(err <= 0)) {
c1595e42
JR
34012+ if (err == -ENODATA
34013+ || (err == -EOPNOTSUPP
b912730e 34014+ && ((ignore_flags & au_xattr_out_of_list)
5527c038 34015+ || (au_test_nfs_noacl(d_inode(h_src))
b912730e
AM
34016+ && (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)
34017+ || !strcmp(name,
34018+ XATTR_NAME_POSIX_ACL_DEFAULT))))
34019+ ))
c1595e42 34020+ err = 0;
b912730e
AM
34021+ if (err && (verbose || au_debug_test()))
34022+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
34023+ goto out;
34024+ }
34025+
34026+ /* unlock it temporary */
5527c038 34027+ h_idst = d_inode(h_dst);
febd17d6 34028+ inode_unlock(h_idst);
c1595e42 34029+ err = vfsub_setxattr(h_dst, name, *buf, ssz, /*flags*/0);
febd17d6 34030+ inode_lock_nested(h_idst, AuLsc_I_CHILD2);
c1595e42 34031+ if (unlikely(err)) {
7e9cd9fe
AM
34032+ if (verbose || au_debug_test())
34033+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
34034+ err = au_xattr_ignore(err, name, ignore_flags);
34035+ }
34036+
34037+out:
34038+ return err;
34039+}
34040+
7e9cd9fe
AM
34041+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
34042+ unsigned int verbose)
c1595e42
JR
34043+{
34044+ int err, unlocked, acl_access, acl_default;
34045+ ssize_t ssz;
34046+ struct inode *h_isrc, *h_idst;
34047+ char *value, *p, *o, *e;
34048+
34049+ /* try stopping to update the source inode while we are referencing */
7e9cd9fe 34050+ /* there should not be the parent-child relationship between them */
5527c038
JR
34051+ h_isrc = d_inode(h_src);
34052+ h_idst = d_inode(h_dst);
febd17d6
JR
34053+ inode_unlock(h_idst);
34054+ inode_lock_nested(h_isrc, AuLsc_I_CHILD);
34055+ inode_lock_nested(h_idst, AuLsc_I_CHILD2);
c1595e42
JR
34056+ unlocked = 0;
34057+
34058+ /* some filesystems don't list POSIX ACL, for example tmpfs */
34059+ ssz = vfs_listxattr(h_src, NULL, 0);
34060+ err = ssz;
34061+ if (unlikely(err < 0)) {
34062+ AuTraceErr(err);
34063+ if (err == -ENODATA
34064+ || err == -EOPNOTSUPP)
34065+ err = 0; /* ignore */
34066+ goto out;
34067+ }
34068+
34069+ err = 0;
34070+ p = NULL;
34071+ o = NULL;
34072+ if (ssz) {
34073+ err = -ENOMEM;
34074+ p = kmalloc(ssz, GFP_NOFS);
34075+ o = p;
34076+ if (unlikely(!p))
34077+ goto out;
34078+ err = vfs_listxattr(h_src, p, ssz);
34079+ }
febd17d6 34080+ inode_unlock(h_isrc);
c1595e42
JR
34081+ unlocked = 1;
34082+ AuDbg("err %d, ssz %zd\n", err, ssz);
34083+ if (unlikely(err < 0))
34084+ goto out_free;
34085+
34086+ err = 0;
34087+ e = p + ssz;
34088+ value = NULL;
34089+ acl_access = 0;
34090+ acl_default = 0;
34091+ while (!err && p < e) {
34092+ acl_access |= !strncmp(p, XATTR_NAME_POSIX_ACL_ACCESS,
34093+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1);
34094+ acl_default |= !strncmp(p, XATTR_NAME_POSIX_ACL_DEFAULT,
34095+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)
34096+ - 1);
7e9cd9fe
AM
34097+ err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
34098+ verbose);
c1595e42
JR
34099+ p += strlen(p) + 1;
34100+ }
34101+ AuTraceErr(err);
34102+ ignore_flags |= au_xattr_out_of_list;
34103+ if (!err && !acl_access) {
34104+ err = au_do_cpup_xattr(h_dst, h_src,
34105+ XATTR_NAME_POSIX_ACL_ACCESS, &value,
7e9cd9fe 34106+ ignore_flags, verbose);
c1595e42
JR
34107+ AuTraceErr(err);
34108+ }
34109+ if (!err && !acl_default) {
34110+ err = au_do_cpup_xattr(h_dst, h_src,
34111+ XATTR_NAME_POSIX_ACL_DEFAULT, &value,
7e9cd9fe 34112+ ignore_flags, verbose);
c1595e42
JR
34113+ AuTraceErr(err);
34114+ }
34115+
f0c0a007
AM
34116+ if (value)
34117+ au_delayed_kfree(value);
c1595e42
JR
34118+
34119+out_free:
f0c0a007
AM
34120+ if (o)
34121+ au_delayed_kfree(o);
c1595e42
JR
34122+out:
34123+ if (!unlocked)
febd17d6 34124+ inode_unlock(h_isrc);
c1595e42
JR
34125+ AuTraceErr(err);
34126+ return err;
34127+}
34128+
34129+/* ---------------------------------------------------------------------- */
34130+
34131+enum {
34132+ AU_XATTR_LIST,
34133+ AU_XATTR_GET
34134+};
34135+
34136+struct au_lgxattr {
34137+ int type;
34138+ union {
34139+ struct {
34140+ char *list;
34141+ size_t size;
34142+ } list;
34143+ struct {
34144+ const char *name;
34145+ void *value;
34146+ size_t size;
34147+ } get;
34148+ } u;
34149+};
34150+
34151+static ssize_t au_lgxattr(struct dentry *dentry, struct au_lgxattr *arg)
34152+{
34153+ ssize_t err;
34154+ struct path h_path;
34155+ struct super_block *sb;
34156+
34157+ sb = dentry->d_sb;
34158+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
34159+ if (unlikely(err))
34160+ goto out;
34161+ err = au_h_path_getattr(dentry, /*force*/1, &h_path);
34162+ if (unlikely(err))
34163+ goto out_si;
34164+ if (unlikely(!h_path.dentry))
34165+ /* illegally overlapped or something */
34166+ goto out_di; /* pretending success */
34167+
34168+ /* always topmost entry only */
34169+ switch (arg->type) {
34170+ case AU_XATTR_LIST:
34171+ err = vfs_listxattr(h_path.dentry,
34172+ arg->u.list.list, arg->u.list.size);
34173+ break;
34174+ case AU_XATTR_GET:
5afbbe0d 34175+ AuDebugOn(d_is_negative(h_path.dentry));
c1595e42
JR
34176+ err = vfs_getxattr(h_path.dentry,
34177+ arg->u.get.name, arg->u.get.value,
34178+ arg->u.get.size);
34179+ break;
34180+ }
34181+
34182+out_di:
34183+ di_read_unlock(dentry, AuLock_IR);
34184+out_si:
34185+ si_read_unlock(sb);
34186+out:
34187+ AuTraceErr(err);
34188+ return err;
34189+}
34190+
34191+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
34192+{
34193+ struct au_lgxattr arg = {
34194+ .type = AU_XATTR_LIST,
34195+ .u.list = {
34196+ .list = list,
34197+ .size = size
34198+ },
34199+ };
34200+
34201+ return au_lgxattr(dentry, &arg);
34202+}
34203+
5afbbe0d
AM
34204+ssize_t aufs_getxattr(struct dentry *dentry, struct inode *inode __maybe_unused,
34205+ const char *name, void *value, size_t size)
c1595e42
JR
34206+{
34207+ struct au_lgxattr arg = {
34208+ .type = AU_XATTR_GET,
34209+ .u.get = {
34210+ .name = name,
34211+ .value = value,
34212+ .size = size
34213+ },
34214+ };
34215+
34216+ return au_lgxattr(dentry, &arg);
34217+}
34218+
5afbbe0d
AM
34219+int aufs_setxattr(struct dentry *dentry, struct inode *inode, const char *name,
34220+ const void *value, size_t size, int flags)
c1595e42
JR
34221+{
34222+ struct au_srxattr arg = {
34223+ .type = AU_XATTR_SET,
34224+ .u.set = {
34225+ .name = name,
34226+ .value = value,
34227+ .size = size,
34228+ .flags = flags
34229+ },
34230+ };
34231+
5afbbe0d 34232+ return au_srxattr(dentry, inode, &arg);
c1595e42
JR
34233+}
34234+
34235+int aufs_removexattr(struct dentry *dentry, const char *name)
34236+{
34237+ struct au_srxattr arg = {
34238+ .type = AU_XATTR_REMOVE,
34239+ .u.remove = {
34240+ .name = name
34241+ },
34242+ };
34243+
5afbbe0d 34244+ return au_srxattr(dentry, d_inode(dentry), &arg);
c1595e42
JR
34245+}
34246+
34247+/* ---------------------------------------------------------------------- */
34248+
34249+#if 0
34250+static size_t au_xattr_list(struct dentry *dentry, char *list, size_t list_size,
34251+ const char *name, size_t name_len, int type)
34252+{
34253+ return aufs_listxattr(dentry, list, list_size);
34254+}
34255+
34256+static int au_xattr_get(struct dentry *dentry, const char *name, void *buffer,
34257+ size_t size, int type)
34258+{
34259+ return aufs_getxattr(dentry, name, buffer, size);
34260+}
34261+
34262+static int au_xattr_set(struct dentry *dentry, const char *name,
34263+ const void *value, size_t size, int flags, int type)
34264+{
34265+ return aufs_setxattr(dentry, name, value, size, flags);
34266+}
34267+
34268+static const struct xattr_handler au_xattr_handler = {
34269+ /* no prefix, no flags */
34270+ .list = au_xattr_list,
34271+ .get = au_xattr_get,
34272+ .set = au_xattr_set
34273+ /* why no remove? */
34274+};
34275+
34276+static const struct xattr_handler *au_xattr_handlers[] = {
34277+ &au_xattr_handler
34278+};
34279+
34280+void au_xattr_init(struct super_block *sb)
34281+{
34282+ /* sb->s_xattr = au_xattr_handlers; */
34283+}
34284+#endif
7f207e10
AM
34285diff -urN /usr/share/empty/fs/aufs/xino.c linux/fs/aufs/xino.c
34286--- /usr/share/empty/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 34287+++ linux/fs/aufs/xino.c 2016-10-09 16:55:36.496035060 +0200
f0c0a007 34288@@ -0,0 +1,1318 @@
1facf9fc 34289+/*
8cdd5066 34290+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 34291+ *
34292+ * This program, aufs is free software; you can redistribute it and/or modify
34293+ * it under the terms of the GNU General Public License as published by
34294+ * the Free Software Foundation; either version 2 of the License, or
34295+ * (at your option) any later version.
dece6358
AM
34296+ *
34297+ * This program is distributed in the hope that it will be useful,
34298+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34299+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34300+ * GNU General Public License for more details.
34301+ *
34302+ * You should have received a copy of the GNU General Public License
523b37e3 34303+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 34304+ */
34305+
34306+/*
34307+ * external inode number translation table and bitmap
34308+ */
34309+
34310+#include <linux/seq_file.h>
392086de 34311+#include <linux/statfs.h>
1facf9fc 34312+#include "aufs.h"
34313+
9dbd164d 34314+/* todo: unnecessary to support mmap_sem since kernel-space? */
5527c038 34315+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
1facf9fc 34316+ loff_t *pos)
34317+{
34318+ ssize_t err;
34319+ mm_segment_t oldfs;
b752ccd1
AM
34320+ union {
34321+ void *k;
34322+ char __user *u;
34323+ } buf;
1facf9fc 34324+
b752ccd1 34325+ buf.k = kbuf;
1facf9fc 34326+ oldfs = get_fs();
34327+ set_fs(KERNEL_DS);
34328+ do {
34329+ /* todo: signal_pending? */
b752ccd1 34330+ err = func(file, buf.u, size, pos);
1facf9fc 34331+ } while (err == -EAGAIN || err == -EINTR);
34332+ set_fs(oldfs);
34333+
34334+#if 0 /* reserved for future use */
34335+ if (err > 0)
2000de60 34336+ fsnotify_access(file->f_path.dentry);
1facf9fc 34337+#endif
34338+
34339+ return err;
34340+}
34341+
34342+/* ---------------------------------------------------------------------- */
34343+
be52b249
AM
34344+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
34345+ size_t size, loff_t *pos);
34346+
5527c038 34347+static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
1facf9fc 34348+ size_t size, loff_t *pos)
34349+{
34350+ ssize_t err;
34351+ mm_segment_t oldfs;
b752ccd1
AM
34352+ union {
34353+ void *k;
34354+ const char __user *u;
34355+ } buf;
be52b249
AM
34356+ int i;
34357+ const int prevent_endless = 10;
1facf9fc 34358+
be52b249 34359+ i = 0;
b752ccd1 34360+ buf.k = kbuf;
1facf9fc 34361+ oldfs = get_fs();
34362+ set_fs(KERNEL_DS);
1facf9fc 34363+ do {
b752ccd1 34364+ err = func(file, buf.u, size, pos);
be52b249
AM
34365+ if (err == -EINTR
34366+ && !au_wkq_test()
34367+ && fatal_signal_pending(current)) {
34368+ set_fs(oldfs);
34369+ err = xino_fwrite_wkq(func, file, kbuf, size, pos);
34370+ BUG_ON(err == -EINTR);
34371+ oldfs = get_fs();
34372+ set_fs(KERNEL_DS);
34373+ }
34374+ } while (i++ < prevent_endless
34375+ && (err == -EAGAIN || err == -EINTR));
1facf9fc 34376+ set_fs(oldfs);
34377+
34378+#if 0 /* reserved for future use */
34379+ if (err > 0)
2000de60 34380+ fsnotify_modify(file->f_path.dentry);
1facf9fc 34381+#endif
34382+
34383+ return err;
34384+}
34385+
34386+struct do_xino_fwrite_args {
34387+ ssize_t *errp;
5527c038 34388+ vfs_writef_t func;
1facf9fc 34389+ struct file *file;
34390+ void *buf;
34391+ size_t size;
34392+ loff_t *pos;
34393+};
34394+
34395+static void call_do_xino_fwrite(void *args)
34396+{
34397+ struct do_xino_fwrite_args *a = args;
34398+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
34399+}
34400+
be52b249
AM
34401+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
34402+ size_t size, loff_t *pos)
34403+{
34404+ ssize_t err;
34405+ int wkq_err;
34406+ struct do_xino_fwrite_args args = {
34407+ .errp = &err,
34408+ .func = func,
34409+ .file = file,
34410+ .buf = buf,
34411+ .size = size,
34412+ .pos = pos
34413+ };
34414+
34415+ /*
34416+ * it breaks RLIMIT_FSIZE and normal user's limit,
34417+ * users should care about quota and real 'filesystem full.'
34418+ */
34419+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
34420+ if (unlikely(wkq_err))
34421+ err = wkq_err;
34422+
34423+ return err;
34424+}
34425+
5527c038
JR
34426+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
34427+ size_t size, loff_t *pos)
1facf9fc 34428+{
34429+ ssize_t err;
34430+
b752ccd1
AM
34431+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
34432+ lockdep_off();
34433+ err = do_xino_fwrite(func, file, buf, size, pos);
34434+ lockdep_on();
be52b249
AM
34435+ } else
34436+ err = xino_fwrite_wkq(func, file, buf, size, pos);
1facf9fc 34437+
34438+ return err;
34439+}
34440+
34441+/* ---------------------------------------------------------------------- */
34442+
34443+/*
34444+ * create a new xinofile at the same place/path as @base_file.
34445+ */
34446+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
34447+{
34448+ struct file *file;
4a4d8108 34449+ struct dentry *base, *parent;
523b37e3 34450+ struct inode *dir, *delegated;
1facf9fc 34451+ struct qstr *name;
1308ab2a 34452+ struct path path;
4a4d8108 34453+ int err;
1facf9fc 34454+
2000de60 34455+ base = base_file->f_path.dentry;
1facf9fc 34456+ parent = base->d_parent; /* dir inode is locked */
5527c038 34457+ dir = d_inode(parent);
1facf9fc 34458+ IMustLock(dir);
34459+
34460+ file = ERR_PTR(-EINVAL);
34461+ name = &base->d_name;
4a4d8108
AM
34462+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
34463+ if (IS_ERR(path.dentry)) {
34464+ file = (void *)path.dentry;
523b37e3
AM
34465+ pr_err("%pd lookup err %ld\n",
34466+ base, PTR_ERR(path.dentry));
1facf9fc 34467+ goto out;
34468+ }
34469+
34470+ /* no need to mnt_want_write() since we call dentry_open() later */
4a4d8108 34471+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
1facf9fc 34472+ if (unlikely(err)) {
34473+ file = ERR_PTR(err);
523b37e3 34474+ pr_err("%pd create err %d\n", base, err);
1facf9fc 34475+ goto out_dput;
34476+ }
34477+
c06a8ce3 34478+ path.mnt = base_file->f_path.mnt;
4a4d8108 34479+ file = vfsub_dentry_open(&path,
7f207e10 34480+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 34481+ /* | __FMODE_NONOTIFY */);
1facf9fc 34482+ if (IS_ERR(file)) {
523b37e3 34483+ pr_err("%pd open err %ld\n", base, PTR_ERR(file));
1facf9fc 34484+ goto out_dput;
34485+ }
34486+
523b37e3
AM
34487+ delegated = NULL;
34488+ err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
34489+ if (unlikely(err == -EWOULDBLOCK)) {
34490+ pr_warn("cannot retry for NFSv4 delegation"
34491+ " for an internal unlink\n");
34492+ iput(delegated);
34493+ }
1facf9fc 34494+ if (unlikely(err)) {
523b37e3 34495+ pr_err("%pd unlink err %d\n", base, err);
1facf9fc 34496+ goto out_fput;
34497+ }
34498+
34499+ if (copy_src) {
34500+ /* no one can touch copy_src xino */
c06a8ce3 34501+ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
1facf9fc 34502+ if (unlikely(err)) {
523b37e3 34503+ pr_err("%pd copy err %d\n", base, err);
1facf9fc 34504+ goto out_fput;
34505+ }
34506+ }
34507+ goto out_dput; /* success */
34508+
4f0767ce 34509+out_fput:
1facf9fc 34510+ fput(file);
34511+ file = ERR_PTR(err);
4f0767ce 34512+out_dput:
4a4d8108 34513+ dput(path.dentry);
4f0767ce 34514+out:
1facf9fc 34515+ return file;
34516+}
34517+
34518+struct au_xino_lock_dir {
34519+ struct au_hinode *hdir;
34520+ struct dentry *parent;
febd17d6 34521+ struct inode *dir;
1facf9fc 34522+};
34523+
34524+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
34525+ struct au_xino_lock_dir *ldir)
34526+{
34527+ aufs_bindex_t brid, bindex;
34528+
34529+ ldir->hdir = NULL;
34530+ bindex = -1;
34531+ brid = au_xino_brid(sb);
34532+ if (brid >= 0)
34533+ bindex = au_br_index(sb, brid);
34534+ if (bindex >= 0) {
5527c038 34535+ ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
5afbbe0d 34536+ au_hn_inode_lock_nested(ldir->hdir, AuLsc_I_PARENT);
1facf9fc 34537+ } else {
2000de60 34538+ ldir->parent = dget_parent(xino->f_path.dentry);
febd17d6
JR
34539+ ldir->dir = d_inode(ldir->parent);
34540+ inode_lock_nested(ldir->dir, AuLsc_I_PARENT);
1facf9fc 34541+ }
34542+}
34543+
34544+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
34545+{
34546+ if (ldir->hdir)
5afbbe0d 34547+ au_hn_inode_unlock(ldir->hdir);
1facf9fc 34548+ else {
febd17d6 34549+ inode_unlock(ldir->dir);
1facf9fc 34550+ dput(ldir->parent);
34551+ }
34552+}
34553+
34554+/* ---------------------------------------------------------------------- */
34555+
34556+/* trucate xino files asynchronously */
34557+
34558+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
34559+{
34560+ int err;
392086de
AM
34561+ unsigned long jiffy;
34562+ blkcnt_t blocks;
5afbbe0d 34563+ aufs_bindex_t bi, bbot;
392086de 34564+ struct kstatfs *st;
1facf9fc 34565+ struct au_branch *br;
34566+ struct file *new_xino, *file;
34567+ struct super_block *h_sb;
34568+ struct au_xino_lock_dir ldir;
34569+
392086de 34570+ err = -ENOMEM;
be52b249 34571+ st = kmalloc(sizeof(*st), GFP_NOFS);
392086de
AM
34572+ if (unlikely(!st))
34573+ goto out;
34574+
1facf9fc 34575+ err = -EINVAL;
5afbbe0d
AM
34576+ bbot = au_sbbot(sb);
34577+ if (unlikely(bindex < 0 || bbot < bindex))
392086de 34578+ goto out_st;
1facf9fc 34579+ br = au_sbr(sb, bindex);
34580+ file = br->br_xino.xi_file;
34581+ if (!file)
392086de
AM
34582+ goto out_st;
34583+
34584+ err = vfs_statfs(&file->f_path, st);
34585+ if (unlikely(err))
34586+ AuErr1("statfs err %d, ignored\n", err);
34587+ jiffy = jiffies;
34588+ blocks = file_inode(file)->i_blocks;
34589+ pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34590+ bindex, (u64)blocks, st->f_bfree, st->f_blocks);
1facf9fc 34591+
34592+ au_xino_lock_dir(sb, file, &ldir);
34593+ /* mnt_want_write() is unnecessary here */
34594+ new_xino = au_xino_create2(file, file);
34595+ au_xino_unlock_dir(&ldir);
34596+ err = PTR_ERR(new_xino);
392086de
AM
34597+ if (IS_ERR(new_xino)) {
34598+ pr_err("err %d, ignored\n", err);
34599+ goto out_st;
34600+ }
1facf9fc 34601+ err = 0;
34602+ fput(file);
34603+ br->br_xino.xi_file = new_xino;
34604+
86dc4139 34605+ h_sb = au_br_sb(br);
5afbbe0d 34606+ for (bi = 0; bi <= bbot; bi++) {
1facf9fc 34607+ if (unlikely(bi == bindex))
34608+ continue;
34609+ br = au_sbr(sb, bi);
86dc4139 34610+ if (au_br_sb(br) != h_sb)
1facf9fc 34611+ continue;
34612+
34613+ fput(br->br_xino.xi_file);
34614+ br->br_xino.xi_file = new_xino;
34615+ get_file(new_xino);
34616+ }
34617+
392086de
AM
34618+ err = vfs_statfs(&new_xino->f_path, st);
34619+ if (!err) {
34620+ pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34621+ bindex, (u64)file_inode(new_xino)->i_blocks,
34622+ st->f_bfree, st->f_blocks);
34623+ if (file_inode(new_xino)->i_blocks < blocks)
34624+ au_sbi(sb)->si_xino_jiffy = jiffy;
34625+ } else
34626+ AuErr1("statfs err %d, ignored\n", err);
34627+
34628+out_st:
f0c0a007 34629+ au_delayed_kfree(st);
4f0767ce 34630+out:
1facf9fc 34631+ return err;
34632+}
34633+
34634+struct xino_do_trunc_args {
34635+ struct super_block *sb;
34636+ struct au_branch *br;
34637+};
34638+
34639+static void xino_do_trunc(void *_args)
34640+{
34641+ struct xino_do_trunc_args *args = _args;
34642+ struct super_block *sb;
34643+ struct au_branch *br;
34644+ struct inode *dir;
34645+ int err;
34646+ aufs_bindex_t bindex;
34647+
34648+ err = 0;
34649+ sb = args->sb;
5527c038 34650+ dir = d_inode(sb->s_root);
1facf9fc 34651+ br = args->br;
34652+
34653+ si_noflush_write_lock(sb);
34654+ ii_read_lock_parent(dir);
34655+ bindex = au_br_index(sb, br->br_id);
34656+ err = au_xino_trunc(sb, bindex);
1facf9fc 34657+ ii_read_unlock(dir);
34658+ if (unlikely(err))
392086de 34659+ pr_warn("err b%d, (%d)\n", bindex, err);
1facf9fc 34660+ atomic_dec(&br->br_xino_running);
5afbbe0d 34661+ au_br_put(br);
1facf9fc 34662+ si_write_unlock(sb);
027c5e7a 34663+ au_nwt_done(&au_sbi(sb)->si_nowait);
f0c0a007 34664+ au_delayed_kfree(args);
1facf9fc 34665+}
34666+
392086de
AM
34667+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
34668+{
34669+ int err;
34670+ struct kstatfs st;
34671+ struct au_sbinfo *sbinfo;
34672+
34673+ /* todo: si_xino_expire and the ratio should be customizable */
34674+ sbinfo = au_sbi(sb);
34675+ if (time_before(jiffies,
34676+ sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
34677+ return 0;
34678+
34679+ /* truncation border */
34680+ err = vfs_statfs(&br->br_xino.xi_file->f_path, &st);
34681+ if (unlikely(err)) {
34682+ AuErr1("statfs err %d, ignored\n", err);
34683+ return 0;
34684+ }
34685+ if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC)
34686+ return 0;
34687+
34688+ return 1;
34689+}
34690+
1facf9fc 34691+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
34692+{
34693+ struct xino_do_trunc_args *args;
34694+ int wkq_err;
34695+
392086de 34696+ if (!xino_trunc_test(sb, br))
1facf9fc 34697+ return;
34698+
34699+ if (atomic_inc_return(&br->br_xino_running) > 1)
34700+ goto out;
34701+
34702+ /* lock and kfree() will be called in trunc_xino() */
34703+ args = kmalloc(sizeof(*args), GFP_NOFS);
34704+ if (unlikely(!args)) {
34705+ AuErr1("no memory\n");
f0c0a007 34706+ goto out;
1facf9fc 34707+ }
34708+
5afbbe0d 34709+ au_br_get(br);
1facf9fc 34710+ args->sb = sb;
34711+ args->br = br;
53392da6 34712+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
1facf9fc 34713+ if (!wkq_err)
34714+ return; /* success */
34715+
4a4d8108 34716+ pr_err("wkq %d\n", wkq_err);
5afbbe0d 34717+ au_br_put(br);
f0c0a007 34718+ au_delayed_kfree(args);
1facf9fc 34719+
4f0767ce 34720+out:
e49829fe 34721+ atomic_dec(&br->br_xino_running);
1facf9fc 34722+}
34723+
34724+/* ---------------------------------------------------------------------- */
34725+
5527c038 34726+static int au_xino_do_write(vfs_writef_t write, struct file *file,
1facf9fc 34727+ ino_t h_ino, ino_t ino)
34728+{
34729+ loff_t pos;
34730+ ssize_t sz;
34731+
34732+ pos = h_ino;
34733+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
34734+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
34735+ return -EFBIG;
34736+ }
34737+ pos *= sizeof(ino);
34738+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
34739+ if (sz == sizeof(ino))
34740+ return 0; /* success */
34741+
34742+ AuIOErr("write failed (%zd)\n", sz);
34743+ return -EIO;
34744+}
34745+
34746+/*
34747+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
34748+ * at the position of @h_ino.
34749+ * even if @ino is zero, it is written to the xinofile and means no entry.
34750+ * if the size of the xino file on a specific filesystem exceeds the watermark,
34751+ * try truncating it.
34752+ */
34753+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
34754+ ino_t ino)
34755+{
34756+ int err;
34757+ unsigned int mnt_flags;
34758+ struct au_branch *br;
34759+
34760+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
34761+ || ((loff_t)-1) > 0);
dece6358 34762+ SiMustAnyLock(sb);
1facf9fc 34763+
34764+ mnt_flags = au_mntflags(sb);
34765+ if (!au_opt_test(mnt_flags, XINO))
34766+ return 0;
34767+
34768+ br = au_sbr(sb, bindex);
34769+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
34770+ h_ino, ino);
34771+ if (!err) {
34772+ if (au_opt_test(mnt_flags, TRUNC_XINO)
86dc4139 34773+ && au_test_fs_trunc_xino(au_br_sb(br)))
1facf9fc 34774+ xino_try_trunc(sb, br);
34775+ return 0; /* success */
34776+ }
34777+
34778+ AuIOErr("write failed (%d)\n", err);
34779+ return -EIO;
34780+}
34781+
34782+/* ---------------------------------------------------------------------- */
34783+
34784+/* aufs inode number bitmap */
34785+
34786+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
34787+static ino_t xib_calc_ino(unsigned long pindex, int bit)
34788+{
34789+ ino_t ino;
34790+
34791+ AuDebugOn(bit < 0 || page_bits <= bit);
34792+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
34793+ return ino;
34794+}
34795+
34796+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
34797+{
34798+ AuDebugOn(ino < AUFS_FIRST_INO);
34799+ ino -= AUFS_FIRST_INO;
34800+ *pindex = ino / page_bits;
34801+ *bit = ino % page_bits;
34802+}
34803+
34804+static int xib_pindex(struct super_block *sb, unsigned long pindex)
34805+{
34806+ int err;
34807+ loff_t pos;
34808+ ssize_t sz;
34809+ struct au_sbinfo *sbinfo;
34810+ struct file *xib;
34811+ unsigned long *p;
34812+
34813+ sbinfo = au_sbi(sb);
34814+ MtxMustLock(&sbinfo->si_xib_mtx);
34815+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
34816+ || !au_opt_test(sbinfo->si_mntflags, XINO));
34817+
34818+ if (pindex == sbinfo->si_xib_last_pindex)
34819+ return 0;
34820+
34821+ xib = sbinfo->si_xib;
34822+ p = sbinfo->si_xib_buf;
34823+ pos = sbinfo->si_xib_last_pindex;
34824+ pos *= PAGE_SIZE;
34825+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
34826+ if (unlikely(sz != PAGE_SIZE))
34827+ goto out;
34828+
34829+ pos = pindex;
34830+ pos *= PAGE_SIZE;
c06a8ce3 34831+ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
1facf9fc 34832+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
34833+ else {
34834+ memset(p, 0, PAGE_SIZE);
34835+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
34836+ }
34837+ if (sz == PAGE_SIZE) {
34838+ sbinfo->si_xib_last_pindex = pindex;
34839+ return 0; /* success */
34840+ }
34841+
4f0767ce 34842+out:
b752ccd1
AM
34843+ AuIOErr1("write failed (%zd)\n", sz);
34844+ err = sz;
34845+ if (sz >= 0)
34846+ err = -EIO;
34847+ return err;
34848+}
34849+
34850+/* ---------------------------------------------------------------------- */
34851+
34852+static void au_xib_clear_bit(struct inode *inode)
34853+{
34854+ int err, bit;
34855+ unsigned long pindex;
34856+ struct super_block *sb;
34857+ struct au_sbinfo *sbinfo;
34858+
34859+ AuDebugOn(inode->i_nlink);
34860+
34861+ sb = inode->i_sb;
34862+ xib_calc_bit(inode->i_ino, &pindex, &bit);
34863+ AuDebugOn(page_bits <= bit);
34864+ sbinfo = au_sbi(sb);
34865+ mutex_lock(&sbinfo->si_xib_mtx);
34866+ err = xib_pindex(sb, pindex);
34867+ if (!err) {
34868+ clear_bit(bit, sbinfo->si_xib_buf);
34869+ sbinfo->si_xib_next_bit = bit;
34870+ }
34871+ mutex_unlock(&sbinfo->si_xib_mtx);
34872+}
34873+
34874+/* for s_op->delete_inode() */
34875+void au_xino_delete_inode(struct inode *inode, const int unlinked)
34876+{
34877+ int err;
34878+ unsigned int mnt_flags;
5afbbe0d 34879+ aufs_bindex_t bindex, bbot, bi;
b752ccd1
AM
34880+ unsigned char try_trunc;
34881+ struct au_iinfo *iinfo;
34882+ struct super_block *sb;
34883+ struct au_hinode *hi;
34884+ struct inode *h_inode;
34885+ struct au_branch *br;
5527c038 34886+ vfs_writef_t xwrite;
b752ccd1 34887+
5afbbe0d
AM
34888+ AuDebugOn(au_is_bad_inode(inode));
34889+
b752ccd1
AM
34890+ sb = inode->i_sb;
34891+ mnt_flags = au_mntflags(sb);
34892+ if (!au_opt_test(mnt_flags, XINO)
34893+ || inode->i_ino == AUFS_ROOT_INO)
34894+ return;
34895+
34896+ if (unlinked) {
34897+ au_xigen_inc(inode);
34898+ au_xib_clear_bit(inode);
34899+ }
34900+
34901+ iinfo = au_ii(inode);
5afbbe0d 34902+ bindex = iinfo->ii_btop;
b752ccd1
AM
34903+ if (bindex < 0)
34904+ return;
1facf9fc 34905+
b752ccd1
AM
34906+ xwrite = au_sbi(sb)->si_xwrite;
34907+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
5afbbe0d
AM
34908+ hi = au_hinode(iinfo, bindex);
34909+ bbot = iinfo->ii_bbot;
34910+ for (; bindex <= bbot; bindex++, hi++) {
b752ccd1
AM
34911+ h_inode = hi->hi_inode;
34912+ if (!h_inode
34913+ || (!unlinked && h_inode->i_nlink))
34914+ continue;
1facf9fc 34915+
b752ccd1
AM
34916+ /* inode may not be revalidated */
34917+ bi = au_br_index(sb, hi->hi_id);
34918+ if (bi < 0)
34919+ continue;
1facf9fc 34920+
b752ccd1
AM
34921+ br = au_sbr(sb, bi);
34922+ err = au_xino_do_write(xwrite, br->br_xino.xi_file,
34923+ h_inode->i_ino, /*ino*/0);
34924+ if (!err && try_trunc
86dc4139 34925+ && au_test_fs_trunc_xino(au_br_sb(br)))
b752ccd1 34926+ xino_try_trunc(sb, br);
1facf9fc 34927+ }
1facf9fc 34928+}
34929+
34930+/* get an unused inode number from bitmap */
34931+ino_t au_xino_new_ino(struct super_block *sb)
34932+{
34933+ ino_t ino;
34934+ unsigned long *p, pindex, ul, pend;
34935+ struct au_sbinfo *sbinfo;
34936+ struct file *file;
34937+ int free_bit, err;
34938+
34939+ if (!au_opt_test(au_mntflags(sb), XINO))
34940+ return iunique(sb, AUFS_FIRST_INO);
34941+
34942+ sbinfo = au_sbi(sb);
34943+ mutex_lock(&sbinfo->si_xib_mtx);
34944+ p = sbinfo->si_xib_buf;
34945+ free_bit = sbinfo->si_xib_next_bit;
34946+ if (free_bit < page_bits && !test_bit(free_bit, p))
34947+ goto out; /* success */
34948+ free_bit = find_first_zero_bit(p, page_bits);
34949+ if (free_bit < page_bits)
34950+ goto out; /* success */
34951+
34952+ pindex = sbinfo->si_xib_last_pindex;
34953+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
34954+ err = xib_pindex(sb, ul);
34955+ if (unlikely(err))
34956+ goto out_err;
34957+ free_bit = find_first_zero_bit(p, page_bits);
34958+ if (free_bit < page_bits)
34959+ goto out; /* success */
34960+ }
34961+
34962+ file = sbinfo->si_xib;
c06a8ce3 34963+ pend = vfsub_f_size_read(file) / PAGE_SIZE;
1facf9fc 34964+ for (ul = pindex + 1; ul <= pend; ul++) {
34965+ err = xib_pindex(sb, ul);
34966+ if (unlikely(err))
34967+ goto out_err;
34968+ free_bit = find_first_zero_bit(p, page_bits);
34969+ if (free_bit < page_bits)
34970+ goto out; /* success */
34971+ }
34972+ BUG();
34973+
4f0767ce 34974+out:
1facf9fc 34975+ set_bit(free_bit, p);
7f207e10 34976+ sbinfo->si_xib_next_bit = free_bit + 1;
1facf9fc 34977+ pindex = sbinfo->si_xib_last_pindex;
34978+ mutex_unlock(&sbinfo->si_xib_mtx);
34979+ ino = xib_calc_ino(pindex, free_bit);
34980+ AuDbg("i%lu\n", (unsigned long)ino);
34981+ return ino;
4f0767ce 34982+out_err:
1facf9fc 34983+ mutex_unlock(&sbinfo->si_xib_mtx);
34984+ AuDbg("i0\n");
34985+ return 0;
34986+}
34987+
34988+/*
34989+ * read @ino from xinofile for the specified branch{@sb, @bindex}
34990+ * at the position of @h_ino.
34991+ * if @ino does not exist and @do_new is true, get new one.
34992+ */
34993+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
34994+ ino_t *ino)
34995+{
34996+ int err;
34997+ ssize_t sz;
34998+ loff_t pos;
34999+ struct file *file;
35000+ struct au_sbinfo *sbinfo;
35001+
35002+ *ino = 0;
35003+ if (!au_opt_test(au_mntflags(sb), XINO))
35004+ return 0; /* no xino */
35005+
35006+ err = 0;
35007+ sbinfo = au_sbi(sb);
35008+ pos = h_ino;
35009+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
35010+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
35011+ return -EFBIG;
35012+ }
35013+ pos *= sizeof(*ino);
35014+
35015+ file = au_sbr(sb, bindex)->br_xino.xi_file;
c06a8ce3 35016+ if (vfsub_f_size_read(file) < pos + sizeof(*ino))
1facf9fc 35017+ return 0; /* no ino */
35018+
35019+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
35020+ if (sz == sizeof(*ino))
35021+ return 0; /* success */
35022+
35023+ err = sz;
35024+ if (unlikely(sz >= 0)) {
35025+ err = -EIO;
35026+ AuIOErr("xino read error (%zd)\n", sz);
35027+ }
35028+
35029+ return err;
35030+}
35031+
35032+/* ---------------------------------------------------------------------- */
35033+
35034+/* create and set a new xino file */
35035+
35036+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
35037+{
35038+ struct file *file;
35039+ struct dentry *h_parent, *d;
b912730e 35040+ struct inode *h_dir, *inode;
1facf9fc 35041+ int err;
35042+
35043+ /*
35044+ * at mount-time, and the xino file is the default path,
4a4d8108 35045+ * hnotify is disabled so we have no notify events to ignore.
1facf9fc 35046+ * when a user specified the xino, we cannot get au_hdir to be ignored.
35047+ */
7f207e10 35048+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 35049+ /* | __FMODE_NONOTIFY */,
1facf9fc 35050+ S_IRUGO | S_IWUGO);
35051+ if (IS_ERR(file)) {
35052+ if (!silent)
4a4d8108 35053+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
1facf9fc 35054+ return file;
35055+ }
35056+
35057+ /* keep file count */
b912730e
AM
35058+ err = 0;
35059+ inode = file_inode(file);
2000de60 35060+ h_parent = dget_parent(file->f_path.dentry);
5527c038 35061+ h_dir = d_inode(h_parent);
febd17d6 35062+ inode_lock_nested(h_dir, AuLsc_I_PARENT);
1facf9fc 35063+ /* mnt_want_write() is unnecessary here */
523b37e3 35064+ /* no delegation since it is just created */
b912730e
AM
35065+ if (inode->i_nlink)
35066+ err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
35067+ /*force*/0);
febd17d6 35068+ inode_unlock(h_dir);
1facf9fc 35069+ dput(h_parent);
35070+ if (unlikely(err)) {
35071+ if (!silent)
4a4d8108 35072+ pr_err("unlink %s(%d)\n", fname, err);
1facf9fc 35073+ goto out;
35074+ }
35075+
35076+ err = -EINVAL;
2000de60 35077+ d = file->f_path.dentry;
1facf9fc 35078+ if (unlikely(sb == d->d_sb)) {
35079+ if (!silent)
4a4d8108 35080+ pr_err("%s must be outside\n", fname);
1facf9fc 35081+ goto out;
35082+ }
35083+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
35084+ if (!silent)
4a4d8108
AM
35085+ pr_err("xino doesn't support %s(%s)\n",
35086+ fname, au_sbtype(d->d_sb));
1facf9fc 35087+ goto out;
35088+ }
35089+ return file; /* success */
35090+
4f0767ce 35091+out:
1facf9fc 35092+ fput(file);
35093+ file = ERR_PTR(err);
35094+ return file;
35095+}
35096+
35097+/*
35098+ * find another branch who is on the same filesystem of the specified
5afbbe0d 35099+ * branch{@btgt}. search until @bbot.
1facf9fc 35100+ */
35101+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
5afbbe0d 35102+ aufs_bindex_t bbot)
1facf9fc 35103+{
35104+ aufs_bindex_t bindex;
35105+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
35106+
35107+ for (bindex = 0; bindex < btgt; bindex++)
35108+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
35109+ return bindex;
5afbbe0d 35110+ for (bindex++; bindex <= bbot; bindex++)
1facf9fc 35111+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
35112+ return bindex;
35113+ return -1;
35114+}
35115+
35116+/* ---------------------------------------------------------------------- */
35117+
35118+/*
35119+ * initialize the xinofile for the specified branch @br
35120+ * at the place/path where @base_file indicates.
35121+ * test whether another branch is on the same filesystem or not,
35122+ * if @do_test is true.
35123+ */
35124+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
35125+ struct file *base_file, int do_test)
35126+{
35127+ int err;
35128+ ino_t ino;
5afbbe0d 35129+ aufs_bindex_t bbot, bindex;
1facf9fc 35130+ struct au_branch *shared_br, *b;
35131+ struct file *file;
35132+ struct super_block *tgt_sb;
35133+
35134+ shared_br = NULL;
5afbbe0d 35135+ bbot = au_sbbot(sb);
1facf9fc 35136+ if (do_test) {
86dc4139 35137+ tgt_sb = au_br_sb(br);
5afbbe0d 35138+ for (bindex = 0; bindex <= bbot; bindex++) {
1facf9fc 35139+ b = au_sbr(sb, bindex);
86dc4139 35140+ if (tgt_sb == au_br_sb(b)) {
1facf9fc 35141+ shared_br = b;
35142+ break;
35143+ }
35144+ }
35145+ }
35146+
35147+ if (!shared_br || !shared_br->br_xino.xi_file) {
35148+ struct au_xino_lock_dir ldir;
35149+
35150+ au_xino_lock_dir(sb, base_file, &ldir);
35151+ /* mnt_want_write() is unnecessary here */
35152+ file = au_xino_create2(base_file, NULL);
35153+ au_xino_unlock_dir(&ldir);
35154+ err = PTR_ERR(file);
35155+ if (IS_ERR(file))
35156+ goto out;
35157+ br->br_xino.xi_file = file;
35158+ } else {
35159+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
35160+ get_file(br->br_xino.xi_file);
35161+ }
35162+
35163+ ino = AUFS_ROOT_INO;
35164+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
35165+ h_ino, ino);
b752ccd1
AM
35166+ if (unlikely(err)) {
35167+ fput(br->br_xino.xi_file);
35168+ br->br_xino.xi_file = NULL;
35169+ }
1facf9fc 35170+
4f0767ce 35171+out:
1facf9fc 35172+ return err;
35173+}
35174+
35175+/* ---------------------------------------------------------------------- */
35176+
35177+/* trucate a xino bitmap file */
35178+
35179+/* todo: slow */
35180+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
35181+{
35182+ int err, bit;
35183+ ssize_t sz;
35184+ unsigned long pindex;
35185+ loff_t pos, pend;
35186+ struct au_sbinfo *sbinfo;
5527c038 35187+ vfs_readf_t func;
1facf9fc 35188+ ino_t *ino;
35189+ unsigned long *p;
35190+
35191+ err = 0;
35192+ sbinfo = au_sbi(sb);
dece6358 35193+ MtxMustLock(&sbinfo->si_xib_mtx);
1facf9fc 35194+ p = sbinfo->si_xib_buf;
35195+ func = sbinfo->si_xread;
c06a8ce3 35196+ pend = vfsub_f_size_read(file);
1facf9fc 35197+ pos = 0;
35198+ while (pos < pend) {
35199+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
35200+ err = sz;
35201+ if (unlikely(sz <= 0))
35202+ goto out;
35203+
35204+ err = 0;
35205+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
35206+ if (unlikely(*ino < AUFS_FIRST_INO))
35207+ continue;
35208+
35209+ xib_calc_bit(*ino, &pindex, &bit);
35210+ AuDebugOn(page_bits <= bit);
35211+ err = xib_pindex(sb, pindex);
35212+ if (!err)
35213+ set_bit(bit, p);
35214+ else
35215+ goto out;
35216+ }
35217+ }
35218+
4f0767ce 35219+out:
1facf9fc 35220+ return err;
35221+}
35222+
35223+static int xib_restore(struct super_block *sb)
35224+{
35225+ int err;
5afbbe0d 35226+ aufs_bindex_t bindex, bbot;
1facf9fc 35227+ void *page;
35228+
35229+ err = -ENOMEM;
35230+ page = (void *)__get_free_page(GFP_NOFS);
35231+ if (unlikely(!page))
35232+ goto out;
35233+
35234+ err = 0;
5afbbe0d
AM
35235+ bbot = au_sbbot(sb);
35236+ for (bindex = 0; !err && bindex <= bbot; bindex++)
1facf9fc 35237+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
35238+ err = do_xib_restore
35239+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
35240+ else
35241+ AuDbg("b%d\n", bindex);
f0c0a007 35242+ au_delayed_free_page((unsigned long)page);
1facf9fc 35243+
4f0767ce 35244+out:
1facf9fc 35245+ return err;
35246+}
35247+
35248+int au_xib_trunc(struct super_block *sb)
35249+{
35250+ int err;
35251+ ssize_t sz;
35252+ loff_t pos;
35253+ struct au_xino_lock_dir ldir;
35254+ struct au_sbinfo *sbinfo;
35255+ unsigned long *p;
35256+ struct file *file;
35257+
dece6358
AM
35258+ SiMustWriteLock(sb);
35259+
1facf9fc 35260+ err = 0;
35261+ sbinfo = au_sbi(sb);
35262+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
35263+ goto out;
35264+
35265+ file = sbinfo->si_xib;
c06a8ce3 35266+ if (vfsub_f_size_read(file) <= PAGE_SIZE)
1facf9fc 35267+ goto out;
35268+
35269+ au_xino_lock_dir(sb, file, &ldir);
35270+ /* mnt_want_write() is unnecessary here */
35271+ file = au_xino_create2(sbinfo->si_xib, NULL);
35272+ au_xino_unlock_dir(&ldir);
35273+ err = PTR_ERR(file);
35274+ if (IS_ERR(file))
35275+ goto out;
35276+ fput(sbinfo->si_xib);
35277+ sbinfo->si_xib = file;
35278+
35279+ p = sbinfo->si_xib_buf;
35280+ memset(p, 0, PAGE_SIZE);
35281+ pos = 0;
35282+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
35283+ if (unlikely(sz != PAGE_SIZE)) {
35284+ err = sz;
35285+ AuIOErr("err %d\n", err);
35286+ if (sz >= 0)
35287+ err = -EIO;
35288+ goto out;
35289+ }
35290+
35291+ mutex_lock(&sbinfo->si_xib_mtx);
35292+ /* mnt_want_write() is unnecessary here */
35293+ err = xib_restore(sb);
35294+ mutex_unlock(&sbinfo->si_xib_mtx);
35295+
35296+out:
35297+ return err;
35298+}
35299+
35300+/* ---------------------------------------------------------------------- */
35301+
35302+/*
35303+ * xino mount option handlers
35304+ */
1facf9fc 35305+
35306+/* xino bitmap */
35307+static void xino_clear_xib(struct super_block *sb)
35308+{
35309+ struct au_sbinfo *sbinfo;
35310+
dece6358
AM
35311+ SiMustWriteLock(sb);
35312+
1facf9fc 35313+ sbinfo = au_sbi(sb);
35314+ sbinfo->si_xread = NULL;
35315+ sbinfo->si_xwrite = NULL;
35316+ if (sbinfo->si_xib)
35317+ fput(sbinfo->si_xib);
35318+ sbinfo->si_xib = NULL;
f0c0a007
AM
35319+ if (sbinfo->si_xib_buf)
35320+ au_delayed_free_page((unsigned long)sbinfo->si_xib_buf);
1facf9fc 35321+ sbinfo->si_xib_buf = NULL;
35322+}
35323+
35324+static int au_xino_set_xib(struct super_block *sb, struct file *base)
35325+{
35326+ int err;
35327+ loff_t pos;
35328+ struct au_sbinfo *sbinfo;
35329+ struct file *file;
35330+
dece6358
AM
35331+ SiMustWriteLock(sb);
35332+
1facf9fc 35333+ sbinfo = au_sbi(sb);
35334+ file = au_xino_create2(base, sbinfo->si_xib);
35335+ err = PTR_ERR(file);
35336+ if (IS_ERR(file))
35337+ goto out;
35338+ if (sbinfo->si_xib)
35339+ fput(sbinfo->si_xib);
35340+ sbinfo->si_xib = file;
5527c038
JR
35341+ sbinfo->si_xread = vfs_readf(file);
35342+ sbinfo->si_xwrite = vfs_writef(file);
1facf9fc 35343+
35344+ err = -ENOMEM;
35345+ if (!sbinfo->si_xib_buf)
35346+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
35347+ if (unlikely(!sbinfo->si_xib_buf))
35348+ goto out_unset;
35349+
35350+ sbinfo->si_xib_last_pindex = 0;
35351+ sbinfo->si_xib_next_bit = 0;
c06a8ce3 35352+ if (vfsub_f_size_read(file) < PAGE_SIZE) {
1facf9fc 35353+ pos = 0;
35354+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
35355+ PAGE_SIZE, &pos);
35356+ if (unlikely(err != PAGE_SIZE))
35357+ goto out_free;
35358+ }
35359+ err = 0;
35360+ goto out; /* success */
35361+
4f0767ce 35362+out_free:
f0c0a007
AM
35363+ if (sbinfo->si_xib_buf)
35364+ au_delayed_free_page((unsigned long)sbinfo->si_xib_buf);
b752ccd1
AM
35365+ sbinfo->si_xib_buf = NULL;
35366+ if (err >= 0)
35367+ err = -EIO;
4f0767ce 35368+out_unset:
b752ccd1
AM
35369+ fput(sbinfo->si_xib);
35370+ sbinfo->si_xib = NULL;
35371+ sbinfo->si_xread = NULL;
35372+ sbinfo->si_xwrite = NULL;
4f0767ce 35373+out:
b752ccd1 35374+ return err;
1facf9fc 35375+}
35376+
b752ccd1
AM
35377+/* xino for each branch */
35378+static void xino_clear_br(struct super_block *sb)
35379+{
5afbbe0d 35380+ aufs_bindex_t bindex, bbot;
b752ccd1 35381+ struct au_branch *br;
1facf9fc 35382+
5afbbe0d
AM
35383+ bbot = au_sbbot(sb);
35384+ for (bindex = 0; bindex <= bbot; bindex++) {
b752ccd1
AM
35385+ br = au_sbr(sb, bindex);
35386+ if (!br || !br->br_xino.xi_file)
35387+ continue;
35388+
35389+ fput(br->br_xino.xi_file);
35390+ br->br_xino.xi_file = NULL;
35391+ }
35392+}
35393+
35394+static int au_xino_set_br(struct super_block *sb, struct file *base)
1facf9fc 35395+{
35396+ int err;
b752ccd1 35397+ ino_t ino;
5afbbe0d 35398+ aufs_bindex_t bindex, bbot, bshared;
b752ccd1
AM
35399+ struct {
35400+ struct file *old, *new;
35401+ } *fpair, *p;
35402+ struct au_branch *br;
35403+ struct inode *inode;
5527c038 35404+ vfs_writef_t writef;
1facf9fc 35405+
b752ccd1
AM
35406+ SiMustWriteLock(sb);
35407+
35408+ err = -ENOMEM;
5afbbe0d
AM
35409+ bbot = au_sbbot(sb);
35410+ fpair = kcalloc(bbot + 1, sizeof(*fpair), GFP_NOFS);
b752ccd1 35411+ if (unlikely(!fpair))
1facf9fc 35412+ goto out;
35413+
5527c038 35414+ inode = d_inode(sb->s_root);
b752ccd1
AM
35415+ ino = AUFS_ROOT_INO;
35416+ writef = au_sbi(sb)->si_xwrite;
5afbbe0d 35417+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
b752ccd1
AM
35418+ bshared = is_sb_shared(sb, bindex, bindex - 1);
35419+ if (bshared >= 0) {
35420+ /* shared xino */
35421+ *p = fpair[bshared];
35422+ get_file(p->new);
35423+ }
35424+
35425+ if (!p->new) {
35426+ /* new xino */
5afbbe0d 35427+ br = au_sbr(sb, bindex);
b752ccd1
AM
35428+ p->old = br->br_xino.xi_file;
35429+ p->new = au_xino_create2(base, br->br_xino.xi_file);
35430+ err = PTR_ERR(p->new);
35431+ if (IS_ERR(p->new)) {
35432+ p->new = NULL;
35433+ goto out_pair;
35434+ }
35435+ }
35436+
35437+ err = au_xino_do_write(writef, p->new,
35438+ au_h_iptr(inode, bindex)->i_ino, ino);
35439+ if (unlikely(err))
35440+ goto out_pair;
35441+ }
35442+
5afbbe0d 35443+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
b752ccd1
AM
35444+ br = au_sbr(sb, bindex);
35445+ if (br->br_xino.xi_file)
35446+ fput(br->br_xino.xi_file);
35447+ get_file(p->new);
35448+ br->br_xino.xi_file = p->new;
35449+ }
1facf9fc 35450+
4f0767ce 35451+out_pair:
5afbbe0d 35452+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++)
b752ccd1
AM
35453+ if (p->new)
35454+ fput(p->new);
35455+ else
35456+ break;
f0c0a007 35457+ au_delayed_kfree(fpair);
4f0767ce 35458+out:
1facf9fc 35459+ return err;
35460+}
b752ccd1
AM
35461+
35462+void au_xino_clr(struct super_block *sb)
35463+{
35464+ struct au_sbinfo *sbinfo;
35465+
35466+ au_xigen_clr(sb);
35467+ xino_clear_xib(sb);
35468+ xino_clear_br(sb);
35469+ sbinfo = au_sbi(sb);
35470+ /* lvalue, do not call au_mntflags() */
35471+ au_opt_clr(sbinfo->si_mntflags, XINO);
35472+}
35473+
35474+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
35475+{
35476+ int err, skip;
35477+ struct dentry *parent, *cur_parent;
35478+ struct qstr *dname, *cur_name;
35479+ struct file *cur_xino;
35480+ struct inode *dir;
35481+ struct au_sbinfo *sbinfo;
35482+
35483+ SiMustWriteLock(sb);
35484+
35485+ err = 0;
35486+ sbinfo = au_sbi(sb);
2000de60 35487+ parent = dget_parent(xino->file->f_path.dentry);
b752ccd1
AM
35488+ if (remount) {
35489+ skip = 0;
2000de60 35490+ dname = &xino->file->f_path.dentry->d_name;
b752ccd1
AM
35491+ cur_xino = sbinfo->si_xib;
35492+ if (cur_xino) {
2000de60
JR
35493+ cur_parent = dget_parent(cur_xino->f_path.dentry);
35494+ cur_name = &cur_xino->f_path.dentry->d_name;
b752ccd1 35495+ skip = (cur_parent == parent
38d290e6 35496+ && au_qstreq(dname, cur_name));
b752ccd1
AM
35497+ dput(cur_parent);
35498+ }
35499+ if (skip)
35500+ goto out;
35501+ }
35502+
35503+ au_opt_set(sbinfo->si_mntflags, XINO);
5527c038 35504+ dir = d_inode(parent);
febd17d6 35505+ inode_lock_nested(dir, AuLsc_I_PARENT);
b752ccd1
AM
35506+ /* mnt_want_write() is unnecessary here */
35507+ err = au_xino_set_xib(sb, xino->file);
35508+ if (!err)
35509+ err = au_xigen_set(sb, xino->file);
35510+ if (!err)
35511+ err = au_xino_set_br(sb, xino->file);
febd17d6 35512+ inode_unlock(dir);
b752ccd1
AM
35513+ if (!err)
35514+ goto out; /* success */
35515+
35516+ /* reset all */
35517+ AuIOErr("failed creating xino(%d).\n", err);
c1595e42
JR
35518+ au_xigen_clr(sb);
35519+ xino_clear_xib(sb);
b752ccd1 35520+
4f0767ce 35521+out:
b752ccd1
AM
35522+ dput(parent);
35523+ return err;
35524+}
35525+
35526+/* ---------------------------------------------------------------------- */
35527+
35528+/*
35529+ * create a xinofile at the default place/path.
35530+ */
35531+struct file *au_xino_def(struct super_block *sb)
35532+{
35533+ struct file *file;
35534+ char *page, *p;
35535+ struct au_branch *br;
35536+ struct super_block *h_sb;
35537+ struct path path;
5afbbe0d 35538+ aufs_bindex_t bbot, bindex, bwr;
b752ccd1
AM
35539+
35540+ br = NULL;
5afbbe0d 35541+ bbot = au_sbbot(sb);
b752ccd1 35542+ bwr = -1;
5afbbe0d 35543+ for (bindex = 0; bindex <= bbot; bindex++) {
b752ccd1
AM
35544+ br = au_sbr(sb, bindex);
35545+ if (au_br_writable(br->br_perm)
86dc4139 35546+ && !au_test_fs_bad_xino(au_br_sb(br))) {
b752ccd1
AM
35547+ bwr = bindex;
35548+ break;
35549+ }
35550+ }
35551+
7f207e10
AM
35552+ if (bwr >= 0) {
35553+ file = ERR_PTR(-ENOMEM);
537831f9 35554+ page = (void *)__get_free_page(GFP_NOFS);
7f207e10
AM
35555+ if (unlikely(!page))
35556+ goto out;
86dc4139 35557+ path.mnt = au_br_mnt(br);
7f207e10
AM
35558+ path.dentry = au_h_dptr(sb->s_root, bwr);
35559+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
35560+ file = (void *)p;
35561+ if (!IS_ERR(p)) {
35562+ strcat(p, "/" AUFS_XINO_FNAME);
35563+ AuDbg("%s\n", p);
35564+ file = au_xino_create(sb, p, /*silent*/0);
35565+ if (!IS_ERR(file))
35566+ au_xino_brid_set(sb, br->br_id);
35567+ }
f0c0a007 35568+ au_delayed_free_page((unsigned long)page);
7f207e10
AM
35569+ } else {
35570+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
35571+ if (IS_ERR(file))
35572+ goto out;
2000de60 35573+ h_sb = file->f_path.dentry->d_sb;
7f207e10
AM
35574+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
35575+ pr_err("xino doesn't support %s(%s)\n",
35576+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
35577+ fput(file);
35578+ file = ERR_PTR(-EINVAL);
35579+ }
35580+ if (!IS_ERR(file))
35581+ au_xino_brid_set(sb, -1);
35582+ }
0c5527e5 35583+
7f207e10
AM
35584+out:
35585+ return file;
35586+}
35587+
35588+/* ---------------------------------------------------------------------- */
35589+
35590+int au_xino_path(struct seq_file *seq, struct file *file)
35591+{
35592+ int err;
35593+
35594+ err = au_seq_path(seq, &file->f_path);
79b8bda9 35595+ if (unlikely(err))
7f207e10
AM
35596+ goto out;
35597+
7f207e10
AM
35598+#define Deleted "\\040(deleted)"
35599+ seq->count -= sizeof(Deleted) - 1;
35600+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
35601+ sizeof(Deleted) - 1));
35602+#undef Deleted
35603+
35604+out:
35605+ return err;
35606+}
537831f9
AM
35607diff -urN /usr/share/empty/include/uapi/linux/aufs_type.h linux/include/uapi/linux/aufs_type.h
35608--- /usr/share/empty/include/uapi/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
106341ce 35609+++ linux/include/uapi/linux/aufs_type.h 2016-12-11 21:22:05.903059512 +0100
c1595e42 35610@@ -0,0 +1,419 @@
7f207e10 35611+/*
8cdd5066 35612+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7f207e10
AM
35613+ *
35614+ * This program, aufs is free software; you can redistribute it and/or modify
35615+ * it under the terms of the GNU General Public License as published by
35616+ * the Free Software Foundation; either version 2 of the License, or
35617+ * (at your option) any later version.
35618+ *
35619+ * This program is distributed in the hope that it will be useful,
35620+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35621+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35622+ * GNU General Public License for more details.
35623+ *
35624+ * You should have received a copy of the GNU General Public License
523b37e3 35625+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
35626+ */
35627+
35628+#ifndef __AUFS_TYPE_H__
35629+#define __AUFS_TYPE_H__
35630+
f6c5ef8b
AM
35631+#define AUFS_NAME "aufs"
35632+
9dbd164d 35633+#ifdef __KERNEL__
f6c5ef8b
AM
35634+/*
35635+ * define it before including all other headers.
35636+ * sched.h may use pr_* macros before defining "current", so define the
35637+ * no-current version first, and re-define later.
35638+ */
35639+#define pr_fmt(fmt) AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
35640+#include <linux/sched.h>
35641+#undef pr_fmt
a2a7ad62
AM
35642+#define pr_fmt(fmt) \
35643+ AUFS_NAME " %s:%d:%.*s[%d]: " fmt, __func__, __LINE__, \
35644+ (int)sizeof(current->comm), current->comm, current->pid
9dbd164d
AM
35645+#else
35646+#include <stdint.h>
35647+#include <sys/types.h>
f6c5ef8b 35648+#endif /* __KERNEL__ */
7f207e10 35649+
f6c5ef8b
AM
35650+#include <linux/limits.h>
35651+
106341ce 35652+#define AUFS_VERSION "4.x-rcN-20161010"
7f207e10
AM
35653+
35654+/* todo? move this to linux-2.6.19/include/magic.h */
35655+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
35656+
35657+/* ---------------------------------------------------------------------- */
35658+
35659+#ifdef CONFIG_AUFS_BRANCH_MAX_127
9dbd164d 35660+typedef int8_t aufs_bindex_t;
7f207e10
AM
35661+#define AUFS_BRANCH_MAX 127
35662+#else
9dbd164d 35663+typedef int16_t aufs_bindex_t;
7f207e10
AM
35664+#ifdef CONFIG_AUFS_BRANCH_MAX_511
35665+#define AUFS_BRANCH_MAX 511
35666+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
35667+#define AUFS_BRANCH_MAX 1023
35668+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
35669+#define AUFS_BRANCH_MAX 32767
35670+#endif
35671+#endif
35672+
35673+#ifdef __KERNEL__
35674+#ifndef AUFS_BRANCH_MAX
35675+#error unknown CONFIG_AUFS_BRANCH_MAX value
35676+#endif
35677+#endif /* __KERNEL__ */
35678+
35679+/* ---------------------------------------------------------------------- */
35680+
7f207e10
AM
35681+#define AUFS_FSTYPE AUFS_NAME
35682+
35683+#define AUFS_ROOT_INO 2
35684+#define AUFS_FIRST_INO 11
35685+
35686+#define AUFS_WH_PFX ".wh."
35687+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
35688+#define AUFS_WH_TMP_LEN 4
86dc4139 35689+/* a limit for rmdir/rename a dir and copyup */
7f207e10
AM
35690+#define AUFS_MAX_NAMELEN (NAME_MAX \
35691+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
35692+ - 1 /* dot */\
35693+ - AUFS_WH_TMP_LEN) /* hex */
35694+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
35695+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
392086de
AM
35696+#define AUFS_XINO_DEF_SEC 30 /* seconds */
35697+#define AUFS_XINO_DEF_TRUNC 45 /* percentage */
7f207e10
AM
35698+#define AUFS_DIRWH_DEF 3
35699+#define AUFS_RDCACHE_DEF 10 /* seconds */
027c5e7a 35700+#define AUFS_RDCACHE_MAX 3600 /* seconds */
7f207e10
AM
35701+#define AUFS_RDBLK_DEF 512 /* bytes */
35702+#define AUFS_RDHASH_DEF 32
35703+#define AUFS_WKQ_NAME AUFS_NAME "d"
027c5e7a
AM
35704+#define AUFS_MFS_DEF_SEC 30 /* seconds */
35705+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
076b876e 35706+#define AUFS_FHSM_CACHE_DEF_SEC 30 /* seconds */
86dc4139 35707+#define AUFS_PLINK_WARN 50 /* number of plinks in a single bucket */
7f207e10
AM
35708+
35709+/* pseudo-link maintenace under /proc */
35710+#define AUFS_PLINK_MAINT_NAME "plink_maint"
35711+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
35712+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
35713+
35714+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
35715+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
35716+
35717+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
35718+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
35719+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
35720+
35721+/* doubly whiteouted */
35722+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
35723+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
35724+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
35725+
1e00d052 35726+/* branch permissions and attributes */
7f207e10
AM
35727+#define AUFS_BRPERM_RW "rw"
35728+#define AUFS_BRPERM_RO "ro"
35729+#define AUFS_BRPERM_RR "rr"
076b876e
AM
35730+#define AUFS_BRATTR_COO_REG "coo_reg"
35731+#define AUFS_BRATTR_COO_ALL "coo_all"
35732+#define AUFS_BRATTR_FHSM "fhsm"
35733+#define AUFS_BRATTR_UNPIN "unpin"
c1595e42
JR
35734+#define AUFS_BRATTR_ICEX "icex"
35735+#define AUFS_BRATTR_ICEX_SEC "icexsec"
35736+#define AUFS_BRATTR_ICEX_SYS "icexsys"
35737+#define AUFS_BRATTR_ICEX_TR "icextr"
35738+#define AUFS_BRATTR_ICEX_USR "icexusr"
35739+#define AUFS_BRATTR_ICEX_OTH "icexoth"
1e00d052
AM
35740+#define AUFS_BRRATTR_WH "wh"
35741+#define AUFS_BRWATTR_NLWH "nolwh"
076b876e
AM
35742+#define AUFS_BRWATTR_MOO "moo"
35743+
35744+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */
35745+#define AuBrPerm_RO (1 << 1) /* readonly */
35746+#define AuBrPerm_RR (1 << 2) /* natively readonly */
35747+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
35748+
35749+#define AuBrAttr_COO_REG (1 << 3) /* copy-up on open */
35750+#define AuBrAttr_COO_ALL (1 << 4)
35751+#define AuBrAttr_COO_Mask (AuBrAttr_COO_REG | AuBrAttr_COO_ALL)
35752+
35753+#define AuBrAttr_FHSM (1 << 5) /* file-based hsm */
35754+#define AuBrAttr_UNPIN (1 << 6) /* rename-able top dir of
c1595e42
JR
35755+ branch. meaningless since
35756+ linux-3.18-rc1 */
35757+
35758+/* ignore error in copying XATTR */
35759+#define AuBrAttr_ICEX_SEC (1 << 7)
35760+#define AuBrAttr_ICEX_SYS (1 << 8)
35761+#define AuBrAttr_ICEX_TR (1 << 9)
35762+#define AuBrAttr_ICEX_USR (1 << 10)
35763+#define AuBrAttr_ICEX_OTH (1 << 11)
35764+#define AuBrAttr_ICEX (AuBrAttr_ICEX_SEC \
35765+ | AuBrAttr_ICEX_SYS \
35766+ | AuBrAttr_ICEX_TR \
35767+ | AuBrAttr_ICEX_USR \
35768+ | AuBrAttr_ICEX_OTH)
35769+
35770+#define AuBrRAttr_WH (1 << 12) /* whiteout-able */
076b876e
AM
35771+#define AuBrRAttr_Mask AuBrRAttr_WH
35772+
c1595e42
JR
35773+#define AuBrWAttr_NoLinkWH (1 << 13) /* un-hardlinkable whiteouts */
35774+#define AuBrWAttr_MOO (1 << 14) /* move-up on open */
076b876e
AM
35775+#define AuBrWAttr_Mask (AuBrWAttr_NoLinkWH | AuBrWAttr_MOO)
35776+
35777+#define AuBrAttr_CMOO_Mask (AuBrAttr_COO_Mask | AuBrWAttr_MOO)
35778+
c1595e42 35779+/* #warning test userspace */
076b876e
AM
35780+#ifdef __KERNEL__
35781+#ifndef CONFIG_AUFS_FHSM
35782+#undef AuBrAttr_FHSM
35783+#define AuBrAttr_FHSM 0
35784+#endif
c1595e42
JR
35785+#ifndef CONFIG_AUFS_XATTR
35786+#undef AuBrAttr_ICEX
35787+#define AuBrAttr_ICEX 0
35788+#undef AuBrAttr_ICEX_SEC
35789+#define AuBrAttr_ICEX_SEC 0
35790+#undef AuBrAttr_ICEX_SYS
35791+#define AuBrAttr_ICEX_SYS 0
35792+#undef AuBrAttr_ICEX_TR
35793+#define AuBrAttr_ICEX_TR 0
35794+#undef AuBrAttr_ICEX_USR
35795+#define AuBrAttr_ICEX_USR 0
35796+#undef AuBrAttr_ICEX_OTH
35797+#define AuBrAttr_ICEX_OTH 0
35798+#endif
076b876e
AM
35799+#endif
35800+
35801+/* the longest combination */
c1595e42
JR
35802+/* AUFS_BRATTR_ICEX and AUFS_BRATTR_ICEX_TR don't affect here */
35803+#define AuBrPermStrSz sizeof(AUFS_BRPERM_RW \
35804+ "+" AUFS_BRATTR_COO_REG \
35805+ "+" AUFS_BRATTR_FHSM \
35806+ "+" AUFS_BRATTR_UNPIN \
7e9cd9fe
AM
35807+ "+" AUFS_BRATTR_ICEX_SEC \
35808+ "+" AUFS_BRATTR_ICEX_SYS \
35809+ "+" AUFS_BRATTR_ICEX_USR \
35810+ "+" AUFS_BRATTR_ICEX_OTH \
076b876e
AM
35811+ "+" AUFS_BRWATTR_NLWH)
35812+
35813+typedef struct {
35814+ char a[AuBrPermStrSz];
35815+} au_br_perm_str_t;
35816+
35817+static inline int au_br_writable(int brperm)
35818+{
35819+ return brperm & AuBrPerm_RW;
35820+}
35821+
35822+static inline int au_br_whable(int brperm)
35823+{
35824+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
35825+}
35826+
35827+static inline int au_br_wh_linkable(int brperm)
35828+{
35829+ return !(brperm & AuBrWAttr_NoLinkWH);
35830+}
35831+
35832+static inline int au_br_cmoo(int brperm)
35833+{
35834+ return brperm & AuBrAttr_CMOO_Mask;
35835+}
35836+
35837+static inline int au_br_fhsm(int brperm)
35838+{
35839+ return brperm & AuBrAttr_FHSM;
35840+}
7f207e10
AM
35841+
35842+/* ---------------------------------------------------------------------- */
35843+
35844+/* ioctl */
35845+enum {
35846+ /* readdir in userspace */
35847+ AuCtl_RDU,
35848+ AuCtl_RDU_INO,
35849+
076b876e
AM
35850+ AuCtl_WBR_FD, /* pathconf wrapper */
35851+ AuCtl_IBUSY, /* busy inode */
35852+ AuCtl_MVDOWN, /* move-down */
35853+ AuCtl_BR, /* info about branches */
35854+ AuCtl_FHSM_FD /* connection for fhsm */
7f207e10
AM
35855+};
35856+
35857+/* borrowed from linux/include/linux/kernel.h */
35858+#ifndef ALIGN
35859+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
35860+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
35861+#endif
35862+
35863+/* borrowed from linux/include/linux/compiler-gcc3.h */
35864+#ifndef __aligned
35865+#define __aligned(x) __attribute__((aligned(x)))
53392da6
AM
35866+#endif
35867+
35868+#ifdef __KERNEL__
35869+#ifndef __packed
7f207e10
AM
35870+#define __packed __attribute__((packed))
35871+#endif
53392da6 35872+#endif
7f207e10
AM
35873+
35874+struct au_rdu_cookie {
9dbd164d
AM
35875+ uint64_t h_pos;
35876+ int16_t bindex;
35877+ uint8_t flags;
35878+ uint8_t pad;
35879+ uint32_t generation;
7f207e10
AM
35880+} __aligned(8);
35881+
35882+struct au_rdu_ent {
9dbd164d
AM
35883+ uint64_t ino;
35884+ int16_t bindex;
35885+ uint8_t type;
35886+ uint8_t nlen;
35887+ uint8_t wh;
7f207e10
AM
35888+ char name[0];
35889+} __aligned(8);
35890+
35891+static inline int au_rdu_len(int nlen)
35892+{
35893+ /* include the terminating NULL */
35894+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
9dbd164d 35895+ sizeof(uint64_t));
7f207e10
AM
35896+}
35897+
35898+union au_rdu_ent_ul {
35899+ struct au_rdu_ent __user *e;
9dbd164d 35900+ uint64_t ul;
7f207e10
AM
35901+};
35902+
35903+enum {
35904+ AufsCtlRduV_SZ,
35905+ AufsCtlRduV_End
35906+};
35907+
35908+struct aufs_rdu {
35909+ /* input */
35910+ union {
9dbd164d
AM
35911+ uint64_t sz; /* AuCtl_RDU */
35912+ uint64_t nent; /* AuCtl_RDU_INO */
7f207e10
AM
35913+ };
35914+ union au_rdu_ent_ul ent;
9dbd164d 35915+ uint16_t verify[AufsCtlRduV_End];
7f207e10
AM
35916+
35917+ /* input/output */
9dbd164d 35918+ uint32_t blk;
7f207e10
AM
35919+
35920+ /* output */
35921+ union au_rdu_ent_ul tail;
35922+ /* number of entries which were added in a single call */
9dbd164d
AM
35923+ uint64_t rent;
35924+ uint8_t full;
35925+ uint8_t shwh;
7f207e10
AM
35926+
35927+ struct au_rdu_cookie cookie;
35928+} __aligned(8);
35929+
1e00d052
AM
35930+/* ---------------------------------------------------------------------- */
35931+
35932+struct aufs_wbr_fd {
9dbd164d
AM
35933+ uint32_t oflags;
35934+ int16_t brid;
1e00d052
AM
35935+} __aligned(8);
35936+
35937+/* ---------------------------------------------------------------------- */
35938+
027c5e7a 35939+struct aufs_ibusy {
9dbd164d
AM
35940+ uint64_t ino, h_ino;
35941+ int16_t bindex;
027c5e7a
AM
35942+} __aligned(8);
35943+
1e00d052
AM
35944+/* ---------------------------------------------------------------------- */
35945+
392086de
AM
35946+/* error code for move-down */
35947+/* the actual message strings are implemented in aufs-util.git */
35948+enum {
35949+ EAU_MVDOWN_OPAQUE = 1,
35950+ EAU_MVDOWN_WHITEOUT,
35951+ EAU_MVDOWN_UPPER,
35952+ EAU_MVDOWN_BOTTOM,
35953+ EAU_MVDOWN_NOUPPER,
35954+ EAU_MVDOWN_NOLOWERBR,
35955+ EAU_Last
35956+};
35957+
c2b27bf2 35958+/* flags for move-down */
392086de
AM
35959+#define AUFS_MVDOWN_DMSG 1
35960+#define AUFS_MVDOWN_OWLOWER (1 << 1) /* overwrite lower */
35961+#define AUFS_MVDOWN_KUPPER (1 << 2) /* keep upper */
35962+#define AUFS_MVDOWN_ROLOWER (1 << 3) /* do even if lower is RO */
35963+#define AUFS_MVDOWN_ROLOWER_R (1 << 4) /* did on lower RO */
35964+#define AUFS_MVDOWN_ROUPPER (1 << 5) /* do even if upper is RO */
35965+#define AUFS_MVDOWN_ROUPPER_R (1 << 6) /* did on upper RO */
35966+#define AUFS_MVDOWN_BRID_UPPER (1 << 7) /* upper brid */
35967+#define AUFS_MVDOWN_BRID_LOWER (1 << 8) /* lower brid */
076b876e
AM
35968+#define AUFS_MVDOWN_FHSM_LOWER (1 << 9) /* find fhsm attr for lower */
35969+#define AUFS_MVDOWN_STFS (1 << 10) /* req. stfs */
35970+#define AUFS_MVDOWN_STFS_FAILED (1 << 11) /* output: stfs is unusable */
35971+#define AUFS_MVDOWN_BOTTOM (1 << 12) /* output: no more lowers */
c2b27bf2 35972+
076b876e 35973+/* index for move-down */
392086de
AM
35974+enum {
35975+ AUFS_MVDOWN_UPPER,
35976+ AUFS_MVDOWN_LOWER,
35977+ AUFS_MVDOWN_NARRAY
35978+};
35979+
076b876e
AM
35980+/*
35981+ * additional info of move-down
35982+ * number of free blocks and inodes.
35983+ * subset of struct kstatfs, but smaller and always 64bit.
35984+ */
35985+struct aufs_stfs {
35986+ uint64_t f_blocks;
35987+ uint64_t f_bavail;
35988+ uint64_t f_files;
35989+ uint64_t f_ffree;
35990+};
35991+
35992+struct aufs_stbr {
35993+ int16_t brid; /* optional input */
35994+ int16_t bindex; /* output */
35995+ struct aufs_stfs stfs; /* output when AUFS_MVDOWN_STFS set */
35996+} __aligned(8);
35997+
c2b27bf2 35998+struct aufs_mvdown {
076b876e
AM
35999+ uint32_t flags; /* input/output */
36000+ struct aufs_stbr stbr[AUFS_MVDOWN_NARRAY]; /* input/output */
36001+ int8_t au_errno; /* output */
36002+} __aligned(8);
36003+
36004+/* ---------------------------------------------------------------------- */
36005+
36006+union aufs_brinfo {
36007+ /* PATH_MAX may differ between kernel-space and user-space */
36008+ char _spacer[4096];
392086de 36009+ struct {
076b876e
AM
36010+ int16_t id;
36011+ int perm;
36012+ char path[0];
36013+ };
c2b27bf2
AM
36014+} __aligned(8);
36015+
36016+/* ---------------------------------------------------------------------- */
36017+
7f207e10
AM
36018+#define AuCtlType 'A'
36019+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
36020+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
1e00d052
AM
36021+#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \
36022+ struct aufs_wbr_fd)
027c5e7a 36023+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
392086de
AM
36024+#define AUFS_CTL_MVDOWN _IOWR(AuCtlType, AuCtl_MVDOWN, \
36025+ struct aufs_mvdown)
076b876e
AM
36026+#define AUFS_CTL_BRINFO _IOW(AuCtlType, AuCtl_BR, union aufs_brinfo)
36027+#define AUFS_CTL_FHSM_FD _IOW(AuCtlType, AuCtl_FHSM_FD, int)
7f207e10
AM
36028+
36029+#endif /* __AUFS_TYPE_H__ */
e2f27e51 36030aufs4.x-rcN loopback patch
5527c038
JR
36031
36032diff --git a/drivers/block/loop.c b/drivers/block/loop.c
e2f27e51 36033index 005e292..e9e517d 100644
5527c038
JR
36034--- a/drivers/block/loop.c
36035+++ b/drivers/block/loop.c
e2f27e51 36036@@ -551,7 +551,7 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
5527c038
JR
36037 }
36038
36039 struct switch_request {
36040- struct file *file;
36041+ struct file *file, *virt_file;
36042 struct completion wait;
36043 };
36044
e2f27e51 36045@@ -577,6 +577,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
5527c038
JR
36046 mapping = file->f_mapping;
36047 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
36048 lo->lo_backing_file = file;
36049+ lo->lo_backing_virt_file = p->virt_file;
36050 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
36051 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
36052 lo->old_gfp_mask = mapping_gfp_mask(mapping);
e2f27e51 36053@@ -589,11 +590,13 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
5527c038
JR
36054 * First it needs to flush existing IO, it does this by sending a magic
36055 * BIO down the pipe. The completion of this BIO does the actual switch.
36056 */
36057-static int loop_switch(struct loop_device *lo, struct file *file)
36058+static int loop_switch(struct loop_device *lo, struct file *file,
36059+ struct file *virt_file)
36060 {
36061 struct switch_request w;
36062
36063 w.file = file;
36064+ w.virt_file = virt_file;
36065
36066 /* freeze queue and wait for completion of scheduled requests */
36067 blk_mq_freeze_queue(lo->lo_queue);
e2f27e51 36068@@ -612,7 +615,16 @@ static int loop_switch(struct loop_device *lo, struct file *file)
5527c038
JR
36069 */
36070 static int loop_flush(struct loop_device *lo)
36071 {
36072- return loop_switch(lo, NULL);
36073+ return loop_switch(lo, NULL, NULL);
36074+}
36075+
36076+static struct file *loop_real_file(struct file *file)
36077+{
36078+ struct file *f = NULL;
36079+
36080+ if (file->f_path.dentry->d_sb->s_op->real_loop)
36081+ f = file->f_path.dentry->d_sb->s_op->real_loop(file);
36082+ return f;
36083 }
36084
c2c0f25c 36085 static void loop_reread_partitions(struct loop_device *lo,
e2f27e51 36086@@ -649,6 +661,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
5527c038
JR
36087 unsigned int arg)
36088 {
36089 struct file *file, *old_file;
36090+ struct file *f, *virt_file = NULL, *old_virt_file;
36091 struct inode *inode;
36092 int error;
36093
e2f27e51 36094@@ -665,9 +678,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
5527c038
JR
36095 file = fget(arg);
36096 if (!file)
36097 goto out;
36098+ f = loop_real_file(file);
36099+ if (f) {
36100+ virt_file = file;
36101+ file = f;
36102+ get_file(file);
36103+ }
36104
36105 inode = file->f_mapping->host;
36106 old_file = lo->lo_backing_file;
36107+ old_virt_file = lo->lo_backing_virt_file;
36108
36109 error = -EINVAL;
36110
e2f27e51 36111@@ -679,17 +699,21 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
5527c038
JR
36112 goto out_putf;
36113
36114 /* and ... switch */
36115- error = loop_switch(lo, file);
36116+ error = loop_switch(lo, file, virt_file);
36117 if (error)
36118 goto out_putf;
36119
36120 fput(old_file);
36121+ if (old_virt_file)
36122+ fput(old_virt_file);
36123 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
c2c0f25c 36124 loop_reread_partitions(lo, bdev);
5527c038
JR
36125 return 0;
36126
36127 out_putf:
36128 fput(file);
36129+ if (virt_file)
36130+ fput(virt_file);
36131 out:
36132 return error;
36133 }
e2f27e51 36134@@ -876,7 +900,7 @@ static int loop_prepare_queue(struct loop_device *lo)
5527c038
JR
36135 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
36136 struct block_device *bdev, unsigned int arg)
36137 {
36138- struct file *file, *f;
36139+ struct file *file, *f, *virt_file = NULL;
36140 struct inode *inode;
36141 struct address_space *mapping;
36142 unsigned lo_blocksize;
e2f27e51 36143@@ -891,6 +915,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
36144 file = fget(arg);
36145 if (!file)
36146 goto out;
36147+ f = loop_real_file(file);
36148+ if (f) {
36149+ virt_file = file;
36150+ file = f;
36151+ get_file(file);
36152+ }
36153
36154 error = -EBUSY;
36155 if (lo->lo_state != Lo_unbound)
e2f27e51 36156@@ -943,6 +973,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
36157 lo->lo_device = bdev;
36158 lo->lo_flags = lo_flags;
36159 lo->lo_backing_file = file;
36160+ lo->lo_backing_virt_file = virt_file;
36161 lo->transfer = NULL;
36162 lo->ioctl = NULL;
36163 lo->lo_sizelimit = 0;
e2f27e51 36164@@ -975,6 +1006,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
36165
36166 out_putf:
36167 fput(file);
36168+ if (virt_file)
36169+ fput(virt_file);
36170 out:
36171 /* This is safe: open() is still holding a reference. */
36172 module_put(THIS_MODULE);
e2f27e51 36173@@ -1021,6 +1054,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
5527c038
JR
36174 static int loop_clr_fd(struct loop_device *lo)
36175 {
36176 struct file *filp = lo->lo_backing_file;
36177+ struct file *virt_filp = lo->lo_backing_virt_file;
36178 gfp_t gfp = lo->old_gfp_mask;
36179 struct block_device *bdev = lo->lo_device;
36180
e2f27e51 36181@@ -1052,6 +1086,7 @@ static int loop_clr_fd(struct loop_device *lo)
5527c038
JR
36182 spin_lock_irq(&lo->lo_lock);
36183 lo->lo_state = Lo_rundown;
36184 lo->lo_backing_file = NULL;
36185+ lo->lo_backing_virt_file = NULL;
36186 spin_unlock_irq(&lo->lo_lock);
36187
36188 loop_release_xfer(lo);
e2f27e51 36189@@ -1096,6 +1131,8 @@ static int loop_clr_fd(struct loop_device *lo)
5527c038
JR
36190 * bd_mutex which is usually taken before lo_ctl_mutex.
36191 */
36192 fput(filp);
36193+ if (virt_filp)
36194+ fput(virt_filp);
36195 return 0;
36196 }
36197
36198diff --git a/drivers/block/loop.h b/drivers/block/loop.h
be52b249 36199index fb2237c..c3888c5 100644
5527c038
JR
36200--- a/drivers/block/loop.h
36201+++ b/drivers/block/loop.h
36202@@ -46,7 +46,7 @@ struct loop_device {
36203 int (*ioctl)(struct loop_device *, int cmd,
36204 unsigned long arg);
36205
36206- struct file * lo_backing_file;
36207+ struct file * lo_backing_file, *lo_backing_virt_file;
36208 struct block_device *lo_device;
36209 unsigned lo_blocksize;
36210 void *key_data;
36211diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
f0c0a007 36212index 00475fb..01390e1 100644
5527c038
JR
36213--- a/fs/aufs/f_op.c
36214+++ b/fs/aufs/f_op.c
f0c0a007 36215@@ -348,7 +348,7 @@ static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
5527c038
JR
36216 if (IS_ERR(h_file))
36217 goto out;
36218
36219- if (au_test_loopback_kthread()) {
36220+ if (0 && au_test_loopback_kthread()) {
36221 au_warn_loopback(h_file->f_path.dentry->d_sb);
36222 if (file->f_mapping != h_file->f_mapping) {
36223 file->f_mapping = h_file->f_mapping;
36224diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c
e2f27e51 36225index c3ca50f..a3dbdaf 100644
5527c038
JR
36226--- a/fs/aufs/loop.c
36227+++ b/fs/aufs/loop.c
e2f27e51 36228@@ -132,3 +132,19 @@ void au_loopback_fin(void)
79b8bda9 36229 symbol_put(loop_backing_file);
f0c0a007 36230 au_delayed_kfree(au_warn_loopback_array);
5527c038
JR
36231 }
36232+
36233+/* ---------------------------------------------------------------------- */
36234+
36235+/* support the loopback block device insude aufs */
36236+
36237+struct file *aufs_real_loop(struct file *file)
36238+{
36239+ struct file *f;
36240+
36241+ BUG_ON(!au_test_aufs(file->f_path.dentry->d_sb));
36242+ fi_read_lock(file);
36243+ f = au_hf_top(file);
36244+ fi_read_unlock(file);
36245+ AuDebugOn(!f);
36246+ return f;
36247+}
36248diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h
8cdd5066 36249index 48bf070..66afec7 100644
5527c038
JR
36250--- a/fs/aufs/loop.h
36251+++ b/fs/aufs/loop.h
36252@@ -25,7 +25,11 @@ void au_warn_loopback(struct super_block *h_sb);
36253
36254 int au_loopback_init(void);
36255 void au_loopback_fin(void);
36256+
36257+struct file *aufs_real_loop(struct file *file);
36258 #else
36259+AuStub(struct file *, loop_backing_file, return NULL)
36260+
36261 AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
36262 struct dentry *h_adding)
36263 AuStubInt0(au_test_loopback_kthread, void)
36264@@ -33,6 +37,8 @@ AuStubVoid(au_warn_loopback, struct super_block *h_sb)
36265
36266 AuStubInt0(au_loopback_init, void)
36267 AuStubVoid(au_loopback_fin, void)
36268+
36269+AuStub(struct file *, aufs_real_loop, return NULL, struct file *file)
36270 #endif /* BLK_DEV_LOOP */
36271
36272 #endif /* __KERNEL__ */
36273diff --git a/fs/aufs/super.c b/fs/aufs/super.c
f0c0a007 36274index 58a773c..75f212c 100644
5527c038
JR
36275--- a/fs/aufs/super.c
36276+++ b/fs/aufs/super.c
f0c0a007 36277@@ -831,7 +831,10 @@ static const struct super_operations aufs_sop = {
5527c038
JR
36278 .statfs = aufs_statfs,
36279 .put_super = aufs_put_super,
36280 .sync_fs = aufs_sync_fs,
36281- .remount_fs = aufs_remount_fs
36282+ .remount_fs = aufs_remount_fs,
36283+#ifdef CONFIG_AUFS_BDEV_LOOP
36284+ .real_loop = aufs_real_loop
36285+#endif
36286 };
36287
36288 /* ---------------------------------------------------------------------- */
36289diff --git a/include/linux/fs.h b/include/linux/fs.h
e2f27e51 36290index fc80663..0c4c6fa 100644
5527c038
JR
36291--- a/include/linux/fs.h
36292+++ b/include/linux/fs.h
e2f27e51 36293@@ -1814,6 +1814,10 @@ struct super_operations {
5527c038
JR
36294 struct shrink_control *);
36295 long (*free_cached_objects)(struct super_block *,
36296 struct shrink_control *);
36297+#if defined(CONFIG_BLK_DEV_LOOP) || defined(CONFIG_BLK_DEV_LOOP_MODULE)
36298+ /* and aufs */
36299+ struct file *(*real_loop)(struct file *);
36300+#endif
36301 };
36302
36303 /*
This page took 8.361524 seconds and 4 git commands to generate.