]> git.pld-linux.org Git - packages/kernel.git/blame_incremental - kernel-aufs4.patch
- up to 4.9.84
[packages/kernel.git] / kernel-aufs4.patch
... / ...
CommitLineData
1aufs4.9 kbuild patch
2
3diff --git a/fs/Kconfig b/fs/Kconfig
4index 4bd03a2..620e01b 100644
5--- a/fs/Kconfig
6+++ b/fs/Kconfig
7@@ -249,6 +249,7 @@ source "fs/pstore/Kconfig"
8 source "fs/sysv/Kconfig"
9 source "fs/ufs/Kconfig"
10 source "fs/exofs/Kconfig"
11+source "fs/aufs/Kconfig"
12
13 endif # MISC_FILESYSTEMS
14
15diff --git a/fs/Makefile b/fs/Makefile
16index ed2b632..aa6d14b 100644
17--- a/fs/Makefile
18+++ b/fs/Makefile
19@@ -129,3 +129,4 @@ obj-y += exofs/ # Multiple modules
20 obj-$(CONFIG_CEPH_FS) += ceph/
21 obj-$(CONFIG_PSTORE) += pstore/
22 obj-$(CONFIG_EFIVAR_FS) += efivarfs/
23+obj-$(CONFIG_AUFS_FS) += aufs/
24diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
25index cd2be1c..78f3c68 100644
26--- a/include/uapi/linux/Kbuild
27+++ b/include/uapi/linux/Kbuild
28@@ -59,6 +59,7 @@ header-y += atmsvc.h
29 header-y += atm_tcp.h
30 header-y += atm_zatm.h
31 header-y += audit.h
32+header-y += aufs_type.h
33 header-y += auto_fs4.h
34 header-y += auto_fs.h
35 header-y += auxvec.h
36aufs4.9 base patch
37
38diff --git a/MAINTAINERS b/MAINTAINERS
39index 63cefa6..d78b954 100644
40--- a/MAINTAINERS
41+++ b/MAINTAINERS
42@@ -2293,6 +2293,19 @@ F: include/linux/audit.h
43 F: include/uapi/linux/audit.h
44 F: kernel/audit*
45
46+AUFS (advanced multi layered unification filesystem) FILESYSTEM
47+M: "J. R. Okajima" <hooanon05g@gmail.com>
48+L: linux-unionfs@vger.kernel.org
49+L: aufs-users@lists.sourceforge.net (members only)
50+W: http://aufs.sourceforge.net
51+T: git://github.com/sfjro/aufs4-linux.git
52+S: Supported
53+F: Documentation/filesystems/aufs/
54+F: Documentation/ABI/testing/debugfs-aufs
55+F: Documentation/ABI/testing/sysfs-aufs
56+F: fs/aufs/
57+F: include/uapi/linux/aufs_type.h
58+
59 AUXILIARY DISPLAY DRIVERS
60 M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
61 W: http://miguelojeda.es/auxdisplay.htm
62diff --git a/drivers/block/loop.c b/drivers/block/loop.c
63index fa1b7a9..6ee9235 100644
64--- a/drivers/block/loop.c
65+++ b/drivers/block/loop.c
66@@ -701,6 +701,24 @@ static inline int is_loop_device(struct file *file)
67 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
68 }
69
70+/*
71+ * for AUFS
72+ * no get/put for file.
73+ */
74+struct file *loop_backing_file(struct super_block *sb)
75+{
76+ struct file *ret;
77+ struct loop_device *l;
78+
79+ ret = NULL;
80+ if (MAJOR(sb->s_dev) == LOOP_MAJOR) {
81+ l = sb->s_bdev->bd_disk->private_data;
82+ ret = l->lo_backing_file;
83+ }
84+ return ret;
85+}
86+EXPORT_SYMBOL_GPL(loop_backing_file);
87+
88 /* loop sysfs attributes */
89
90 static ssize_t loop_attr_show(struct device *dev, char *page,
91diff --git a/fs/dcache.c b/fs/dcache.c
92index 5c7cc95..df0268c 100644
93--- a/fs/dcache.c
94+++ b/fs/dcache.c
95@@ -1164,7 +1164,7 @@ enum d_walk_ret {
96 *
97 * The @enter() and @finish() callbacks are called with d_lock held.
98 */
99-static void d_walk(struct dentry *parent, void *data,
100+void d_walk(struct dentry *parent, void *data,
101 enum d_walk_ret (*enter)(void *, struct dentry *),
102 void (*finish)(void *))
103 {
104diff --git a/fs/fcntl.c b/fs/fcntl.c
105index 350a2c8..6f42279 100644
106--- a/fs/fcntl.c
107+++ b/fs/fcntl.c
108@@ -29,7 +29,7 @@
109
110 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
111
112-static int setfl(int fd, struct file * filp, unsigned long arg)
113+int setfl(int fd, struct file * filp, unsigned long arg)
114 {
115 struct inode * inode = file_inode(filp);
116 int error = 0;
117@@ -60,6 +60,8 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
118
119 if (filp->f_op->check_flags)
120 error = filp->f_op->check_flags(arg);
121+ if (!error && filp->f_op->setfl)
122+ error = filp->f_op->setfl(filp, arg);
123 if (error)
124 return error;
125
126diff --git a/fs/inode.c b/fs/inode.c
127index 88110fd..9a9ba3a 100644
128--- a/fs/inode.c
129+++ b/fs/inode.c
130@@ -1642,7 +1642,7 @@ int generic_update_time(struct inode *inode, struct timespec *time, int flags)
131 * This does the actual work of updating an inodes time or version. Must have
132 * had called mnt_want_write() before calling this.
133 */
134-static int update_time(struct inode *inode, struct timespec *time, int flags)
135+int update_time(struct inode *inode, struct timespec *time, int flags)
136 {
137 int (*update_time)(struct inode *, struct timespec *, int);
138
139diff --git a/fs/read_write.c b/fs/read_write.c
140index 190e0d36..4052813 100644
141--- a/fs/read_write.c
142+++ b/fs/read_write.c
143@@ -515,6 +515,28 @@ ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
144 }
145 EXPORT_SYMBOL(__vfs_write);
146
147+vfs_readf_t vfs_readf(struct file *file)
148+{
149+ const struct file_operations *fop = file->f_op;
150+
151+ if (fop->read)
152+ return fop->read;
153+ if (fop->read_iter)
154+ return new_sync_read;
155+ return ERR_PTR(-ENOSYS);
156+}
157+
158+vfs_writef_t vfs_writef(struct file *file)
159+{
160+ const struct file_operations *fop = file->f_op;
161+
162+ if (fop->write)
163+ return fop->write;
164+ if (fop->write_iter)
165+ return new_sync_write;
166+ return ERR_PTR(-ENOSYS);
167+}
168+
169 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
170 {
171 mm_segment_t old_fs;
172diff --git a/fs/splice.c b/fs/splice.c
173index 5a7750b..28160a7 100644
174--- a/fs/splice.c
175+++ b/fs/splice.c
176@@ -855,8 +855,8 @@ ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
177 /*
178 * Attempt to initiate a splice from pipe to file.
179 */
180-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
181- loff_t *ppos, size_t len, unsigned int flags)
182+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
183+ loff_t *ppos, size_t len, unsigned int flags)
184 {
185 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
186 loff_t *, size_t, unsigned int);
187@@ -872,9 +872,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
188 /*
189 * Attempt to initiate a splice from a file to a pipe.
190 */
191-static long do_splice_to(struct file *in, loff_t *ppos,
192- struct pipe_inode_info *pipe, size_t len,
193- unsigned int flags)
194+long do_splice_to(struct file *in, loff_t *ppos,
195+ struct pipe_inode_info *pipe, size_t len,
196+ unsigned int flags)
197 {
198 ssize_t (*splice_read)(struct file *, loff_t *,
199 struct pipe_inode_info *, size_t, unsigned int);
200diff --git a/include/linux/file.h b/include/linux/file.h
201index 7444f5f..bdac0be 100644
202--- a/include/linux/file.h
203+++ b/include/linux/file.h
204@@ -19,6 +19,7 @@
205 struct path;
206 extern struct file *alloc_file(struct path *, fmode_t mode,
207 const struct file_operations *fop);
208+extern struct file *get_empty_filp(void);
209
210 static inline void fput_light(struct file *file, int fput_needed)
211 {
212diff --git a/include/linux/fs.h b/include/linux/fs.h
213index dc0478c..27c05e7 100644
214--- a/include/linux/fs.h
215+++ b/include/linux/fs.h
216@@ -1291,6 +1291,7 @@ struct fasync_struct {
217 /* can be called from interrupts */
218 extern void kill_fasync(struct fasync_struct **, int, int);
219
220+extern int setfl(int fd, struct file * filp, unsigned long arg);
221 extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
222 extern void f_setown(struct file *filp, unsigned long arg, int force);
223 extern void f_delown(struct file *filp);
224@@ -1715,6 +1716,7 @@ struct file_operations {
225 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
226 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
227 int (*check_flags)(int);
228+ int (*setfl)(struct file *, unsigned long);
229 int (*flock) (struct file *, int, struct file_lock *);
230 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
231 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
232@@ -1768,6 +1770,12 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
233 struct iovec *fast_pointer,
234 struct iovec **ret_pointer);
235
236+typedef ssize_t (*vfs_readf_t)(struct file *, char __user *, size_t, loff_t *);
237+typedef ssize_t (*vfs_writef_t)(struct file *, const char __user *, size_t,
238+ loff_t *);
239+vfs_readf_t vfs_readf(struct file *file);
240+vfs_writef_t vfs_writef(struct file *file);
241+
242 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
243 extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
244 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
245@@ -2140,6 +2148,7 @@ extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
246 extern void ihold(struct inode * inode);
247 extern void iput(struct inode *);
248 extern int generic_update_time(struct inode *, struct timespec *, int);
249+extern int update_time(struct inode *, struct timespec *, int);
250
251 /* /sys/fs */
252 extern struct kobject *fs_kobj;
253diff --git a/include/linux/splice.h b/include/linux/splice.h
254index 00a2116..1f0a4a2 100644
255--- a/include/linux/splice.h
256+++ b/include/linux/splice.h
257@@ -86,4 +86,10 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
258
259 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
260 extern const struct pipe_buf_operations default_pipe_buf_ops;
261+
262+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
263+ loff_t *ppos, size_t len, unsigned int flags);
264+extern long do_splice_to(struct file *in, loff_t *ppos,
265+ struct pipe_inode_info *pipe, size_t len,
266+ unsigned int flags);
267 #endif
268aufs4.9 mmap patch
269
270diff --git a/fs/proc/base.c b/fs/proc/base.c
271index ca651ac..0e8551a 100644
272--- a/fs/proc/base.c
273+++ b/fs/proc/base.c
274@@ -1953,7 +1953,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
275 down_read(&mm->mmap_sem);
276 vma = find_exact_vma(mm, vm_start, vm_end);
277 if (vma && vma->vm_file) {
278- *path = vma->vm_file->f_path;
279+ *path = vma_pr_or_file(vma)->f_path;
280 path_get(path);
281 rc = 0;
282 }
283diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
284index f8595e8..cb8eda0 100644
285--- a/fs/proc/nommu.c
286+++ b/fs/proc/nommu.c
287@@ -45,7 +45,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
288 file = region->vm_file;
289
290 if (file) {
291- struct inode *inode = file_inode(region->vm_file);
292+ struct inode *inode;
293+
294+ file = vmr_pr_or_file(region);
295+ inode = file_inode(file);
296 dev = inode->i_sb->s_dev;
297 ino = inode->i_ino;
298 }
299diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
300index 35b92d8..5b981db 100644
301--- a/fs/proc/task_mmu.c
302+++ b/fs/proc/task_mmu.c
303@@ -291,7 +291,10 @@ static int is_stack(struct proc_maps_private *priv,
304 const char *name = NULL;
305
306 if (file) {
307- struct inode *inode = file_inode(vma->vm_file);
308+ struct inode *inode;
309+
310+ file = vma_pr_or_file(vma);
311+ inode = file_inode(file);
312 dev = inode->i_sb->s_dev;
313 ino = inode->i_ino;
314 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
315@@ -1627,7 +1630,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
316 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
317 struct vm_area_struct *vma = v;
318 struct numa_maps *md = &numa_priv->md;
319- struct file *file = vma->vm_file;
320+ struct file *file = vma_pr_or_file(vma);
321 struct mm_struct *mm = vma->vm_mm;
322 struct mm_walk walk = {
323 .hugetlb_entry = gather_hugetlb_stats,
324diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
325index 3717562..6a328f1 100644
326--- a/fs/proc/task_nommu.c
327+++ b/fs/proc/task_nommu.c
328@@ -155,7 +155,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
329 file = vma->vm_file;
330
331 if (file) {
332- struct inode *inode = file_inode(vma->vm_file);
333+ struct inode *inode;
334+
335+ file = vma_pr_or_file(vma);
336+ inode = file_inode(file);
337 dev = inode->i_sb->s_dev;
338 ino = inode->i_ino;
339 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
340diff --git a/include/linux/mm.h b/include/linux/mm.h
341index a92c8d7..1d83a2a 100644
342--- a/include/linux/mm.h
343+++ b/include/linux/mm.h
344@@ -1266,6 +1266,28 @@ static inline int fixup_user_fault(struct task_struct *tsk,
345 }
346 #endif
347
348+extern void vma_do_file_update_time(struct vm_area_struct *, const char[], int);
349+extern struct file *vma_do_pr_or_file(struct vm_area_struct *, const char[],
350+ int);
351+extern void vma_do_get_file(struct vm_area_struct *, const char[], int);
352+extern void vma_do_fput(struct vm_area_struct *, const char[], int);
353+
354+#define vma_file_update_time(vma) vma_do_file_update_time(vma, __func__, \
355+ __LINE__)
356+#define vma_pr_or_file(vma) vma_do_pr_or_file(vma, __func__, \
357+ __LINE__)
358+#define vma_get_file(vma) vma_do_get_file(vma, __func__, __LINE__)
359+#define vma_fput(vma) vma_do_fput(vma, __func__, __LINE__)
360+
361+#ifndef CONFIG_MMU
362+extern struct file *vmr_do_pr_or_file(struct vm_region *, const char[], int);
363+extern void vmr_do_fput(struct vm_region *, const char[], int);
364+
365+#define vmr_pr_or_file(region) vmr_do_pr_or_file(region, __func__, \
366+ __LINE__)
367+#define vmr_fput(region) vmr_do_fput(region, __func__, __LINE__)
368+#endif /* !CONFIG_MMU */
369+
370 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len,
371 unsigned int gup_flags);
372 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
373diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
374index 4a8aced..badd16b 100644
375--- a/include/linux/mm_types.h
376+++ b/include/linux/mm_types.h
377@@ -275,6 +275,7 @@ struct vm_region {
378 unsigned long vm_top; /* region allocated to here */
379 unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
380 struct file *vm_file; /* the backing file or NULL */
381+ struct file *vm_prfile; /* the virtual backing file or NULL */
382
383 int vm_usage; /* region usage count (access under nommu_region_sem) */
384 bool vm_icache_flushed : 1; /* true if the icache has been flushed for
385@@ -349,6 +350,7 @@ struct vm_area_struct {
386 unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
387 units */
388 struct file * vm_file; /* File we map to (can be NULL). */
389+ struct file *vm_prfile; /* shadow of vm_file */
390 void * vm_private_data; /* was vm_pte (shared mem) */
391
392 #ifndef CONFIG_MMU
393diff --git a/kernel/fork.c b/kernel/fork.c
394index 997ac1d..4d0131b 100644
395--- a/kernel/fork.c
396+++ b/kernel/fork.c
397@@ -624,7 +624,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
398 struct inode *inode = file_inode(file);
399 struct address_space *mapping = file->f_mapping;
400
401- get_file(file);
402+ vma_get_file(tmp);
403 if (tmp->vm_flags & VM_DENYWRITE)
404 atomic_dec(&inode->i_writecount);
405 i_mmap_lock_write(mapping);
406diff --git a/mm/Makefile b/mm/Makefile
407index 295bd7a..14fa1c8 100644
408--- a/mm/Makefile
409+++ b/mm/Makefile
410@@ -37,7 +37,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
411 mm_init.o mmu_context.o percpu.o slab_common.o \
412 compaction.o vmacache.o \
413 interval_tree.o list_lru.o workingset.o \
414- debug.o $(mmu-y)
415+ prfile.o debug.o $(mmu-y)
416
417 obj-y += init-mm.o
418
419diff --git a/mm/filemap.c b/mm/filemap.c
420index 50b52fe..9e607f9 100644
421--- a/mm/filemap.c
422+++ b/mm/filemap.c
423@@ -2304,7 +2304,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
424 int ret = VM_FAULT_LOCKED;
425
426 sb_start_pagefault(inode->i_sb);
427- file_update_time(vma->vm_file);
428+ vma_file_update_time(vma);
429 lock_page(page);
430 if (page->mapping != inode->i_mapping) {
431 unlock_page(page);
432diff --git a/mm/memory.c b/mm/memory.c
433index e18c57b..7be4a39 100644
434--- a/mm/memory.c
435+++ b/mm/memory.c
436@@ -2117,7 +2117,7 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte,
437 }
438
439 if (!page_mkwrite)
440- file_update_time(vma->vm_file);
441+ vma_file_update_time(vma);
442 }
443
444 return VM_FAULT_WRITE;
445diff --git a/mm/mmap.c b/mm/mmap.c
446index 1af87c1..95b0ff4 100644
447--- a/mm/mmap.c
448+++ b/mm/mmap.c
449@@ -170,7 +170,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
450 if (vma->vm_ops && vma->vm_ops->close)
451 vma->vm_ops->close(vma);
452 if (vma->vm_file)
453- fput(vma->vm_file);
454+ vma_fput(vma);
455 mpol_put(vma_policy(vma));
456 kmem_cache_free(vm_area_cachep, vma);
457 return next;
458@@ -879,7 +879,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
459 if (remove_next) {
460 if (file) {
461 uprobe_munmap(next, next->vm_start, next->vm_end);
462- fput(file);
463+ vma_fput(vma);
464 }
465 if (next->anon_vma)
466 anon_vma_merge(vma, next);
467@@ -1727,8 +1727,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
468 return addr;
469
470 unmap_and_free_vma:
471+ vma_fput(vma);
472 vma->vm_file = NULL;
473- fput(file);
474
475 /* Undo any partial mapping done by a device driver. */
476 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
477@@ -2533,7 +2533,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
478 goto out_free_mpol;
479
480 if (new->vm_file)
481- get_file(new->vm_file);
482+ vma_get_file(new);
483
484 if (new->vm_ops && new->vm_ops->open)
485 new->vm_ops->open(new);
486@@ -2552,7 +2552,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
487 if (new->vm_ops && new->vm_ops->close)
488 new->vm_ops->close(new);
489 if (new->vm_file)
490- fput(new->vm_file);
491+ vma_fput(new);
492 unlink_anon_vmas(new);
493 out_free_mpol:
494 mpol_put(vma_policy(new));
495@@ -2703,7 +2703,7 @@ int vm_munmap(unsigned long start, size_t len)
496 struct vm_area_struct *vma;
497 unsigned long populate = 0;
498 unsigned long ret = -EINVAL;
499- struct file *file;
500+ struct file *file, *prfile;
501
502 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n",
503 current->comm, current->pid);
504@@ -2778,10 +2778,27 @@ int vm_munmap(unsigned long start, size_t len)
505 }
506 }
507
508- file = get_file(vma->vm_file);
509+ vma_get_file(vma);
510+ file = vma->vm_file;
511+ prfile = vma->vm_prfile;
512 ret = do_mmap_pgoff(vma->vm_file, start, size,
513 prot, flags, pgoff, &populate);
514+ if (!IS_ERR_VALUE(ret) && file && prfile) {
515+ struct vm_area_struct *new_vma;
516+
517+ new_vma = find_vma(mm, ret);
518+ if (!new_vma->vm_prfile)
519+ new_vma->vm_prfile = prfile;
520+ if (new_vma != vma)
521+ get_file(prfile);
522+ }
523+ /*
524+ * two fput()s instead of vma_fput(vma),
525+ * coz vma may not be available anymore.
526+ */
527 fput(file);
528+ if (prfile)
529+ fput(prfile);
530 out:
531 up_write(&mm->mmap_sem);
532 if (populate)
533@@ -3056,7 +3073,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
534 if (anon_vma_clone(new_vma, vma))
535 goto out_free_mempol;
536 if (new_vma->vm_file)
537- get_file(new_vma->vm_file);
538+ vma_get_file(new_vma);
539 if (new_vma->vm_ops && new_vma->vm_ops->open)
540 new_vma->vm_ops->open(new_vma);
541 vma_link(mm, new_vma, prev, rb_link, rb_parent);
542diff --git a/mm/nommu.c b/mm/nommu.c
543index 8b8faaf..5d26ed94 100644
544--- a/mm/nommu.c
545+++ b/mm/nommu.c
546@@ -636,7 +636,7 @@ static void __put_nommu_region(struct vm_region *region)
547 up_write(&nommu_region_sem);
548
549 if (region->vm_file)
550- fput(region->vm_file);
551+ vmr_fput(region);
552
553 /* IO memory and memory shared directly out of the pagecache
554 * from ramfs/tmpfs mustn't be released here */
555@@ -794,7 +794,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
556 if (vma->vm_ops && vma->vm_ops->close)
557 vma->vm_ops->close(vma);
558 if (vma->vm_file)
559- fput(vma->vm_file);
560+ vma_fput(vma);
561 put_nommu_region(vma->vm_region);
562 kmem_cache_free(vm_area_cachep, vma);
563 }
564@@ -1320,7 +1320,7 @@ unsigned long do_mmap(struct file *file,
565 goto error_just_free;
566 }
567 }
568- fput(region->vm_file);
569+ vmr_fput(region);
570 kmem_cache_free(vm_region_jar, region);
571 region = pregion;
572 result = start;
573@@ -1395,10 +1395,10 @@ unsigned long do_mmap(struct file *file,
574 up_write(&nommu_region_sem);
575 error:
576 if (region->vm_file)
577- fput(region->vm_file);
578+ vmr_fput(region);
579 kmem_cache_free(vm_region_jar, region);
580 if (vma->vm_file)
581- fput(vma->vm_file);
582+ vma_fput(vma);
583 kmem_cache_free(vm_area_cachep, vma);
584 return ret;
585
586diff --git a/mm/prfile.c b/mm/prfile.c
587new file mode 100644
588index 0000000..b323b8a
589--- /dev/null
590+++ b/mm/prfile.c
591@@ -0,0 +1,86 @@
592+/*
593+ * Mainly for aufs which mmap(2) diffrent file and wants to print different path
594+ * in /proc/PID/maps.
595+ * Call these functions via macros defined in linux/mm.h.
596+ *
597+ * See Documentation/filesystems/aufs/design/06mmap.txt
598+ *
599+ * Copyright (c) 2014 Junjro R. Okajima
600+ * Copyright (c) 2014 Ian Campbell
601+ */
602+
603+#include <linux/mm.h>
604+#include <linux/file.h>
605+#include <linux/fs.h>
606+
607+/* #define PRFILE_TRACE */
608+static inline void prfile_trace(struct file *f, struct file *pr,
609+ const char func[], int line, const char func2[])
610+{
611+#ifdef PRFILE_TRACE
612+ if (pr)
613+ pr_info("%s:%d: %s, %s\n", func, line, func2,
614+ f ? (char *)f->f_path.dentry->d_name.name : "(null)");
615+#endif
616+}
617+
618+void vma_do_file_update_time(struct vm_area_struct *vma, const char func[],
619+ int line)
620+{
621+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
622+
623+ prfile_trace(f, pr, func, line, __func__);
624+ file_update_time(f);
625+ if (f && pr)
626+ file_update_time(pr);
627+}
628+
629+struct file *vma_do_pr_or_file(struct vm_area_struct *vma, const char func[],
630+ int line)
631+{
632+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
633+
634+ prfile_trace(f, pr, func, line, __func__);
635+ return (f && pr) ? pr : f;
636+}
637+
638+void vma_do_get_file(struct vm_area_struct *vma, const char func[], int line)
639+{
640+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
641+
642+ prfile_trace(f, pr, func, line, __func__);
643+ get_file(f);
644+ if (f && pr)
645+ get_file(pr);
646+}
647+
648+void vma_do_fput(struct vm_area_struct *vma, const char func[], int line)
649+{
650+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
651+
652+ prfile_trace(f, pr, func, line, __func__);
653+ fput(f);
654+ if (f && pr)
655+ fput(pr);
656+}
657+
658+#ifndef CONFIG_MMU
659+struct file *vmr_do_pr_or_file(struct vm_region *region, const char func[],
660+ int line)
661+{
662+ struct file *f = region->vm_file, *pr = region->vm_prfile;
663+
664+ prfile_trace(f, pr, func, line, __func__);
665+ return (f && pr) ? pr : f;
666+}
667+
668+void vmr_do_fput(struct vm_region *region, const char func[], int line)
669+{
670+ struct file *f = region->vm_file, *pr = region->vm_prfile;
671+
672+ prfile_trace(f, pr, func, line, __func__);
673+ fput(f);
674+ if (f && pr)
675+ fput(pr);
676+}
677+#endif /* !CONFIG_MMU */
678aufs4.9 standalone patch
679
680diff --git a/fs/dcache.c b/fs/dcache.c
681index df0268c..755fea1 100644
682--- a/fs/dcache.c
683+++ b/fs/dcache.c
684@@ -1272,6 +1272,7 @@ void d_walk(struct dentry *parent, void *data,
685 seq = 1;
686 goto again;
687 }
688+EXPORT_SYMBOL_GPL(d_walk);
689
690 /*
691 * Search for at least 1 mount point in the dentry's subdirs.
692@@ -2855,6 +2856,7 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
693
694 write_sequnlock(&rename_lock);
695 }
696+EXPORT_SYMBOL_GPL(d_exchange);
697
698 /**
699 * d_ancestor - search for an ancestor
700diff --git a/fs/exec.c b/fs/exec.c
701index 4e497b9..e27d323 100644
702--- a/fs/exec.c
703+++ b/fs/exec.c
704@@ -104,6 +104,7 @@ bool path_noexec(const struct path *path)
705 return (path->mnt->mnt_flags & MNT_NOEXEC) ||
706 (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
707 }
708+EXPORT_SYMBOL_GPL(path_noexec);
709
710 #ifdef CONFIG_USELIB
711 /*
712diff --git a/fs/fcntl.c b/fs/fcntl.c
713index 6f42279..04fd33c 100644
714--- a/fs/fcntl.c
715+++ b/fs/fcntl.c
716@@ -82,6 +82,7 @@ int setfl(int fd, struct file * filp, unsigned long arg)
717 out:
718 return error;
719 }
720+EXPORT_SYMBOL_GPL(setfl);
721
722 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
723 int force)
724diff --git a/fs/file_table.c b/fs/file_table.c
725index ad17e05..ae9f267 100644
726--- a/fs/file_table.c
727+++ b/fs/file_table.c
728@@ -147,6 +147,7 @@ struct file *get_empty_filp(void)
729 }
730 return ERR_PTR(-ENFILE);
731 }
732+EXPORT_SYMBOL_GPL(get_empty_filp);
733
734 /**
735 * alloc_file - allocate and initialize a 'struct file'
736@@ -258,6 +259,7 @@ void flush_delayed_fput(void)
737 {
738 delayed_fput(NULL);
739 }
740+EXPORT_SYMBOL_GPL(flush_delayed_fput);
741
742 static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
743
744@@ -300,6 +302,7 @@ void __fput_sync(struct file *file)
745 }
746
747 EXPORT_SYMBOL(fput);
748+EXPORT_SYMBOL_GPL(__fput_sync);
749
750 void put_filp(struct file *file)
751 {
752@@ -308,6 +311,7 @@ void put_filp(struct file *file)
753 file_free(file);
754 }
755 }
756+EXPORT_SYMBOL_GPL(put_filp);
757
758 void __init files_init(void)
759 {
760diff --git a/fs/inode.c b/fs/inode.c
761index 9a9ba3a..a3a18d8 100644
762--- a/fs/inode.c
763+++ b/fs/inode.c
764@@ -1651,6 +1651,7 @@ int update_time(struct inode *inode, struct timespec *time, int flags)
765
766 return update_time(inode, time, flags);
767 }
768+EXPORT_SYMBOL_GPL(update_time);
769
770 /**
771 * touch_atime - update the access time
772diff --git a/fs/namespace.c b/fs/namespace.c
773index e6c234b..8d13f7b 100644
774--- a/fs/namespace.c
775+++ b/fs/namespace.c
776@@ -466,6 +466,7 @@ void __mnt_drop_write(struct vfsmount *mnt)
777 mnt_dec_writers(real_mount(mnt));
778 preempt_enable();
779 }
780+EXPORT_SYMBOL_GPL(__mnt_drop_write);
781
782 /**
783 * mnt_drop_write - give up write access to a mount
784@@ -1823,6 +1824,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
785 }
786 return 0;
787 }
788+EXPORT_SYMBOL_GPL(iterate_mounts);
789
790 static void cleanup_group_ids(struct mount *mnt, struct mount *end)
791 {
792diff --git a/fs/notify/group.c b/fs/notify/group.c
793index fbe3cbe..bdfc61e 100644
794--- a/fs/notify/group.c
795+++ b/fs/notify/group.c
796@@ -22,6 +22,7 @@
797 #include <linux/srcu.h>
798 #include <linux/rculist.h>
799 #include <linux/wait.h>
800+#include <linux/module.h>
801
802 #include <linux/fsnotify_backend.h>
803 #include "fsnotify.h"
804@@ -100,6 +101,7 @@ void fsnotify_get_group(struct fsnotify_group *group)
805 {
806 atomic_inc(&group->refcnt);
807 }
808+EXPORT_SYMBOL_GPL(fsnotify_get_group);
809
810 /*
811 * Drop a reference to a group. Free it if it's through.
812@@ -109,6 +111,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
813 if (atomic_dec_and_test(&group->refcnt))
814 fsnotify_final_destroy_group(group);
815 }
816+EXPORT_SYMBOL_GPL(fsnotify_put_group);
817
818 /*
819 * Create a new fsnotify_group and hold a reference for the group returned.
820@@ -137,6 +140,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
821
822 return group;
823 }
824+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
825
826 int fsnotify_fasync(int fd, struct file *file, int on)
827 {
828diff --git a/fs/notify/mark.c b/fs/notify/mark.c
829index d3fea0b..5fc06ad 100644
830--- a/fs/notify/mark.c
831+++ b/fs/notify/mark.c
832@@ -113,6 +113,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
833 mark->free_mark(mark);
834 }
835 }
836+EXPORT_SYMBOL_GPL(fsnotify_put_mark);
837
838 /* Calculate mask of events for a list of marks */
839 u32 fsnotify_recalc_mask(struct hlist_head *head)
840@@ -230,6 +231,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
841 mutex_unlock(&group->mark_mutex);
842 fsnotify_free_mark(mark);
843 }
844+EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
845
846 void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
847 {
848@@ -415,6 +417,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
849
850 return ret;
851 }
852+EXPORT_SYMBOL_GPL(fsnotify_add_mark);
853
854 int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
855 struct inode *inode, struct vfsmount *mnt, int allow_dups)
856@@ -533,6 +536,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
857 atomic_set(&mark->refcnt, 1);
858 mark->free_mark = free_mark;
859 }
860+EXPORT_SYMBOL_GPL(fsnotify_init_mark);
861
862 /*
863 * Destroy all marks in destroy_list, waits for SRCU period to finish before
864diff --git a/fs/open.c b/fs/open.c
865index d3ed817..20d2494 100644
866--- a/fs/open.c
867+++ b/fs/open.c
868@@ -64,6 +64,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
869 inode_unlock(dentry->d_inode);
870 return ret;
871 }
872+EXPORT_SYMBOL_GPL(do_truncate);
873
874 long vfs_truncate(const struct path *path, loff_t length)
875 {
876@@ -695,6 +696,7 @@ int open_check_o_direct(struct file *f)
877 }
878 return 0;
879 }
880+EXPORT_SYMBOL_GPL(open_check_o_direct);
881
882 static int do_dentry_open(struct file *f,
883 struct inode *inode,
884diff --git a/fs/read_write.c b/fs/read_write.c
885index 4052813..7dfd732 100644
886--- a/fs/read_write.c
887+++ b/fs/read_write.c
888@@ -525,6 +525,7 @@ vfs_readf_t vfs_readf(struct file *file)
889 return new_sync_read;
890 return ERR_PTR(-ENOSYS);
891 }
892+EXPORT_SYMBOL_GPL(vfs_readf);
893
894 vfs_writef_t vfs_writef(struct file *file)
895 {
896@@ -536,6 +537,7 @@ vfs_writef_t vfs_writef(struct file *file)
897 return new_sync_write;
898 return ERR_PTR(-ENOSYS);
899 }
900+EXPORT_SYMBOL_GPL(vfs_writef);
901
902 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
903 {
904diff --git a/fs/splice.c b/fs/splice.c
905index 28160a7..98c1902 100644
906--- a/fs/splice.c
907+++ b/fs/splice.c
908@@ -868,6 +868,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
909
910 return splice_write(pipe, out, ppos, len, flags);
911 }
912+EXPORT_SYMBOL_GPL(do_splice_from);
913
914 /*
915 * Attempt to initiate a splice from a file to a pipe.
916@@ -897,6 +898,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
917
918 return splice_read(in, ppos, pipe, len, flags);
919 }
920+EXPORT_SYMBOL_GPL(do_splice_to);
921
922 /**
923 * splice_direct_to_actor - splices data directly between two non-pipes
924diff --git a/fs/xattr.c b/fs/xattr.c
925index 2d13b4e..41c2bcd 100644
926--- a/fs/xattr.c
927+++ b/fs/xattr.c
928@@ -296,6 +296,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
929 *xattr_value = value;
930 return error;
931 }
932+EXPORT_SYMBOL_GPL(vfs_getxattr_alloc);
933
934 ssize_t
935 __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
936diff --git a/kernel/task_work.c b/kernel/task_work.c
937index d513051..e056d54 100644
938--- a/kernel/task_work.c
939+++ b/kernel/task_work.c
940@@ -119,3 +119,4 @@ void task_work_run(void)
941 } while (work);
942 }
943 }
944+EXPORT_SYMBOL_GPL(task_work_run);
945diff --git a/security/commoncap.c b/security/commoncap.c
946index 8df676f..6b5cc07 100644
947--- a/security/commoncap.c
948+++ b/security/commoncap.c
949@@ -1061,12 +1061,14 @@ int cap_mmap_addr(unsigned long addr)
950 }
951 return ret;
952 }
953+EXPORT_SYMBOL_GPL(cap_mmap_addr);
954
955 int cap_mmap_file(struct file *file, unsigned long reqprot,
956 unsigned long prot, unsigned long flags)
957 {
958 return 0;
959 }
960+EXPORT_SYMBOL_GPL(cap_mmap_file);
961
962 #ifdef CONFIG_SECURITY
963
964diff --git a/security/device_cgroup.c b/security/device_cgroup.c
965index 03c1652..f88c84b 100644
966--- a/security/device_cgroup.c
967+++ b/security/device_cgroup.c
968@@ -7,6 +7,7 @@
969 #include <linux/device_cgroup.h>
970 #include <linux/cgroup.h>
971 #include <linux/ctype.h>
972+#include <linux/export.h>
973 #include <linux/list.h>
974 #include <linux/uaccess.h>
975 #include <linux/seq_file.h>
976@@ -849,6 +850,7 @@ int __devcgroup_inode_permission(struct inode *inode, int mask)
977 return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
978 access);
979 }
980+EXPORT_SYMBOL_GPL(__devcgroup_inode_permission);
981
982 int devcgroup_inode_mknod(int mode, dev_t dev)
983 {
984diff --git a/security/security.c b/security/security.c
985index f825304..8dd441d 100644
986--- a/security/security.c
987+++ b/security/security.c
988@@ -443,6 +443,7 @@ int security_path_rmdir(const struct path *dir, struct dentry *dentry)
989 return 0;
990 return call_int_hook(path_rmdir, 0, dir, dentry);
991 }
992+EXPORT_SYMBOL_GPL(security_path_rmdir);
993
994 int security_path_unlink(const struct path *dir, struct dentry *dentry)
995 {
996@@ -459,6 +460,7 @@ int security_path_symlink(const struct path *dir, struct dentry *dentry,
997 return 0;
998 return call_int_hook(path_symlink, 0, dir, dentry, old_name);
999 }
1000+EXPORT_SYMBOL_GPL(security_path_symlink);
1001
1002 int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
1003 struct dentry *new_dentry)
1004@@ -467,6 +469,7 @@ int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
1005 return 0;
1006 return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry);
1007 }
1008+EXPORT_SYMBOL_GPL(security_path_link);
1009
1010 int security_path_rename(const struct path *old_dir, struct dentry *old_dentry,
1011 const struct path *new_dir, struct dentry *new_dentry,
1012@@ -494,6 +497,7 @@ int security_path_truncate(const struct path *path)
1013 return 0;
1014 return call_int_hook(path_truncate, 0, path);
1015 }
1016+EXPORT_SYMBOL_GPL(security_path_truncate);
1017
1018 int security_path_chmod(const struct path *path, umode_t mode)
1019 {
1020@@ -501,6 +505,7 @@ int security_path_chmod(const struct path *path, umode_t mode)
1021 return 0;
1022 return call_int_hook(path_chmod, 0, path, mode);
1023 }
1024+EXPORT_SYMBOL_GPL(security_path_chmod);
1025
1026 int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
1027 {
1028@@ -508,6 +513,7 @@ int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
1029 return 0;
1030 return call_int_hook(path_chown, 0, path, uid, gid);
1031 }
1032+EXPORT_SYMBOL_GPL(security_path_chown);
1033
1034 int security_path_chroot(const struct path *path)
1035 {
1036@@ -593,6 +599,7 @@ int security_inode_readlink(struct dentry *dentry)
1037 return 0;
1038 return call_int_hook(inode_readlink, 0, dentry);
1039 }
1040+EXPORT_SYMBOL_GPL(security_inode_readlink);
1041
1042 int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
1043 bool rcu)
1044@@ -608,6 +615,7 @@ int security_inode_permission(struct inode *inode, int mask)
1045 return 0;
1046 return call_int_hook(inode_permission, 0, inode, mask);
1047 }
1048+EXPORT_SYMBOL_GPL(security_inode_permission);
1049
1050 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
1051 {
1052@@ -779,6 +787,7 @@ int security_file_permission(struct file *file, int mask)
1053
1054 return fsnotify_perm(file, mask);
1055 }
1056+EXPORT_SYMBOL_GPL(security_file_permission);
1057
1058 int security_file_alloc(struct file *file)
1059 {
1060@@ -838,6 +847,7 @@ int security_mmap_file(struct file *file, unsigned long prot,
1061 return ret;
1062 return ima_file_mmap(file, prot);
1063 }
1064+EXPORT_SYMBOL_GPL(security_mmap_file);
1065
1066 int security_mmap_addr(unsigned long addr)
1067 {
1068diff -urN /usr/share/empty/Documentation/ABI/testing/debugfs-aufs linux/Documentation/ABI/testing/debugfs-aufs
1069--- /usr/share/empty/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
1070+++ linux/Documentation/ABI/testing/debugfs-aufs 2016-10-09 16:55:36.476034536 +0200
1071@@ -0,0 +1,50 @@
1072+What: /debug/aufs/si_<id>/
1073+Date: March 2009
1074+Contact: J. R. Okajima <hooanon05g@gmail.com>
1075+Description:
1076+ Under /debug/aufs, a directory named si_<id> is created
1077+ per aufs mount, where <id> is a unique id generated
1078+ internally.
1079+
1080+What: /debug/aufs/si_<id>/plink
1081+Date: Apr 2013
1082+Contact: J. R. Okajima <hooanon05g@gmail.com>
1083+Description:
1084+ It has three lines and shows the information about the
1085+ pseudo-link. The first line is a single number
1086+ representing a number of buckets. The second line is a
1087+ number of pseudo-links per buckets (separated by a
1088+ blank). The last line is a single number representing a
1089+ total number of psedo-links.
1090+ When the aufs mount option 'noplink' is specified, it
1091+ will show "1\n0\n0\n".
1092+
1093+What: /debug/aufs/si_<id>/xib
1094+Date: March 2009
1095+Contact: J. R. Okajima <hooanon05g@gmail.com>
1096+Description:
1097+ It shows the consumed blocks by xib (External Inode Number
1098+ Bitmap), its block size and file size.
1099+ When the aufs mount option 'noxino' is specified, it
1100+ will be empty. About XINO files, see the aufs manual.
1101+
1102+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
1103+Date: March 2009
1104+Contact: J. R. Okajima <hooanon05g@gmail.com>
1105+Description:
1106+ It shows the consumed blocks by xino (External Inode Number
1107+ Translation Table), its link count, block size and file
1108+ size.
1109+ When the aufs mount option 'noxino' is specified, it
1110+ will be empty. About XINO files, see the aufs manual.
1111+
1112+What: /debug/aufs/si_<id>/xigen
1113+Date: March 2009
1114+Contact: J. R. Okajima <hooanon05g@gmail.com>
1115+Description:
1116+ It shows the consumed blocks by xigen (External Inode
1117+ Generation Table), its block size and file size.
1118+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
1119+ be created.
1120+ When the aufs mount option 'noxino' is specified, it
1121+ will be empty. About XINO files, see the aufs manual.
1122diff -urN /usr/share/empty/Documentation/ABI/testing/sysfs-aufs linux/Documentation/ABI/testing/sysfs-aufs
1123--- /usr/share/empty/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
1124+++ linux/Documentation/ABI/testing/sysfs-aufs 2016-10-09 16:55:36.476034536 +0200
1125@@ -0,0 +1,31 @@
1126+What: /sys/fs/aufs/si_<id>/
1127+Date: March 2009
1128+Contact: J. R. Okajima <hooanon05g@gmail.com>
1129+Description:
1130+ Under /sys/fs/aufs, a directory named si_<id> is created
1131+ per aufs mount, where <id> is a unique id generated
1132+ internally.
1133+
1134+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
1135+Date: March 2009
1136+Contact: J. R. Okajima <hooanon05g@gmail.com>
1137+Description:
1138+ It shows the abolute path of a member directory (which
1139+ is called branch) in aufs, and its permission.
1140+
1141+What: /sys/fs/aufs/si_<id>/brid0, brid1 ... bridN
1142+Date: July 2013
1143+Contact: J. R. Okajima <hooanon05g@gmail.com>
1144+Description:
1145+ It shows the id of a member directory (which is called
1146+ branch) in aufs.
1147+
1148+What: /sys/fs/aufs/si_<id>/xi_path
1149+Date: March 2009
1150+Contact: J. R. Okajima <hooanon05g@gmail.com>
1151+Description:
1152+ It shows the abolute path of XINO (External Inode Number
1153+ Bitmap, Translation Table and Generation Table) file
1154+ even if it is the default path.
1155+ When the aufs mount option 'noxino' is specified, it
1156+ will be empty. About XINO files, see the aufs manual.
1157diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt linux/Documentation/filesystems/aufs/design/01intro.txt
1158--- /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
1159+++ linux/Documentation/filesystems/aufs/design/01intro.txt 2016-10-09 16:55:36.479367956 +0200
1160@@ -0,0 +1,170 @@
1161+
1162+# Copyright (C) 2005-2016 Junjiro R. Okajima
1163+#
1164+# This program is free software; you can redistribute it and/or modify
1165+# it under the terms of the GNU General Public License as published by
1166+# the Free Software Foundation; either version 2 of the License, or
1167+# (at your option) any later version.
1168+#
1169+# This program is distributed in the hope that it will be useful,
1170+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1171+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1172+# GNU General Public License for more details.
1173+#
1174+# You should have received a copy of the GNU General Public License
1175+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1176+
1177+Introduction
1178+----------------------------------------
1179+
1180+aufs [ei ju: ef es] | [a u f s]
1181+1. abbrev. for "advanced multi-layered unification filesystem".
1182+2. abbrev. for "another unionfs".
1183+3. abbrev. for "auf das" in German which means "on the" in English.
1184+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
1185+ But "Filesystem aufs Filesystem" is hard to understand.
1186+
1187+AUFS is a filesystem with features:
1188+- multi layered stackable unification filesystem, the member directory
1189+ is called as a branch.
1190+- branch permission and attribute, 'readonly', 'real-readonly',
1191+ 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
1192+ combination.
1193+- internal "file copy-on-write".
1194+- logical deletion, whiteout.
1195+- dynamic branch manipulation, adding, deleting and changing permission.
1196+- allow bypassing aufs, user's direct branch access.
1197+- external inode number translation table and bitmap which maintains the
1198+ persistent aufs inode number.
1199+- seekable directory, including NFS readdir.
1200+- file mapping, mmap and sharing pages.
1201+- pseudo-link, hardlink over branches.
1202+- loopback mounted filesystem as a branch.
1203+- several policies to select one among multiple writable branches.
1204+- revert a single systemcall when an error occurs in aufs.
1205+- and more...
1206+
1207+
1208+Multi Layered Stackable Unification Filesystem
1209+----------------------------------------------------------------------
1210+Most people already knows what it is.
1211+It is a filesystem which unifies several directories and provides a
1212+merged single directory. When users access a file, the access will be
1213+passed/re-directed/converted (sorry, I am not sure which English word is
1214+correct) to the real file on the member filesystem. The member
1215+filesystem is called 'lower filesystem' or 'branch' and has a mode
1216+'readonly' and 'readwrite.' And the deletion for a file on the lower
1217+readonly branch is handled by creating 'whiteout' on the upper writable
1218+branch.
1219+
1220+On LKML, there have been discussions about UnionMount (Jan Blunck,
1221+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
1222+different approaches to implement the merged-view.
1223+The former tries putting it into VFS, and the latter implements as a
1224+separate filesystem.
1225+(If I misunderstand about these implementations, please let me know and
1226+I shall correct it. Because it is a long time ago when I read their
1227+source files last time).
1228+
1229+UnionMount's approach will be able to small, but may be hard to share
1230+branches between several UnionMount since the whiteout in it is
1231+implemented in the inode on branch filesystem and always
1232+shared. According to Bharata's post, readdir does not seems to be
1233+finished yet.
1234+There are several missing features known in this implementations such as
1235+- for users, the inode number may change silently. eg. copy-up.
1236+- link(2) may break by copy-up.
1237+- read(2) may get an obsoleted filedata (fstat(2) too).
1238+- fcntl(F_SETLK) may be broken by copy-up.
1239+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
1240+ open(O_RDWR).
1241+
1242+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
1243+merged into mainline. This is another implementation of UnionMount as a
1244+separated filesystem. All the limitations and known problems which
1245+UnionMount are equally inherited to "overlay" filesystem.
1246+
1247+Unionfs has a longer history. When I started implementing a stackable
1248+filesystem (Aug 2005), it already existed. It has virtual super_block,
1249+inode, dentry and file objects and they have an array pointing lower
1250+same kind objects. After contributing many patches for Unionfs, I
1251+re-started my project AUFS (Jun 2006).
1252+
1253+In AUFS, the structure of filesystem resembles to Unionfs, but I
1254+implemented my own ideas, approaches and enhancements and it became
1255+totally different one.
1256+
1257+Comparing DM snapshot and fs based implementation
1258+- the number of bytes to be copied between devices is much smaller.
1259+- the type of filesystem must be one and only.
1260+- the fs must be writable, no readonly fs, even for the lower original
1261+ device. so the compression fs will not be usable. but if we use
1262+ loopback mount, we may address this issue.
1263+ for instance,
1264+ mount /cdrom/squashfs.img /sq
1265+ losetup /sq/ext2.img
1266+ losetup /somewhere/cow
1267+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
1268+- it will be difficult (or needs more operations) to extract the
1269+ difference between the original device and COW.
1270+- DM snapshot-merge may help a lot when users try merging. in the
1271+ fs-layer union, users will use rsync(1).
1272+
1273+You may want to read my old paper "Filesystems in LiveCD"
1274+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
1275+
1276+
1277+Several characters/aspects/persona of aufs
1278+----------------------------------------------------------------------
1279+
1280+Aufs has several characters, aspects or persona.
1281+1. a filesystem, callee of VFS helper
1282+2. sub-VFS, caller of VFS helper for branches
1283+3. a virtual filesystem which maintains persistent inode number
1284+4. reader/writer of files on branches such like an application
1285+
1286+1. Callee of VFS Helper
1287+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
1288+unlink(2) from an application reaches sys_unlink() kernel function and
1289+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
1290+calls filesystem specific unlink operation. Actually aufs implements the
1291+unlink operation but it behaves like a redirector.
1292+
1293+2. Caller of VFS Helper for Branches
1294+aufs_unlink() passes the unlink request to the branch filesystem as if
1295+it were called from VFS. So the called unlink operation of the branch
1296+filesystem acts as usual. As a caller of VFS helper, aufs should handle
1297+every necessary pre/post operation for the branch filesystem.
1298+- acquire the lock for the parent dir on a branch
1299+- lookup in a branch
1300+- revalidate dentry on a branch
1301+- mnt_want_write() for a branch
1302+- vfs_unlink() for a branch
1303+- mnt_drop_write() for a branch
1304+- release the lock on a branch
1305+
1306+3. Persistent Inode Number
1307+One of the most important issue for a filesystem is to maintain inode
1308+numbers. This is particularly important to support exporting a
1309+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
1310+backend block device for its own. But some storage is necessary to
1311+keep and maintain the inode numbers. It may be a large space and may not
1312+suit to keep in memory. Aufs rents some space from its first writable
1313+branch filesystem (by default) and creates file(s) on it. These files
1314+are created by aufs internally and removed soon (currently) keeping
1315+opened.
1316+Note: Because these files are removed, they are totally gone after
1317+ unmounting aufs. It means the inode numbers are not persistent
1318+ across unmount or reboot. I have a plan to make them really
1319+ persistent which will be important for aufs on NFS server.
1320+
1321+4. Read/Write Files Internally (copy-on-write)
1322+Because a branch can be readonly, when you write a file on it, aufs will
1323+"copy-up" it to the upper writable branch internally. And then write the
1324+originally requested thing to the file. Generally kernel doesn't
1325+open/read/write file actively. In aufs, even a single write may cause a
1326+internal "file copy". This behaviour is very similar to cp(1) command.
1327+
1328+Some people may think it is better to pass such work to user space
1329+helper, instead of doing in kernel space. Actually I am still thinking
1330+about it. But currently I have implemented it in kernel space.
1331diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt linux/Documentation/filesystems/aufs/design/02struct.txt
1332--- /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
1333+++ linux/Documentation/filesystems/aufs/design/02struct.txt 2016-10-09 16:55:36.479367956 +0200
1334@@ -0,0 +1,258 @@
1335+
1336+# Copyright (C) 2005-2016 Junjiro R. Okajima
1337+#
1338+# This program is free software; you can redistribute it and/or modify
1339+# it under the terms of the GNU General Public License as published by
1340+# the Free Software Foundation; either version 2 of the License, or
1341+# (at your option) any later version.
1342+#
1343+# This program is distributed in the hope that it will be useful,
1344+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1345+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1346+# GNU General Public License for more details.
1347+#
1348+# You should have received a copy of the GNU General Public License
1349+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1350+
1351+Basic Aufs Internal Structure
1352+
1353+Superblock/Inode/Dentry/File Objects
1354+----------------------------------------------------------------------
1355+As like an ordinary filesystem, aufs has its own
1356+superblock/inode/dentry/file objects. All these objects have a
1357+dynamically allocated array and store the same kind of pointers to the
1358+lower filesystem, branch.
1359+For example, when you build a union with one readwrite branch and one
1360+readonly, mounted /au, /rw and /ro respectively.
1361+- /au = /rw + /ro
1362+- /ro/fileA exists but /rw/fileA
1363+
1364+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
1365+pointers are stored in a aufs dentry. The array in aufs dentry will be,
1366+- [0] = NULL (because /rw/fileA doesn't exist)
1367+- [1] = /ro/fileA
1368+
1369+This style of an array is essentially same to the aufs
1370+superblock/inode/dentry/file objects.
1371+
1372+Because aufs supports manipulating branches, ie. add/delete/change
1373+branches dynamically, these objects has its own generation. When
1374+branches are changed, the generation in aufs superblock is
1375+incremented. And a generation in other object are compared when it is
1376+accessed. When a generation in other objects are obsoleted, aufs
1377+refreshes the internal array.
1378+
1379+
1380+Superblock
1381+----------------------------------------------------------------------
1382+Additionally aufs superblock has some data for policies to select one
1383+among multiple writable branches, XIB files, pseudo-links and kobject.
1384+See below in detail.
1385+About the policies which supports copy-down a directory, see
1386+wbr_policy.txt too.
1387+
1388+
1389+Branch and XINO(External Inode Number Translation Table)
1390+----------------------------------------------------------------------
1391+Every branch has its own xino (external inode number translation table)
1392+file. The xino file is created and unlinked by aufs internally. When two
1393+members of a union exist on the same filesystem, they share the single
1394+xino file.
1395+The struct of a xino file is simple, just a sequence of aufs inode
1396+numbers which is indexed by the lower inode number.
1397+In the above sample, assume the inode number of /ro/fileA is i111 and
1398+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
1399+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
1400+
1401+When the inode numbers are not contiguous, the xino file will be sparse
1402+which has a hole in it and doesn't consume as much disk space as it
1403+might appear. If your branch filesystem consumes disk space for such
1404+holes, then you should specify 'xino=' option at mounting aufs.
1405+
1406+Aufs has a mount option to free the disk blocks for such holes in XINO
1407+files on tmpfs or ramdisk. But it is not so effective actually. If you
1408+meet a problem of disk shortage due to XINO files, then you should try
1409+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
1410+The patch localizes the assignment inumbers per tmpfs-mount and avoid
1411+the holes in XINO files.
1412+
1413+Also a writable branch has three kinds of "whiteout bases". All these
1414+are existed when the branch is joined to aufs, and their names are
1415+whiteout-ed doubly, so that users will never see their names in aufs
1416+hierarchy.
1417+1. a regular file which will be hardlinked to all whiteouts.
1418+2. a directory to store a pseudo-link.
1419+3. a directory to store an "orphan"-ed file temporary.
1420+
1421+1. Whiteout Base
1422+ When you remove a file on a readonly branch, aufs handles it as a
1423+ logical deletion and creates a whiteout on the upper writable branch
1424+ as a hardlink of this file in order not to consume inode on the
1425+ writable branch.
1426+2. Pseudo-link Dir
1427+ See below, Pseudo-link.
1428+3. Step-Parent Dir
1429+ When "fileC" exists on the lower readonly branch only and it is
1430+ opened and removed with its parent dir, and then user writes
1431+ something into it, then aufs copies-up fileC to this
1432+ directory. Because there is no other dir to store fileC. After
1433+ creating a file under this dir, the file is unlinked.
1434+
1435+Because aufs supports manipulating branches, ie. add/delete/change
1436+dynamically, a branch has its own id. When the branch order changes,
1437+aufs finds the new index by searching the branch id.
1438+
1439+
1440+Pseudo-link
1441+----------------------------------------------------------------------
1442+Assume "fileA" exists on the lower readonly branch only and it is
1443+hardlinked to "fileB" on the branch. When you write something to fileA,
1444+aufs copies-up it to the upper writable branch. Additionally aufs
1445+creates a hardlink under the Pseudo-link Directory of the writable
1446+branch. The inode of a pseudo-link is kept in aufs super_block as a
1447+simple list. If fileB is read after unlinking fileA, aufs returns
1448+filedata from the pseudo-link instead of the lower readonly
1449+branch. Because the pseudo-link is based upon the inode, to keep the
1450+inode number by xino (see above) is essentially necessary.
1451+
1452+All the hardlinks under the Pseudo-link Directory of the writable branch
1453+should be restored in a proper location later. Aufs provides a utility
1454+to do this. The userspace helpers executed at remounting and unmounting
1455+aufs by default.
1456+During this utility is running, it puts aufs into the pseudo-link
1457+maintenance mode. In this mode, only the process which began the
1458+maintenance mode (and its child processes) is allowed to operate in
1459+aufs. Some other processes which are not related to the pseudo-link will
1460+be allowed to run too, but the rest have to return an error or wait
1461+until the maintenance mode ends. If a process already acquires an inode
1462+mutex (in VFS), it has to return an error.
1463+
1464+
1465+XIB(external inode number bitmap)
1466+----------------------------------------------------------------------
1467+Addition to the xino file per a branch, aufs has an external inode number
1468+bitmap in a superblock object. It is also an internal file such like a
1469+xino file.
1470+It is a simple bitmap to mark whether the aufs inode number is in-use or
1471+not.
1472+To reduce the file I/O, aufs prepares a single memory page to cache xib.
1473+
1474+As well as XINO files, aufs has a feature to truncate/refresh XIB to
1475+reduce the number of consumed disk blocks for these files.
1476+
1477+
1478+Virtual or Vertical Dir, and Readdir in Userspace
1479+----------------------------------------------------------------------
1480+In order to support multiple layers (branches), aufs readdir operation
1481+constructs a virtual dir block on memory. For readdir, aufs calls
1482+vfs_readdir() internally for each dir on branches, merges their entries
1483+with eliminating the whiteout-ed ones, and sets it to file (dir)
1484+object. So the file object has its entry list until it is closed. The
1485+entry list will be updated when the file position is zero and becomes
1486+obsoleted. This decision is made in aufs automatically.
1487+
1488+The dynamically allocated memory block for the name of entries has a
1489+unit of 512 bytes (by default) and stores the names contiguously (no
1490+padding). Another block for each entry is handled by kmem_cache too.
1491+During building dir blocks, aufs creates hash list and judging whether
1492+the entry is whiteouted by its upper branch or already listed.
1493+The merged result is cached in the corresponding inode object and
1494+maintained by a customizable life-time option.
1495+
1496+Some people may call it can be a security hole or invite DoS attack
1497+since the opened and once readdir-ed dir (file object) holds its entry
1498+list and becomes a pressure for system memory. But I'd say it is similar
1499+to files under /proc or /sys. The virtual files in them also holds a
1500+memory page (generally) while they are opened. When an idea to reduce
1501+memory for them is introduced, it will be applied to aufs too.
1502+For those who really hate this situation, I've developed readdir(3)
1503+library which operates this merging in userspace. You just need to set
1504+LD_PRELOAD environment variable, and aufs will not consume no memory in
1505+kernel space for readdir(3).
1506+
1507+
1508+Workqueue
1509+----------------------------------------------------------------------
1510+Aufs sometimes requires privilege access to a branch. For instance,
1511+in copy-up/down operation. When a user process is going to make changes
1512+to a file which exists in the lower readonly branch only, and the mode
1513+of one of ancestor directories may not be writable by a user
1514+process. Here aufs copy-up the file with its ancestors and they may
1515+require privilege to set its owner/group/mode/etc.
1516+This is a typical case of a application character of aufs (see
1517+Introduction).
1518+
1519+Aufs uses workqueue synchronously for this case. It creates its own
1520+workqueue. The workqueue is a kernel thread and has privilege. Aufs
1521+passes the request to call mkdir or write (for example), and wait for
1522+its completion. This approach solves a problem of a signal handler
1523+simply.
1524+If aufs didn't adopt the workqueue and changed the privilege of the
1525+process, then the process may receive the unexpected SIGXFSZ or other
1526+signals.
1527+
1528+Also aufs uses the system global workqueue ("events" kernel thread) too
1529+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
1530+whiteout base and etc. This is unrelated to a privilege.
1531+Most of aufs operation tries acquiring a rw_semaphore for aufs
1532+superblock at the beginning, at the same time waits for the completion
1533+of all queued asynchronous tasks.
1534+
1535+
1536+Whiteout
1537+----------------------------------------------------------------------
1538+The whiteout in aufs is very similar to Unionfs's. That is represented
1539+by its filename. UnionMount takes an approach of a file mode, but I am
1540+afraid several utilities (find(1) or something) will have to support it.
1541+
1542+Basically the whiteout represents "logical deletion" which stops aufs to
1543+lookup further, but also it represents "dir is opaque" which also stop
1544+further lookup.
1545+
1546+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
1547+In order to make several functions in a single systemcall to be
1548+revertible, aufs adopts an approach to rename a directory to a temporary
1549+unique whiteouted name.
1550+For example, in rename(2) dir where the target dir already existed, aufs
1551+renames the target dir to a temporary unique whiteouted name before the
1552+actual rename on a branch, and then handles other actions (make it opaque,
1553+update the attributes, etc). If an error happens in these actions, aufs
1554+simply renames the whiteouted name back and returns an error. If all are
1555+succeeded, aufs registers a function to remove the whiteouted unique
1556+temporary name completely and asynchronously to the system global
1557+workqueue.
1558+
1559+
1560+Copy-up
1561+----------------------------------------------------------------------
1562+It is a well-known feature or concept.
1563+When user modifies a file on a readonly branch, aufs operate "copy-up"
1564+internally and makes change to the new file on the upper writable branch.
1565+When the trigger systemcall does not update the timestamps of the parent
1566+dir, aufs reverts it after copy-up.
1567+
1568+
1569+Move-down (aufs3.9 and later)
1570+----------------------------------------------------------------------
1571+"Copy-up" is one of the essential feature in aufs. It copies a file from
1572+the lower readonly branch to the upper writable branch when a user
1573+changes something about the file.
1574+"Move-down" is an opposite action of copy-up. Basically this action is
1575+ran manually instead of automatically and internally.
1576+For desgin and implementation, aufs has to consider these issues.
1577+- whiteout for the file may exist on the lower branch.
1578+- ancestor directories may not exist on the lower branch.
1579+- diropq for the ancestor directories may exist on the upper branch.
1580+- free space on the lower branch will reduce.
1581+- another access to the file may happen during moving-down, including
1582+ UDBA (see "Revalidate Dentry and UDBA").
1583+- the file should not be hard-linked nor pseudo-linked. they should be
1584+ handled by auplink utility later.
1585+
1586+Sometimes users want to move-down a file from the upper writable branch
1587+to the lower readonly or writable branch. For instance,
1588+- the free space of the upper writable branch is going to run out.
1589+- create a new intermediate branch between the upper and lower branch.
1590+- etc.
1591+
1592+For this purpose, use "aumvdown" command in aufs-util.git.
1593diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt linux/Documentation/filesystems/aufs/design/03atomic_open.txt
1594--- /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt 1970-01-01 01:00:00.000000000 +0100
1595+++ linux/Documentation/filesystems/aufs/design/03atomic_open.txt 2016-10-09 16:55:36.479367956 +0200
1596@@ -0,0 +1,85 @@
1597+
1598+# Copyright (C) 2015-2016 Junjiro R. Okajima
1599+#
1600+# This program is free software; you can redistribute it and/or modify
1601+# it under the terms of the GNU General Public License as published by
1602+# the Free Software Foundation; either version 2 of the License, or
1603+# (at your option) any later version.
1604+#
1605+# This program is distributed in the hope that it will be useful,
1606+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1607+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1608+# GNU General Public License for more details.
1609+#
1610+# You should have received a copy of the GNU General Public License
1611+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1612+
1613+Support for a branch who has its ->atomic_open()
1614+----------------------------------------------------------------------
1615+The filesystems who implement its ->atomic_open() are not majority. For
1616+example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
1617+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
1618+->atomic_open(), NFSv4 returns an error for this open(2). While I am not
1619+sure whether all filesystems who have ->atomic_open() behave like this,
1620+but NFSv4 surely returns the error.
1621+
1622+In order to support ->atomic_open() for aufs, there are a few
1623+approaches.
1624+
1625+A. Introduce aufs_atomic_open()
1626+ - calls one of VFS:do_last(), lookup_open() or atomic_open() for
1627+ branch fs.
1628+B. Introduce aufs_atomic_open() calling create, open and chmod. this is
1629+ an aufs user Pip Cet's approach
1630+ - calls aufs_create(), VFS finish_open() and notify_change().
1631+ - pass fake-mode to finish_open(), and then correct the mode by
1632+ notify_change().
1633+C. Extend aufs_open() to call branch fs's ->atomic_open()
1634+ - no aufs_atomic_open().
1635+ - aufs_lookup() registers the TID to an aufs internal object.
1636+ - aufs_create() does nothing when the matching TID is registered, but
1637+ registers the mode.
1638+ - aufs_open() calls branch fs's ->atomic_open() when the matching
1639+ TID is registered.
1640+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
1641+ credential
1642+ - no aufs_atomic_open().
1643+ - aufs_create() registers the TID to an internal object. this info
1644+ represents "this process created this file just now."
1645+ - when aufs gets EACCES from branch fs's ->open(), then confirm the
1646+ registered TID and re-try open() with superuser's credential.
1647+
1648+Pros and cons for each approach.
1649+
1650+A.
1651+ - straightforward but highly depends upon VFS internal.
1652+ - the atomic behavaiour is kept.
1653+ - some of parameters such as nameidata are hard to reproduce for
1654+ branch fs.
1655+ - large overhead.
1656+B.
1657+ - easy to implement.
1658+ - the atomic behavaiour is lost.
1659+C.
1660+ - the atomic behavaiour is kept.
1661+ - dirty and tricky.
1662+ - VFS checks whether the file is created correctly after calling
1663+ ->create(), which means this approach doesn't work.
1664+D.
1665+ - easy to implement.
1666+ - the atomic behavaiour is lost.
1667+ - to open a file with superuser's credential and give it to a user
1668+ process is a bad idea, since the file object keeps the credential
1669+ in it. It may affect LSM or something. This approach doesn't work
1670+ either.
1671+
1672+The approach A is ideal, but it hard to implement. So here is a
1673+variation of A, which is to be implemented.
1674+
1675+A-1. Introduce aufs_atomic_open()
1676+ - calls branch fs ->atomic_open() if exists. otherwise calls
1677+ vfs_create() and finish_open().
1678+ - the demerit is that the several checks after branch fs
1679+ ->atomic_open() are lost. in the ordinary case, the checks are
1680+ done by VFS:do_last(), lookup_open() and atomic_open(). some can
1681+ be implemented in aufs, but not all I am afraid.
1682diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt linux/Documentation/filesystems/aufs/design/03lookup.txt
1683--- /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
1684+++ linux/Documentation/filesystems/aufs/design/03lookup.txt 2016-10-09 16:55:36.479367956 +0200
1685@@ -0,0 +1,113 @@
1686+
1687+# Copyright (C) 2005-2016 Junjiro R. Okajima
1688+#
1689+# This program is free software; you can redistribute it and/or modify
1690+# it under the terms of the GNU General Public License as published by
1691+# the Free Software Foundation; either version 2 of the License, or
1692+# (at your option) any later version.
1693+#
1694+# This program is distributed in the hope that it will be useful,
1695+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1696+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1697+# GNU General Public License for more details.
1698+#
1699+# You should have received a copy of the GNU General Public License
1700+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1701+
1702+Lookup in a Branch
1703+----------------------------------------------------------------------
1704+Since aufs has a character of sub-VFS (see Introduction), it operates
1705+lookup for branches as VFS does. It may be a heavy work. But almost all
1706+lookup operation in aufs is the simplest case, ie. lookup only an entry
1707+directly connected to its parent. Digging down the directory hierarchy
1708+is unnecessary. VFS has a function lookup_one_len() for that use, and
1709+aufs calls it.
1710+
1711+When a branch is a remote filesystem, aufs basically relies upon its
1712+->d_revalidate(), also aufs forces the hardest revalidate tests for
1713+them.
1714+For d_revalidate, aufs implements three levels of revalidate tests. See
1715+"Revalidate Dentry and UDBA" in detail.
1716+
1717+
1718+Test Only the Highest One for the Directory Permission (dirperm1 option)
1719+----------------------------------------------------------------------
1720+Let's try case study.
1721+- aufs has two branches, upper readwrite and lower readonly.
1722+ /au = /rw + /ro
1723+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1724+- user invoked "chmod a+rx /au/dirA"
1725+- the internal copy-up is activated and "/rw/dirA" is created and its
1726+ permission bits are set to world readable.
1727+- then "/au/dirA" becomes world readable?
1728+
1729+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1730+or it may be a natively readonly filesystem. If aufs respects the lower
1731+branch, it should not respond readdir request from other users. But user
1732+allowed it by chmod. Should really aufs rejects showing the entries
1733+under /ro/dirA?
1734+
1735+To be honest, I don't have a good solution for this case. So aufs
1736+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
1737+users.
1738+When dirperm1 is specified, aufs checks only the highest one for the
1739+directory permission, and shows the entries. Otherwise, as usual, checks
1740+every dir existing on all branches and rejects the request.
1741+
1742+As a side effect, dirperm1 option improves the performance of aufs
1743+because the number of permission check is reduced when the number of
1744+branch is many.
1745+
1746+
1747+Revalidate Dentry and UDBA (User's Direct Branch Access)
1748+----------------------------------------------------------------------
1749+Generally VFS helpers re-validate a dentry as a part of lookup.
1750+0. digging down the directory hierarchy.
1751+1. lock the parent dir by its i_mutex.
1752+2. lookup the final (child) entry.
1753+3. revalidate it.
1754+4. call the actual operation (create, unlink, etc.)
1755+5. unlock the parent dir
1756+
1757+If the filesystem implements its ->d_revalidate() (step 3), then it is
1758+called. Actually aufs implements it and checks the dentry on a branch is
1759+still valid.
1760+But it is not enough. Because aufs has to release the lock for the
1761+parent dir on a branch at the end of ->lookup() (step 2) and
1762+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
1763+held by VFS.
1764+If the file on a branch is changed directly, eg. bypassing aufs, after
1765+aufs released the lock, then the subsequent operation may cause
1766+something unpleasant result.
1767+
1768+This situation is a result of VFS architecture, ->lookup() and
1769+->d_revalidate() is separated. But I never say it is wrong. It is a good
1770+design from VFS's point of view. It is just not suitable for sub-VFS
1771+character in aufs.
1772+
1773+Aufs supports such case by three level of revalidation which is
1774+selectable by user.
1775+1. Simple Revalidate
1776+ Addition to the native flow in VFS's, confirm the child-parent
1777+ relationship on the branch just after locking the parent dir on the
1778+ branch in the "actual operation" (step 4). When this validation
1779+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
1780+ checks the validation of the dentry on branches.
1781+2. Monitor Changes Internally by Inotify/Fsnotify
1782+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
1783+ the dentry on the branch, and returns EBUSY if it finds different
1784+ dentry.
1785+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
1786+ during it is in cache. When the event is notified, aufs registers a
1787+ function to kernel 'events' thread by schedule_work(). And the
1788+ function sets some special status to the cached aufs dentry and inode
1789+ private data. If they are not cached, then aufs has nothing to
1790+ do. When the same file is accessed through aufs (step 0-3) later,
1791+ aufs will detect the status and refresh all necessary data.
1792+ In this mode, aufs has to ignore the event which is fired by aufs
1793+ itself.
1794+3. No Extra Validation
1795+ This is the simplest test and doesn't add any additional revalidation
1796+ test, and skip the revalidation in step 4. It is useful and improves
1797+ aufs performance when system surely hide the aufs branches from user,
1798+ by over-mounting something (or another method).
1799diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt linux/Documentation/filesystems/aufs/design/04branch.txt
1800--- /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
1801+++ linux/Documentation/filesystems/aufs/design/04branch.txt 2016-10-09 16:55:36.482701377 +0200
1802@@ -0,0 +1,74 @@
1803+
1804+# Copyright (C) 2005-2016 Junjiro R. Okajima
1805+#
1806+# This program is free software; you can redistribute it and/or modify
1807+# it under the terms of the GNU General Public License as published by
1808+# the Free Software Foundation; either version 2 of the License, or
1809+# (at your option) any later version.
1810+#
1811+# This program is distributed in the hope that it will be useful,
1812+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1813+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1814+# GNU General Public License for more details.
1815+#
1816+# You should have received a copy of the GNU General Public License
1817+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1818+
1819+Branch Manipulation
1820+
1821+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
1822+and changing its permission/attribute, there are a lot of works to do.
1823+
1824+
1825+Add a Branch
1826+----------------------------------------------------------------------
1827+o Confirm the adding dir exists outside of aufs, including loopback
1828+ mount, and its various attributes.
1829+o Initialize the xino file and whiteout bases if necessary.
1830+ See struct.txt.
1831+
1832+o Check the owner/group/mode of the directory
1833+ When the owner/group/mode of the adding directory differs from the
1834+ existing branch, aufs issues a warning because it may impose a
1835+ security risk.
1836+ For example, when a upper writable branch has a world writable empty
1837+ top directory, a malicious user can create any files on the writable
1838+ branch directly, like copy-up and modify manually. If something like
1839+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
1840+ writable branch, and the writable branch is world-writable, then a
1841+ malicious guy may create /etc/passwd on the writable branch directly
1842+ and the infected file will be valid in aufs.
1843+ I am afraid it can be a security issue, but aufs can do nothing except
1844+ producing a warning.
1845+
1846+
1847+Delete a Branch
1848+----------------------------------------------------------------------
1849+o Confirm the deleting branch is not busy
1850+ To be general, there is one merit to adopt "remount" interface to
1851+ manipulate branches. It is to discard caches. At deleting a branch,
1852+ aufs checks the still cached (and connected) dentries and inodes. If
1853+ there are any, then they are all in-use. An inode without its
1854+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
1855+
1856+ For the cached one, aufs checks whether the same named entry exists on
1857+ other branches.
1858+ If the cached one is a directory, because aufs provides a merged view
1859+ to users, as long as one dir is left on any branch aufs can show the
1860+ dir to users. In this case, the branch can be removed from aufs.
1861+ Otherwise aufs rejects deleting the branch.
1862+
1863+ If any file on the deleting branch is opened by aufs, then aufs
1864+ rejects deleting.
1865+
1866+
1867+Modify the Permission of a Branch
1868+----------------------------------------------------------------------
1869+o Re-initialize or remove the xino file and whiteout bases if necessary.
1870+ See struct.txt.
1871+
1872+o rw --> ro: Confirm the modifying branch is not busy
1873+ Aufs rejects the request if any of these conditions are true.
1874+ - a file on the branch is mmap-ed.
1875+ - a regular file on the branch is opened for write and there is no
1876+ same named entry on the upper branch.
1877diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt linux/Documentation/filesystems/aufs/design/05wbr_policy.txt
1878--- /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
1879+++ linux/Documentation/filesystems/aufs/design/05wbr_policy.txt 2016-10-09 16:55:36.482701377 +0200
1880@@ -0,0 +1,64 @@
1881+
1882+# Copyright (C) 2005-2016 Junjiro R. Okajima
1883+#
1884+# This program is free software; you can redistribute it and/or modify
1885+# it under the terms of the GNU General Public License as published by
1886+# the Free Software Foundation; either version 2 of the License, or
1887+# (at your option) any later version.
1888+#
1889+# This program is distributed in the hope that it will be useful,
1890+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1891+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1892+# GNU General Public License for more details.
1893+#
1894+# You should have received a copy of the GNU General Public License
1895+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1896+
1897+Policies to Select One among Multiple Writable Branches
1898+----------------------------------------------------------------------
1899+When the number of writable branch is more than one, aufs has to decide
1900+the target branch for file creation or copy-up. By default, the highest
1901+writable branch which has the parent (or ancestor) dir of the target
1902+file is chosen (top-down-parent policy).
1903+By user's request, aufs implements some other policies to select the
1904+writable branch, for file creation several policies, round-robin,
1905+most-free-space, and other policies. For copy-up, top-down-parent,
1906+bottom-up-parent, bottom-up and others.
1907+
1908+As expected, the round-robin policy selects the branch in circular. When
1909+you have two writable branches and creates 10 new files, 5 files will be
1910+created for each branch. mkdir(2) systemcall is an exception. When you
1911+create 10 new directories, all will be created on the same branch.
1912+And the most-free-space policy selects the one which has most free
1913+space among the writable branches. The amount of free space will be
1914+checked by aufs internally, and users can specify its time interval.
1915+
1916+The policies for copy-up is more simple,
1917+top-down-parent is equivalent to the same named on in create policy,
1918+bottom-up-parent selects the writable branch where the parent dir
1919+exists and the nearest upper one from the copyup-source,
1920+bottom-up selects the nearest upper writable branch from the
1921+copyup-source, regardless the existence of the parent dir.
1922+
1923+There are some rules or exceptions to apply these policies.
1924+- If there is a readonly branch above the policy-selected branch and
1925+ the parent dir is marked as opaque (a variation of whiteout), or the
1926+ target (creating) file is whiteout-ed on the upper readonly branch,
1927+ then the result of the policy is ignored and the target file will be
1928+ created on the nearest upper writable branch than the readonly branch.
1929+- If there is a writable branch above the policy-selected branch and
1930+ the parent dir is marked as opaque or the target file is whiteouted
1931+ on the branch, then the result of the policy is ignored and the target
1932+ file will be created on the highest one among the upper writable
1933+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1934+ it as usual.
1935+- link(2) and rename(2) systemcalls are exceptions in every policy.
1936+ They try selecting the branch where the source exists as possible
1937+ since copyup a large file will take long time. If it can't be,
1938+ ie. the branch where the source exists is readonly, then they will
1939+ follow the copyup policy.
1940+- There is an exception for rename(2) when the target exists.
1941+ If the rename target exists, aufs compares the index of the branches
1942+ where the source and the target exists and selects the higher
1943+ one. If the selected branch is readonly, then aufs follows the
1944+ copyup policy.
1945diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt linux/Documentation/filesystems/aufs/design/06fhsm.txt
1946--- /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt 1970-01-01 01:00:00.000000000 +0100
1947+++ linux/Documentation/filesystems/aufs/design/06fhsm.txt 2016-10-09 16:55:36.482701377 +0200
1948@@ -0,0 +1,120 @@
1949+
1950+# Copyright (C) 2011-2016 Junjiro R. Okajima
1951+#
1952+# This program is free software; you can redistribute it and/or modify
1953+# it under the terms of the GNU General Public License as published by
1954+# the Free Software Foundation; either version 2 of the License, or
1955+# (at your option) any later version.
1956+#
1957+# This program is distributed in the hope that it will be useful,
1958+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1959+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1960+# GNU General Public License for more details.
1961+#
1962+# You should have received a copy of the GNU General Public License
1963+# along with this program; if not, write to the Free Software
1964+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1965+
1966+
1967+File-based Hierarchical Storage Management (FHSM)
1968+----------------------------------------------------------------------
1969+Hierarchical Storage Management (or HSM) is a well-known feature in the
1970+storage world. Aufs provides this feature as file-based with multiple
1971+writable branches, based upon the principle of "Colder, the Lower".
1972+Here the word "colder" means that the less used files, and "lower" means
1973+that the position in the order of the stacked branches vertically.
1974+These multiple writable branches are prioritized, ie. the topmost one
1975+should be the fastest drive and be used heavily.
1976+
1977+o Characters in aufs FHSM story
1978+- aufs itself and a new branch attribute.
1979+- a new ioctl interface to move-down and to establish a connection with
1980+ the daemon ("move-down" is a converse of "copy-up").
1981+- userspace tool and daemon.
1982+
1983+The userspace daemon establishes a connection with aufs and waits for
1984+the notification. The notified information is very similar to struct
1985+statfs containing the number of consumed blocks and inodes.
1986+When the consumed blocks/inodes of a branch exceeds the user-specified
1987+upper watermark, the daemon activates its move-down process until the
1988+consumed blocks/inodes reaches the user-specified lower watermark.
1989+
1990+The actual move-down is done by aufs based upon the request from
1991+user-space since we need to maintain the inode number and the internal
1992+pointer arrays in aufs.
1993+
1994+Currently aufs FHSM handles the regular files only. Additionally they
1995+must not be hard-linked nor pseudo-linked.
1996+
1997+
1998+o Cowork of aufs and the user-space daemon
1999+ During the userspace daemon established the connection, aufs sends a
2000+ small notification to it whenever aufs writes something into the
2001+ writable branch. But it may cost high since aufs issues statfs(2)
2002+ internally. So user can specify a new option to cache the
2003+ info. Actually the notification is controlled by these factors.
2004+ + the specified cache time.
2005+ + classified as "force" by aufs internally.
2006+ Until the specified time expires, aufs doesn't send the info
2007+ except the forced cases. When aufs decide forcing, the info is always
2008+ notified to userspace.
2009+ For example, the number of free inodes is generally large enough and
2010+ the shortage of it happens rarely. So aufs doesn't force the
2011+ notification when creating a new file, directory and others. This is
2012+ the typical case which aufs doesn't force.
2013+ When aufs writes the actual filedata and the files consumes any of new
2014+ blocks, the aufs forces notifying.
2015+
2016+
2017+o Interfaces in aufs
2018+- New branch attribute.
2019+ + fhsm
2020+ Specifies that the branch is managed by FHSM feature. In other word,
2021+ participant in the FHSM.
2022+ When nofhsm is set to the branch, it will not be the source/target
2023+ branch of the move-down operation. This attribute is set
2024+ independently from coo and moo attributes, and if you want full
2025+ FHSM, you should specify them as well.
2026+- New mount option.
2027+ + fhsm_sec
2028+ Specifies a second to suppress many less important info to be
2029+ notified.
2030+- New ioctl.
2031+ + AUFS_CTL_FHSM_FD
2032+ create a new file descriptor which userspace can read the notification
2033+ (a subset of struct statfs) from aufs.
2034+- Module parameter 'brs'
2035+ It has to be set to 1. Otherwise the new mount option 'fhsm' will not
2036+ be set.
2037+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
2038+ When there are two or more branches with fhsm attributes,
2039+ /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
2040+ terminates it. As a result of remounting and branch-manipulation, the
2041+ number of branches with fhsm attribute can be one. In this case,
2042+ /sbin/mount.aufs will terminate the user-space daemon.
2043+
2044+
2045+Finally the operation is done as these steps in kernel-space.
2046+- make sure that,
2047+ + no one else is using the file.
2048+ + the file is not hard-linked.
2049+ + the file is not pseudo-linked.
2050+ + the file is a regular file.
2051+ + the parent dir is not opaqued.
2052+- find the target writable branch.
2053+- make sure the file is not whiteout-ed by the upper (than the target)
2054+ branch.
2055+- make the parent dir on the target branch.
2056+- mutex lock the inode on the branch.
2057+- unlink the whiteout on the target branch (if exists).
2058+- lookup and create the whiteout-ed temporary name on the target branch.
2059+- copy the file as the whiteout-ed temporary name on the target branch.
2060+- rename the whiteout-ed temporary name to the original name.
2061+- unlink the file on the source branch.
2062+- maintain the internal pointer array and the external inode number
2063+ table (XINO).
2064+- maintain the timestamps and other attributes of the parent dir and the
2065+ file.
2066+
2067+And of course, in every step, an error may happen. So the operation
2068+should restore the original file state after an error happens.
2069diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt linux/Documentation/filesystems/aufs/design/06mmap.txt
2070--- /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
2071+++ linux/Documentation/filesystems/aufs/design/06mmap.txt 2016-10-09 16:55:36.482701377 +0200
2072@@ -0,0 +1,72 @@
2073+
2074+# Copyright (C) 2005-2016 Junjiro R. Okajima
2075+#
2076+# This program is free software; you can redistribute it and/or modify
2077+# it under the terms of the GNU General Public License as published by
2078+# the Free Software Foundation; either version 2 of the License, or
2079+# (at your option) any later version.
2080+#
2081+# This program is distributed in the hope that it will be useful,
2082+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2083+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2084+# GNU General Public License for more details.
2085+#
2086+# You should have received a copy of the GNU General Public License
2087+# along with this program. If not, see <http://www.gnu.org/licenses/>.
2088+
2089+mmap(2) -- File Memory Mapping
2090+----------------------------------------------------------------------
2091+In aufs, the file-mapped pages are handled by a branch fs directly, no
2092+interaction with aufs. It means aufs_mmap() calls the branch fs's
2093+->mmap().
2094+This approach is simple and good, but there is one problem.
2095+Under /proc, several entries show the mmapped files by its path (with
2096+device and inode number), and the printed path will be the path on the
2097+branch fs's instead of virtual aufs's.
2098+This is not a problem in most cases, but some utilities lsof(1) (and its
2099+user) may expect the path on aufs.
2100+
2101+To address this issue, aufs adds a new member called vm_prfile in struct
2102+vm_area_struct (and struct vm_region). The original vm_file points to
2103+the file on the branch fs in order to handle everything correctly as
2104+usual. The new vm_prfile points to a virtual file in aufs, and the
2105+show-functions in procfs refers to vm_prfile if it is set.
2106+Also we need to maintain several other places where touching vm_file
2107+such like
2108+- fork()/clone() copies vma and the reference count of vm_file is
2109+ incremented.
2110+- merging vma maintains the ref count too.
2111+
2112+This is not a good approach. It just fakes the printed path. But it
2113+leaves all behaviour around f_mapping unchanged. This is surely an
2114+advantage.
2115+Actually aufs had adopted another complicated approach which calls
2116+generic_file_mmap() and handles struct vm_operations_struct. In this
2117+approach, aufs met a hard problem and I could not solve it without
2118+switching the approach.
2119+
2120+There may be one more another approach which is
2121+- bind-mount the branch-root onto the aufs-root internally
2122+- grab the new vfsmount (ie. struct mount)
2123+- lazy-umount the branch-root internally
2124+- in open(2) the aufs-file, open the branch-file with the hidden
2125+ vfsmount (instead of the original branch's vfsmount)
2126+- ideally this "bind-mount and lazy-umount" should be done atomically,
2127+ but it may be possible from userspace by the mount helper.
2128+
2129+Adding the internal hidden vfsmount and using it in opening a file, the
2130+file path under /proc will be printed correctly. This approach looks
2131+smarter, but is not possible I am afraid.
2132+- aufs-root may be bind-mount later. when it happens, another hidden
2133+ vfsmount will be required.
2134+- it is hard to get the chance to bind-mount and lazy-umount
2135+ + in kernel-space, FS can have vfsmount in open(2) via
2136+ file->f_path, and aufs can know its vfsmount. But several locks are
2137+ already acquired, and if aufs tries to bind-mount and lazy-umount
2138+ here, then it may cause a deadlock.
2139+ + in user-space, bind-mount doesn't invoke the mount helper.
2140+- since /proc shows dev and ino, aufs has to give vma these info. it
2141+ means a new member vm_prinode will be necessary. this is essentially
2142+ equivalent to vm_prfile described above.
2143+
2144+I have to give up this "looks-smater" approach.
2145diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt linux/Documentation/filesystems/aufs/design/06xattr.txt
2146--- /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt 1970-01-01 01:00:00.000000000 +0100
2147+++ linux/Documentation/filesystems/aufs/design/06xattr.txt 2016-10-09 16:55:36.482701377 +0200
2148@@ -0,0 +1,96 @@
2149+
2150+# Copyright (C) 2014-2016 Junjiro R. Okajima
2151+#
2152+# This program is free software; you can redistribute it and/or modify
2153+# it under the terms of the GNU General Public License as published by
2154+# the Free Software Foundation; either version 2 of the License, or
2155+# (at your option) any later version.
2156+#
2157+# This program is distributed in the hope that it will be useful,
2158+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2159+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2160+# GNU General Public License for more details.
2161+#
2162+# You should have received a copy of the GNU General Public License
2163+# along with this program; if not, write to the Free Software
2164+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2165+
2166+
2167+Listing XATTR/EA and getting the value
2168+----------------------------------------------------------------------
2169+For the inode standard attributes (owner, group, timestamps, etc.), aufs
2170+shows the values from the topmost existing file. This behaviour is good
2171+for the non-dir entries since the bahaviour exactly matches the shown
2172+information. But for the directories, aufs considers all the same named
2173+entries on the lower branches. Which means, if one of the lower entry
2174+rejects readdir call, then aufs returns an error even if the topmost
2175+entry allows it. This behaviour is necessary to respect the branch fs's
2176+security, but can make users confused since the user-visible standard
2177+attributes don't match the behaviour.
2178+To address this issue, aufs has a mount option called dirperm1 which
2179+checks the permission for the topmost entry only, and ignores the lower
2180+entry's permission.
2181+
2182+A similar issue can happen around XATTR.
2183+getxattr(2) and listxattr(2) families behave as if dirperm1 option is
2184+always set. Otherwise these very unpleasant situation would happen.
2185+- listxattr(2) may return the duplicated entries.
2186+- users may not be able to remove or reset the XATTR forever,
2187+
2188+
2189+XATTR/EA support in the internal (copy,move)-(up,down)
2190+----------------------------------------------------------------------
2191+Generally the extended attributes of inode are categorized as these.
2192+- "security" for LSM and capability.
2193+- "system" for posix ACL, 'acl' mount option is required for the branch
2194+ fs generally.
2195+- "trusted" for userspace, CAP_SYS_ADMIN is required.
2196+- "user" for userspace, 'user_xattr' mount option is required for the
2197+ branch fs generally.
2198+
2199+Moreover there are some other categories. Aufs handles these rather
2200+unpopular categories as the ordinary ones, ie. there is no special
2201+condition nor exception.
2202+
2203+In copy-up, the support for XATTR on the dst branch may differ from the
2204+src branch. In this case, the copy-up operation will get an error and
2205+the original user operation which triggered the copy-up will fail. It
2206+can happen that even all copy-up will fail.
2207+When both of src and dst branches support XATTR and if an error occurs
2208+during copying XATTR, then the copy-up should fail obviously. That is a
2209+good reason and aufs should return an error to userspace. But when only
2210+the src branch support that XATTR, aufs should not return an error.
2211+For example, the src branch supports ACL but the dst branch doesn't
2212+because the dst branch may natively un-support it or temporary
2213+un-support it due to "noacl" mount option. Of course, the dst branch fs
2214+may NOT return an error even if the XATTR is not supported. It is
2215+totally up to the branch fs.
2216+
2217+Anyway when the aufs internal copy-up gets an error from the dst branch
2218+fs, then aufs tries removing the just copied entry and returns the error
2219+to the userspace. The worst case of this situation will be all copy-up
2220+will fail.
2221+
2222+For the copy-up operation, there two basic approaches.
2223+- copy the specified XATTR only (by category above), and return the
2224+ error unconditionally if it happens.
2225+- copy all XATTR, and ignore the error on the specified category only.
2226+
2227+In order to support XATTR and to implement the correct behaviour, aufs
2228+chooses the latter approach and introduces some new branch attributes,
2229+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
2230+They correspond to the XATTR namespaces (see above). Additionally, to be
2231+convenient, "icex" is also provided which means all "icex*" attributes
2232+are set (here the word "icex" stands for "ignore copy-error on XATTR").
2233+
2234+The meaning of these attributes is to ignore the error from setting
2235+XATTR on that branch.
2236+Note that aufs tries copying all XATTR unconditionally, and ignores the
2237+error from the dst branch according to the specified attributes.
2238+
2239+Some XATTR may have its default value. The default value may come from
2240+the parent dir or the environment. If the default value is set at the
2241+file creating-time, it will be overwritten by copy-up.
2242+Some contradiction may happen I am afraid.
2243+Do we need another attribute to stop copying XATTR? I am unsure. For
2244+now, aufs implements the branch attributes to ignore the error.
2245diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt linux/Documentation/filesystems/aufs/design/07export.txt
2246--- /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
2247+++ linux/Documentation/filesystems/aufs/design/07export.txt 2016-10-09 16:55:36.482701377 +0200
2248@@ -0,0 +1,58 @@
2249+
2250+# Copyright (C) 2005-2016 Junjiro R. Okajima
2251+#
2252+# This program is free software; you can redistribute it and/or modify
2253+# it under the terms of the GNU General Public License as published by
2254+# the Free Software Foundation; either version 2 of the License, or
2255+# (at your option) any later version.
2256+#
2257+# This program is distributed in the hope that it will be useful,
2258+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2259+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2260+# GNU General Public License for more details.
2261+#
2262+# You should have received a copy of the GNU General Public License
2263+# along with this program. If not, see <http://www.gnu.org/licenses/>.
2264+
2265+Export Aufs via NFS
2266+----------------------------------------------------------------------
2267+Here is an approach.
2268+- like xino/xib, add a new file 'xigen' which stores aufs inode
2269+ generation.
2270+- iget_locked(): initialize aufs inode generation for a new inode, and
2271+ store it in xigen file.
2272+- destroy_inode(): increment aufs inode generation and store it in xigen
2273+ file. it is necessary even if it is not unlinked, because any data of
2274+ inode may be changed by UDBA.
2275+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
2276+ build file handle by
2277+ + branch id (4 bytes)
2278+ + superblock generation (4 bytes)
2279+ + inode number (4 or 8 bytes)
2280+ + parent dir inode number (4 or 8 bytes)
2281+ + inode generation (4 bytes))
2282+ + return value of exportfs_encode_fh() for the parent on a branch (4
2283+ bytes)
2284+ + file handle for a branch (by exportfs_encode_fh())
2285+- fh_to_dentry():
2286+ + find the index of a branch from its id in handle, and check it is
2287+ still exist in aufs.
2288+ + 1st level: get the inode number from handle and search it in cache.
2289+ + 2nd level: if not found in cache, get the parent inode number from
2290+ the handle and search it in cache. and then open the found parent
2291+ dir, find the matching inode number by vfs_readdir() and get its
2292+ name, and call lookup_one_len() for the target dentry.
2293+ + 3rd level: if the parent dir is not cached, call
2294+ exportfs_decode_fh() for a branch and get the parent on a branch,
2295+ build a pathname of it, convert it a pathname in aufs, call
2296+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
2297+ the 2nd level.
2298+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
2299+ for every branch, but not itself. to get this, (currently) aufs
2300+ searches in current->nsproxy->mnt_ns list. it may not be a good
2301+ idea, but I didn't get other approach.
2302+ + test the generation of the gotten inode.
2303+- every inode operation: they may get EBUSY due to UDBA. in this case,
2304+ convert it into ESTALE for NFSD.
2305+- readdir(): call lockdep_on/off() because filldir in NFSD calls
2306+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
2307diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt linux/Documentation/filesystems/aufs/design/08shwh.txt
2308--- /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
2309+++ linux/Documentation/filesystems/aufs/design/08shwh.txt 2016-10-09 16:55:36.482701377 +0200
2310@@ -0,0 +1,52 @@
2311+
2312+# Copyright (C) 2005-2016 Junjiro R. Okajima
2313+#
2314+# This program is free software; you can redistribute it and/or modify
2315+# it under the terms of the GNU General Public License as published by
2316+# the Free Software Foundation; either version 2 of the License, or
2317+# (at your option) any later version.
2318+#
2319+# This program is distributed in the hope that it will be useful,
2320+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2321+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2322+# GNU General Public License for more details.
2323+#
2324+# You should have received a copy of the GNU General Public License
2325+# along with this program. If not, see <http://www.gnu.org/licenses/>.
2326+
2327+Show Whiteout Mode (shwh)
2328+----------------------------------------------------------------------
2329+Generally aufs hides the name of whiteouts. But in some cases, to show
2330+them is very useful for users. For instance, creating a new middle layer
2331+(branch) by merging existing layers.
2332+
2333+(borrowing aufs1 HOW-TO from a user, Michael Towers)
2334+When you have three branches,
2335+- Bottom: 'system', squashfs (underlying base system), read-only
2336+- Middle: 'mods', squashfs, read-only
2337+- Top: 'overlay', ram (tmpfs), read-write
2338+
2339+The top layer is loaded at boot time and saved at shutdown, to preserve
2340+the changes made to the system during the session.
2341+When larger changes have been made, or smaller changes have accumulated,
2342+the size of the saved top layer data grows. At this point, it would be
2343+nice to be able to merge the two overlay branches ('mods' and 'overlay')
2344+and rewrite the 'mods' squashfs, clearing the top layer and thus
2345+restoring save and load speed.
2346+
2347+This merging is simplified by the use of another aufs mount, of just the
2348+two overlay branches using the 'shwh' option.
2349+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
2350+ aufs /livesys/merge_union
2351+
2352+A merged view of these two branches is then available at
2353+/livesys/merge_union, and the new feature is that the whiteouts are
2354+visible!
2355+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
2356+writing to all branches. Also the default mode for all branches is 'ro'.
2357+It is now possible to save the combined contents of the two overlay
2358+branches to a new squashfs, e.g.:
2359+# mksquashfs /livesys/merge_union /path/to/newmods.squash
2360+
2361+This new squashfs archive can be stored on the boot device and the
2362+initramfs will use it to replace the old one at the next boot.
2363diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt linux/Documentation/filesystems/aufs/design/10dynop.txt
2364--- /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
2365+++ linux/Documentation/filesystems/aufs/design/10dynop.txt 2016-10-09 16:55:36.482701377 +0200
2366@@ -0,0 +1,47 @@
2367+
2368+# Copyright (C) 2010-2016 Junjiro R. Okajima
2369+#
2370+# This program is free software; you can redistribute it and/or modify
2371+# it under the terms of the GNU General Public License as published by
2372+# the Free Software Foundation; either version 2 of the License, or
2373+# (at your option) any later version.
2374+#
2375+# This program is distributed in the hope that it will be useful,
2376+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2377+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2378+# GNU General Public License for more details.
2379+#
2380+# You should have received a copy of the GNU General Public License
2381+# along with this program. If not, see <http://www.gnu.org/licenses/>.
2382+
2383+Dynamically customizable FS operations
2384+----------------------------------------------------------------------
2385+Generally FS operations (struct inode_operations, struct
2386+address_space_operations, struct file_operations, etc.) are defined as
2387+"static const", but it never means that FS have only one set of
2388+operation. Some FS have multiple sets of them. For instance, ext2 has
2389+three sets, one for XIP, for NOBH, and for normal.
2390+Since aufs overrides and redirects these operations, sometimes aufs has
2391+to change its behaviour according to the branch FS type. More importantly
2392+VFS acts differently if a function (member in the struct) is set or
2393+not. It means aufs should have several sets of operations and select one
2394+among them according to the branch FS definition.
2395+
2396+In order to solve this problem and not to affect the behaviour of VFS,
2397+aufs defines these operations dynamically. For instance, aufs defines
2398+dummy direct_IO function for struct address_space_operations, but it may
2399+not be set to the address_space_operations actually. When the branch FS
2400+doesn't have it, aufs doesn't set it to its address_space_operations
2401+while the function definition itself is still alive. So the behaviour
2402+itself will not change, and it will return an error when direct_IO is
2403+not set.
2404+
2405+The lifetime of these dynamically generated operation object is
2406+maintained by aufs branch object. When the branch is removed from aufs,
2407+the reference counter of the object is decremented. When it reaches
2408+zero, the dynamically generated operation object will be freed.
2409+
2410+This approach is designed to support AIO (io_submit), Direct I/O and
2411+XIP (DAX) mainly.
2412+Currently this approach is applied to address_space_operations for
2413+regular files only.
2414diff -urN /usr/share/empty/Documentation/filesystems/aufs/README linux/Documentation/filesystems/aufs/README
2415--- /usr/share/empty/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
2416+++ linux/Documentation/filesystems/aufs/README 2016-12-17 12:28:17.595211562 +0100
2417@@ -0,0 +1,393 @@
2418+
2419+Aufs4 -- advanced multi layered unification filesystem version 4.x
2420+http://aufs.sf.net
2421+Junjiro R. Okajima
2422+
2423+
2424+0. Introduction
2425+----------------------------------------
2426+In the early days, aufs was entirely re-designed and re-implemented
2427+Unionfs Version 1.x series. Adding many original ideas, approaches,
2428+improvements and implementations, it becomes totally different from
2429+Unionfs while keeping the basic features.
2430+Recently, Unionfs Version 2.x series begin taking some of the same
2431+approaches to aufs1's.
2432+Unionfs is being developed by Professor Erez Zadok at Stony Brook
2433+University and his team.
2434+
2435+Aufs4 supports linux-4.0 and later, and for linux-3.x series try aufs3.
2436+If you want older kernel version support, try aufs2-2.6.git or
2437+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
2438+
2439+Note: it becomes clear that "Aufs was rejected. Let's give it up."
2440+ According to Christoph Hellwig, linux rejects all union-type
2441+ filesystems but UnionMount.
2442+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
2443+
2444+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
2445+ UnionMount, and he pointed out an issue around a directory mutex
2446+ lock and aufs addressed it. But it is still unsure whether aufs will
2447+ be merged (or any other union solution).
2448+<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
2449+
2450+
2451+1. Features
2452+----------------------------------------
2453+- unite several directories into a single virtual filesystem. The member
2454+ directory is called as a branch.
2455+- you can specify the permission flags to the branch, which are 'readonly',
2456+ 'readwrite' and 'whiteout-able.'
2457+- by upper writable branch, internal copyup and whiteout, files/dirs on
2458+ readonly branch are modifiable logically.
2459+- dynamic branch manipulation, add, del.
2460+- etc...
2461+
2462+Also there are many enhancements in aufs, such as:
2463+- test only the highest one for the directory permission (dirperm1)
2464+- copyup on open (coo=)
2465+- 'move' policy for copy-up between two writable branches, after
2466+ checking free space.
2467+- xattr, acl
2468+- readdir(3) in userspace.
2469+- keep inode number by external inode number table
2470+- keep the timestamps of file/dir in internal copyup operation
2471+- seekable directory, supporting NFS readdir.
2472+- whiteout is hardlinked in order to reduce the consumption of inodes
2473+ on branch
2474+- do not copyup, nor create a whiteout when it is unnecessary
2475+- revert a single systemcall when an error occurs in aufs
2476+- remount interface instead of ioctl
2477+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
2478+- loopback mounted filesystem as a branch
2479+- kernel thread for removing the dir who has a plenty of whiteouts
2480+- support copyup sparse file (a file which has a 'hole' in it)
2481+- default permission flags for branches
2482+- selectable permission flags for ro branch, whether whiteout can
2483+ exist or not
2484+- export via NFS.
2485+- support <sysfs>/fs/aufs and <debugfs>/aufs.
2486+- support multiple writable branches, some policies to select one
2487+ among multiple writable branches.
2488+- a new semantics for link(2) and rename(2) to support multiple
2489+ writable branches.
2490+- no glibc changes are required.
2491+- pseudo hardlink (hardlink over branches)
2492+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
2493+ including NFS or remote filesystem branch.
2494+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
2495+- and more...
2496+
2497+Currently these features are dropped temporary from aufs4.
2498+See design/08plan.txt in detail.
2499+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
2500+ (robr)
2501+- statistics of aufs thread (/sys/fs/aufs/stat)
2502+
2503+Features or just an idea in the future (see also design/*.txt),
2504+- reorder the branch index without del/re-add.
2505+- permanent xino files for NFSD
2506+- an option for refreshing the opened files after add/del branches
2507+- light version, without branch manipulation. (unnecessary?)
2508+- copyup in userspace
2509+- inotify in userspace
2510+- readv/writev
2511+
2512+
2513+2. Download
2514+----------------------------------------
2515+There are three GIT trees for aufs4, aufs4-linux.git,
2516+aufs4-standalone.git, and aufs-util.git. Note that there is no "4" in
2517+"aufs-util.git."
2518+While the aufs-util is always necessary, you need either of aufs4-linux
2519+or aufs4-standalone.
2520+
2521+The aufs4-linux tree includes the whole linux mainline GIT tree,
2522+git://git.kernel.org/.../torvalds/linux.git.
2523+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
2524+build aufs4 as an external kernel module.
2525+Several extra patches are not included in this tree. Only
2526+aufs4-standalone tree contains them. They are described in the later
2527+section "Configuration and Compilation."
2528+
2529+On the other hand, the aufs4-standalone tree has only aufs source files
2530+and necessary patches, and you can select CONFIG_AUFS_FS=m.
2531+But you need to apply all aufs patches manually.
2532+
2533+You will find GIT branches whose name is in form of "aufs4.x" where "x"
2534+represents the linux kernel version, "linux-4.x". For instance,
2535+"aufs4.0" is for linux-4.0. For latest "linux-4.x-rcN", use
2536+"aufs4.x-rcN" branch.
2537+
2538+o aufs4-linux tree
2539+$ git clone --reference /your/linux/git/tree \
2540+ git://github.com/sfjro/aufs4-linux.git aufs4-linux.git
2541+- if you don't have linux GIT tree, then remove "--reference ..."
2542+$ cd aufs4-linux.git
2543+$ git checkout origin/aufs4.0
2544+
2545+Or You may want to directly git-pull aufs into your linux GIT tree, and
2546+leave the patch-work to GIT.
2547+$ cd /your/linux/git/tree
2548+$ git remote add aufs4 git://github.com/sfjro/aufs4-linux.git
2549+$ git fetch aufs4
2550+$ git checkout -b my4.0 v4.0
2551+$ (add your local change...)
2552+$ git pull aufs4 aufs4.0
2553+- now you have v4.0 + your_changes + aufs4.0 in you my4.0 branch.
2554+- you may need to solve some conflicts between your_changes and
2555+ aufs4.0. in this case, git-rerere is recommended so that you can
2556+ solve the similar conflicts automatically when you upgrade to 4.1 or
2557+ later in the future.
2558+
2559+o aufs4-standalone tree
2560+$ git clone git://github.com/sfjro/aufs4-standalone.git aufs4-standalone.git
2561+$ cd aufs4-standalone.git
2562+$ git checkout origin/aufs4.0
2563+
2564+o aufs-util tree
2565+$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git
2566+- note that the public aufs-util.git is on SourceForge instead of
2567+ GitHUB.
2568+$ cd aufs-util.git
2569+$ git checkout origin/aufs4.0
2570+
2571+Note: The 4.x-rcN branch is to be used with `rc' kernel versions ONLY.
2572+The minor version number, 'x' in '4.x', of aufs may not always
2573+follow the minor version number of the kernel.
2574+Because changes in the kernel that cause the use of a new
2575+minor version number do not always require changes to aufs-util.
2576+
2577+Since aufs-util has its own minor version number, you may not be
2578+able to find a GIT branch in aufs-util for your kernel's
2579+exact minor version number.
2580+In this case, you should git-checkout the branch for the
2581+nearest lower number.
2582+
2583+For (an unreleased) example:
2584+If you are using "linux-4.10" and the "aufs4.10" branch
2585+does not exist in aufs-util repository, then "aufs4.9", "aufs4.8"
2586+or something numerically smaller is the branch for your kernel.
2587+
2588+Also you can view all branches by
2589+ $ git branch -a
2590+
2591+
2592+3. Configuration and Compilation
2593+----------------------------------------
2594+Make sure you have git-checkout'ed the correct branch.
2595+
2596+For aufs4-linux tree,
2597+- enable CONFIG_AUFS_FS.
2598+- set other aufs configurations if necessary.
2599+
2600+For aufs4-standalone tree,
2601+There are several ways to build.
2602+
2603+1.
2604+- apply ./aufs4-kbuild.patch to your kernel source files.
2605+- apply ./aufs4-base.patch too.
2606+- apply ./aufs4-mmap.patch too.
2607+- apply ./aufs4-standalone.patch too, if you have a plan to set
2608+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs4-standalone.patch.
2609+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
2610+ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
2611+- enable CONFIG_AUFS_FS, you can select either
2612+ =m or =y.
2613+- and build your kernel as usual.
2614+- install the built kernel.
2615+ Note: Since linux-3.9, every filesystem module requires an alias
2616+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2617+ modules.aliases file if you set CONFIG_AUFS_FS=m.
2618+- install the header files too by "make headers_install" to the
2619+ directory where you specify. By default, it is $PWD/usr.
2620+ "make help" shows a brief note for headers_install.
2621+- and reboot your system.
2622+
2623+2.
2624+- module only (CONFIG_AUFS_FS=m).
2625+- apply ./aufs4-base.patch to your kernel source files.
2626+- apply ./aufs4-mmap.patch too.
2627+- apply ./aufs4-standalone.patch too.
2628+- build your kernel, don't forget "make headers_install", and reboot.
2629+- edit ./config.mk and set other aufs configurations if necessary.
2630+ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
2631+ every aufs configurations.
2632+- build the module by simple "make".
2633+ Note: Since linux-3.9, every filesystem module requires an alias
2634+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2635+ modules.aliases file.
2636+- you can specify ${KDIR} make variable which points to your kernel
2637+ source tree.
2638+- install the files
2639+ + run "make install" to install the aufs module, or copy the built
2640+ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
2641+ + run "make install_headers" (instead of headers_install) to install
2642+ the modified aufs header file (you can specify DESTDIR which is
2643+ available in aufs standalone version's Makefile only), or copy
2644+ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
2645+ you like manually. By default, the target directory is $PWD/usr.
2646+- no need to apply aufs4-kbuild.patch, nor copying source files to your
2647+ kernel source tree.
2648+
2649+Note: The header file aufs_type.h is necessary to build aufs-util
2650+ as well as "make headers_install" in the kernel source tree.
2651+ headers_install is subject to be forgotten, but it is essentially
2652+ necessary, not only for building aufs-util.
2653+ You may not meet problems without headers_install in some older
2654+ version though.
2655+
2656+And then,
2657+- read README in aufs-util, build and install it
2658+- note that your distribution may contain an obsoleted version of
2659+ aufs_type.h in /usr/include/linux or something. When you build aufs
2660+ utilities, make sure that your compiler refers the correct aufs header
2661+ file which is built by "make headers_install."
2662+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
2663+ then run "make install_ulib" too. And refer to the aufs manual in
2664+ detail.
2665+
2666+There several other patches in aufs4-standalone.git. They are all
2667+optional. When you meet some problems, they will help you.
2668+- aufs4-loopback.patch
2669+ Supports a nested loopback mount in a branch-fs. This patch is
2670+ unnecessary until aufs produces a message like "you may want to try
2671+ another patch for loopback file".
2672+- vfs-ino.patch
2673+ Modifies a system global kernel internal function get_next_ino() in
2674+ order to stop assigning 0 for an inode-number. Not directly related to
2675+ aufs, but recommended generally.
2676+- tmpfs-idr.patch
2677+ Keeps the tmpfs inode number as the lowest value. Effective to reduce
2678+ the size of aufs XINO files for tmpfs branch. Also it prevents the
2679+ duplication of inode number, which is important for backup tools and
2680+ other utilities. When you find aufs XINO files for tmpfs branch
2681+ growing too much, try this patch.
2682+- lockdep-debug.patch
2683+ Because aufs is not only an ordinary filesystem (callee of VFS), but
2684+ also a caller of VFS functions for branch filesystems, subclassing of
2685+ the internal locks for LOCKDEP is necessary. LOCKDEP is a debugging
2686+ feature of linux kernel. If you enable CONFIG_LOCKDEP, then you will
2687+ need to apply this debug patch to expand several constant values.
2688+ If don't know what LOCKDEP, then you don't have apply this patch.
2689+
2690+
2691+4. Usage
2692+----------------------------------------
2693+At first, make sure aufs-util are installed, and please read the aufs
2694+manual, aufs.5 in aufs-util.git tree.
2695+$ man -l aufs.5
2696+
2697+And then,
2698+$ mkdir /tmp/rw /tmp/aufs
2699+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
2700+
2701+Here is another example. The result is equivalent.
2702+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
2703+ Or
2704+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
2705+# mount -o remount,append:${HOME} /tmp/aufs
2706+
2707+Then, you can see whole tree of your home dir through /tmp/aufs. If
2708+you modify a file under /tmp/aufs, the one on your home directory is
2709+not affected, instead the same named file will be newly created under
2710+/tmp/rw. And all of your modification to a file will be applied to
2711+the one under /tmp/rw. This is called the file based Copy on Write
2712+(COW) method.
2713+Aufs mount options are described in aufs.5.
2714+If you run chroot or something and make your aufs as a root directory,
2715+then you need to customize the shutdown script. See the aufs manual in
2716+detail.
2717+
2718+Additionally, there are some sample usages of aufs which are a
2719+diskless system with network booting, and LiveCD over NFS.
2720+See sample dir in CVS tree on SourceForge.
2721+
2722+
2723+5. Contact
2724+----------------------------------------
2725+When you have any problems or strange behaviour in aufs, please let me
2726+know with:
2727+- /proc/mounts (instead of the output of mount(8))
2728+- /sys/module/aufs/*
2729+- /sys/fs/aufs/* (if you have them)
2730+- /debug/aufs/* (if you have them)
2731+- linux kernel version
2732+ if your kernel is not plain, for example modified by distributor,
2733+ the url where i can download its source is necessary too.
2734+- aufs version which was printed at loading the module or booting the
2735+ system, instead of the date you downloaded.
2736+- configuration (define/undefine CONFIG_AUFS_xxx)
2737+- kernel configuration or /proc/config.gz (if you have it)
2738+- behaviour which you think to be incorrect
2739+- actual operation, reproducible one is better
2740+- mailto: aufs-users at lists.sourceforge.net
2741+
2742+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
2743+and Feature Requests) on SourceForge. Please join and write to
2744+aufs-users ML.
2745+
2746+
2747+6. Acknowledgements
2748+----------------------------------------
2749+Thanks to everyone who have tried and are using aufs, whoever
2750+have reported a bug or any feedback.
2751+
2752+Especially donators:
2753+Tomas Matejicek(slax.org) made a donation (much more than once).
2754+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
2755+ scripts) is making "doubling" donations.
2756+ Unfortunately I cannot list all of the donators, but I really
2757+ appreciate.
2758+ It ends Aug 2010, but the ordinary donation URL is still available.
2759+ <http://sourceforge.net/donate/index.php?group_id=167503>
2760+Dai Itasaka made a donation (2007/8).
2761+Chuck Smith made a donation (2008/4, 10 and 12).
2762+Henk Schoneveld made a donation (2008/9).
2763+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
2764+Francois Dupoux made a donation (2008/11).
2765+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
2766+ aufs2 GIT tree (2009/2).
2767+William Grant made a donation (2009/3).
2768+Patrick Lane made a donation (2009/4).
2769+The Mail Archive (mail-archive.com) made donations (2009/5).
2770+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
2771+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
2772+Pavel Pronskiy made a donation (2011/2).
2773+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
2774+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
2775+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
2776+11).
2777+Sam Liddicott made a donation (2011/9).
2778+Era Scarecrow made a donation (2013/4).
2779+Bor Ratajc made a donation (2013/4).
2780+Alessandro Gorreta made a donation (2013/4).
2781+POIRETTE Marc made a donation (2013/4).
2782+Alessandro Gorreta made a donation (2013/4).
2783+lauri kasvandik made a donation (2013/5).
2784+"pemasu from Finland" made a donation (2013/7).
2785+The Parted Magic Project made a donation (2013/9 and 11).
2786+Pavel Barta made a donation (2013/10).
2787+Nikolay Pertsev made a donation (2014/5).
2788+James B made a donation (2014/7 and 2015/7).
2789+Stefano Di Biase made a donation (2014/8).
2790+Daniel Epellei made a donation (2015/1).
2791+OmegaPhil made a donation (2016/1).
2792+Tomasz Szewczyk made a donation (2016/4).
2793+James Burry made a donation (2016/12).
2794+
2795+Thank you very much.
2796+Donations are always, including future donations, very important and
2797+helpful for me to keep on developing aufs.
2798+
2799+
2800+7.
2801+----------------------------------------
2802+If you are an experienced user, no explanation is needed. Aufs is
2803+just a linux filesystem.
2804+
2805+
2806+Enjoy!
2807+
2808+# Local variables: ;
2809+# mode: text;
2810+# End: ;
2811diff -urN /usr/share/empty/fs/aufs/aufs.h linux/fs/aufs/aufs.h
2812--- /usr/share/empty/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
2813+++ linux/fs/aufs/aufs.h 2016-10-09 16:55:36.486034798 +0200
2814@@ -0,0 +1,59 @@
2815+/*
2816+ * Copyright (C) 2005-2016 Junjiro R. Okajima
2817+ *
2818+ * This program, aufs is free software; you can redistribute it and/or modify
2819+ * it under the terms of the GNU General Public License as published by
2820+ * the Free Software Foundation; either version 2 of the License, or
2821+ * (at your option) any later version.
2822+ *
2823+ * This program is distributed in the hope that it will be useful,
2824+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2825+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2826+ * GNU General Public License for more details.
2827+ *
2828+ * You should have received a copy of the GNU General Public License
2829+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
2830+ */
2831+
2832+/*
2833+ * all header files
2834+ */
2835+
2836+#ifndef __AUFS_H__
2837+#define __AUFS_H__
2838+
2839+#ifdef __KERNEL__
2840+
2841+#define AuStub(type, name, body, ...) \
2842+ static inline type name(__VA_ARGS__) { body; }
2843+
2844+#define AuStubVoid(name, ...) \
2845+ AuStub(void, name, , __VA_ARGS__)
2846+#define AuStubInt0(name, ...) \
2847+ AuStub(int, name, return 0, __VA_ARGS__)
2848+
2849+#include "debug.h"
2850+
2851+#include "branch.h"
2852+#include "cpup.h"
2853+#include "dcsub.h"
2854+#include "dbgaufs.h"
2855+#include "dentry.h"
2856+#include "dir.h"
2857+#include "dynop.h"
2858+#include "file.h"
2859+#include "fstype.h"
2860+#include "inode.h"
2861+#include "loop.h"
2862+#include "module.h"
2863+#include "opts.h"
2864+#include "rwsem.h"
2865+#include "spl.h"
2866+#include "super.h"
2867+#include "sysaufs.h"
2868+#include "vfsub.h"
2869+#include "whout.h"
2870+#include "wkq.h"
2871+
2872+#endif /* __KERNEL__ */
2873+#endif /* __AUFS_H__ */
2874diff -urN /usr/share/empty/fs/aufs/branch.c linux/fs/aufs/branch.c
2875--- /usr/share/empty/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
2876+++ linux/fs/aufs/branch.c 2016-10-09 16:55:38.886097714 +0200
2877@@ -0,0 +1,1412 @@
2878+/*
2879+ * Copyright (C) 2005-2016 Junjiro R. Okajima
2880+ *
2881+ * This program, aufs is free software; you can redistribute it and/or modify
2882+ * it under the terms of the GNU General Public License as published by
2883+ * the Free Software Foundation; either version 2 of the License, or
2884+ * (at your option) any later version.
2885+ *
2886+ * This program is distributed in the hope that it will be useful,
2887+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2888+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2889+ * GNU General Public License for more details.
2890+ *
2891+ * You should have received a copy of the GNU General Public License
2892+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
2893+ */
2894+
2895+/*
2896+ * branch management
2897+ */
2898+
2899+#include <linux/compat.h>
2900+#include <linux/statfs.h>
2901+#include "aufs.h"
2902+
2903+/*
2904+ * free a single branch
2905+ */
2906+static void au_br_do_free(struct au_branch *br)
2907+{
2908+ int i;
2909+ struct au_wbr *wbr;
2910+ struct au_dykey **key;
2911+
2912+ au_hnotify_fin_br(br);
2913+
2914+ if (br->br_xino.xi_file)
2915+ fput(br->br_xino.xi_file);
2916+ mutex_destroy(&br->br_xino.xi_nondir_mtx);
2917+
2918+ AuDebugOn(au_br_count(br));
2919+ au_br_count_fin(br);
2920+
2921+ wbr = br->br_wbr;
2922+ if (wbr) {
2923+ for (i = 0; i < AuBrWh_Last; i++)
2924+ dput(wbr->wbr_wh[i]);
2925+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
2926+ AuRwDestroy(&wbr->wbr_wh_rwsem);
2927+ }
2928+
2929+ if (br->br_fhsm) {
2930+ au_br_fhsm_fin(br->br_fhsm);
2931+ au_delayed_kfree(br->br_fhsm);
2932+ }
2933+
2934+ key = br->br_dykey;
2935+ for (i = 0; i < AuBrDynOp; i++, key++)
2936+ if (*key)
2937+ au_dy_put(*key);
2938+ else
2939+ break;
2940+
2941+ /* recursive lock, s_umount of branch's */
2942+ lockdep_off();
2943+ path_put(&br->br_path);
2944+ lockdep_on();
2945+ if (wbr)
2946+ au_delayed_kfree(wbr);
2947+ au_delayed_kfree(br);
2948+}
2949+
2950+/*
2951+ * frees all branches
2952+ */
2953+void au_br_free(struct au_sbinfo *sbinfo)
2954+{
2955+ aufs_bindex_t bmax;
2956+ struct au_branch **br;
2957+
2958+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2959+
2960+ bmax = sbinfo->si_bbot + 1;
2961+ br = sbinfo->si_branch;
2962+ while (bmax--)
2963+ au_br_do_free(*br++);
2964+}
2965+
2966+/*
2967+ * find the index of a branch which is specified by @br_id.
2968+ */
2969+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
2970+{
2971+ aufs_bindex_t bindex, bbot;
2972+
2973+ bbot = au_sbbot(sb);
2974+ for (bindex = 0; bindex <= bbot; bindex++)
2975+ if (au_sbr_id(sb, bindex) == br_id)
2976+ return bindex;
2977+ return -1;
2978+}
2979+
2980+/* ---------------------------------------------------------------------- */
2981+
2982+/*
2983+ * add a branch
2984+ */
2985+
2986+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
2987+ struct dentry *h_root)
2988+{
2989+ if (unlikely(h_adding == h_root
2990+ || au_test_loopback_overlap(sb, h_adding)))
2991+ return 1;
2992+ if (h_adding->d_sb != h_root->d_sb)
2993+ return 0;
2994+ return au_test_subdir(h_adding, h_root)
2995+ || au_test_subdir(h_root, h_adding);
2996+}
2997+
2998+/*
2999+ * returns a newly allocated branch. @new_nbranch is a number of branches
3000+ * after adding a branch.
3001+ */
3002+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
3003+ int perm)
3004+{
3005+ struct au_branch *add_branch;
3006+ struct dentry *root;
3007+ struct inode *inode;
3008+ int err;
3009+
3010+ err = -ENOMEM;
3011+ root = sb->s_root;
3012+ add_branch = kzalloc(sizeof(*add_branch), GFP_NOFS);
3013+ if (unlikely(!add_branch))
3014+ goto out;
3015+
3016+ err = au_hnotify_init_br(add_branch, perm);
3017+ if (unlikely(err))
3018+ goto out_br;
3019+
3020+ if (au_br_writable(perm)) {
3021+ /* may be freed separately at changing the branch permission */
3022+ add_branch->br_wbr = kzalloc(sizeof(*add_branch->br_wbr),
3023+ GFP_NOFS);
3024+ if (unlikely(!add_branch->br_wbr))
3025+ goto out_hnotify;
3026+ }
3027+
3028+ if (au_br_fhsm(perm)) {
3029+ err = au_fhsm_br_alloc(add_branch);
3030+ if (unlikely(err))
3031+ goto out_wbr;
3032+ }
3033+
3034+ err = au_sbr_realloc(au_sbi(sb), new_nbranch, /*may_shrink*/0);
3035+ if (!err)
3036+ err = au_di_realloc(au_di(root), new_nbranch, /*may_shrink*/0);
3037+ if (!err) {
3038+ inode = d_inode(root);
3039+ err = au_hinode_realloc(au_ii(inode), new_nbranch, /*may_shrink*/0);
3040+ }
3041+ if (!err)
3042+ return add_branch; /* success */
3043+
3044+out_wbr:
3045+ if (add_branch->br_wbr)
3046+ au_delayed_kfree(add_branch->br_wbr);
3047+out_hnotify:
3048+ au_hnotify_fin_br(add_branch);
3049+out_br:
3050+ au_delayed_kfree(add_branch);
3051+out:
3052+ return ERR_PTR(err);
3053+}
3054+
3055+/*
3056+ * test if the branch permission is legal or not.
3057+ */
3058+static int test_br(struct inode *inode, int brperm, char *path)
3059+{
3060+ int err;
3061+
3062+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
3063+ if (!err)
3064+ goto out;
3065+
3066+ err = -EINVAL;
3067+ pr_err("write permission for readonly mount or inode, %s\n", path);
3068+
3069+out:
3070+ return err;
3071+}
3072+
3073+/*
3074+ * returns:
3075+ * 0: success, the caller will add it
3076+ * plus: success, it is already unified, the caller should ignore it
3077+ * minus: error
3078+ */
3079+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
3080+{
3081+ int err;
3082+ aufs_bindex_t bbot, bindex;
3083+ struct dentry *root, *h_dentry;
3084+ struct inode *inode, *h_inode;
3085+
3086+ root = sb->s_root;
3087+ bbot = au_sbbot(sb);
3088+ if (unlikely(bbot >= 0
3089+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
3090+ err = 1;
3091+ if (!remount) {
3092+ err = -EINVAL;
3093+ pr_err("%s duplicated\n", add->pathname);
3094+ }
3095+ goto out;
3096+ }
3097+
3098+ err = -ENOSPC; /* -E2BIG; */
3099+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
3100+ || AUFS_BRANCH_MAX - 1 <= bbot)) {
3101+ pr_err("number of branches exceeded %s\n", add->pathname);
3102+ goto out;
3103+ }
3104+
3105+ err = -EDOM;
3106+ if (unlikely(add->bindex < 0 || bbot + 1 < add->bindex)) {
3107+ pr_err("bad index %d\n", add->bindex);
3108+ goto out;
3109+ }
3110+
3111+ inode = d_inode(add->path.dentry);
3112+ err = -ENOENT;
3113+ if (unlikely(!inode->i_nlink)) {
3114+ pr_err("no existence %s\n", add->pathname);
3115+ goto out;
3116+ }
3117+
3118+ err = -EINVAL;
3119+ if (unlikely(inode->i_sb == sb)) {
3120+ pr_err("%s must be outside\n", add->pathname);
3121+ goto out;
3122+ }
3123+
3124+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
3125+ pr_err("unsupported filesystem, %s (%s)\n",
3126+ add->pathname, au_sbtype(inode->i_sb));
3127+ goto out;
3128+ }
3129+
3130+ if (unlikely(inode->i_sb->s_stack_depth)) {
3131+ pr_err("already stacked, %s (%s)\n",
3132+ add->pathname, au_sbtype(inode->i_sb));
3133+ goto out;
3134+ }
3135+
3136+ err = test_br(d_inode(add->path.dentry), add->perm, add->pathname);
3137+ if (unlikely(err))
3138+ goto out;
3139+
3140+ if (bbot < 0)
3141+ return 0; /* success */
3142+
3143+ err = -EINVAL;
3144+ for (bindex = 0; bindex <= bbot; bindex++)
3145+ if (unlikely(test_overlap(sb, add->path.dentry,
3146+ au_h_dptr(root, bindex)))) {
3147+ pr_err("%s is overlapped\n", add->pathname);
3148+ goto out;
3149+ }
3150+
3151+ err = 0;
3152+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
3153+ h_dentry = au_h_dptr(root, 0);
3154+ h_inode = d_inode(h_dentry);
3155+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
3156+ || !uid_eq(h_inode->i_uid, inode->i_uid)
3157+ || !gid_eq(h_inode->i_gid, inode->i_gid))
3158+ pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
3159+ add->pathname,
3160+ i_uid_read(inode), i_gid_read(inode),
3161+ (inode->i_mode & S_IALLUGO),
3162+ i_uid_read(h_inode), i_gid_read(h_inode),
3163+ (h_inode->i_mode & S_IALLUGO));
3164+ }
3165+
3166+out:
3167+ return err;
3168+}
3169+
3170+/*
3171+ * initialize or clean the whiteouts for an adding branch
3172+ */
3173+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
3174+ int new_perm)
3175+{
3176+ int err, old_perm;
3177+ aufs_bindex_t bindex;
3178+ struct inode *h_inode;
3179+ struct au_wbr *wbr;
3180+ struct au_hinode *hdir;
3181+ struct dentry *h_dentry;
3182+
3183+ err = vfsub_mnt_want_write(au_br_mnt(br));
3184+ if (unlikely(err))
3185+ goto out;
3186+
3187+ wbr = br->br_wbr;
3188+ old_perm = br->br_perm;
3189+ br->br_perm = new_perm;
3190+ hdir = NULL;
3191+ h_inode = NULL;
3192+ bindex = au_br_index(sb, br->br_id);
3193+ if (0 <= bindex) {
3194+ hdir = au_hi(d_inode(sb->s_root), bindex);
3195+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
3196+ } else {
3197+ h_dentry = au_br_dentry(br);
3198+ h_inode = d_inode(h_dentry);
3199+ inode_lock_nested(h_inode, AuLsc_I_PARENT);
3200+ }
3201+ if (!wbr)
3202+ err = au_wh_init(br, sb);
3203+ else {
3204+ wbr_wh_write_lock(wbr);
3205+ err = au_wh_init(br, sb);
3206+ wbr_wh_write_unlock(wbr);
3207+ }
3208+ if (hdir)
3209+ au_hn_inode_unlock(hdir);
3210+ else
3211+ inode_unlock(h_inode);
3212+ vfsub_mnt_drop_write(au_br_mnt(br));
3213+ br->br_perm = old_perm;
3214+
3215+ if (!err && wbr && !au_br_writable(new_perm)) {
3216+ au_delayed_kfree(wbr);
3217+ br->br_wbr = NULL;
3218+ }
3219+
3220+out:
3221+ return err;
3222+}
3223+
3224+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
3225+ int perm)
3226+{
3227+ int err;
3228+ struct kstatfs kst;
3229+ struct au_wbr *wbr;
3230+
3231+ wbr = br->br_wbr;
3232+ au_rw_init(&wbr->wbr_wh_rwsem);
3233+ atomic_set(&wbr->wbr_wh_running, 0);
3234+
3235+ /*
3236+ * a limit for rmdir/rename a dir
3237+ * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h
3238+ */
3239+ err = vfs_statfs(&br->br_path, &kst);
3240+ if (unlikely(err))
3241+ goto out;
3242+ err = -EINVAL;
3243+ if (kst.f_namelen >= NAME_MAX)
3244+ err = au_br_init_wh(sb, br, perm);
3245+ else
3246+ pr_err("%pd(%s), unsupported namelen %ld\n",
3247+ au_br_dentry(br),
3248+ au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen);
3249+
3250+out:
3251+ return err;
3252+}
3253+
3254+/* initialize a new branch */
3255+static int au_br_init(struct au_branch *br, struct super_block *sb,
3256+ struct au_opt_add *add)
3257+{
3258+ int err;
3259+ struct inode *h_inode;
3260+
3261+ err = 0;
3262+ mutex_init(&br->br_xino.xi_nondir_mtx);
3263+ br->br_perm = add->perm;
3264+ br->br_path = add->path; /* set first, path_get() later */
3265+ spin_lock_init(&br->br_dykey_lock);
3266+ au_br_count_init(br);
3267+ atomic_set(&br->br_xino_running, 0);
3268+ br->br_id = au_new_br_id(sb);
3269+ AuDebugOn(br->br_id < 0);
3270+
3271+ if (au_br_writable(add->perm)) {
3272+ err = au_wbr_init(br, sb, add->perm);
3273+ if (unlikely(err))
3274+ goto out_err;
3275+ }
3276+
3277+ if (au_opt_test(au_mntflags(sb), XINO)) {
3278+ h_inode = d_inode(add->path.dentry);
3279+ err = au_xino_br(sb, br, h_inode->i_ino,
3280+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
3281+ if (unlikely(err)) {
3282+ AuDebugOn(br->br_xino.xi_file);
3283+ goto out_err;
3284+ }
3285+ }
3286+
3287+ sysaufs_br_init(br);
3288+ path_get(&br->br_path);
3289+ goto out; /* success */
3290+
3291+out_err:
3292+ memset(&br->br_path, 0, sizeof(br->br_path));
3293+out:
3294+ return err;
3295+}
3296+
3297+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
3298+ struct au_branch *br, aufs_bindex_t bbot,
3299+ aufs_bindex_t amount)
3300+{
3301+ struct au_branch **brp;
3302+
3303+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3304+
3305+ brp = sbinfo->si_branch + bindex;
3306+ memmove(brp + 1, brp, sizeof(*brp) * amount);
3307+ *brp = br;
3308+ sbinfo->si_bbot++;
3309+ if (unlikely(bbot < 0))
3310+ sbinfo->si_bbot = 0;
3311+}
3312+
3313+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
3314+ aufs_bindex_t bbot, aufs_bindex_t amount)
3315+{
3316+ struct au_hdentry *hdp;
3317+
3318+ AuRwMustWriteLock(&dinfo->di_rwsem);
3319+
3320+ hdp = au_hdentry(dinfo, bindex);
3321+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
3322+ au_h_dentry_init(hdp);
3323+ dinfo->di_bbot++;
3324+ if (unlikely(bbot < 0))
3325+ dinfo->di_btop = 0;
3326+}
3327+
3328+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
3329+ aufs_bindex_t bbot, aufs_bindex_t amount)
3330+{
3331+ struct au_hinode *hip;
3332+
3333+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3334+
3335+ hip = au_hinode(iinfo, bindex);
3336+ memmove(hip + 1, hip, sizeof(*hip) * amount);
3337+ au_hinode_init(hip);
3338+ iinfo->ii_bbot++;
3339+ if (unlikely(bbot < 0))
3340+ iinfo->ii_btop = 0;
3341+}
3342+
3343+static void au_br_do_add(struct super_block *sb, struct au_branch *br,
3344+ aufs_bindex_t bindex)
3345+{
3346+ struct dentry *root, *h_dentry;
3347+ struct inode *root_inode, *h_inode;
3348+ aufs_bindex_t bbot, amount;
3349+
3350+ root = sb->s_root;
3351+ root_inode = d_inode(root);
3352+ bbot = au_sbbot(sb);
3353+ amount = bbot + 1 - bindex;
3354+ h_dentry = au_br_dentry(br);
3355+ au_sbilist_lock();
3356+ au_br_do_add_brp(au_sbi(sb), bindex, br, bbot, amount);
3357+ au_br_do_add_hdp(au_di(root), bindex, bbot, amount);
3358+ au_br_do_add_hip(au_ii(root_inode), bindex, bbot, amount);
3359+ au_set_h_dptr(root, bindex, dget(h_dentry));
3360+ h_inode = d_inode(h_dentry);
3361+ au_set_h_iptr(root_inode, bindex, au_igrab(h_inode), /*flags*/0);
3362+ au_sbilist_unlock();
3363+}
3364+
3365+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
3366+{
3367+ int err;
3368+ aufs_bindex_t bbot, add_bindex;
3369+ struct dentry *root, *h_dentry;
3370+ struct inode *root_inode;
3371+ struct au_branch *add_branch;
3372+
3373+ root = sb->s_root;
3374+ root_inode = d_inode(root);
3375+ IMustLock(root_inode);
3376+ IiMustWriteLock(root_inode);
3377+ err = test_add(sb, add, remount);
3378+ if (unlikely(err < 0))
3379+ goto out;
3380+ if (err) {
3381+ err = 0;
3382+ goto out; /* success */
3383+ }
3384+
3385+ bbot = au_sbbot(sb);
3386+ add_branch = au_br_alloc(sb, bbot + 2, add->perm);
3387+ err = PTR_ERR(add_branch);
3388+ if (IS_ERR(add_branch))
3389+ goto out;
3390+
3391+ err = au_br_init(add_branch, sb, add);
3392+ if (unlikely(err)) {
3393+ au_br_do_free(add_branch);
3394+ goto out;
3395+ }
3396+
3397+ add_bindex = add->bindex;
3398+ if (!remount)
3399+ au_br_do_add(sb, add_branch, add_bindex);
3400+ else {
3401+ sysaufs_brs_del(sb, add_bindex);
3402+ au_br_do_add(sb, add_branch, add_bindex);
3403+ sysaufs_brs_add(sb, add_bindex);
3404+ }
3405+
3406+ h_dentry = add->path.dentry;
3407+ if (!add_bindex) {
3408+ au_cpup_attr_all(root_inode, /*force*/1);
3409+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
3410+ } else
3411+ au_add_nlink(root_inode, d_inode(h_dentry));
3412+
3413+ /*
3414+ * this test/set prevents aufs from handling unnecesary notify events
3415+ * of xino files, in case of re-adding a writable branch which was
3416+ * once detached from aufs.
3417+ */
3418+ if (au_xino_brid(sb) < 0
3419+ && au_br_writable(add_branch->br_perm)
3420+ && !au_test_fs_bad_xino(h_dentry->d_sb)
3421+ && add_branch->br_xino.xi_file
3422+ && add_branch->br_xino.xi_file->f_path.dentry->d_parent == h_dentry)
3423+ au_xino_brid_set(sb, add_branch->br_id);
3424+
3425+out:
3426+ return err;
3427+}
3428+
3429+/* ---------------------------------------------------------------------- */
3430+
3431+static unsigned long long au_farray_cb(struct super_block *sb, void *a,
3432+ unsigned long long max __maybe_unused,
3433+ void *arg)
3434+{
3435+ unsigned long long n;
3436+ struct file **p, *f;
3437+ struct au_sphlhead *files;
3438+ struct au_finfo *finfo;
3439+
3440+ n = 0;
3441+ p = a;
3442+ files = &au_sbi(sb)->si_files;
3443+ spin_lock(&files->spin);
3444+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
3445+ f = finfo->fi_file;
3446+ if (file_count(f)
3447+ && !special_file(file_inode(f)->i_mode)) {
3448+ get_file(f);
3449+ *p++ = f;
3450+ n++;
3451+ AuDebugOn(n > max);
3452+ }
3453+ }
3454+ spin_unlock(&files->spin);
3455+
3456+ return n;
3457+}
3458+
3459+static struct file **au_farray_alloc(struct super_block *sb,
3460+ unsigned long long *max)
3461+{
3462+ *max = au_nfiles(sb);
3463+ return au_array_alloc(max, au_farray_cb, sb, /*arg*/NULL);
3464+}
3465+
3466+static void au_farray_free(struct file **a, unsigned long long max)
3467+{
3468+ unsigned long long ull;
3469+
3470+ for (ull = 0; ull < max; ull++)
3471+ if (a[ull])
3472+ fput(a[ull]);
3473+ kvfree(a);
3474+}
3475+
3476+/* ---------------------------------------------------------------------- */
3477+
3478+/*
3479+ * delete a branch
3480+ */
3481+
3482+/* to show the line number, do not make it inlined function */
3483+#define AuVerbose(do_info, fmt, ...) do { \
3484+ if (do_info) \
3485+ pr_info(fmt, ##__VA_ARGS__); \
3486+} while (0)
3487+
3488+static int au_test_ibusy(struct inode *inode, aufs_bindex_t btop,
3489+ aufs_bindex_t bbot)
3490+{
3491+ return (inode && !S_ISDIR(inode->i_mode)) || btop == bbot;
3492+}
3493+
3494+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t btop,
3495+ aufs_bindex_t bbot)
3496+{
3497+ return au_test_ibusy(d_inode(dentry), btop, bbot);
3498+}
3499+
3500+/*
3501+ * test if the branch is deletable or not.
3502+ */
3503+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
3504+ unsigned int sigen, const unsigned int verbose)
3505+{
3506+ int err, i, j, ndentry;
3507+ aufs_bindex_t btop, bbot;
3508+ struct au_dcsub_pages dpages;
3509+ struct au_dpage *dpage;
3510+ struct dentry *d;
3511+
3512+ err = au_dpages_init(&dpages, GFP_NOFS);
3513+ if (unlikely(err))
3514+ goto out;
3515+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
3516+ if (unlikely(err))
3517+ goto out_dpages;
3518+
3519+ for (i = 0; !err && i < dpages.ndpage; i++) {
3520+ dpage = dpages.dpages + i;
3521+ ndentry = dpage->ndentry;
3522+ for (j = 0; !err && j < ndentry; j++) {
3523+ d = dpage->dentries[j];
3524+ AuDebugOn(au_dcount(d) <= 0);
3525+ if (!au_digen_test(d, sigen)) {
3526+ di_read_lock_child(d, AuLock_IR);
3527+ if (unlikely(au_dbrange_test(d))) {
3528+ di_read_unlock(d, AuLock_IR);
3529+ continue;
3530+ }
3531+ } else {
3532+ di_write_lock_child(d);
3533+ if (unlikely(au_dbrange_test(d))) {
3534+ di_write_unlock(d);
3535+ continue;
3536+ }
3537+ err = au_reval_dpath(d, sigen);
3538+ if (!err)
3539+ di_downgrade_lock(d, AuLock_IR);
3540+ else {
3541+ di_write_unlock(d);
3542+ break;
3543+ }
3544+ }
3545+
3546+ /* AuDbgDentry(d); */
3547+ btop = au_dbtop(d);
3548+ bbot = au_dbbot(d);
3549+ if (btop <= bindex
3550+ && bindex <= bbot
3551+ && au_h_dptr(d, bindex)
3552+ && au_test_dbusy(d, btop, bbot)) {
3553+ err = -EBUSY;
3554+ AuVerbose(verbose, "busy %pd\n", d);
3555+ AuDbgDentry(d);
3556+ }
3557+ di_read_unlock(d, AuLock_IR);
3558+ }
3559+ }
3560+
3561+out_dpages:
3562+ au_dpages_free(&dpages);
3563+out:
3564+ return err;
3565+}
3566+
3567+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
3568+ unsigned int sigen, const unsigned int verbose)
3569+{
3570+ int err;
3571+ unsigned long long max, ull;
3572+ struct inode *i, **array;
3573+ aufs_bindex_t btop, bbot;
3574+
3575+ array = au_iarray_alloc(sb, &max);
3576+ err = PTR_ERR(array);
3577+ if (IS_ERR(array))
3578+ goto out;
3579+
3580+ err = 0;
3581+ AuDbg("b%d\n", bindex);
3582+ for (ull = 0; !err && ull < max; ull++) {
3583+ i = array[ull];
3584+ if (unlikely(!i))
3585+ break;
3586+ if (i->i_ino == AUFS_ROOT_INO)
3587+ continue;
3588+
3589+ /* AuDbgInode(i); */
3590+ if (au_iigen(i, NULL) == sigen)
3591+ ii_read_lock_child(i);
3592+ else {
3593+ ii_write_lock_child(i);
3594+ err = au_refresh_hinode_self(i);
3595+ au_iigen_dec(i);
3596+ if (!err)
3597+ ii_downgrade_lock(i);
3598+ else {
3599+ ii_write_unlock(i);
3600+ break;
3601+ }
3602+ }
3603+
3604+ btop = au_ibtop(i);
3605+ bbot = au_ibbot(i);
3606+ if (btop <= bindex
3607+ && bindex <= bbot
3608+ && au_h_iptr(i, bindex)
3609+ && au_test_ibusy(i, btop, bbot)) {
3610+ err = -EBUSY;
3611+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
3612+ AuDbgInode(i);
3613+ }
3614+ ii_read_unlock(i);
3615+ }
3616+ au_iarray_free(array, max);
3617+
3618+out:
3619+ return err;
3620+}
3621+
3622+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
3623+ const unsigned int verbose)
3624+{
3625+ int err;
3626+ unsigned int sigen;
3627+
3628+ sigen = au_sigen(root->d_sb);
3629+ DiMustNoWaiters(root);
3630+ IiMustNoWaiters(d_inode(root));
3631+ di_write_unlock(root);
3632+ err = test_dentry_busy(root, bindex, sigen, verbose);
3633+ if (!err)
3634+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
3635+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
3636+
3637+ return err;
3638+}
3639+
3640+static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
3641+ struct file **to_free, int *idx)
3642+{
3643+ int err;
3644+ unsigned char matched, root;
3645+ aufs_bindex_t bindex, bbot;
3646+ struct au_fidir *fidir;
3647+ struct au_hfile *hfile;
3648+
3649+ err = 0;
3650+ root = IS_ROOT(file->f_path.dentry);
3651+ if (root) {
3652+ get_file(file);
3653+ to_free[*idx] = file;
3654+ (*idx)++;
3655+ goto out;
3656+ }
3657+
3658+ matched = 0;
3659+ fidir = au_fi(file)->fi_hdir;
3660+ AuDebugOn(!fidir);
3661+ bbot = au_fbbot_dir(file);
3662+ for (bindex = au_fbtop(file); bindex <= bbot; bindex++) {
3663+ hfile = fidir->fd_hfile + bindex;
3664+ if (!hfile->hf_file)
3665+ continue;
3666+
3667+ if (hfile->hf_br->br_id == br_id) {
3668+ matched = 1;
3669+ break;
3670+ }
3671+ }
3672+ if (matched)
3673+ err = -EBUSY;
3674+
3675+out:
3676+ return err;
3677+}
3678+
3679+static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
3680+ struct file **to_free, int opened)
3681+{
3682+ int err, idx;
3683+ unsigned long long ull, max;
3684+ aufs_bindex_t btop;
3685+ struct file *file, **array;
3686+ struct dentry *root;
3687+ struct au_hfile *hfile;
3688+
3689+ array = au_farray_alloc(sb, &max);
3690+ err = PTR_ERR(array);
3691+ if (IS_ERR(array))
3692+ goto out;
3693+
3694+ err = 0;
3695+ idx = 0;
3696+ root = sb->s_root;
3697+ di_write_unlock(root);
3698+ for (ull = 0; ull < max; ull++) {
3699+ file = array[ull];
3700+ if (unlikely(!file))
3701+ break;
3702+
3703+ /* AuDbg("%pD\n", file); */
3704+ fi_read_lock(file);
3705+ btop = au_fbtop(file);
3706+ if (!d_is_dir(file->f_path.dentry)) {
3707+ hfile = &au_fi(file)->fi_htop;
3708+ if (hfile->hf_br->br_id == br_id)
3709+ err = -EBUSY;
3710+ } else
3711+ err = test_dir_busy(file, br_id, to_free, &idx);
3712+ fi_read_unlock(file);
3713+ if (unlikely(err))
3714+ break;
3715+ }
3716+ di_write_lock_child(root);
3717+ au_farray_free(array, max);
3718+ AuDebugOn(idx > opened);
3719+
3720+out:
3721+ return err;
3722+}
3723+
3724+static void br_del_file(struct file **to_free, unsigned long long opened,
3725+ aufs_bindex_t br_id)
3726+{
3727+ unsigned long long ull;
3728+ aufs_bindex_t bindex, btop, bbot, bfound;
3729+ struct file *file;
3730+ struct au_fidir *fidir;
3731+ struct au_hfile *hfile;
3732+
3733+ for (ull = 0; ull < opened; ull++) {
3734+ file = to_free[ull];
3735+ if (unlikely(!file))
3736+ break;
3737+
3738+ /* AuDbg("%pD\n", file); */
3739+ AuDebugOn(!d_is_dir(file->f_path.dentry));
3740+ bfound = -1;
3741+ fidir = au_fi(file)->fi_hdir;
3742+ AuDebugOn(!fidir);
3743+ fi_write_lock(file);
3744+ btop = au_fbtop(file);
3745+ bbot = au_fbbot_dir(file);
3746+ for (bindex = btop; bindex <= bbot; bindex++) {
3747+ hfile = fidir->fd_hfile + bindex;
3748+ if (!hfile->hf_file)
3749+ continue;
3750+
3751+ if (hfile->hf_br->br_id == br_id) {
3752+ bfound = bindex;
3753+ break;
3754+ }
3755+ }
3756+ AuDebugOn(bfound < 0);
3757+ au_set_h_fptr(file, bfound, NULL);
3758+ if (bfound == btop) {
3759+ for (btop++; btop <= bbot; btop++)
3760+ if (au_hf_dir(file, btop)) {
3761+ au_set_fbtop(file, btop);
3762+ break;
3763+ }
3764+ }
3765+ fi_write_unlock(file);
3766+ }
3767+}
3768+
3769+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
3770+ const aufs_bindex_t bindex,
3771+ const aufs_bindex_t bbot)
3772+{
3773+ struct au_branch **brp, **p;
3774+
3775+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3776+
3777+ brp = sbinfo->si_branch + bindex;
3778+ if (bindex < bbot)
3779+ memmove(brp, brp + 1, sizeof(*brp) * (bbot - bindex));
3780+ sbinfo->si_branch[0 + bbot] = NULL;
3781+ sbinfo->si_bbot--;
3782+
3783+ p = au_krealloc(sbinfo->si_branch, sizeof(*p) * bbot, AuGFP_SBILIST,
3784+ /*may_shrink*/1);
3785+ if (p)
3786+ sbinfo->si_branch = p;
3787+ /* harmless error */
3788+}
3789+
3790+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
3791+ const aufs_bindex_t bbot)
3792+{
3793+ struct au_hdentry *hdp, *p;
3794+
3795+ AuRwMustWriteLock(&dinfo->di_rwsem);
3796+
3797+ hdp = au_hdentry(dinfo, bindex);
3798+ if (bindex < bbot)
3799+ memmove(hdp, hdp + 1, sizeof(*hdp) * (bbot - bindex));
3800+ /* au_h_dentry_init(au_hdentry(dinfo, bbot); */
3801+ dinfo->di_bbot--;
3802+
3803+ p = au_krealloc(dinfo->di_hdentry, sizeof(*p) * bbot, AuGFP_SBILIST,
3804+ /*may_shrink*/1);
3805+ if (p)
3806+ dinfo->di_hdentry = p;
3807+ /* harmless error */
3808+}
3809+
3810+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
3811+ const aufs_bindex_t bbot)
3812+{
3813+ struct au_hinode *hip, *p;
3814+
3815+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3816+
3817+ hip = au_hinode(iinfo, bindex);
3818+ if (bindex < bbot)
3819+ memmove(hip, hip + 1, sizeof(*hip) * (bbot - bindex));
3820+ /* au_hinode_init(au_hinode(iinfo, bbot)); */
3821+ iinfo->ii_bbot--;
3822+
3823+ p = au_krealloc(iinfo->ii_hinode, sizeof(*p) * bbot, AuGFP_SBILIST,
3824+ /*may_shrink*/1);
3825+ if (p)
3826+ iinfo->ii_hinode = p;
3827+ /* harmless error */
3828+}
3829+
3830+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
3831+ struct au_branch *br)
3832+{
3833+ aufs_bindex_t bbot;
3834+ struct au_sbinfo *sbinfo;
3835+ struct dentry *root, *h_root;
3836+ struct inode *inode, *h_inode;
3837+ struct au_hinode *hinode;
3838+
3839+ SiMustWriteLock(sb);
3840+
3841+ root = sb->s_root;
3842+ inode = d_inode(root);
3843+ sbinfo = au_sbi(sb);
3844+ bbot = sbinfo->si_bbot;
3845+
3846+ h_root = au_h_dptr(root, bindex);
3847+ hinode = au_hi(inode, bindex);
3848+ h_inode = au_igrab(hinode->hi_inode);
3849+ au_hiput(hinode);
3850+
3851+ au_sbilist_lock();
3852+ au_br_do_del_brp(sbinfo, bindex, bbot);
3853+ au_br_do_del_hdp(au_di(root), bindex, bbot);
3854+ au_br_do_del_hip(au_ii(inode), bindex, bbot);
3855+ au_sbilist_unlock();
3856+
3857+ dput(h_root);
3858+ iput(h_inode);
3859+ au_br_do_free(br);
3860+}
3861+
3862+static unsigned long long empty_cb(struct super_block *sb, void *array,
3863+ unsigned long long max, void *arg)
3864+{
3865+ return max;
3866+}
3867+
3868+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
3869+{
3870+ int err, rerr, i;
3871+ unsigned long long opened;
3872+ unsigned int mnt_flags;
3873+ aufs_bindex_t bindex, bbot, br_id;
3874+ unsigned char do_wh, verbose;
3875+ struct au_branch *br;
3876+ struct au_wbr *wbr;
3877+ struct dentry *root;
3878+ struct file **to_free;
3879+
3880+ err = 0;
3881+ opened = 0;
3882+ to_free = NULL;
3883+ root = sb->s_root;
3884+ bindex = au_find_dbindex(root, del->h_path.dentry);
3885+ if (bindex < 0) {
3886+ if (remount)
3887+ goto out; /* success */
3888+ err = -ENOENT;
3889+ pr_err("%s no such branch\n", del->pathname);
3890+ goto out;
3891+ }
3892+ AuDbg("bindex b%d\n", bindex);
3893+
3894+ err = -EBUSY;
3895+ mnt_flags = au_mntflags(sb);
3896+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
3897+ bbot = au_sbbot(sb);
3898+ if (unlikely(!bbot)) {
3899+ AuVerbose(verbose, "no more branches left\n");
3900+ goto out;
3901+ }
3902+ br = au_sbr(sb, bindex);
3903+ AuDebugOn(!path_equal(&br->br_path, &del->h_path));
3904+
3905+ br_id = br->br_id;
3906+ opened = au_br_count(br);
3907+ if (unlikely(opened)) {
3908+ to_free = au_array_alloc(&opened, empty_cb, sb, NULL);
3909+ err = PTR_ERR(to_free);
3910+ if (IS_ERR(to_free))
3911+ goto out;
3912+
3913+ err = test_file_busy(sb, br_id, to_free, opened);
3914+ if (unlikely(err)) {
3915+ AuVerbose(verbose, "%llu file(s) opened\n", opened);
3916+ goto out;
3917+ }
3918+ }
3919+
3920+ wbr = br->br_wbr;
3921+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
3922+ if (do_wh) {
3923+ /* instead of WbrWhMustWriteLock(wbr) */
3924+ SiMustWriteLock(sb);
3925+ for (i = 0; i < AuBrWh_Last; i++) {
3926+ dput(wbr->wbr_wh[i]);
3927+ wbr->wbr_wh[i] = NULL;
3928+ }
3929+ }
3930+
3931+ err = test_children_busy(root, bindex, verbose);
3932+ if (unlikely(err)) {
3933+ if (do_wh)
3934+ goto out_wh;
3935+ goto out;
3936+ }
3937+
3938+ err = 0;
3939+ if (to_free) {
3940+ /*
3941+ * now we confirmed the branch is deletable.
3942+ * let's free the remaining opened dirs on the branch.
3943+ */
3944+ di_write_unlock(root);
3945+ br_del_file(to_free, opened, br_id);
3946+ di_write_lock_child(root);
3947+ }
3948+
3949+ if (!remount)
3950+ au_br_do_del(sb, bindex, br);
3951+ else {
3952+ sysaufs_brs_del(sb, bindex);
3953+ au_br_do_del(sb, bindex, br);
3954+ sysaufs_brs_add(sb, bindex);
3955+ }
3956+
3957+ if (!bindex) {
3958+ au_cpup_attr_all(d_inode(root), /*force*/1);
3959+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
3960+ } else
3961+ au_sub_nlink(d_inode(root), d_inode(del->h_path.dentry));
3962+ if (au_opt_test(mnt_flags, PLINK))
3963+ au_plink_half_refresh(sb, br_id);
3964+
3965+ if (au_xino_brid(sb) == br_id)
3966+ au_xino_brid_set(sb, -1);
3967+ goto out; /* success */
3968+
3969+out_wh:
3970+ /* revert */
3971+ rerr = au_br_init_wh(sb, br, br->br_perm);
3972+ if (rerr)
3973+ pr_warn("failed re-creating base whiteout, %s. (%d)\n",
3974+ del->pathname, rerr);
3975+out:
3976+ if (to_free)
3977+ au_farray_free(to_free, opened);
3978+ return err;
3979+}
3980+
3981+/* ---------------------------------------------------------------------- */
3982+
3983+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
3984+{
3985+ int err;
3986+ aufs_bindex_t btop, bbot;
3987+ struct aufs_ibusy ibusy;
3988+ struct inode *inode, *h_inode;
3989+
3990+ err = -EPERM;
3991+ if (unlikely(!capable(CAP_SYS_ADMIN)))
3992+ goto out;
3993+
3994+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
3995+ if (!err)
3996+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
3997+ if (unlikely(err)) {
3998+ err = -EFAULT;
3999+ AuTraceErr(err);
4000+ goto out;
4001+ }
4002+
4003+ err = -EINVAL;
4004+ si_read_lock(sb, AuLock_FLUSH);
4005+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbbot(sb)))
4006+ goto out_unlock;
4007+
4008+ err = 0;
4009+ ibusy.h_ino = 0; /* invalid */
4010+ inode = ilookup(sb, ibusy.ino);
4011+ if (!inode
4012+ || inode->i_ino == AUFS_ROOT_INO
4013+ || au_is_bad_inode(inode))
4014+ goto out_unlock;
4015+
4016+ ii_read_lock_child(inode);
4017+ btop = au_ibtop(inode);
4018+ bbot = au_ibbot(inode);
4019+ if (btop <= ibusy.bindex && ibusy.bindex <= bbot) {
4020+ h_inode = au_h_iptr(inode, ibusy.bindex);
4021+ if (h_inode && au_test_ibusy(inode, btop, bbot))
4022+ ibusy.h_ino = h_inode->i_ino;
4023+ }
4024+ ii_read_unlock(inode);
4025+ iput(inode);
4026+
4027+out_unlock:
4028+ si_read_unlock(sb);
4029+ if (!err) {
4030+ err = __put_user(ibusy.h_ino, &arg->h_ino);
4031+ if (unlikely(err)) {
4032+ err = -EFAULT;
4033+ AuTraceErr(err);
4034+ }
4035+ }
4036+out:
4037+ return err;
4038+}
4039+
4040+long au_ibusy_ioctl(struct file *file, unsigned long arg)
4041+{
4042+ return au_ibusy(file->f_path.dentry->d_sb, (void __user *)arg);
4043+}
4044+
4045+#ifdef CONFIG_COMPAT
4046+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
4047+{
4048+ return au_ibusy(file->f_path.dentry->d_sb, compat_ptr(arg));
4049+}
4050+#endif
4051+
4052+/* ---------------------------------------------------------------------- */
4053+
4054+/*
4055+ * change a branch permission
4056+ */
4057+
4058+static void au_warn_ima(void)
4059+{
4060+#ifdef CONFIG_IMA
4061+ /* since it doesn't support mark_files_ro() */
4062+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
4063+#endif
4064+}
4065+
4066+static int do_need_sigen_inc(int a, int b)
4067+{
4068+ return au_br_whable(a) && !au_br_whable(b);
4069+}
4070+
4071+static int need_sigen_inc(int old, int new)
4072+{
4073+ return do_need_sigen_inc(old, new)
4074+ || do_need_sigen_inc(new, old);
4075+}
4076+
4077+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
4078+{
4079+ int err, do_warn;
4080+ unsigned int mnt_flags;
4081+ unsigned long long ull, max;
4082+ aufs_bindex_t br_id;
4083+ unsigned char verbose, writer;
4084+ struct file *file, *hf, **array;
4085+ struct au_hfile *hfile;
4086+
4087+ mnt_flags = au_mntflags(sb);
4088+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4089+
4090+ array = au_farray_alloc(sb, &max);
4091+ err = PTR_ERR(array);
4092+ if (IS_ERR(array))
4093+ goto out;
4094+
4095+ do_warn = 0;
4096+ br_id = au_sbr_id(sb, bindex);
4097+ for (ull = 0; ull < max; ull++) {
4098+ file = array[ull];
4099+ if (unlikely(!file))
4100+ break;
4101+
4102+ /* AuDbg("%pD\n", file); */
4103+ fi_read_lock(file);
4104+ if (unlikely(au_test_mmapped(file))) {
4105+ err = -EBUSY;
4106+ AuVerbose(verbose, "mmapped %pD\n", file);
4107+ AuDbgFile(file);
4108+ FiMustNoWaiters(file);
4109+ fi_read_unlock(file);
4110+ goto out_array;
4111+ }
4112+
4113+ hfile = &au_fi(file)->fi_htop;
4114+ hf = hfile->hf_file;
4115+ if (!d_is_reg(file->f_path.dentry)
4116+ || !(file->f_mode & FMODE_WRITE)
4117+ || hfile->hf_br->br_id != br_id
4118+ || !(hf->f_mode & FMODE_WRITE))
4119+ array[ull] = NULL;
4120+ else {
4121+ do_warn = 1;
4122+ get_file(file);
4123+ }
4124+
4125+ FiMustNoWaiters(file);
4126+ fi_read_unlock(file);
4127+ fput(file);
4128+ }
4129+
4130+ err = 0;
4131+ if (do_warn)
4132+ au_warn_ima();
4133+
4134+ for (ull = 0; ull < max; ull++) {
4135+ file = array[ull];
4136+ if (!file)
4137+ continue;
4138+
4139+ /* todo: already flushed? */
4140+ /*
4141+ * fs/super.c:mark_files_ro() is gone, but aufs keeps its
4142+ * approach which resets f_mode and calls mnt_drop_write() and
4143+ * file_release_write() for each file, because the branch
4144+ * attribute in aufs world is totally different from the native
4145+ * fs rw/ro mode.
4146+ */
4147+ /* fi_read_lock(file); */
4148+ hfile = &au_fi(file)->fi_htop;
4149+ hf = hfile->hf_file;
4150+ /* fi_read_unlock(file); */
4151+ spin_lock(&hf->f_lock);
4152+ writer = !!(hf->f_mode & FMODE_WRITER);
4153+ hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER);
4154+ spin_unlock(&hf->f_lock);
4155+ if (writer) {
4156+ put_write_access(file_inode(hf));
4157+ __mnt_drop_write(hf->f_path.mnt);
4158+ }
4159+ }
4160+
4161+out_array:
4162+ au_farray_free(array, max);
4163+out:
4164+ AuTraceErr(err);
4165+ return err;
4166+}
4167+
4168+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
4169+ int *do_refresh)
4170+{
4171+ int err, rerr;
4172+ aufs_bindex_t bindex;
4173+ struct dentry *root;
4174+ struct au_branch *br;
4175+ struct au_br_fhsm *bf;
4176+
4177+ root = sb->s_root;
4178+ bindex = au_find_dbindex(root, mod->h_root);
4179+ if (bindex < 0) {
4180+ if (remount)
4181+ return 0; /* success */
4182+ err = -ENOENT;
4183+ pr_err("%s no such branch\n", mod->path);
4184+ goto out;
4185+ }
4186+ AuDbg("bindex b%d\n", bindex);
4187+
4188+ err = test_br(d_inode(mod->h_root), mod->perm, mod->path);
4189+ if (unlikely(err))
4190+ goto out;
4191+
4192+ br = au_sbr(sb, bindex);
4193+ AuDebugOn(mod->h_root != au_br_dentry(br));
4194+ if (br->br_perm == mod->perm)
4195+ return 0; /* success */
4196+
4197+ /* pre-allocate for non-fhsm --> fhsm */
4198+ bf = NULL;
4199+ if (!au_br_fhsm(br->br_perm) && au_br_fhsm(mod->perm)) {
4200+ err = au_fhsm_br_alloc(br);
4201+ if (unlikely(err))
4202+ goto out;
4203+ bf = br->br_fhsm;
4204+ br->br_fhsm = NULL;
4205+ }
4206+
4207+ if (au_br_writable(br->br_perm)) {
4208+ /* remove whiteout base */
4209+ err = au_br_init_wh(sb, br, mod->perm);
4210+ if (unlikely(err))
4211+ goto out_bf;
4212+
4213+ if (!au_br_writable(mod->perm)) {
4214+ /* rw --> ro, file might be mmapped */
4215+ DiMustNoWaiters(root);
4216+ IiMustNoWaiters(d_inode(root));
4217+ di_write_unlock(root);
4218+ err = au_br_mod_files_ro(sb, bindex);
4219+ /* aufs_write_lock() calls ..._child() */
4220+ di_write_lock_child(root);
4221+
4222+ if (unlikely(err)) {
4223+ rerr = -ENOMEM;
4224+ br->br_wbr = kzalloc(sizeof(*br->br_wbr),
4225+ GFP_NOFS);
4226+ if (br->br_wbr)
4227+ rerr = au_wbr_init(br, sb, br->br_perm);
4228+ if (unlikely(rerr)) {
4229+ AuIOErr("nested error %d (%d)\n",
4230+ rerr, err);
4231+ br->br_perm = mod->perm;
4232+ }
4233+ }
4234+ }
4235+ } else if (au_br_writable(mod->perm)) {
4236+ /* ro --> rw */
4237+ err = -ENOMEM;
4238+ br->br_wbr = kzalloc(sizeof(*br->br_wbr), GFP_NOFS);
4239+ if (br->br_wbr) {
4240+ err = au_wbr_init(br, sb, mod->perm);
4241+ if (unlikely(err)) {
4242+ au_delayed_kfree(br->br_wbr);
4243+ br->br_wbr = NULL;
4244+ }
4245+ }
4246+ }
4247+ if (unlikely(err))
4248+ goto out_bf;
4249+
4250+ if (au_br_fhsm(br->br_perm)) {
4251+ if (!au_br_fhsm(mod->perm)) {
4252+ /* fhsm --> non-fhsm */
4253+ au_br_fhsm_fin(br->br_fhsm);
4254+ au_delayed_kfree(br->br_fhsm);
4255+ br->br_fhsm = NULL;
4256+ }
4257+ } else if (au_br_fhsm(mod->perm))
4258+ /* non-fhsm --> fhsm */
4259+ br->br_fhsm = bf;
4260+
4261+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
4262+ br->br_perm = mod->perm;
4263+ goto out; /* success */
4264+
4265+out_bf:
4266+ if (bf)
4267+ au_delayed_kfree(bf);
4268+out:
4269+ AuTraceErr(err);
4270+ return err;
4271+}
4272+
4273+/* ---------------------------------------------------------------------- */
4274+
4275+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs)
4276+{
4277+ int err;
4278+ struct kstatfs kstfs;
4279+
4280+ err = vfs_statfs(&br->br_path, &kstfs);
4281+ if (!err) {
4282+ stfs->f_blocks = kstfs.f_blocks;
4283+ stfs->f_bavail = kstfs.f_bavail;
4284+ stfs->f_files = kstfs.f_files;
4285+ stfs->f_ffree = kstfs.f_ffree;
4286+ }
4287+
4288+ return err;
4289+}
4290diff -urN /usr/share/empty/fs/aufs/branch.h linux/fs/aufs/branch.h
4291--- /usr/share/empty/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
4292+++ linux/fs/aufs/branch.h 2016-10-09 16:55:36.486034798 +0200
4293@@ -0,0 +1,309 @@
4294+/*
4295+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4296+ *
4297+ * This program, aufs is free software; you can redistribute it and/or modify
4298+ * it under the terms of the GNU General Public License as published by
4299+ * the Free Software Foundation; either version 2 of the License, or
4300+ * (at your option) any later version.
4301+ *
4302+ * This program is distributed in the hope that it will be useful,
4303+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4304+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4305+ * GNU General Public License for more details.
4306+ *
4307+ * You should have received a copy of the GNU General Public License
4308+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4309+ */
4310+
4311+/*
4312+ * branch filesystems and xino for them
4313+ */
4314+
4315+#ifndef __AUFS_BRANCH_H__
4316+#define __AUFS_BRANCH_H__
4317+
4318+#ifdef __KERNEL__
4319+
4320+#include <linux/mount.h>
4321+#include "dynop.h"
4322+#include "rwsem.h"
4323+#include "super.h"
4324+
4325+/* ---------------------------------------------------------------------- */
4326+
4327+/* a xino file */
4328+struct au_xino_file {
4329+ struct file *xi_file;
4330+ struct mutex xi_nondir_mtx;
4331+
4332+ /* todo: make xino files an array to support huge inode number */
4333+
4334+#ifdef CONFIG_DEBUG_FS
4335+ struct dentry *xi_dbgaufs;
4336+#endif
4337+};
4338+
4339+/* File-based Hierarchical Storage Management */
4340+struct au_br_fhsm {
4341+#ifdef CONFIG_AUFS_FHSM
4342+ struct mutex bf_lock;
4343+ unsigned long bf_jiffy;
4344+ struct aufs_stfs bf_stfs;
4345+ int bf_readable;
4346+#endif
4347+};
4348+
4349+/* members for writable branch only */
4350+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
4351+struct au_wbr {
4352+ struct au_rwsem wbr_wh_rwsem;
4353+ struct dentry *wbr_wh[AuBrWh_Last];
4354+ atomic_t wbr_wh_running;
4355+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
4356+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
4357+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
4358+
4359+ /* mfs mode */
4360+ unsigned long long wbr_bytes;
4361+};
4362+
4363+/* ext2 has 3 types of operations at least, ext3 has 4 */
4364+#define AuBrDynOp (AuDyLast * 4)
4365+
4366+#ifdef CONFIG_AUFS_HFSNOTIFY
4367+/* support for asynchronous destruction */
4368+struct au_br_hfsnotify {
4369+ struct fsnotify_group *hfsn_group;
4370+};
4371+#endif
4372+
4373+/* sysfs entries */
4374+struct au_brsysfs {
4375+ char name[16];
4376+ struct attribute attr;
4377+};
4378+
4379+enum {
4380+ AuBrSysfs_BR,
4381+ AuBrSysfs_BRID,
4382+ AuBrSysfs_Last
4383+};
4384+
4385+/* protected by superblock rwsem */
4386+struct au_branch {
4387+ struct au_xino_file br_xino;
4388+
4389+ aufs_bindex_t br_id;
4390+
4391+ int br_perm;
4392+ struct path br_path;
4393+ spinlock_t br_dykey_lock;
4394+ struct au_dykey *br_dykey[AuBrDynOp];
4395+ struct percpu_counter br_count;
4396+
4397+ struct au_wbr *br_wbr;
4398+ struct au_br_fhsm *br_fhsm;
4399+
4400+ /* xino truncation */
4401+ atomic_t br_xino_running;
4402+
4403+#ifdef CONFIG_AUFS_HFSNOTIFY
4404+ struct au_br_hfsnotify *br_hfsn;
4405+#endif
4406+
4407+#ifdef CONFIG_SYSFS
4408+ /* entries under sysfs per mount-point */
4409+ struct au_brsysfs br_sysfs[AuBrSysfs_Last];
4410+#endif
4411+};
4412+
4413+/* ---------------------------------------------------------------------- */
4414+
4415+static inline struct vfsmount *au_br_mnt(struct au_branch *br)
4416+{
4417+ return br->br_path.mnt;
4418+}
4419+
4420+static inline struct dentry *au_br_dentry(struct au_branch *br)
4421+{
4422+ return br->br_path.dentry;
4423+}
4424+
4425+static inline struct super_block *au_br_sb(struct au_branch *br)
4426+{
4427+ return au_br_mnt(br)->mnt_sb;
4428+}
4429+
4430+static inline void au_br_get(struct au_branch *br)
4431+{
4432+ percpu_counter_inc(&br->br_count);
4433+}
4434+
4435+static inline void au_br_put(struct au_branch *br)
4436+{
4437+ percpu_counter_dec(&br->br_count);
4438+}
4439+
4440+static inline s64 au_br_count(struct au_branch *br)
4441+{
4442+ return percpu_counter_sum(&br->br_count);
4443+}
4444+
4445+static inline void au_br_count_init(struct au_branch *br)
4446+{
4447+ percpu_counter_init(&br->br_count, 0, GFP_NOFS);
4448+}
4449+
4450+static inline void au_br_count_fin(struct au_branch *br)
4451+{
4452+ percpu_counter_destroy(&br->br_count);
4453+}
4454+
4455+static inline int au_br_rdonly(struct au_branch *br)
4456+{
4457+ return ((au_br_sb(br)->s_flags & MS_RDONLY)
4458+ || !au_br_writable(br->br_perm))
4459+ ? -EROFS : 0;
4460+}
4461+
4462+static inline int au_br_hnotifyable(int brperm __maybe_unused)
4463+{
4464+#ifdef CONFIG_AUFS_HNOTIFY
4465+ return !(brperm & AuBrPerm_RR);
4466+#else
4467+ return 0;
4468+#endif
4469+}
4470+
4471+static inline int au_br_test_oflag(int oflag, struct au_branch *br)
4472+{
4473+ int err, exec_flag;
4474+
4475+ err = 0;
4476+ exec_flag = oflag & __FMODE_EXEC;
4477+ if (unlikely(exec_flag && path_noexec(&br->br_path)))
4478+ err = -EACCES;
4479+
4480+ return err;
4481+}
4482+
4483+/* ---------------------------------------------------------------------- */
4484+
4485+/* branch.c */
4486+struct au_sbinfo;
4487+void au_br_free(struct au_sbinfo *sinfo);
4488+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
4489+struct au_opt_add;
4490+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
4491+struct au_opt_del;
4492+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
4493+long au_ibusy_ioctl(struct file *file, unsigned long arg);
4494+#ifdef CONFIG_COMPAT
4495+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
4496+#endif
4497+struct au_opt_mod;
4498+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
4499+ int *do_refresh);
4500+struct aufs_stfs;
4501+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
4502+
4503+/* xino.c */
4504+static const loff_t au_loff_max = LLONG_MAX;
4505+
4506+int au_xib_trunc(struct super_block *sb);
4507+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
4508+ loff_t *pos);
4509+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
4510+ size_t size, loff_t *pos);
4511+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
4512+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
4513+ino_t au_xino_new_ino(struct super_block *sb);
4514+void au_xino_delete_inode(struct inode *inode, const int unlinked);
4515+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4516+ ino_t ino);
4517+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4518+ ino_t *ino);
4519+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
4520+ struct file *base_file, int do_test);
4521+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
4522+
4523+struct au_opt_xino;
4524+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
4525+void au_xino_clr(struct super_block *sb);
4526+struct file *au_xino_def(struct super_block *sb);
4527+int au_xino_path(struct seq_file *seq, struct file *file);
4528+
4529+/* ---------------------------------------------------------------------- */
4530+
4531+/* Superblock to branch */
4532+static inline
4533+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
4534+{
4535+ return au_sbr(sb, bindex)->br_id;
4536+}
4537+
4538+static inline
4539+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
4540+{
4541+ return au_br_mnt(au_sbr(sb, bindex));
4542+}
4543+
4544+static inline
4545+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
4546+{
4547+ return au_br_sb(au_sbr(sb, bindex));
4548+}
4549+
4550+static inline void au_sbr_get(struct super_block *sb, aufs_bindex_t bindex)
4551+{
4552+ au_br_get(au_sbr(sb, bindex));
4553+}
4554+
4555+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
4556+{
4557+ au_br_put(au_sbr(sb, bindex));
4558+}
4559+
4560+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
4561+{
4562+ return au_sbr(sb, bindex)->br_perm;
4563+}
4564+
4565+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
4566+{
4567+ return au_br_whable(au_sbr_perm(sb, bindex));
4568+}
4569+
4570+/* ---------------------------------------------------------------------- */
4571+
4572+/*
4573+ * wbr_wh_read_lock, wbr_wh_write_lock
4574+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
4575+ */
4576+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
4577+
4578+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
4579+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
4580+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
4581+
4582+/* ---------------------------------------------------------------------- */
4583+
4584+#ifdef CONFIG_AUFS_FHSM
4585+static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
4586+{
4587+ mutex_init(&brfhsm->bf_lock);
4588+ brfhsm->bf_jiffy = 0;
4589+ brfhsm->bf_readable = 0;
4590+}
4591+
4592+static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
4593+{
4594+ mutex_destroy(&brfhsm->bf_lock);
4595+}
4596+#else
4597+AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
4598+AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
4599+#endif
4600+
4601+#endif /* __KERNEL__ */
4602+#endif /* __AUFS_BRANCH_H__ */
4603diff -urN /usr/share/empty/fs/aufs/conf.mk linux/fs/aufs/conf.mk
4604--- /usr/share/empty/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
4605+++ linux/fs/aufs/conf.mk 2016-10-09 16:55:36.486034798 +0200
4606@@ -0,0 +1,38 @@
4607+
4608+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
4609+
4610+define AuConf
4611+ifdef ${1}
4612+AuConfStr += ${1}=${${1}}
4613+endif
4614+endef
4615+
4616+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
4617+ SBILIST \
4618+ HNOTIFY HFSNOTIFY \
4619+ EXPORT INO_T_64 \
4620+ XATTR \
4621+ FHSM \
4622+ RDU \
4623+ SHWH \
4624+ BR_RAMFS \
4625+ BR_FUSE POLL \
4626+ BR_HFSPLUS \
4627+ BDEV_LOOP \
4628+ DEBUG MAGIC_SYSRQ
4629+$(foreach i, ${AuConfAll}, \
4630+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
4631+
4632+AuConfName = ${obj}/conf.str
4633+${AuConfName}.tmp: FORCE
4634+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
4635+${AuConfName}: ${AuConfName}.tmp
4636+ @diff -q $< $@ > /dev/null 2>&1 || { \
4637+ echo ' GEN ' $@; \
4638+ cp -p $< $@; \
4639+ }
4640+FORCE:
4641+clean-files += ${AuConfName} ${AuConfName}.tmp
4642+${obj}/sysfs.o: ${AuConfName}
4643+
4644+-include ${srctree}/${src}/conf_priv.mk
4645diff -urN /usr/share/empty/fs/aufs/cpup.c linux/fs/aufs/cpup.c
4646--- /usr/share/empty/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
4647+++ linux/fs/aufs/cpup.c 2016-12-17 12:28:17.595211562 +0100
4648@@ -0,0 +1,1394 @@
4649+/*
4650+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4651+ *
4652+ * This program, aufs is free software; you can redistribute it and/or modify
4653+ * it under the terms of the GNU General Public License as published by
4654+ * the Free Software Foundation; either version 2 of the License, or
4655+ * (at your option) any later version.
4656+ *
4657+ * This program is distributed in the hope that it will be useful,
4658+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4659+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4660+ * GNU General Public License for more details.
4661+ *
4662+ * You should have received a copy of the GNU General Public License
4663+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4664+ */
4665+
4666+/*
4667+ * copy-up functions, see wbr_policy.c for copy-down
4668+ */
4669+
4670+#include <linux/fs_stack.h>
4671+#include <linux/mm.h>
4672+#include <linux/task_work.h>
4673+#include "aufs.h"
4674+
4675+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
4676+{
4677+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
4678+ | S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
4679+
4680+ BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
4681+
4682+ dst->i_flags |= iflags & ~mask;
4683+ if (au_test_fs_notime(dst->i_sb))
4684+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
4685+}
4686+
4687+void au_cpup_attr_timesizes(struct inode *inode)
4688+{
4689+ struct inode *h_inode;
4690+
4691+ h_inode = au_h_iptr(inode, au_ibtop(inode));
4692+ fsstack_copy_attr_times(inode, h_inode);
4693+ fsstack_copy_inode_size(inode, h_inode);
4694+}
4695+
4696+void au_cpup_attr_nlink(struct inode *inode, int force)
4697+{
4698+ struct inode *h_inode;
4699+ struct super_block *sb;
4700+ aufs_bindex_t bindex, bbot;
4701+
4702+ sb = inode->i_sb;
4703+ bindex = au_ibtop(inode);
4704+ h_inode = au_h_iptr(inode, bindex);
4705+ if (!force
4706+ && !S_ISDIR(h_inode->i_mode)
4707+ && au_opt_test(au_mntflags(sb), PLINK)
4708+ && au_plink_test(inode))
4709+ return;
4710+
4711+ /*
4712+ * 0 can happen in revalidating.
4713+ * h_inode->i_mutex may not be held here, but it is harmless since once
4714+ * i_nlink reaches 0, it will never become positive except O_TMPFILE
4715+ * case.
4716+ * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause
4717+ * the incorrect link count.
4718+ */
4719+ set_nlink(inode, h_inode->i_nlink);
4720+
4721+ /*
4722+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
4723+ * it may includes whplink directory.
4724+ */
4725+ if (S_ISDIR(h_inode->i_mode)) {
4726+ bbot = au_ibbot(inode);
4727+ for (bindex++; bindex <= bbot; bindex++) {
4728+ h_inode = au_h_iptr(inode, bindex);
4729+ if (h_inode)
4730+ au_add_nlink(inode, h_inode);
4731+ }
4732+ }
4733+}
4734+
4735+void au_cpup_attr_changeable(struct inode *inode)
4736+{
4737+ struct inode *h_inode;
4738+
4739+ h_inode = au_h_iptr(inode, au_ibtop(inode));
4740+ inode->i_mode = h_inode->i_mode;
4741+ inode->i_uid = h_inode->i_uid;
4742+ inode->i_gid = h_inode->i_gid;
4743+ au_cpup_attr_timesizes(inode);
4744+ au_cpup_attr_flags(inode, h_inode->i_flags);
4745+}
4746+
4747+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
4748+{
4749+ struct au_iinfo *iinfo = au_ii(inode);
4750+
4751+ IiMustWriteLock(inode);
4752+
4753+ iinfo->ii_higen = h_inode->i_generation;
4754+ iinfo->ii_hsb1 = h_inode->i_sb;
4755+}
4756+
4757+void au_cpup_attr_all(struct inode *inode, int force)
4758+{
4759+ struct inode *h_inode;
4760+
4761+ h_inode = au_h_iptr(inode, au_ibtop(inode));
4762+ au_cpup_attr_changeable(inode);
4763+ if (inode->i_nlink > 0)
4764+ au_cpup_attr_nlink(inode, force);
4765+ inode->i_rdev = h_inode->i_rdev;
4766+ inode->i_blkbits = h_inode->i_blkbits;
4767+ au_cpup_igen(inode, h_inode);
4768+}
4769+
4770+/* ---------------------------------------------------------------------- */
4771+
4772+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
4773+
4774+/* keep the timestamps of the parent dir when cpup */
4775+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4776+ struct path *h_path)
4777+{
4778+ struct inode *h_inode;
4779+
4780+ dt->dt_dentry = dentry;
4781+ dt->dt_h_path = *h_path;
4782+ h_inode = d_inode(h_path->dentry);
4783+ dt->dt_atime = h_inode->i_atime;
4784+ dt->dt_mtime = h_inode->i_mtime;
4785+ /* smp_mb(); */
4786+}
4787+
4788+void au_dtime_revert(struct au_dtime *dt)
4789+{
4790+ struct iattr attr;
4791+ int err;
4792+
4793+ attr.ia_atime = dt->dt_atime;
4794+ attr.ia_mtime = dt->dt_mtime;
4795+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
4796+ | ATTR_ATIME | ATTR_ATIME_SET;
4797+
4798+ /* no delegation since this is a directory */
4799+ err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL);
4800+ if (unlikely(err))
4801+ pr_warn("restoring timestamps failed(%d). ignored\n", err);
4802+}
4803+
4804+/* ---------------------------------------------------------------------- */
4805+
4806+/* internal use only */
4807+struct au_cpup_reg_attr {
4808+ int valid;
4809+ struct kstat st;
4810+ unsigned int iflags; /* inode->i_flags */
4811+};
4812+
4813+static noinline_for_stack
4814+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src,
4815+ struct au_cpup_reg_attr *h_src_attr)
4816+{
4817+ int err, sbits, icex;
4818+ unsigned int mnt_flags;
4819+ unsigned char verbose;
4820+ struct iattr ia;
4821+ struct path h_path;
4822+ struct inode *h_isrc, *h_idst;
4823+ struct kstat *h_st;
4824+ struct au_branch *br;
4825+
4826+ h_path.dentry = au_h_dptr(dst, bindex);
4827+ h_idst = d_inode(h_path.dentry);
4828+ br = au_sbr(dst->d_sb, bindex);
4829+ h_path.mnt = au_br_mnt(br);
4830+ h_isrc = d_inode(h_src);
4831+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
4832+ | ATTR_ATIME | ATTR_MTIME
4833+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
4834+ if (h_src_attr && h_src_attr->valid) {
4835+ h_st = &h_src_attr->st;
4836+ ia.ia_uid = h_st->uid;
4837+ ia.ia_gid = h_st->gid;
4838+ ia.ia_atime = h_st->atime;
4839+ ia.ia_mtime = h_st->mtime;
4840+ if (h_idst->i_mode != h_st->mode
4841+ && !S_ISLNK(h_idst->i_mode)) {
4842+ ia.ia_valid |= ATTR_MODE;
4843+ ia.ia_mode = h_st->mode;
4844+ }
4845+ sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
4846+ au_cpup_attr_flags(h_idst, h_src_attr->iflags);
4847+ } else {
4848+ ia.ia_uid = h_isrc->i_uid;
4849+ ia.ia_gid = h_isrc->i_gid;
4850+ ia.ia_atime = h_isrc->i_atime;
4851+ ia.ia_mtime = h_isrc->i_mtime;
4852+ if (h_idst->i_mode != h_isrc->i_mode
4853+ && !S_ISLNK(h_idst->i_mode)) {
4854+ ia.ia_valid |= ATTR_MODE;
4855+ ia.ia_mode = h_isrc->i_mode;
4856+ }
4857+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
4858+ au_cpup_attr_flags(h_idst, h_isrc->i_flags);
4859+ }
4860+ /* no delegation since it is just created */
4861+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
4862+
4863+ /* is this nfs only? */
4864+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
4865+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
4866+ ia.ia_mode = h_isrc->i_mode;
4867+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
4868+ }
4869+
4870+ icex = br->br_perm & AuBrAttr_ICEX;
4871+ if (!err) {
4872+ mnt_flags = au_mntflags(dst->d_sb);
4873+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4874+ err = au_cpup_xattr(h_path.dentry, h_src, icex, verbose);
4875+ }
4876+
4877+ return err;
4878+}
4879+
4880+/* ---------------------------------------------------------------------- */
4881+
4882+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
4883+ char *buf, unsigned long blksize)
4884+{
4885+ int err;
4886+ size_t sz, rbytes, wbytes;
4887+ unsigned char all_zero;
4888+ char *p, *zp;
4889+ struct inode *h_inode;
4890+ /* reduce stack usage */
4891+ struct iattr *ia;
4892+
4893+ zp = page_address(ZERO_PAGE(0));
4894+ if (unlikely(!zp))
4895+ return -ENOMEM; /* possible? */
4896+
4897+ err = 0;
4898+ all_zero = 0;
4899+ while (len) {
4900+ AuDbg("len %lld\n", len);
4901+ sz = blksize;
4902+ if (len < blksize)
4903+ sz = len;
4904+
4905+ rbytes = 0;
4906+ /* todo: signal_pending? */
4907+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
4908+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
4909+ err = rbytes;
4910+ }
4911+ if (unlikely(err < 0))
4912+ break;
4913+
4914+ all_zero = 0;
4915+ if (len >= rbytes && rbytes == blksize)
4916+ all_zero = !memcmp(buf, zp, rbytes);
4917+ if (!all_zero) {
4918+ wbytes = rbytes;
4919+ p = buf;
4920+ while (wbytes) {
4921+ size_t b;
4922+
4923+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
4924+ err = b;
4925+ /* todo: signal_pending? */
4926+ if (unlikely(err == -EAGAIN || err == -EINTR))
4927+ continue;
4928+ if (unlikely(err < 0))
4929+ break;
4930+ wbytes -= b;
4931+ p += b;
4932+ }
4933+ if (unlikely(err < 0))
4934+ break;
4935+ } else {
4936+ loff_t res;
4937+
4938+ AuLabel(hole);
4939+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
4940+ err = res;
4941+ if (unlikely(res < 0))
4942+ break;
4943+ }
4944+ len -= rbytes;
4945+ err = 0;
4946+ }
4947+
4948+ /* the last block may be a hole */
4949+ if (!err && all_zero) {
4950+ AuLabel(last hole);
4951+
4952+ err = 1;
4953+ if (au_test_nfs(dst->f_path.dentry->d_sb)) {
4954+ /* nfs requires this step to make last hole */
4955+ /* is this only nfs? */
4956+ do {
4957+ /* todo: signal_pending? */
4958+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
4959+ } while (err == -EAGAIN || err == -EINTR);
4960+ if (err == 1)
4961+ dst->f_pos--;
4962+ }
4963+
4964+ if (err == 1) {
4965+ ia = (void *)buf;
4966+ ia->ia_size = dst->f_pos;
4967+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
4968+ ia->ia_file = dst;
4969+ h_inode = file_inode(dst);
4970+ inode_lock_nested(h_inode, AuLsc_I_CHILD2);
4971+ /* no delegation since it is just created */
4972+ err = vfsub_notify_change(&dst->f_path, ia,
4973+ /*delegated*/NULL);
4974+ inode_unlock(h_inode);
4975+ }
4976+ }
4977+
4978+ return err;
4979+}
4980+
4981+int au_copy_file(struct file *dst, struct file *src, loff_t len)
4982+{
4983+ int err;
4984+ unsigned long blksize;
4985+ unsigned char do_kfree;
4986+ char *buf;
4987+
4988+ err = -ENOMEM;
4989+ blksize = dst->f_path.dentry->d_sb->s_blocksize;
4990+ if (!blksize || PAGE_SIZE < blksize)
4991+ blksize = PAGE_SIZE;
4992+ AuDbg("blksize %lu\n", blksize);
4993+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
4994+ if (do_kfree)
4995+ buf = kmalloc(blksize, GFP_NOFS);
4996+ else
4997+ buf = (void *)__get_free_page(GFP_NOFS);
4998+ if (unlikely(!buf))
4999+ goto out;
5000+
5001+ if (len > (1 << 22))
5002+ AuDbg("copying a large file %lld\n", (long long)len);
5003+
5004+ src->f_pos = 0;
5005+ dst->f_pos = 0;
5006+ err = au_do_copy_file(dst, src, len, buf, blksize);
5007+ if (do_kfree)
5008+ au_delayed_kfree(buf);
5009+ else
5010+ au_delayed_free_page((unsigned long)buf);
5011+
5012+out:
5013+ return err;
5014+}
5015+
5016+/*
5017+ * to support a sparse file which is opened with O_APPEND,
5018+ * we need to close the file.
5019+ */
5020+static int au_cp_regular(struct au_cp_generic *cpg)
5021+{
5022+ int err, i;
5023+ enum { SRC, DST };
5024+ struct {
5025+ aufs_bindex_t bindex;
5026+ unsigned int flags;
5027+ struct dentry *dentry;
5028+ int force_wr;
5029+ struct file *file;
5030+ void *label;
5031+ } *f, file[] = {
5032+ {
5033+ .bindex = cpg->bsrc,
5034+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
5035+ .label = &&out
5036+ },
5037+ {
5038+ .bindex = cpg->bdst,
5039+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
5040+ .force_wr = !!au_ftest_cpup(cpg->flags, RWDST),
5041+ .label = &&out_src
5042+ }
5043+ };
5044+ struct super_block *sb;
5045+ struct inode *h_src_inode;
5046+ struct task_struct *tsk = current;
5047+
5048+ /* bsrc branch can be ro/rw. */
5049+ sb = cpg->dentry->d_sb;
5050+ f = file;
5051+ for (i = 0; i < 2; i++, f++) {
5052+ f->dentry = au_h_dptr(cpg->dentry, f->bindex);
5053+ f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
5054+ /*file*/NULL, f->force_wr);
5055+ err = PTR_ERR(f->file);
5056+ if (IS_ERR(f->file))
5057+ goto *f->label;
5058+ }
5059+
5060+ /* try stopping to update while we copyup */
5061+ h_src_inode = d_inode(file[SRC].dentry);
5062+ if (!au_test_nfs(h_src_inode->i_sb))
5063+ IMustLock(h_src_inode);
5064+ err = au_copy_file(file[DST].file, file[SRC].file, cpg->len);
5065+
5066+ /* i wonder if we had O_NO_DELAY_FPUT flag */
5067+ if (tsk->flags & PF_KTHREAD)
5068+ __fput_sync(file[DST].file);
5069+ else {
5070+ WARN(1, "%pD\nPlease report this warning to aufs-users ML",
5071+ file[DST].file);
5072+ fput(file[DST].file);
5073+ /*
5074+ * too bad.
5075+ * we have to call both since we don't know which place the file
5076+ * was added to.
5077+ */
5078+ task_work_run();
5079+ flush_delayed_fput();
5080+ }
5081+ au_sbr_put(sb, file[DST].bindex);
5082+
5083+out_src:
5084+ fput(file[SRC].file);
5085+ au_sbr_put(sb, file[SRC].bindex);
5086+out:
5087+ return err;
5088+}
5089+
5090+static int au_do_cpup_regular(struct au_cp_generic *cpg,
5091+ struct au_cpup_reg_attr *h_src_attr)
5092+{
5093+ int err, rerr;
5094+ loff_t l;
5095+ struct path h_path;
5096+ struct inode *h_src_inode, *h_dst_inode;
5097+
5098+ err = 0;
5099+ h_src_inode = au_h_iptr(d_inode(cpg->dentry), cpg->bsrc);
5100+ l = i_size_read(h_src_inode);
5101+ if (cpg->len == -1 || l < cpg->len)
5102+ cpg->len = l;
5103+ if (cpg->len) {
5104+ /* try stopping to update while we are referencing */
5105+ inode_lock_nested(h_src_inode, AuLsc_I_CHILD);
5106+ au_pin_hdir_unlock(cpg->pin);
5107+
5108+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5109+ h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
5110+ h_src_attr->iflags = h_src_inode->i_flags;
5111+ if (!au_test_nfs(h_src_inode->i_sb))
5112+ err = vfs_getattr(&h_path, &h_src_attr->st);
5113+ else {
5114+ inode_unlock(h_src_inode);
5115+ err = vfs_getattr(&h_path, &h_src_attr->st);
5116+ inode_lock_nested(h_src_inode, AuLsc_I_CHILD);
5117+ }
5118+ if (unlikely(err)) {
5119+ inode_unlock(h_src_inode);
5120+ goto out;
5121+ }
5122+ h_src_attr->valid = 1;
5123+ if (!au_test_nfs(h_src_inode->i_sb)) {
5124+ err = au_cp_regular(cpg);
5125+ inode_unlock(h_src_inode);
5126+ } else {
5127+ inode_unlock(h_src_inode);
5128+ err = au_cp_regular(cpg);
5129+ }
5130+ rerr = au_pin_hdir_relock(cpg->pin);
5131+ if (!err && rerr)
5132+ err = rerr;
5133+ }
5134+ if (!err && (h_src_inode->i_state & I_LINKABLE)) {
5135+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst);
5136+ h_dst_inode = d_inode(h_path.dentry);
5137+ spin_lock(&h_dst_inode->i_lock);
5138+ h_dst_inode->i_state |= I_LINKABLE;
5139+ spin_unlock(&h_dst_inode->i_lock);
5140+ }
5141+
5142+out:
5143+ return err;
5144+}
5145+
5146+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
5147+ struct inode *h_dir)
5148+{
5149+ int err, symlen;
5150+ mm_segment_t old_fs;
5151+ union {
5152+ char *k;
5153+ char __user *u;
5154+ } sym;
5155+ struct inode *h_inode = d_inode(h_src);
5156+ const struct inode_operations *h_iop = h_inode->i_op;
5157+
5158+ err = -ENOSYS;
5159+ if (unlikely(!h_iop->readlink))
5160+ goto out;
5161+
5162+ err = -ENOMEM;
5163+ sym.k = (void *)__get_free_page(GFP_NOFS);
5164+ if (unlikely(!sym.k))
5165+ goto out;
5166+
5167+ /* unnecessary to support mmap_sem since symlink is not mmap-able */
5168+ old_fs = get_fs();
5169+ set_fs(KERNEL_DS);
5170+ symlen = h_iop->readlink(h_src, sym.u, PATH_MAX);
5171+ err = symlen;
5172+ set_fs(old_fs);
5173+
5174+ if (symlen > 0) {
5175+ sym.k[symlen] = 0;
5176+ err = vfsub_symlink(h_dir, h_path, sym.k);
5177+ }
5178+ au_delayed_free_page((unsigned long)sym.k);
5179+
5180+out:
5181+ return err;
5182+}
5183+
5184+/*
5185+ * regardless 'acl' option, reset all ACL.
5186+ * All ACL will be copied up later from the original entry on the lower branch.
5187+ */
5188+static int au_reset_acl(struct inode *h_dir, struct path *h_path, umode_t mode)
5189+{
5190+ int err;
5191+ struct dentry *h_dentry;
5192+ struct inode *h_inode;
5193+
5194+ h_dentry = h_path->dentry;
5195+ h_inode = d_inode(h_dentry);
5196+ /* forget_all_cached_acls(h_inode)); */
5197+ err = vfsub_removexattr(h_dentry, XATTR_NAME_POSIX_ACL_ACCESS);
5198+ AuTraceErr(err);
5199+ if (err == -EOPNOTSUPP)
5200+ err = 0;
5201+ if (!err)
5202+ err = vfsub_acl_chmod(h_inode, mode);
5203+
5204+ AuTraceErr(err);
5205+ return err;
5206+}
5207+
5208+static int au_do_cpup_dir(struct au_cp_generic *cpg, struct dentry *dst_parent,
5209+ struct inode *h_dir, struct path *h_path)
5210+{
5211+ int err;
5212+ struct inode *dir, *inode;
5213+
5214+ err = vfsub_removexattr(h_path->dentry, XATTR_NAME_POSIX_ACL_DEFAULT);
5215+ AuTraceErr(err);
5216+ if (err == -EOPNOTSUPP)
5217+ err = 0;
5218+ if (unlikely(err))
5219+ goto out;
5220+
5221+ /*
5222+ * strange behaviour from the users view,
5223+ * particularry setattr case
5224+ */
5225+ dir = d_inode(dst_parent);
5226+ if (au_ibtop(dir) == cpg->bdst)
5227+ au_cpup_attr_nlink(dir, /*force*/1);
5228+ inode = d_inode(cpg->dentry);
5229+ au_cpup_attr_nlink(inode, /*force*/1);
5230+
5231+out:
5232+ return err;
5233+}
5234+
5235+static noinline_for_stack
5236+int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
5237+ struct au_cpup_reg_attr *h_src_attr)
5238+{
5239+ int err;
5240+ umode_t mode;
5241+ unsigned int mnt_flags;
5242+ unsigned char isdir, isreg, force;
5243+ const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
5244+ struct au_dtime dt;
5245+ struct path h_path;
5246+ struct dentry *h_src, *h_dst, *h_parent;
5247+ struct inode *h_inode, *h_dir;
5248+ struct super_block *sb;
5249+
5250+ /* bsrc branch can be ro/rw. */
5251+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5252+ h_inode = d_inode(h_src);
5253+ AuDebugOn(h_inode != au_h_iptr(d_inode(cpg->dentry), cpg->bsrc));
5254+
5255+ /* try stopping to be referenced while we are creating */
5256+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
5257+ if (au_ftest_cpup(cpg->flags, RENAME))
5258+ AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
5259+ AUFS_WH_PFX_LEN));
5260+ h_parent = h_dst->d_parent; /* dir inode is locked */
5261+ h_dir = d_inode(h_parent);
5262+ IMustLock(h_dir);
5263+ AuDebugOn(h_parent != h_dst->d_parent);
5264+
5265+ sb = cpg->dentry->d_sb;
5266+ h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
5267+ if (do_dt) {
5268+ h_path.dentry = h_parent;
5269+ au_dtime_store(&dt, dst_parent, &h_path);
5270+ }
5271+ h_path.dentry = h_dst;
5272+
5273+ isreg = 0;
5274+ isdir = 0;
5275+ mode = h_inode->i_mode;
5276+ switch (mode & S_IFMT) {
5277+ case S_IFREG:
5278+ isreg = 1;
5279+ err = vfsub_create(h_dir, &h_path, S_IRUSR | S_IWUSR,
5280+ /*want_excl*/true);
5281+ if (!err)
5282+ err = au_do_cpup_regular(cpg, h_src_attr);
5283+ break;
5284+ case S_IFDIR:
5285+ isdir = 1;
5286+ err = vfsub_mkdir(h_dir, &h_path, mode);
5287+ if (!err)
5288+ err = au_do_cpup_dir(cpg, dst_parent, h_dir, &h_path);
5289+ break;
5290+ case S_IFLNK:
5291+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
5292+ break;
5293+ case S_IFCHR:
5294+ case S_IFBLK:
5295+ AuDebugOn(!capable(CAP_MKNOD));
5296+ /*FALLTHROUGH*/
5297+ case S_IFIFO:
5298+ case S_IFSOCK:
5299+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
5300+ break;
5301+ default:
5302+ AuIOErr("Unknown inode type 0%o\n", mode);
5303+ err = -EIO;
5304+ }
5305+ if (!err)
5306+ err = au_reset_acl(h_dir, &h_path, mode);
5307+
5308+ mnt_flags = au_mntflags(sb);
5309+ if (!au_opt_test(mnt_flags, UDBA_NONE)
5310+ && !isdir
5311+ && au_opt_test(mnt_flags, XINO)
5312+ && (h_inode->i_nlink == 1
5313+ || (h_inode->i_state & I_LINKABLE))
5314+ /* todo: unnecessary? */
5315+ /* && d_inode(cpg->dentry)->i_nlink == 1 */
5316+ && cpg->bdst < cpg->bsrc
5317+ && !au_ftest_cpup(cpg->flags, KEEPLINO))
5318+ au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
5319+ /* ignore this error */
5320+
5321+ if (!err) {
5322+ force = 0;
5323+ if (isreg) {
5324+ force = !!cpg->len;
5325+ if (cpg->len == -1)
5326+ force = !!i_size_read(h_inode);
5327+ }
5328+ au_fhsm_wrote(sb, cpg->bdst, force);
5329+ }
5330+
5331+ if (do_dt)
5332+ au_dtime_revert(&dt);
5333+ return err;
5334+}
5335+
5336+static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path)
5337+{
5338+ int err;
5339+ struct dentry *dentry, *h_dentry, *h_parent, *parent;
5340+ struct inode *h_dir;
5341+ aufs_bindex_t bdst;
5342+
5343+ dentry = cpg->dentry;
5344+ bdst = cpg->bdst;
5345+ h_dentry = au_h_dptr(dentry, bdst);
5346+ if (!au_ftest_cpup(cpg->flags, OVERWRITE)) {
5347+ dget(h_dentry);
5348+ au_set_h_dptr(dentry, bdst, NULL);
5349+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
5350+ if (!err)
5351+ h_path->dentry = dget(au_h_dptr(dentry, bdst));
5352+ au_set_h_dptr(dentry, bdst, h_dentry);
5353+ } else {
5354+ err = 0;
5355+ parent = dget_parent(dentry);
5356+ h_parent = au_h_dptr(parent, bdst);
5357+ dput(parent);
5358+ h_path->dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
5359+ if (IS_ERR(h_path->dentry))
5360+ err = PTR_ERR(h_path->dentry);
5361+ }
5362+ if (unlikely(err))
5363+ goto out;
5364+
5365+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5366+ h_dir = d_inode(h_parent);
5367+ IMustLock(h_dir);
5368+ AuDbg("%pd %pd\n", h_dentry, h_path->dentry);
5369+ /* no delegation since it is just created */
5370+ err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL,
5371+ /*flags*/0);
5372+ dput(h_path->dentry);
5373+
5374+out:
5375+ return err;
5376+}
5377+
5378+/*
5379+ * copyup the @dentry from @bsrc to @bdst.
5380+ * the caller must set the both of lower dentries.
5381+ * @len is for truncating when it is -1 copyup the entire file.
5382+ * in link/rename cases, @dst_parent may be different from the real one.
5383+ * basic->bsrc can be larger than basic->bdst.
5384+ * aufs doesn't touch the credential so
5385+ * security_inode_copy_up{,_xattr}() are unnecrssary.
5386+ */
5387+static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
5388+{
5389+ int err, rerr;
5390+ aufs_bindex_t old_ibtop;
5391+ unsigned char isdir, plink;
5392+ struct dentry *h_src, *h_dst, *h_parent;
5393+ struct inode *dst_inode, *h_dir, *inode, *delegated, *src_inode;
5394+ struct super_block *sb;
5395+ struct au_branch *br;
5396+ /* to reuduce stack size */
5397+ struct {
5398+ struct au_dtime dt;
5399+ struct path h_path;
5400+ struct au_cpup_reg_attr h_src_attr;
5401+ } *a;
5402+
5403+ err = -ENOMEM;
5404+ a = kmalloc(sizeof(*a), GFP_NOFS);
5405+ if (unlikely(!a))
5406+ goto out;
5407+ a->h_src_attr.valid = 0;
5408+
5409+ sb = cpg->dentry->d_sb;
5410+ br = au_sbr(sb, cpg->bdst);
5411+ a->h_path.mnt = au_br_mnt(br);
5412+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
5413+ h_parent = h_dst->d_parent; /* dir inode is locked */
5414+ h_dir = d_inode(h_parent);
5415+ IMustLock(h_dir);
5416+
5417+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5418+ inode = d_inode(cpg->dentry);
5419+
5420+ if (!dst_parent)
5421+ dst_parent = dget_parent(cpg->dentry);
5422+ else
5423+ dget(dst_parent);
5424+
5425+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
5426+ dst_inode = au_h_iptr(inode, cpg->bdst);
5427+ if (dst_inode) {
5428+ if (unlikely(!plink)) {
5429+ err = -EIO;
5430+ AuIOErr("hi%lu(i%lu) exists on b%d "
5431+ "but plink is disabled\n",
5432+ dst_inode->i_ino, inode->i_ino, cpg->bdst);
5433+ goto out_parent;
5434+ }
5435+
5436+ if (dst_inode->i_nlink) {
5437+ const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
5438+
5439+ h_src = au_plink_lkup(inode, cpg->bdst);
5440+ err = PTR_ERR(h_src);
5441+ if (IS_ERR(h_src))
5442+ goto out_parent;
5443+ if (unlikely(d_is_negative(h_src))) {
5444+ err = -EIO;
5445+ AuIOErr("i%lu exists on b%d "
5446+ "but not pseudo-linked\n",
5447+ inode->i_ino, cpg->bdst);
5448+ dput(h_src);
5449+ goto out_parent;
5450+ }
5451+
5452+ if (do_dt) {
5453+ a->h_path.dentry = h_parent;
5454+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
5455+ }
5456+
5457+ a->h_path.dentry = h_dst;
5458+ delegated = NULL;
5459+ err = vfsub_link(h_src, h_dir, &a->h_path, &delegated);
5460+ if (!err && au_ftest_cpup(cpg->flags, RENAME))
5461+ err = au_do_ren_after_cpup(cpg, &a->h_path);
5462+ if (do_dt)
5463+ au_dtime_revert(&a->dt);
5464+ if (unlikely(err == -EWOULDBLOCK)) {
5465+ pr_warn("cannot retry for NFSv4 delegation"
5466+ " for an internal link\n");
5467+ iput(delegated);
5468+ }
5469+ dput(h_src);
5470+ goto out_parent;
5471+ } else
5472+ /* todo: cpup_wh_file? */
5473+ /* udba work */
5474+ au_update_ibrange(inode, /*do_put_zero*/1);
5475+ }
5476+
5477+ isdir = S_ISDIR(inode->i_mode);
5478+ old_ibtop = au_ibtop(inode);
5479+ err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
5480+ if (unlikely(err))
5481+ goto out_rev;
5482+ dst_inode = d_inode(h_dst);
5483+ inode_lock_nested(dst_inode, AuLsc_I_CHILD2);
5484+ /* todo: necessary? */
5485+ /* au_pin_hdir_unlock(cpg->pin); */
5486+
5487+ err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr);
5488+ if (unlikely(err)) {
5489+ /* todo: necessary? */
5490+ /* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
5491+ inode_unlock(dst_inode);
5492+ goto out_rev;
5493+ }
5494+
5495+ if (cpg->bdst < old_ibtop) {
5496+ if (S_ISREG(inode->i_mode)) {
5497+ err = au_dy_iaop(inode, cpg->bdst, dst_inode);
5498+ if (unlikely(err)) {
5499+ /* ignore an error */
5500+ /* au_pin_hdir_relock(cpg->pin); */
5501+ inode_unlock(dst_inode);
5502+ goto out_rev;
5503+ }
5504+ }
5505+ au_set_ibtop(inode, cpg->bdst);
5506+ } else
5507+ au_set_ibbot(inode, cpg->bdst);
5508+ au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
5509+ au_hi_flags(inode, isdir));
5510+
5511+ /* todo: necessary? */
5512+ /* err = au_pin_hdir_relock(cpg->pin); */
5513+ inode_unlock(dst_inode);
5514+ if (unlikely(err))
5515+ goto out_rev;
5516+
5517+ src_inode = d_inode(h_src);
5518+ if (!isdir
5519+ && (src_inode->i_nlink > 1
5520+ || src_inode->i_state & I_LINKABLE)
5521+ && plink)
5522+ au_plink_append(inode, cpg->bdst, h_dst);
5523+
5524+ if (au_ftest_cpup(cpg->flags, RENAME)) {
5525+ a->h_path.dentry = h_dst;
5526+ err = au_do_ren_after_cpup(cpg, &a->h_path);
5527+ }
5528+ if (!err)
5529+ goto out_parent; /* success */
5530+
5531+ /* revert */
5532+out_rev:
5533+ a->h_path.dentry = h_parent;
5534+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
5535+ a->h_path.dentry = h_dst;
5536+ rerr = 0;
5537+ if (d_is_positive(h_dst)) {
5538+ if (!isdir) {
5539+ /* no delegation since it is just created */
5540+ rerr = vfsub_unlink(h_dir, &a->h_path,
5541+ /*delegated*/NULL, /*force*/0);
5542+ } else
5543+ rerr = vfsub_rmdir(h_dir, &a->h_path);
5544+ }
5545+ au_dtime_revert(&a->dt);
5546+ if (rerr) {
5547+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
5548+ err = -EIO;
5549+ }
5550+out_parent:
5551+ dput(dst_parent);
5552+ au_delayed_kfree(a);
5553+out:
5554+ return err;
5555+}
5556+
5557+#if 0 /* reserved */
5558+struct au_cpup_single_args {
5559+ int *errp;
5560+ struct au_cp_generic *cpg;
5561+ struct dentry *dst_parent;
5562+};
5563+
5564+static void au_call_cpup_single(void *args)
5565+{
5566+ struct au_cpup_single_args *a = args;
5567+
5568+ au_pin_hdir_acquire_nest(a->cpg->pin);
5569+ *a->errp = au_cpup_single(a->cpg, a->dst_parent);
5570+ au_pin_hdir_release(a->cpg->pin);
5571+}
5572+#endif
5573+
5574+/*
5575+ * prevent SIGXFSZ in copy-up.
5576+ * testing CAP_MKNOD is for generic fs,
5577+ * but CAP_FSETID is for xfs only, currently.
5578+ */
5579+static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
5580+{
5581+ int do_sio;
5582+ struct super_block *sb;
5583+ struct inode *h_dir;
5584+
5585+ do_sio = 0;
5586+ sb = au_pinned_parent(pin)->d_sb;
5587+ if (!au_wkq_test()
5588+ && (!au_sbi(sb)->si_plink_maint_pid
5589+ || au_plink_maint(sb, AuLock_NOPLM))) {
5590+ switch (mode & S_IFMT) {
5591+ case S_IFREG:
5592+ /* no condition about RLIMIT_FSIZE and the file size */
5593+ do_sio = 1;
5594+ break;
5595+ case S_IFCHR:
5596+ case S_IFBLK:
5597+ do_sio = !capable(CAP_MKNOD);
5598+ break;
5599+ }
5600+ if (!do_sio)
5601+ do_sio = ((mode & (S_ISUID | S_ISGID))
5602+ && !capable(CAP_FSETID));
5603+ /* this workaround may be removed in the future */
5604+ if (!do_sio) {
5605+ h_dir = au_pinned_h_dir(pin);
5606+ do_sio = h_dir->i_mode & S_ISVTX;
5607+ }
5608+ }
5609+
5610+ return do_sio;
5611+}
5612+
5613+#if 0 /* reserved */
5614+int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
5615+{
5616+ int err, wkq_err;
5617+ struct dentry *h_dentry;
5618+
5619+ h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5620+ if (!au_cpup_sio_test(pin, d_inode(h_dentry)->i_mode))
5621+ err = au_cpup_single(cpg, dst_parent);
5622+ else {
5623+ struct au_cpup_single_args args = {
5624+ .errp = &err,
5625+ .cpg = cpg,
5626+ .dst_parent = dst_parent
5627+ };
5628+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
5629+ if (unlikely(wkq_err))
5630+ err = wkq_err;
5631+ }
5632+
5633+ return err;
5634+}
5635+#endif
5636+
5637+/*
5638+ * copyup the @dentry from the first active lower branch to @bdst,
5639+ * using au_cpup_single().
5640+ */
5641+static int au_cpup_simple(struct au_cp_generic *cpg)
5642+{
5643+ int err;
5644+ unsigned int flags_orig;
5645+ struct dentry *dentry;
5646+
5647+ AuDebugOn(cpg->bsrc < 0);
5648+
5649+ dentry = cpg->dentry;
5650+ DiMustWriteLock(dentry);
5651+
5652+ err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
5653+ if (!err) {
5654+ flags_orig = cpg->flags;
5655+ au_fset_cpup(cpg->flags, RENAME);
5656+ err = au_cpup_single(cpg, NULL);
5657+ cpg->flags = flags_orig;
5658+ if (!err)
5659+ return 0; /* success */
5660+
5661+ /* revert */
5662+ au_set_h_dptr(dentry, cpg->bdst, NULL);
5663+ au_set_dbtop(dentry, cpg->bsrc);
5664+ }
5665+
5666+ return err;
5667+}
5668+
5669+struct au_cpup_simple_args {
5670+ int *errp;
5671+ struct au_cp_generic *cpg;
5672+};
5673+
5674+static void au_call_cpup_simple(void *args)
5675+{
5676+ struct au_cpup_simple_args *a = args;
5677+
5678+ au_pin_hdir_acquire_nest(a->cpg->pin);
5679+ *a->errp = au_cpup_simple(a->cpg);
5680+ au_pin_hdir_release(a->cpg->pin);
5681+}
5682+
5683+static int au_do_sio_cpup_simple(struct au_cp_generic *cpg)
5684+{
5685+ int err, wkq_err;
5686+ struct dentry *dentry, *parent;
5687+ struct file *h_file;
5688+ struct inode *h_dir;
5689+
5690+ dentry = cpg->dentry;
5691+ h_file = NULL;
5692+ if (au_ftest_cpup(cpg->flags, HOPEN)) {
5693+ AuDebugOn(cpg->bsrc < 0);
5694+ h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0);
5695+ err = PTR_ERR(h_file);
5696+ if (IS_ERR(h_file))
5697+ goto out;
5698+ }
5699+
5700+ parent = dget_parent(dentry);
5701+ h_dir = au_h_iptr(d_inode(parent), cpg->bdst);
5702+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
5703+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
5704+ err = au_cpup_simple(cpg);
5705+ else {
5706+ struct au_cpup_simple_args args = {
5707+ .errp = &err,
5708+ .cpg = cpg
5709+ };
5710+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
5711+ if (unlikely(wkq_err))
5712+ err = wkq_err;
5713+ }
5714+
5715+ dput(parent);
5716+ if (h_file)
5717+ au_h_open_post(dentry, cpg->bsrc, h_file);
5718+
5719+out:
5720+ return err;
5721+}
5722+
5723+int au_sio_cpup_simple(struct au_cp_generic *cpg)
5724+{
5725+ aufs_bindex_t bsrc, bbot;
5726+ struct dentry *dentry, *h_dentry;
5727+
5728+ if (cpg->bsrc < 0) {
5729+ dentry = cpg->dentry;
5730+ bbot = au_dbbot(dentry);
5731+ for (bsrc = cpg->bdst + 1; bsrc <= bbot; bsrc++) {
5732+ h_dentry = au_h_dptr(dentry, bsrc);
5733+ if (h_dentry) {
5734+ AuDebugOn(d_is_negative(h_dentry));
5735+ break;
5736+ }
5737+ }
5738+ AuDebugOn(bsrc > bbot);
5739+ cpg->bsrc = bsrc;
5740+ }
5741+ AuDebugOn(cpg->bsrc <= cpg->bdst);
5742+ return au_do_sio_cpup_simple(cpg);
5743+}
5744+
5745+int au_sio_cpdown_simple(struct au_cp_generic *cpg)
5746+{
5747+ AuDebugOn(cpg->bdst <= cpg->bsrc);
5748+ return au_do_sio_cpup_simple(cpg);
5749+}
5750+
5751+/* ---------------------------------------------------------------------- */
5752+
5753+/*
5754+ * copyup the deleted file for writing.
5755+ */
5756+static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
5757+ struct file *file)
5758+{
5759+ int err;
5760+ unsigned int flags_orig;
5761+ aufs_bindex_t bsrc_orig;
5762+ struct au_dinfo *dinfo;
5763+ struct {
5764+ struct au_hdentry *hd;
5765+ struct dentry *h_dentry;
5766+ } hdst, hsrc;
5767+
5768+ dinfo = au_di(cpg->dentry);
5769+ AuRwMustWriteLock(&dinfo->di_rwsem);
5770+
5771+ bsrc_orig = cpg->bsrc;
5772+ cpg->bsrc = dinfo->di_btop;
5773+ hdst.hd = au_hdentry(dinfo, cpg->bdst);
5774+ hdst.h_dentry = hdst.hd->hd_dentry;
5775+ hdst.hd->hd_dentry = wh_dentry;
5776+ dinfo->di_btop = cpg->bdst;
5777+
5778+ hsrc.h_dentry = NULL;
5779+ if (file) {
5780+ hsrc.hd = au_hdentry(dinfo, cpg->bsrc);
5781+ hsrc.h_dentry = hsrc.hd->hd_dentry;
5782+ hsrc.hd->hd_dentry = au_hf_top(file)->f_path.dentry;
5783+ }
5784+ flags_orig = cpg->flags;
5785+ cpg->flags = !AuCpup_DTIME;
5786+ err = au_cpup_single(cpg, /*h_parent*/NULL);
5787+ cpg->flags = flags_orig;
5788+ if (file) {
5789+ if (!err)
5790+ err = au_reopen_nondir(file);
5791+ hsrc.hd->hd_dentry = hsrc.h_dentry;
5792+ }
5793+ hdst.hd->hd_dentry = hdst.h_dentry;
5794+ dinfo->di_btop = cpg->bsrc;
5795+ cpg->bsrc = bsrc_orig;
5796+
5797+ return err;
5798+}
5799+
5800+static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
5801+{
5802+ int err;
5803+ aufs_bindex_t bdst;
5804+ struct au_dtime dt;
5805+ struct dentry *dentry, *parent, *h_parent, *wh_dentry;
5806+ struct au_branch *br;
5807+ struct path h_path;
5808+
5809+ dentry = cpg->dentry;
5810+ bdst = cpg->bdst;
5811+ br = au_sbr(dentry->d_sb, bdst);
5812+ parent = dget_parent(dentry);
5813+ h_parent = au_h_dptr(parent, bdst);
5814+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
5815+ err = PTR_ERR(wh_dentry);
5816+ if (IS_ERR(wh_dentry))
5817+ goto out;
5818+
5819+ h_path.dentry = h_parent;
5820+ h_path.mnt = au_br_mnt(br);
5821+ au_dtime_store(&dt, parent, &h_path);
5822+ err = au_do_cpup_wh(cpg, wh_dentry, file);
5823+ if (unlikely(err))
5824+ goto out_wh;
5825+
5826+ dget(wh_dentry);
5827+ h_path.dentry = wh_dentry;
5828+ if (!d_is_dir(wh_dentry)) {
5829+ /* no delegation since it is just created */
5830+ err = vfsub_unlink(d_inode(h_parent), &h_path,
5831+ /*delegated*/NULL, /*force*/0);
5832+ } else
5833+ err = vfsub_rmdir(d_inode(h_parent), &h_path);
5834+ if (unlikely(err)) {
5835+ AuIOErr("failed remove copied-up tmp file %pd(%d)\n",
5836+ wh_dentry, err);
5837+ err = -EIO;
5838+ }
5839+ au_dtime_revert(&dt);
5840+ au_set_hi_wh(d_inode(dentry), bdst, wh_dentry);
5841+
5842+out_wh:
5843+ dput(wh_dentry);
5844+out:
5845+ dput(parent);
5846+ return err;
5847+}
5848+
5849+struct au_cpup_wh_args {
5850+ int *errp;
5851+ struct au_cp_generic *cpg;
5852+ struct file *file;
5853+};
5854+
5855+static void au_call_cpup_wh(void *args)
5856+{
5857+ struct au_cpup_wh_args *a = args;
5858+
5859+ au_pin_hdir_acquire_nest(a->cpg->pin);
5860+ *a->errp = au_cpup_wh(a->cpg, a->file);
5861+ au_pin_hdir_release(a->cpg->pin);
5862+}
5863+
5864+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
5865+{
5866+ int err, wkq_err;
5867+ aufs_bindex_t bdst;
5868+ struct dentry *dentry, *parent, *h_orph, *h_parent;
5869+ struct inode *dir, *h_dir, *h_tmpdir;
5870+ struct au_wbr *wbr;
5871+ struct au_pin wh_pin, *pin_orig;
5872+
5873+ dentry = cpg->dentry;
5874+ bdst = cpg->bdst;
5875+ parent = dget_parent(dentry);
5876+ dir = d_inode(parent);
5877+ h_orph = NULL;
5878+ h_parent = NULL;
5879+ h_dir = au_igrab(au_h_iptr(dir, bdst));
5880+ h_tmpdir = h_dir;
5881+ pin_orig = NULL;
5882+ if (!h_dir->i_nlink) {
5883+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
5884+ h_orph = wbr->wbr_orph;
5885+
5886+ h_parent = dget(au_h_dptr(parent, bdst));
5887+ au_set_h_dptr(parent, bdst, dget(h_orph));
5888+ h_tmpdir = d_inode(h_orph);
5889+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
5890+
5891+ inode_lock_nested(h_tmpdir, AuLsc_I_PARENT3);
5892+ /* todo: au_h_open_pre()? */
5893+
5894+ pin_orig = cpg->pin;
5895+ au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
5896+ AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
5897+ cpg->pin = &wh_pin;
5898+ }
5899+
5900+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
5901+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
5902+ err = au_cpup_wh(cpg, file);
5903+ else {
5904+ struct au_cpup_wh_args args = {
5905+ .errp = &err,
5906+ .cpg = cpg,
5907+ .file = file
5908+ };
5909+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
5910+ if (unlikely(wkq_err))
5911+ err = wkq_err;
5912+ }
5913+
5914+ if (h_orph) {
5915+ inode_unlock(h_tmpdir);
5916+ /* todo: au_h_open_post()? */
5917+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
5918+ au_set_h_dptr(parent, bdst, h_parent);
5919+ AuDebugOn(!pin_orig);
5920+ cpg->pin = pin_orig;
5921+ }
5922+ iput(h_dir);
5923+ dput(parent);
5924+
5925+ return err;
5926+}
5927+
5928+/* ---------------------------------------------------------------------- */
5929+
5930+/*
5931+ * generic routine for both of copy-up and copy-down.
5932+ */
5933+/* cf. revalidate function in file.c */
5934+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
5935+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
5936+ struct au_pin *pin,
5937+ struct dentry *h_parent, void *arg),
5938+ void *arg)
5939+{
5940+ int err;
5941+ struct au_pin pin;
5942+ struct dentry *d, *parent, *h_parent, *real_parent, *h_dentry;
5943+
5944+ err = 0;
5945+ parent = dget_parent(dentry);
5946+ if (IS_ROOT(parent))
5947+ goto out;
5948+
5949+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
5950+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
5951+
5952+ /* do not use au_dpage */
5953+ real_parent = parent;
5954+ while (1) {
5955+ dput(parent);
5956+ parent = dget_parent(dentry);
5957+ h_parent = au_h_dptr(parent, bdst);
5958+ if (h_parent)
5959+ goto out; /* success */
5960+
5961+ /* find top dir which is necessary to cpup */
5962+ do {
5963+ d = parent;
5964+ dput(parent);
5965+ parent = dget_parent(d);
5966+ di_read_lock_parent3(parent, !AuLock_IR);
5967+ h_parent = au_h_dptr(parent, bdst);
5968+ di_read_unlock(parent, !AuLock_IR);
5969+ } while (!h_parent);
5970+
5971+ if (d != real_parent)
5972+ di_write_lock_child3(d);
5973+
5974+ /* somebody else might create while we were sleeping */
5975+ h_dentry = au_h_dptr(d, bdst);
5976+ if (!h_dentry || d_is_negative(h_dentry)) {
5977+ if (h_dentry)
5978+ au_update_dbtop(d);
5979+
5980+ au_pin_set_dentry(&pin, d);
5981+ err = au_do_pin(&pin);
5982+ if (!err) {
5983+ err = cp(d, bdst, &pin, h_parent, arg);
5984+ au_unpin(&pin);
5985+ }
5986+ }
5987+
5988+ if (d != real_parent)
5989+ di_write_unlock(d);
5990+ if (unlikely(err))
5991+ break;
5992+ }
5993+
5994+out:
5995+ dput(parent);
5996+ return err;
5997+}
5998+
5999+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
6000+ struct au_pin *pin,
6001+ struct dentry *h_parent __maybe_unused,
6002+ void *arg __maybe_unused)
6003+{
6004+ struct au_cp_generic cpg = {
6005+ .dentry = dentry,
6006+ .bdst = bdst,
6007+ .bsrc = -1,
6008+ .len = 0,
6009+ .pin = pin,
6010+ .flags = AuCpup_DTIME
6011+ };
6012+ return au_sio_cpup_simple(&cpg);
6013+}
6014+
6015+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
6016+{
6017+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
6018+}
6019+
6020+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
6021+{
6022+ int err;
6023+ struct dentry *parent;
6024+ struct inode *dir;
6025+
6026+ parent = dget_parent(dentry);
6027+ dir = d_inode(parent);
6028+ err = 0;
6029+ if (au_h_iptr(dir, bdst))
6030+ goto out;
6031+
6032+ di_read_unlock(parent, AuLock_IR);
6033+ di_write_lock_parent(parent);
6034+ /* someone else might change our inode while we were sleeping */
6035+ if (!au_h_iptr(dir, bdst))
6036+ err = au_cpup_dirs(dentry, bdst);
6037+ di_downgrade_lock(parent, AuLock_IR);
6038+
6039+out:
6040+ dput(parent);
6041+ return err;
6042+}
6043diff -urN /usr/share/empty/fs/aufs/cpup.h linux/fs/aufs/cpup.h
6044--- /usr/share/empty/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
6045+++ linux/fs/aufs/cpup.h 2016-10-09 16:55:36.486034798 +0200
6046@@ -0,0 +1,94 @@
6047+/*
6048+ * Copyright (C) 2005-2016 Junjiro R. Okajima
6049+ *
6050+ * This program, aufs is free software; you can redistribute it and/or modify
6051+ * it under the terms of the GNU General Public License as published by
6052+ * the Free Software Foundation; either version 2 of the License, or
6053+ * (at your option) any later version.
6054+ *
6055+ * This program is distributed in the hope that it will be useful,
6056+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6057+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6058+ * GNU General Public License for more details.
6059+ *
6060+ * You should have received a copy of the GNU General Public License
6061+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
6062+ */
6063+
6064+/*
6065+ * copy-up/down functions
6066+ */
6067+
6068+#ifndef __AUFS_CPUP_H__
6069+#define __AUFS_CPUP_H__
6070+
6071+#ifdef __KERNEL__
6072+
6073+#include <linux/path.h>
6074+
6075+struct inode;
6076+struct file;
6077+struct au_pin;
6078+
6079+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
6080+void au_cpup_attr_timesizes(struct inode *inode);
6081+void au_cpup_attr_nlink(struct inode *inode, int force);
6082+void au_cpup_attr_changeable(struct inode *inode);
6083+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
6084+void au_cpup_attr_all(struct inode *inode, int force);
6085+
6086+/* ---------------------------------------------------------------------- */
6087+
6088+struct au_cp_generic {
6089+ struct dentry *dentry;
6090+ aufs_bindex_t bdst, bsrc;
6091+ loff_t len;
6092+ struct au_pin *pin;
6093+ unsigned int flags;
6094+};
6095+
6096+/* cpup flags */
6097+#define AuCpup_DTIME 1 /* do dtime_store/revert */
6098+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
6099+ for link(2) */
6100+#define AuCpup_RENAME (1 << 2) /* rename after cpup */
6101+#define AuCpup_HOPEN (1 << 3) /* call h_open_pre/post() in
6102+ cpup */
6103+#define AuCpup_OVERWRITE (1 << 4) /* allow overwriting the
6104+ existing entry */
6105+#define AuCpup_RWDST (1 << 5) /* force write target even if
6106+ the branch is marked as RO */
6107+
6108+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
6109+#define au_fset_cpup(flags, name) \
6110+ do { (flags) |= AuCpup_##name; } while (0)
6111+#define au_fclr_cpup(flags, name) \
6112+ do { (flags) &= ~AuCpup_##name; } while (0)
6113+
6114+int au_copy_file(struct file *dst, struct file *src, loff_t len);
6115+int au_sio_cpup_simple(struct au_cp_generic *cpg);
6116+int au_sio_cpdown_simple(struct au_cp_generic *cpg);
6117+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
6118+
6119+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
6120+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
6121+ struct au_pin *pin,
6122+ struct dentry *h_parent, void *arg),
6123+ void *arg);
6124+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
6125+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
6126+
6127+/* ---------------------------------------------------------------------- */
6128+
6129+/* keep timestamps when copyup */
6130+struct au_dtime {
6131+ struct dentry *dt_dentry;
6132+ struct path dt_h_path;
6133+ struct timespec dt_atime, dt_mtime;
6134+};
6135+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
6136+ struct path *h_path);
6137+void au_dtime_revert(struct au_dtime *dt);
6138+
6139+#endif /* __KERNEL__ */
6140+#endif /* __AUFS_CPUP_H__ */
6141diff -urN /usr/share/empty/fs/aufs/dbgaufs.c linux/fs/aufs/dbgaufs.c
6142--- /usr/share/empty/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
6143+++ linux/fs/aufs/dbgaufs.c 2016-10-09 16:55:38.886097714 +0200
6144@@ -0,0 +1,438 @@
6145+/*
6146+ * Copyright (C) 2005-2016 Junjiro R. Okajima
6147+ *
6148+ * This program, aufs is free software; you can redistribute it and/or modify
6149+ * it under the terms of the GNU General Public License as published by
6150+ * the Free Software Foundation; either version 2 of the License, or
6151+ * (at your option) any later version.
6152+ *
6153+ * This program is distributed in the hope that it will be useful,
6154+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6155+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6156+ * GNU General Public License for more details.
6157+ *
6158+ * You should have received a copy of the GNU General Public License
6159+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
6160+ */
6161+
6162+/*
6163+ * debugfs interface
6164+ */
6165+
6166+#include <linux/debugfs.h>
6167+#include "aufs.h"
6168+
6169+#ifndef CONFIG_SYSFS
6170+#error DEBUG_FS depends upon SYSFS
6171+#endif
6172+
6173+static struct dentry *dbgaufs;
6174+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
6175+
6176+/* 20 is max digits length of ulong 64 */
6177+struct dbgaufs_arg {
6178+ int n;
6179+ char a[20 * 4];
6180+};
6181+
6182+/*
6183+ * common function for all XINO files
6184+ */
6185+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
6186+ struct file *file)
6187+{
6188+ au_delayed_kfree(file->private_data);
6189+ return 0;
6190+}
6191+
6192+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
6193+{
6194+ int err;
6195+ struct kstat st;
6196+ struct dbgaufs_arg *p;
6197+
6198+ err = -ENOMEM;
6199+ p = kmalloc(sizeof(*p), GFP_NOFS);
6200+ if (unlikely(!p))
6201+ goto out;
6202+
6203+ err = 0;
6204+ p->n = 0;
6205+ file->private_data = p;
6206+ if (!xf)
6207+ goto out;
6208+
6209+ err = vfs_getattr(&xf->f_path, &st);
6210+ if (!err) {
6211+ if (do_fcnt)
6212+ p->n = snprintf
6213+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
6214+ (long)file_count(xf), st.blocks, st.blksize,
6215+ (long long)st.size);
6216+ else
6217+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
6218+ st.blocks, st.blksize,
6219+ (long long)st.size);
6220+ AuDebugOn(p->n >= sizeof(p->a));
6221+ } else {
6222+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
6223+ err = 0;
6224+ }
6225+
6226+out:
6227+ return err;
6228+
6229+}
6230+
6231+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
6232+ size_t count, loff_t *ppos)
6233+{
6234+ struct dbgaufs_arg *p;
6235+
6236+ p = file->private_data;
6237+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6238+}
6239+
6240+/* ---------------------------------------------------------------------- */
6241+
6242+struct dbgaufs_plink_arg {
6243+ int n;
6244+ char a[];
6245+};
6246+
6247+static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
6248+ struct file *file)
6249+{
6250+ au_delayed_free_page((unsigned long)file->private_data);
6251+ return 0;
6252+}
6253+
6254+static int dbgaufs_plink_open(struct inode *inode, struct file *file)
6255+{
6256+ int err, i, limit;
6257+ unsigned long n, sum;
6258+ struct dbgaufs_plink_arg *p;
6259+ struct au_sbinfo *sbinfo;
6260+ struct super_block *sb;
6261+ struct au_sphlhead *sphl;
6262+
6263+ err = -ENOMEM;
6264+ p = (void *)get_zeroed_page(GFP_NOFS);
6265+ if (unlikely(!p))
6266+ goto out;
6267+
6268+ err = -EFBIG;
6269+ sbinfo = inode->i_private;
6270+ sb = sbinfo->si_sb;
6271+ si_noflush_read_lock(sb);
6272+ if (au_opt_test(au_mntflags(sb), PLINK)) {
6273+ limit = PAGE_SIZE - sizeof(p->n);
6274+
6275+ /* the number of buckets */
6276+ n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
6277+ p->n += n;
6278+ limit -= n;
6279+
6280+ sum = 0;
6281+ for (i = 0, sphl = sbinfo->si_plink;
6282+ i < AuPlink_NHASH;
6283+ i++, sphl++) {
6284+ n = au_sphl_count(sphl);
6285+ sum += n;
6286+
6287+ n = snprintf(p->a + p->n, limit, "%lu ", n);
6288+ p->n += n;
6289+ limit -= n;
6290+ if (unlikely(limit <= 0))
6291+ goto out_free;
6292+ }
6293+ p->a[p->n - 1] = '\n';
6294+
6295+ /* the sum of plinks */
6296+ n = snprintf(p->a + p->n, limit, "%lu\n", sum);
6297+ p->n += n;
6298+ limit -= n;
6299+ if (unlikely(limit <= 0))
6300+ goto out_free;
6301+ } else {
6302+#define str "1\n0\n0\n"
6303+ p->n = sizeof(str) - 1;
6304+ strcpy(p->a, str);
6305+#undef str
6306+ }
6307+ si_read_unlock(sb);
6308+
6309+ err = 0;
6310+ file->private_data = p;
6311+ goto out; /* success */
6312+
6313+out_free:
6314+ au_delayed_free_page((unsigned long)p);
6315+out:
6316+ return err;
6317+}
6318+
6319+static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
6320+ size_t count, loff_t *ppos)
6321+{
6322+ struct dbgaufs_plink_arg *p;
6323+
6324+ p = file->private_data;
6325+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6326+}
6327+
6328+static const struct file_operations dbgaufs_plink_fop = {
6329+ .owner = THIS_MODULE,
6330+ .open = dbgaufs_plink_open,
6331+ .release = dbgaufs_plink_release,
6332+ .read = dbgaufs_plink_read
6333+};
6334+
6335+/* ---------------------------------------------------------------------- */
6336+
6337+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
6338+{
6339+ int err;
6340+ struct au_sbinfo *sbinfo;
6341+ struct super_block *sb;
6342+
6343+ sbinfo = inode->i_private;
6344+ sb = sbinfo->si_sb;
6345+ si_noflush_read_lock(sb);
6346+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
6347+ si_read_unlock(sb);
6348+ return err;
6349+}
6350+
6351+static const struct file_operations dbgaufs_xib_fop = {
6352+ .owner = THIS_MODULE,
6353+ .open = dbgaufs_xib_open,
6354+ .release = dbgaufs_xi_release,
6355+ .read = dbgaufs_xi_read
6356+};
6357+
6358+/* ---------------------------------------------------------------------- */
6359+
6360+#define DbgaufsXi_PREFIX "xi"
6361+
6362+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
6363+{
6364+ int err;
6365+ long l;
6366+ struct au_sbinfo *sbinfo;
6367+ struct super_block *sb;
6368+ struct file *xf;
6369+ struct qstr *name;
6370+
6371+ err = -ENOENT;
6372+ xf = NULL;
6373+ name = &file->f_path.dentry->d_name;
6374+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
6375+ || memcmp(name->name, DbgaufsXi_PREFIX,
6376+ sizeof(DbgaufsXi_PREFIX) - 1)))
6377+ goto out;
6378+ err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
6379+ if (unlikely(err))
6380+ goto out;
6381+
6382+ sbinfo = inode->i_private;
6383+ sb = sbinfo->si_sb;
6384+ si_noflush_read_lock(sb);
6385+ if (l <= au_sbbot(sb)) {
6386+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
6387+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
6388+ } else
6389+ err = -ENOENT;
6390+ si_read_unlock(sb);
6391+
6392+out:
6393+ return err;
6394+}
6395+
6396+static const struct file_operations dbgaufs_xino_fop = {
6397+ .owner = THIS_MODULE,
6398+ .open = dbgaufs_xino_open,
6399+ .release = dbgaufs_xi_release,
6400+ .read = dbgaufs_xi_read
6401+};
6402+
6403+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
6404+{
6405+ aufs_bindex_t bbot;
6406+ struct au_branch *br;
6407+ struct au_xino_file *xi;
6408+
6409+ if (!au_sbi(sb)->si_dbgaufs)
6410+ return;
6411+
6412+ bbot = au_sbbot(sb);
6413+ for (; bindex <= bbot; bindex++) {
6414+ br = au_sbr(sb, bindex);
6415+ xi = &br->br_xino;
6416+ /* debugfs acquires the parent i_mutex */
6417+ lockdep_off();
6418+ debugfs_remove(xi->xi_dbgaufs);
6419+ lockdep_on();
6420+ xi->xi_dbgaufs = NULL;
6421+ }
6422+}
6423+
6424+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
6425+{
6426+ struct au_sbinfo *sbinfo;
6427+ struct dentry *parent;
6428+ struct au_branch *br;
6429+ struct au_xino_file *xi;
6430+ aufs_bindex_t bbot;
6431+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
6432+
6433+ sbinfo = au_sbi(sb);
6434+ parent = sbinfo->si_dbgaufs;
6435+ if (!parent)
6436+ return;
6437+
6438+ bbot = au_sbbot(sb);
6439+ for (; bindex <= bbot; bindex++) {
6440+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
6441+ br = au_sbr(sb, bindex);
6442+ xi = &br->br_xino;
6443+ AuDebugOn(xi->xi_dbgaufs);
6444+ /* debugfs acquires the parent i_mutex */
6445+ lockdep_off();
6446+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
6447+ sbinfo, &dbgaufs_xino_fop);
6448+ lockdep_on();
6449+ /* ignore an error */
6450+ if (unlikely(!xi->xi_dbgaufs))
6451+ AuWarn1("failed %s under debugfs\n", name);
6452+ }
6453+}
6454+
6455+/* ---------------------------------------------------------------------- */
6456+
6457+#ifdef CONFIG_AUFS_EXPORT
6458+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
6459+{
6460+ int err;
6461+ struct au_sbinfo *sbinfo;
6462+ struct super_block *sb;
6463+
6464+ sbinfo = inode->i_private;
6465+ sb = sbinfo->si_sb;
6466+ si_noflush_read_lock(sb);
6467+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
6468+ si_read_unlock(sb);
6469+ return err;
6470+}
6471+
6472+static const struct file_operations dbgaufs_xigen_fop = {
6473+ .owner = THIS_MODULE,
6474+ .open = dbgaufs_xigen_open,
6475+ .release = dbgaufs_xi_release,
6476+ .read = dbgaufs_xi_read
6477+};
6478+
6479+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6480+{
6481+ int err;
6482+
6483+ /*
6484+ * This function is a dynamic '__init' function actually,
6485+ * so the tiny check for si_rwsem is unnecessary.
6486+ */
6487+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6488+
6489+ err = -EIO;
6490+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
6491+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6492+ &dbgaufs_xigen_fop);
6493+ if (sbinfo->si_dbgaufs_xigen)
6494+ err = 0;
6495+
6496+ return err;
6497+}
6498+#else
6499+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6500+{
6501+ return 0;
6502+}
6503+#endif /* CONFIG_AUFS_EXPORT */
6504+
6505+/* ---------------------------------------------------------------------- */
6506+
6507+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
6508+{
6509+ /*
6510+ * This function is a dynamic '__fin' function actually,
6511+ * so the tiny check for si_rwsem is unnecessary.
6512+ */
6513+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6514+
6515+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
6516+ sbinfo->si_dbgaufs = NULL;
6517+ kobject_put(&sbinfo->si_kobj);
6518+}
6519+
6520+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
6521+{
6522+ int err;
6523+ char name[SysaufsSiNameLen];
6524+
6525+ /*
6526+ * This function is a dynamic '__init' function actually,
6527+ * so the tiny check for si_rwsem is unnecessary.
6528+ */
6529+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6530+
6531+ err = -ENOENT;
6532+ if (!dbgaufs) {
6533+ AuErr1("/debug/aufs is uninitialized\n");
6534+ goto out;
6535+ }
6536+
6537+ err = -EIO;
6538+ sysaufs_name(sbinfo, name);
6539+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
6540+ if (unlikely(!sbinfo->si_dbgaufs))
6541+ goto out;
6542+ kobject_get(&sbinfo->si_kobj);
6543+
6544+ sbinfo->si_dbgaufs_xib = debugfs_create_file
6545+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6546+ &dbgaufs_xib_fop);
6547+ if (unlikely(!sbinfo->si_dbgaufs_xib))
6548+ goto out_dir;
6549+
6550+ sbinfo->si_dbgaufs_plink = debugfs_create_file
6551+ ("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6552+ &dbgaufs_plink_fop);
6553+ if (unlikely(!sbinfo->si_dbgaufs_plink))
6554+ goto out_dir;
6555+
6556+ err = dbgaufs_xigen_init(sbinfo);
6557+ if (!err)
6558+ goto out; /* success */
6559+
6560+out_dir:
6561+ dbgaufs_si_fin(sbinfo);
6562+out:
6563+ return err;
6564+}
6565+
6566+/* ---------------------------------------------------------------------- */
6567+
6568+void dbgaufs_fin(void)
6569+{
6570+ debugfs_remove(dbgaufs);
6571+}
6572+
6573+int __init dbgaufs_init(void)
6574+{
6575+ int err;
6576+
6577+ err = -EIO;
6578+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
6579+ if (dbgaufs)
6580+ err = 0;
6581+ return err;
6582+}
6583diff -urN /usr/share/empty/fs/aufs/dbgaufs.h linux/fs/aufs/dbgaufs.h
6584--- /usr/share/empty/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
6585+++ linux/fs/aufs/dbgaufs.h 2016-10-09 16:55:36.486034798 +0200
6586@@ -0,0 +1,48 @@
6587+/*
6588+ * Copyright (C) 2005-2016 Junjiro R. Okajima
6589+ *
6590+ * This program, aufs is free software; you can redistribute it and/or modify
6591+ * it under the terms of the GNU General Public License as published by
6592+ * the Free Software Foundation; either version 2 of the License, or
6593+ * (at your option) any later version.
6594+ *
6595+ * This program is distributed in the hope that it will be useful,
6596+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6597+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6598+ * GNU General Public License for more details.
6599+ *
6600+ * You should have received a copy of the GNU General Public License
6601+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
6602+ */
6603+
6604+/*
6605+ * debugfs interface
6606+ */
6607+
6608+#ifndef __DBGAUFS_H__
6609+#define __DBGAUFS_H__
6610+
6611+#ifdef __KERNEL__
6612+
6613+struct super_block;
6614+struct au_sbinfo;
6615+
6616+#ifdef CONFIG_DEBUG_FS
6617+/* dbgaufs.c */
6618+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
6619+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
6620+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
6621+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
6622+void dbgaufs_fin(void);
6623+int __init dbgaufs_init(void);
6624+#else
6625+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
6626+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
6627+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
6628+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
6629+AuStubVoid(dbgaufs_fin, void)
6630+AuStubInt0(__init dbgaufs_init, void)
6631+#endif /* CONFIG_DEBUG_FS */
6632+
6633+#endif /* __KERNEL__ */
6634+#endif /* __DBGAUFS_H__ */
6635diff -urN /usr/share/empty/fs/aufs/dcsub.c linux/fs/aufs/dcsub.c
6636--- /usr/share/empty/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
6637+++ linux/fs/aufs/dcsub.c 2016-10-09 16:55:38.886097714 +0200
6638@@ -0,0 +1,225 @@
6639+/*
6640+ * Copyright (C) 2005-2016 Junjiro R. Okajima
6641+ *
6642+ * This program, aufs is free software; you can redistribute it and/or modify
6643+ * it under the terms of the GNU General Public License as published by
6644+ * the Free Software Foundation; either version 2 of the License, or
6645+ * (at your option) any later version.
6646+ *
6647+ * This program is distributed in the hope that it will be useful,
6648+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6649+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6650+ * GNU General Public License for more details.
6651+ *
6652+ * You should have received a copy of the GNU General Public License
6653+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
6654+ */
6655+
6656+/*
6657+ * sub-routines for dentry cache
6658+ */
6659+
6660+#include "aufs.h"
6661+
6662+static void au_dpage_free(struct au_dpage *dpage)
6663+{
6664+ int i;
6665+ struct dentry **p;
6666+
6667+ p = dpage->dentries;
6668+ for (i = 0; i < dpage->ndentry; i++)
6669+ dput(*p++);
6670+ au_delayed_free_page((unsigned long)dpage->dentries);
6671+}
6672+
6673+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
6674+{
6675+ int err;
6676+ void *p;
6677+
6678+ err = -ENOMEM;
6679+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
6680+ if (unlikely(!dpages->dpages))
6681+ goto out;
6682+
6683+ p = (void *)__get_free_page(gfp);
6684+ if (unlikely(!p))
6685+ goto out_dpages;
6686+
6687+ dpages->dpages[0].ndentry = 0;
6688+ dpages->dpages[0].dentries = p;
6689+ dpages->ndpage = 1;
6690+ return 0; /* success */
6691+
6692+out_dpages:
6693+ au_delayed_kfree(dpages->dpages);
6694+out:
6695+ return err;
6696+}
6697+
6698+void au_dpages_free(struct au_dcsub_pages *dpages)
6699+{
6700+ int i;
6701+ struct au_dpage *p;
6702+
6703+ p = dpages->dpages;
6704+ for (i = 0; i < dpages->ndpage; i++)
6705+ au_dpage_free(p++);
6706+ au_delayed_kfree(dpages->dpages);
6707+}
6708+
6709+static int au_dpages_append(struct au_dcsub_pages *dpages,
6710+ struct dentry *dentry, gfp_t gfp)
6711+{
6712+ int err, sz;
6713+ struct au_dpage *dpage;
6714+ void *p;
6715+
6716+ dpage = dpages->dpages + dpages->ndpage - 1;
6717+ sz = PAGE_SIZE / sizeof(dentry);
6718+ if (unlikely(dpage->ndentry >= sz)) {
6719+ AuLabel(new dpage);
6720+ err = -ENOMEM;
6721+ sz = dpages->ndpage * sizeof(*dpages->dpages);
6722+ p = au_kzrealloc(dpages->dpages, sz,
6723+ sz + sizeof(*dpages->dpages), gfp,
6724+ /*may_shrink*/0);
6725+ if (unlikely(!p))
6726+ goto out;
6727+
6728+ dpages->dpages = p;
6729+ dpage = dpages->dpages + dpages->ndpage;
6730+ p = (void *)__get_free_page(gfp);
6731+ if (unlikely(!p))
6732+ goto out;
6733+
6734+ dpage->ndentry = 0;
6735+ dpage->dentries = p;
6736+ dpages->ndpage++;
6737+ }
6738+
6739+ AuDebugOn(au_dcount(dentry) <= 0);
6740+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
6741+ return 0; /* success */
6742+
6743+out:
6744+ return err;
6745+}
6746+
6747+/* todo: BAD approach */
6748+/* copied from linux/fs/dcache.c */
6749+enum d_walk_ret {
6750+ D_WALK_CONTINUE,
6751+ D_WALK_QUIT,
6752+ D_WALK_NORETRY,
6753+ D_WALK_SKIP,
6754+};
6755+
6756+extern void d_walk(struct dentry *parent, void *data,
6757+ enum d_walk_ret (*enter)(void *, struct dentry *),
6758+ void (*finish)(void *));
6759+
6760+struct ac_dpages_arg {
6761+ int err;
6762+ struct au_dcsub_pages *dpages;
6763+ struct super_block *sb;
6764+ au_dpages_test test;
6765+ void *arg;
6766+};
6767+
6768+static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
6769+{
6770+ enum d_walk_ret ret;
6771+ struct ac_dpages_arg *arg = _arg;
6772+
6773+ ret = D_WALK_CONTINUE;
6774+ if (dentry->d_sb == arg->sb
6775+ && !IS_ROOT(dentry)
6776+ && au_dcount(dentry) > 0
6777+ && au_di(dentry)
6778+ && (!arg->test || arg->test(dentry, arg->arg))) {
6779+ arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
6780+ if (unlikely(arg->err))
6781+ ret = D_WALK_QUIT;
6782+ }
6783+
6784+ return ret;
6785+}
6786+
6787+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6788+ au_dpages_test test, void *arg)
6789+{
6790+ struct ac_dpages_arg args = {
6791+ .err = 0,
6792+ .dpages = dpages,
6793+ .sb = root->d_sb,
6794+ .test = test,
6795+ .arg = arg
6796+ };
6797+
6798+ d_walk(root, &args, au_call_dpages_append, NULL);
6799+
6800+ return args.err;
6801+}
6802+
6803+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6804+ int do_include, au_dpages_test test, void *arg)
6805+{
6806+ int err;
6807+
6808+ err = 0;
6809+ write_seqlock(&rename_lock);
6810+ spin_lock(&dentry->d_lock);
6811+ if (do_include
6812+ && au_dcount(dentry) > 0
6813+ && (!test || test(dentry, arg)))
6814+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
6815+ spin_unlock(&dentry->d_lock);
6816+ if (unlikely(err))
6817+ goto out;
6818+
6819+ /*
6820+ * RCU for vfsmount is unnecessary since this is a traverse in a single
6821+ * mount
6822+ */
6823+ while (!IS_ROOT(dentry)) {
6824+ dentry = dentry->d_parent; /* rename_lock is locked */
6825+ spin_lock(&dentry->d_lock);
6826+ if (au_dcount(dentry) > 0
6827+ && (!test || test(dentry, arg)))
6828+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
6829+ spin_unlock(&dentry->d_lock);
6830+ if (unlikely(err))
6831+ break;
6832+ }
6833+
6834+out:
6835+ write_sequnlock(&rename_lock);
6836+ return err;
6837+}
6838+
6839+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
6840+{
6841+ return au_di(dentry) && dentry->d_sb == arg;
6842+}
6843+
6844+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6845+ struct dentry *dentry, int do_include)
6846+{
6847+ return au_dcsub_pages_rev(dpages, dentry, do_include,
6848+ au_dcsub_dpages_aufs, dentry->d_sb);
6849+}
6850+
6851+int au_test_subdir(struct dentry *d1, struct dentry *d2)
6852+{
6853+ struct path path[2] = {
6854+ {
6855+ .dentry = d1
6856+ },
6857+ {
6858+ .dentry = d2
6859+ }
6860+ };
6861+
6862+ return path_is_under(path + 0, path + 1);
6863+}
6864diff -urN /usr/share/empty/fs/aufs/dcsub.h linux/fs/aufs/dcsub.h
6865--- /usr/share/empty/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
6866+++ linux/fs/aufs/dcsub.h 2016-10-09 16:55:36.486034798 +0200
6867@@ -0,0 +1,136 @@
6868+/*
6869+ * Copyright (C) 2005-2016 Junjiro R. Okajima
6870+ *
6871+ * This program, aufs is free software; you can redistribute it and/or modify
6872+ * it under the terms of the GNU General Public License as published by
6873+ * the Free Software Foundation; either version 2 of the License, or
6874+ * (at your option) any later version.
6875+ *
6876+ * This program is distributed in the hope that it will be useful,
6877+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6878+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6879+ * GNU General Public License for more details.
6880+ *
6881+ * You should have received a copy of the GNU General Public License
6882+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
6883+ */
6884+
6885+/*
6886+ * sub-routines for dentry cache
6887+ */
6888+
6889+#ifndef __AUFS_DCSUB_H__
6890+#define __AUFS_DCSUB_H__
6891+
6892+#ifdef __KERNEL__
6893+
6894+#include <linux/dcache.h>
6895+#include <linux/fs.h>
6896+
6897+struct au_dpage {
6898+ int ndentry;
6899+ struct dentry **dentries;
6900+};
6901+
6902+struct au_dcsub_pages {
6903+ int ndpage;
6904+ struct au_dpage *dpages;
6905+};
6906+
6907+/* ---------------------------------------------------------------------- */
6908+
6909+/* dcsub.c */
6910+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
6911+void au_dpages_free(struct au_dcsub_pages *dpages);
6912+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
6913+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6914+ au_dpages_test test, void *arg);
6915+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6916+ int do_include, au_dpages_test test, void *arg);
6917+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6918+ struct dentry *dentry, int do_include);
6919+int au_test_subdir(struct dentry *d1, struct dentry *d2);
6920+
6921+/* ---------------------------------------------------------------------- */
6922+
6923+/*
6924+ * todo: in linux-3.13, several similar (but faster) helpers are added to
6925+ * include/linux/dcache.h. Try them (in the future).
6926+ */
6927+
6928+static inline int au_d_hashed_positive(struct dentry *d)
6929+{
6930+ int err;
6931+ struct inode *inode = d_inode(d);
6932+
6933+ err = 0;
6934+ if (unlikely(d_unhashed(d)
6935+ || d_is_negative(d)
6936+ || !inode->i_nlink))
6937+ err = -ENOENT;
6938+ return err;
6939+}
6940+
6941+static inline int au_d_linkable(struct dentry *d)
6942+{
6943+ int err;
6944+ struct inode *inode = d_inode(d);
6945+
6946+ err = au_d_hashed_positive(d);
6947+ if (err
6948+ && d_is_positive(d)
6949+ && (inode->i_state & I_LINKABLE))
6950+ err = 0;
6951+ return err;
6952+}
6953+
6954+static inline int au_d_alive(struct dentry *d)
6955+{
6956+ int err;
6957+ struct inode *inode;
6958+
6959+ err = 0;
6960+ if (!IS_ROOT(d))
6961+ err = au_d_hashed_positive(d);
6962+ else {
6963+ inode = d_inode(d);
6964+ if (unlikely(d_unlinked(d)
6965+ || d_is_negative(d)
6966+ || !inode->i_nlink))
6967+ err = -ENOENT;
6968+ }
6969+ return err;
6970+}
6971+
6972+static inline int au_alive_dir(struct dentry *d)
6973+{
6974+ int err;
6975+
6976+ err = au_d_alive(d);
6977+ if (unlikely(err || IS_DEADDIR(d_inode(d))))
6978+ err = -ENOENT;
6979+ return err;
6980+}
6981+
6982+static inline int au_qstreq(struct qstr *a, struct qstr *b)
6983+{
6984+ return a->len == b->len
6985+ && !memcmp(a->name, b->name, a->len);
6986+}
6987+
6988+/*
6989+ * by the commit
6990+ * 360f547 2015-01-25 dcache: let the dentry count go down to zero without
6991+ * taking d_lock
6992+ * the type of d_lockref.count became int, but the inlined function d_count()
6993+ * still returns unsigned int.
6994+ * I don't know why. Maybe it is for every d_count() users?
6995+ * Anyway au_dcount() lives on.
6996+ */
6997+static inline int au_dcount(struct dentry *d)
6998+{
6999+ return (int)d_count(d);
7000+}
7001+
7002+#endif /* __KERNEL__ */
7003+#endif /* __AUFS_DCSUB_H__ */
7004diff -urN /usr/share/empty/fs/aufs/debug.c linux/fs/aufs/debug.c
7005--- /usr/share/empty/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
7006+++ linux/fs/aufs/debug.c 2016-10-09 16:55:36.486034798 +0200
7007@@ -0,0 +1,440 @@
7008+/*
7009+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7010+ *
7011+ * This program, aufs is free software; you can redistribute it and/or modify
7012+ * it under the terms of the GNU General Public License as published by
7013+ * the Free Software Foundation; either version 2 of the License, or
7014+ * (at your option) any later version.
7015+ *
7016+ * This program is distributed in the hope that it will be useful,
7017+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7018+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7019+ * GNU General Public License for more details.
7020+ *
7021+ * You should have received a copy of the GNU General Public License
7022+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7023+ */
7024+
7025+/*
7026+ * debug print functions
7027+ */
7028+
7029+#include "aufs.h"
7030+
7031+/* Returns 0, or -errno. arg is in kp->arg. */
7032+static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
7033+{
7034+ int err, n;
7035+
7036+ err = kstrtoint(val, 0, &n);
7037+ if (!err) {
7038+ if (n > 0)
7039+ au_debug_on();
7040+ else
7041+ au_debug_off();
7042+ }
7043+ return err;
7044+}
7045+
7046+/* Returns length written or -errno. Buffer is 4k (ie. be short!) */
7047+static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
7048+{
7049+ atomic_t *a;
7050+
7051+ a = kp->arg;
7052+ return sprintf(buffer, "%d", atomic_read(a));
7053+}
7054+
7055+static struct kernel_param_ops param_ops_atomic_t = {
7056+ .set = param_atomic_t_set,
7057+ .get = param_atomic_t_get
7058+ /* void (*free)(void *arg) */
7059+};
7060+
7061+atomic_t aufs_debug = ATOMIC_INIT(0);
7062+MODULE_PARM_DESC(debug, "debug print");
7063+module_param_named(debug, aufs_debug, atomic_t, S_IRUGO | S_IWUSR | S_IWGRP);
7064+
7065+DEFINE_MUTEX(au_dbg_mtx); /* just to serialize the dbg msgs */
7066+char *au_plevel = KERN_DEBUG;
7067+#define dpri(fmt, ...) do { \
7068+ if ((au_plevel \
7069+ && strcmp(au_plevel, KERN_DEBUG)) \
7070+ || au_debug_test()) \
7071+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
7072+} while (0)
7073+
7074+/* ---------------------------------------------------------------------- */
7075+
7076+void au_dpri_whlist(struct au_nhash *whlist)
7077+{
7078+ unsigned long ul, n;
7079+ struct hlist_head *head;
7080+ struct au_vdir_wh *pos;
7081+
7082+ n = whlist->nh_num;
7083+ head = whlist->nh_head;
7084+ for (ul = 0; ul < n; ul++) {
7085+ hlist_for_each_entry(pos, head, wh_hash)
7086+ dpri("b%d, %.*s, %d\n",
7087+ pos->wh_bindex,
7088+ pos->wh_str.len, pos->wh_str.name,
7089+ pos->wh_str.len);
7090+ head++;
7091+ }
7092+}
7093+
7094+void au_dpri_vdir(struct au_vdir *vdir)
7095+{
7096+ unsigned long ul;
7097+ union au_vdir_deblk_p p;
7098+ unsigned char *o;
7099+
7100+ if (!vdir || IS_ERR(vdir)) {
7101+ dpri("err %ld\n", PTR_ERR(vdir));
7102+ return;
7103+ }
7104+
7105+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
7106+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
7107+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
7108+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
7109+ p.deblk = vdir->vd_deblk[ul];
7110+ o = p.deblk;
7111+ dpri("[%lu]: %p\n", ul, o);
7112+ }
7113+}
7114+
7115+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
7116+ struct dentry *wh)
7117+{
7118+ char *n = NULL;
7119+ int l = 0;
7120+
7121+ if (!inode || IS_ERR(inode)) {
7122+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
7123+ return -1;
7124+ }
7125+
7126+ /* the type of i_blocks depends upon CONFIG_LBDAF */
7127+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
7128+ && sizeof(inode->i_blocks) != sizeof(u64));
7129+ if (wh) {
7130+ n = (void *)wh->d_name.name;
7131+ l = wh->d_name.len;
7132+ }
7133+
7134+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
7135+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
7136+ bindex, inode,
7137+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
7138+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
7139+ i_size_read(inode), (unsigned long long)inode->i_blocks,
7140+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
7141+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
7142+ inode->i_state, inode->i_flags, inode->i_version,
7143+ inode->i_generation,
7144+ l ? ", wh " : "", l, n);
7145+ return 0;
7146+}
7147+
7148+void au_dpri_inode(struct inode *inode)
7149+{
7150+ struct au_iinfo *iinfo;
7151+ struct au_hinode *hi;
7152+ aufs_bindex_t bindex;
7153+ int err, hn;
7154+
7155+ err = do_pri_inode(-1, inode, -1, NULL);
7156+ if (err || !au_test_aufs(inode->i_sb) || au_is_bad_inode(inode))
7157+ return;
7158+
7159+ iinfo = au_ii(inode);
7160+ dpri("i-1: btop %d, bbot %d, gen %d\n",
7161+ iinfo->ii_btop, iinfo->ii_bbot, au_iigen(inode, NULL));
7162+ if (iinfo->ii_btop < 0)
7163+ return;
7164+ hn = 0;
7165+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; bindex++) {
7166+ hi = au_hinode(iinfo, bindex);
7167+ hn = !!au_hn(hi);
7168+ do_pri_inode(bindex, hi->hi_inode, hn, hi->hi_whdentry);
7169+ }
7170+}
7171+
7172+void au_dpri_dalias(struct inode *inode)
7173+{
7174+ struct dentry *d;
7175+
7176+ spin_lock(&inode->i_lock);
7177+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
7178+ au_dpri_dentry(d);
7179+ spin_unlock(&inode->i_lock);
7180+}
7181+
7182+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
7183+{
7184+ struct dentry *wh = NULL;
7185+ int hn;
7186+ struct inode *inode;
7187+ struct au_iinfo *iinfo;
7188+ struct au_hinode *hi;
7189+
7190+ if (!dentry || IS_ERR(dentry)) {
7191+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
7192+ return -1;
7193+ }
7194+ /* do not call dget_parent() here */
7195+ /* note: access d_xxx without d_lock */
7196+ dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
7197+ bindex, dentry, dentry,
7198+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
7199+ au_dcount(dentry), dentry->d_flags,
7200+ d_unhashed(dentry) ? "un" : "");
7201+ hn = -1;
7202+ inode = NULL;
7203+ if (d_is_positive(dentry))
7204+ inode = d_inode(dentry);
7205+ if (inode
7206+ && au_test_aufs(dentry->d_sb)
7207+ && bindex >= 0
7208+ && !au_is_bad_inode(inode)) {
7209+ iinfo = au_ii(inode);
7210+ hi = au_hinode(iinfo, bindex);
7211+ hn = !!au_hn(hi);
7212+ wh = hi->hi_whdentry;
7213+ }
7214+ do_pri_inode(bindex, inode, hn, wh);
7215+ return 0;
7216+}
7217+
7218+void au_dpri_dentry(struct dentry *dentry)
7219+{
7220+ struct au_dinfo *dinfo;
7221+ aufs_bindex_t bindex;
7222+ int err;
7223+
7224+ err = do_pri_dentry(-1, dentry);
7225+ if (err || !au_test_aufs(dentry->d_sb))
7226+ return;
7227+
7228+ dinfo = au_di(dentry);
7229+ if (!dinfo)
7230+ return;
7231+ dpri("d-1: btop %d, bbot %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
7232+ dinfo->di_btop, dinfo->di_bbot,
7233+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
7234+ dinfo->di_tmpfile);
7235+ if (dinfo->di_btop < 0)
7236+ return;
7237+ for (bindex = dinfo->di_btop; bindex <= dinfo->di_bbot; bindex++)
7238+ do_pri_dentry(bindex, au_hdentry(dinfo, bindex)->hd_dentry);
7239+}
7240+
7241+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
7242+{
7243+ char a[32];
7244+
7245+ if (!file || IS_ERR(file)) {
7246+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
7247+ return -1;
7248+ }
7249+ a[0] = 0;
7250+ if (bindex < 0
7251+ && !IS_ERR_OR_NULL(file->f_path.dentry)
7252+ && au_test_aufs(file->f_path.dentry->d_sb)
7253+ && au_fi(file))
7254+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
7255+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
7256+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
7257+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
7258+ file->f_version, file->f_pos, a);
7259+ if (!IS_ERR_OR_NULL(file->f_path.dentry))
7260+ do_pri_dentry(bindex, file->f_path.dentry);
7261+ return 0;
7262+}
7263+
7264+void au_dpri_file(struct file *file)
7265+{
7266+ struct au_finfo *finfo;
7267+ struct au_fidir *fidir;
7268+ struct au_hfile *hfile;
7269+ aufs_bindex_t bindex;
7270+ int err;
7271+
7272+ err = do_pri_file(-1, file);
7273+ if (err
7274+ || IS_ERR_OR_NULL(file->f_path.dentry)
7275+ || !au_test_aufs(file->f_path.dentry->d_sb))
7276+ return;
7277+
7278+ finfo = au_fi(file);
7279+ if (!finfo)
7280+ return;
7281+ if (finfo->fi_btop < 0)
7282+ return;
7283+ fidir = finfo->fi_hdir;
7284+ if (!fidir)
7285+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
7286+ else
7287+ for (bindex = finfo->fi_btop;
7288+ bindex >= 0 && bindex <= fidir->fd_bbot;
7289+ bindex++) {
7290+ hfile = fidir->fd_hfile + bindex;
7291+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
7292+ }
7293+}
7294+
7295+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
7296+{
7297+ struct vfsmount *mnt;
7298+ struct super_block *sb;
7299+
7300+ if (!br || IS_ERR(br))
7301+ goto out;
7302+ mnt = au_br_mnt(br);
7303+ if (!mnt || IS_ERR(mnt))
7304+ goto out;
7305+ sb = mnt->mnt_sb;
7306+ if (!sb || IS_ERR(sb))
7307+ goto out;
7308+
7309+ dpri("s%d: {perm 0x%x, id %d, cnt %lld, wbr %p}, "
7310+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
7311+ "xino %d\n",
7312+ bindex, br->br_perm, br->br_id, au_br_count(br),
7313+ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
7314+ sb->s_flags, sb->s_count,
7315+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
7316+ return 0;
7317+
7318+out:
7319+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
7320+ return -1;
7321+}
7322+
7323+void au_dpri_sb(struct super_block *sb)
7324+{
7325+ struct au_sbinfo *sbinfo;
7326+ aufs_bindex_t bindex;
7327+ int err;
7328+ /* to reuduce stack size */
7329+ struct {
7330+ struct vfsmount mnt;
7331+ struct au_branch fake;
7332+ } *a;
7333+
7334+ /* this function can be called from magic sysrq */
7335+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
7336+ if (unlikely(!a)) {
7337+ dpri("no memory\n");
7338+ return;
7339+ }
7340+
7341+ a->mnt.mnt_sb = sb;
7342+ a->fake.br_path.mnt = &a->mnt;
7343+ au_br_count_init(&a->fake);
7344+ err = do_pri_br(-1, &a->fake);
7345+ au_br_count_fin(&a->fake);
7346+ au_delayed_kfree(a);
7347+ dpri("dev 0x%x\n", sb->s_dev);
7348+ if (err || !au_test_aufs(sb))
7349+ return;
7350+
7351+ sbinfo = au_sbi(sb);
7352+ if (!sbinfo)
7353+ return;
7354+ dpri("nw %d, gen %u, kobj %d\n",
7355+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
7356+ atomic_read(&sbinfo->si_kobj.kref.refcount));
7357+ for (bindex = 0; bindex <= sbinfo->si_bbot; bindex++)
7358+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
7359+}
7360+
7361+/* ---------------------------------------------------------------------- */
7362+
7363+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
7364+{
7365+ struct inode *h_inode, *inode = d_inode(dentry);
7366+ struct dentry *h_dentry;
7367+ aufs_bindex_t bindex, bbot, bi;
7368+
7369+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
7370+ return;
7371+
7372+ bbot = au_dbbot(dentry);
7373+ bi = au_ibbot(inode);
7374+ if (bi < bbot)
7375+ bbot = bi;
7376+ bindex = au_dbtop(dentry);
7377+ bi = au_ibtop(inode);
7378+ if (bi > bindex)
7379+ bindex = bi;
7380+
7381+ for (; bindex <= bbot; bindex++) {
7382+ h_dentry = au_h_dptr(dentry, bindex);
7383+ if (!h_dentry)
7384+ continue;
7385+ h_inode = au_h_iptr(inode, bindex);
7386+ if (unlikely(h_inode != d_inode(h_dentry))) {
7387+ au_debug_on();
7388+ AuDbg("b%d, %s:%d\n", bindex, func, line);
7389+ AuDbgDentry(dentry);
7390+ AuDbgInode(inode);
7391+ au_debug_off();
7392+ BUG();
7393+ }
7394+ }
7395+}
7396+
7397+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
7398+{
7399+ int err, i, j;
7400+ struct au_dcsub_pages dpages;
7401+ struct au_dpage *dpage;
7402+ struct dentry **dentries;
7403+
7404+ err = au_dpages_init(&dpages, GFP_NOFS);
7405+ AuDebugOn(err);
7406+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
7407+ AuDebugOn(err);
7408+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
7409+ dpage = dpages.dpages + i;
7410+ dentries = dpage->dentries;
7411+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
7412+ AuDebugOn(au_digen_test(dentries[j], sigen));
7413+ }
7414+ au_dpages_free(&dpages);
7415+}
7416+
7417+void au_dbg_verify_kthread(void)
7418+{
7419+ if (au_wkq_test()) {
7420+ au_dbg_blocked();
7421+ /*
7422+ * It may be recursive, but udba=notify between two aufs mounts,
7423+ * where a single ro branch is shared, is not a problem.
7424+ */
7425+ /* WARN_ON(1); */
7426+ }
7427+}
7428+
7429+/* ---------------------------------------------------------------------- */
7430+
7431+int __init au_debug_init(void)
7432+{
7433+ aufs_bindex_t bindex;
7434+ struct au_vdir_destr destr;
7435+
7436+ bindex = -1;
7437+ AuDebugOn(bindex >= 0);
7438+
7439+ destr.len = -1;
7440+ AuDebugOn(destr.len < NAME_MAX);
7441+
7442+#ifdef CONFIG_4KSTACKS
7443+ pr_warn("CONFIG_4KSTACKS is defined.\n");
7444+#endif
7445+
7446+ return 0;
7447+}
7448diff -urN /usr/share/empty/fs/aufs/debug.h linux/fs/aufs/debug.h
7449--- /usr/share/empty/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
7450+++ linux/fs/aufs/debug.h 2016-10-09 16:55:36.486034798 +0200
7451@@ -0,0 +1,225 @@
7452+/*
7453+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7454+ *
7455+ * This program, aufs is free software; you can redistribute it and/or modify
7456+ * it under the terms of the GNU General Public License as published by
7457+ * the Free Software Foundation; either version 2 of the License, or
7458+ * (at your option) any later version.
7459+ *
7460+ * This program is distributed in the hope that it will be useful,
7461+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7462+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7463+ * GNU General Public License for more details.
7464+ *
7465+ * You should have received a copy of the GNU General Public License
7466+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7467+ */
7468+
7469+/*
7470+ * debug print functions
7471+ */
7472+
7473+#ifndef __AUFS_DEBUG_H__
7474+#define __AUFS_DEBUG_H__
7475+
7476+#ifdef __KERNEL__
7477+
7478+#include <linux/atomic.h>
7479+#include <linux/module.h>
7480+#include <linux/kallsyms.h>
7481+#include <linux/sysrq.h>
7482+
7483+#ifdef CONFIG_AUFS_DEBUG
7484+#define AuDebugOn(a) BUG_ON(a)
7485+
7486+/* module parameter */
7487+extern atomic_t aufs_debug;
7488+static inline void au_debug_on(void)
7489+{
7490+ atomic_inc(&aufs_debug);
7491+}
7492+static inline void au_debug_off(void)
7493+{
7494+ atomic_dec_if_positive(&aufs_debug);
7495+}
7496+
7497+static inline int au_debug_test(void)
7498+{
7499+ return atomic_read(&aufs_debug) > 0;
7500+}
7501+#else
7502+#define AuDebugOn(a) do {} while (0)
7503+AuStubVoid(au_debug_on, void)
7504+AuStubVoid(au_debug_off, void)
7505+AuStubInt0(au_debug_test, void)
7506+#endif /* CONFIG_AUFS_DEBUG */
7507+
7508+#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
7509+
7510+/* ---------------------------------------------------------------------- */
7511+
7512+/* debug print */
7513+
7514+#define AuDbg(fmt, ...) do { \
7515+ if (au_debug_test()) \
7516+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
7517+} while (0)
7518+#define AuLabel(l) AuDbg(#l "\n")
7519+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
7520+#define AuWarn1(fmt, ...) do { \
7521+ static unsigned char _c; \
7522+ if (!_c++) \
7523+ pr_warn(fmt, ##__VA_ARGS__); \
7524+} while (0)
7525+
7526+#define AuErr1(fmt, ...) do { \
7527+ static unsigned char _c; \
7528+ if (!_c++) \
7529+ pr_err(fmt, ##__VA_ARGS__); \
7530+} while (0)
7531+
7532+#define AuIOErr1(fmt, ...) do { \
7533+ static unsigned char _c; \
7534+ if (!_c++) \
7535+ AuIOErr(fmt, ##__VA_ARGS__); \
7536+} while (0)
7537+
7538+#define AuUnsupportMsg "This operation is not supported." \
7539+ " Please report this application to aufs-users ML."
7540+#define AuUnsupport(fmt, ...) do { \
7541+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
7542+ dump_stack(); \
7543+} while (0)
7544+
7545+#define AuTraceErr(e) do { \
7546+ if (unlikely((e) < 0)) \
7547+ AuDbg("err %d\n", (int)(e)); \
7548+} while (0)
7549+
7550+#define AuTraceErrPtr(p) do { \
7551+ if (IS_ERR(p)) \
7552+ AuDbg("err %ld\n", PTR_ERR(p)); \
7553+} while (0)
7554+
7555+/* dirty macros for debug print, use with "%.*s" and caution */
7556+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
7557+
7558+/* ---------------------------------------------------------------------- */
7559+
7560+struct dentry;
7561+#ifdef CONFIG_AUFS_DEBUG
7562+extern struct mutex au_dbg_mtx;
7563+extern char *au_plevel;
7564+struct au_nhash;
7565+void au_dpri_whlist(struct au_nhash *whlist);
7566+struct au_vdir;
7567+void au_dpri_vdir(struct au_vdir *vdir);
7568+struct inode;
7569+void au_dpri_inode(struct inode *inode);
7570+void au_dpri_dalias(struct inode *inode);
7571+void au_dpri_dentry(struct dentry *dentry);
7572+struct file;
7573+void au_dpri_file(struct file *filp);
7574+struct super_block;
7575+void au_dpri_sb(struct super_block *sb);
7576+
7577+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
7578+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
7579+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
7580+void au_dbg_verify_kthread(void);
7581+
7582+int __init au_debug_init(void);
7583+
7584+#define AuDbgWhlist(w) do { \
7585+ mutex_lock(&au_dbg_mtx); \
7586+ AuDbg(#w "\n"); \
7587+ au_dpri_whlist(w); \
7588+ mutex_unlock(&au_dbg_mtx); \
7589+} while (0)
7590+
7591+#define AuDbgVdir(v) do { \
7592+ mutex_lock(&au_dbg_mtx); \
7593+ AuDbg(#v "\n"); \
7594+ au_dpri_vdir(v); \
7595+ mutex_unlock(&au_dbg_mtx); \
7596+} while (0)
7597+
7598+#define AuDbgInode(i) do { \
7599+ mutex_lock(&au_dbg_mtx); \
7600+ AuDbg(#i "\n"); \
7601+ au_dpri_inode(i); \
7602+ mutex_unlock(&au_dbg_mtx); \
7603+} while (0)
7604+
7605+#define AuDbgDAlias(i) do { \
7606+ mutex_lock(&au_dbg_mtx); \
7607+ AuDbg(#i "\n"); \
7608+ au_dpri_dalias(i); \
7609+ mutex_unlock(&au_dbg_mtx); \
7610+} while (0)
7611+
7612+#define AuDbgDentry(d) do { \
7613+ mutex_lock(&au_dbg_mtx); \
7614+ AuDbg(#d "\n"); \
7615+ au_dpri_dentry(d); \
7616+ mutex_unlock(&au_dbg_mtx); \
7617+} while (0)
7618+
7619+#define AuDbgFile(f) do { \
7620+ mutex_lock(&au_dbg_mtx); \
7621+ AuDbg(#f "\n"); \
7622+ au_dpri_file(f); \
7623+ mutex_unlock(&au_dbg_mtx); \
7624+} while (0)
7625+
7626+#define AuDbgSb(sb) do { \
7627+ mutex_lock(&au_dbg_mtx); \
7628+ AuDbg(#sb "\n"); \
7629+ au_dpri_sb(sb); \
7630+ mutex_unlock(&au_dbg_mtx); \
7631+} while (0)
7632+
7633+#define AuDbgSym(addr) do { \
7634+ char sym[KSYM_SYMBOL_LEN]; \
7635+ sprint_symbol(sym, (unsigned long)addr); \
7636+ AuDbg("%s\n", sym); \
7637+} while (0)
7638+#else
7639+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
7640+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
7641+AuStubVoid(au_dbg_verify_kthread, void)
7642+AuStubInt0(__init au_debug_init, void)
7643+
7644+#define AuDbgWhlist(w) do {} while (0)
7645+#define AuDbgVdir(v) do {} while (0)
7646+#define AuDbgInode(i) do {} while (0)
7647+#define AuDbgDAlias(i) do {} while (0)
7648+#define AuDbgDentry(d) do {} while (0)
7649+#define AuDbgFile(f) do {} while (0)
7650+#define AuDbgSb(sb) do {} while (0)
7651+#define AuDbgSym(addr) do {} while (0)
7652+#endif /* CONFIG_AUFS_DEBUG */
7653+
7654+/* ---------------------------------------------------------------------- */
7655+
7656+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
7657+int __init au_sysrq_init(void);
7658+void au_sysrq_fin(void);
7659+
7660+#ifdef CONFIG_HW_CONSOLE
7661+#define au_dbg_blocked() do { \
7662+ WARN_ON(1); \
7663+ handle_sysrq('w'); \
7664+} while (0)
7665+#else
7666+AuStubVoid(au_dbg_blocked, void)
7667+#endif
7668+
7669+#else
7670+AuStubInt0(__init au_sysrq_init, void)
7671+AuStubVoid(au_sysrq_fin, void)
7672+AuStubVoid(au_dbg_blocked, void)
7673+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
7674+
7675+#endif /* __KERNEL__ */
7676+#endif /* __AUFS_DEBUG_H__ */
7677diff -urN /usr/share/empty/fs/aufs/dentry.c linux/fs/aufs/dentry.c
7678--- /usr/share/empty/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
7679+++ linux/fs/aufs/dentry.c 2016-10-09 16:55:38.889431135 +0200
7680@@ -0,0 +1,1130 @@
7681+/*
7682+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7683+ *
7684+ * This program, aufs is free software; you can redistribute it and/or modify
7685+ * it under the terms of the GNU General Public License as published by
7686+ * the Free Software Foundation; either version 2 of the License, or
7687+ * (at your option) any later version.
7688+ *
7689+ * This program is distributed in the hope that it will be useful,
7690+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7691+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7692+ * GNU General Public License for more details.
7693+ *
7694+ * You should have received a copy of the GNU General Public License
7695+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7696+ */
7697+
7698+/*
7699+ * lookup and dentry operations
7700+ */
7701+
7702+#include <linux/namei.h>
7703+#include "aufs.h"
7704+
7705+struct au_do_lookup_args {
7706+ unsigned int flags;
7707+ mode_t type;
7708+};
7709+
7710+/*
7711+ * returns positive/negative dentry, NULL or an error.
7712+ * NULL means whiteout-ed or not-found.
7713+ */
7714+static struct dentry*
7715+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
7716+ aufs_bindex_t bindex, struct qstr *wh_name,
7717+ struct au_do_lookup_args *args)
7718+{
7719+ struct dentry *h_dentry;
7720+ struct inode *h_inode;
7721+ struct au_branch *br;
7722+ int wh_found, opq;
7723+ unsigned char wh_able;
7724+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
7725+ const unsigned char ignore_perm = !!au_ftest_lkup(args->flags,
7726+ IGNORE_PERM);
7727+
7728+ wh_found = 0;
7729+ br = au_sbr(dentry->d_sb, bindex);
7730+ wh_able = !!au_br_whable(br->br_perm);
7731+ if (wh_able)
7732+ wh_found = au_wh_test(h_parent, wh_name, ignore_perm);
7733+ h_dentry = ERR_PTR(wh_found);
7734+ if (!wh_found)
7735+ goto real_lookup;
7736+ if (unlikely(wh_found < 0))
7737+ goto out;
7738+
7739+ /* We found a whiteout */
7740+ /* au_set_dbbot(dentry, bindex); */
7741+ au_set_dbwh(dentry, bindex);
7742+ if (!allow_neg)
7743+ return NULL; /* success */
7744+
7745+real_lookup:
7746+ if (!ignore_perm)
7747+ h_dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
7748+ else
7749+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
7750+ if (IS_ERR(h_dentry)) {
7751+ if (PTR_ERR(h_dentry) == -ENAMETOOLONG
7752+ && !allow_neg)
7753+ h_dentry = NULL;
7754+ goto out;
7755+ }
7756+
7757+ h_inode = d_inode(h_dentry);
7758+ if (d_is_negative(h_dentry)) {
7759+ if (!allow_neg)
7760+ goto out_neg;
7761+ } else if (wh_found
7762+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
7763+ goto out_neg;
7764+
7765+ if (au_dbbot(dentry) <= bindex)
7766+ au_set_dbbot(dentry, bindex);
7767+ if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
7768+ au_set_dbtop(dentry, bindex);
7769+ au_set_h_dptr(dentry, bindex, h_dentry);
7770+
7771+ if (!d_is_dir(h_dentry)
7772+ || !wh_able
7773+ || (d_really_is_positive(dentry) && !d_is_dir(dentry)))
7774+ goto out; /* success */
7775+
7776+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
7777+ opq = au_diropq_test(h_dentry);
7778+ inode_unlock(h_inode);
7779+ if (opq > 0)
7780+ au_set_dbdiropq(dentry, bindex);
7781+ else if (unlikely(opq < 0)) {
7782+ au_set_h_dptr(dentry, bindex, NULL);
7783+ h_dentry = ERR_PTR(opq);
7784+ }
7785+ goto out;
7786+
7787+out_neg:
7788+ dput(h_dentry);
7789+ h_dentry = NULL;
7790+out:
7791+ return h_dentry;
7792+}
7793+
7794+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
7795+{
7796+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
7797+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
7798+ return -EPERM;
7799+ return 0;
7800+}
7801+
7802+/*
7803+ * returns the number of lower positive dentries,
7804+ * otherwise an error.
7805+ * can be called at unlinking with @type is zero.
7806+ */
7807+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
7808+ unsigned int flags)
7809+{
7810+ int npositive, err;
7811+ aufs_bindex_t bindex, btail, bdiropq;
7812+ unsigned char isdir, dirperm1;
7813+ struct qstr whname;
7814+ struct au_do_lookup_args args = {
7815+ .flags = flags
7816+ };
7817+ const struct qstr *name = &dentry->d_name;
7818+ struct dentry *parent;
7819+ struct super_block *sb;
7820+
7821+ sb = dentry->d_sb;
7822+ err = au_test_shwh(sb, name);
7823+ if (unlikely(err))
7824+ goto out;
7825+
7826+ err = au_wh_name_alloc(&whname, name);
7827+ if (unlikely(err))
7828+ goto out;
7829+
7830+ isdir = !!d_is_dir(dentry);
7831+ dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1);
7832+
7833+ npositive = 0;
7834+ parent = dget_parent(dentry);
7835+ btail = au_dbtaildir(parent);
7836+ for (bindex = btop; bindex <= btail; bindex++) {
7837+ struct dentry *h_parent, *h_dentry;
7838+ struct inode *h_inode, *h_dir;
7839+
7840+ h_dentry = au_h_dptr(dentry, bindex);
7841+ if (h_dentry) {
7842+ if (d_is_positive(h_dentry))
7843+ npositive++;
7844+ break;
7845+ }
7846+ h_parent = au_h_dptr(parent, bindex);
7847+ if (!h_parent || !d_is_dir(h_parent))
7848+ continue;
7849+
7850+ h_dir = d_inode(h_parent);
7851+ inode_lock_nested(h_dir, AuLsc_I_PARENT);
7852+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
7853+ &args);
7854+ inode_unlock(h_dir);
7855+ err = PTR_ERR(h_dentry);
7856+ if (IS_ERR(h_dentry))
7857+ goto out_parent;
7858+ if (h_dentry)
7859+ au_fclr_lkup(args.flags, ALLOW_NEG);
7860+ if (dirperm1)
7861+ au_fset_lkup(args.flags, IGNORE_PERM);
7862+
7863+ if (au_dbwh(dentry) == bindex)
7864+ break;
7865+ if (!h_dentry)
7866+ continue;
7867+ if (d_is_negative(h_dentry))
7868+ continue;
7869+ h_inode = d_inode(h_dentry);
7870+ npositive++;
7871+ if (!args.type)
7872+ args.type = h_inode->i_mode & S_IFMT;
7873+ if (args.type != S_IFDIR)
7874+ break;
7875+ else if (isdir) {
7876+ /* the type of lower may be different */
7877+ bdiropq = au_dbdiropq(dentry);
7878+ if (bdiropq >= 0 && bdiropq <= bindex)
7879+ break;
7880+ }
7881+ }
7882+
7883+ if (npositive) {
7884+ AuLabel(positive);
7885+ au_update_dbtop(dentry);
7886+ }
7887+ err = npositive;
7888+ if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
7889+ && au_dbtop(dentry) < 0)) {
7890+ err = -EIO;
7891+ AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
7892+ dentry, err);
7893+ }
7894+
7895+out_parent:
7896+ dput(parent);
7897+ au_delayed_kfree(whname.name);
7898+out:
7899+ return err;
7900+}
7901+
7902+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent)
7903+{
7904+ struct dentry *dentry;
7905+ int wkq_err;
7906+
7907+ if (!au_test_h_perm_sio(d_inode(parent), MAY_EXEC))
7908+ dentry = vfsub_lkup_one(name, parent);
7909+ else {
7910+ struct vfsub_lkup_one_args args = {
7911+ .errp = &dentry,
7912+ .name = name,
7913+ .parent = parent
7914+ };
7915+
7916+ wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args);
7917+ if (unlikely(wkq_err))
7918+ dentry = ERR_PTR(wkq_err);
7919+ }
7920+
7921+ return dentry;
7922+}
7923+
7924+/*
7925+ * lookup @dentry on @bindex which should be negative.
7926+ */
7927+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
7928+{
7929+ int err;
7930+ struct dentry *parent, *h_parent, *h_dentry;
7931+ struct au_branch *br;
7932+
7933+ parent = dget_parent(dentry);
7934+ h_parent = au_h_dptr(parent, bindex);
7935+ br = au_sbr(dentry->d_sb, bindex);
7936+ if (wh)
7937+ h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
7938+ else
7939+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
7940+ err = PTR_ERR(h_dentry);
7941+ if (IS_ERR(h_dentry))
7942+ goto out;
7943+ if (unlikely(d_is_positive(h_dentry))) {
7944+ err = -EIO;
7945+ AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex);
7946+ dput(h_dentry);
7947+ goto out;
7948+ }
7949+
7950+ err = 0;
7951+ if (bindex < au_dbtop(dentry))
7952+ au_set_dbtop(dentry, bindex);
7953+ if (au_dbbot(dentry) < bindex)
7954+ au_set_dbbot(dentry, bindex);
7955+ au_set_h_dptr(dentry, bindex, h_dentry);
7956+
7957+out:
7958+ dput(parent);
7959+ return err;
7960+}
7961+
7962+/* ---------------------------------------------------------------------- */
7963+
7964+/* subset of struct inode */
7965+struct au_iattr {
7966+ unsigned long i_ino;
7967+ /* unsigned int i_nlink; */
7968+ kuid_t i_uid;
7969+ kgid_t i_gid;
7970+ u64 i_version;
7971+/*
7972+ loff_t i_size;
7973+ blkcnt_t i_blocks;
7974+*/
7975+ umode_t i_mode;
7976+};
7977+
7978+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
7979+{
7980+ ia->i_ino = h_inode->i_ino;
7981+ /* ia->i_nlink = h_inode->i_nlink; */
7982+ ia->i_uid = h_inode->i_uid;
7983+ ia->i_gid = h_inode->i_gid;
7984+ ia->i_version = h_inode->i_version;
7985+/*
7986+ ia->i_size = h_inode->i_size;
7987+ ia->i_blocks = h_inode->i_blocks;
7988+*/
7989+ ia->i_mode = (h_inode->i_mode & S_IFMT);
7990+}
7991+
7992+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
7993+{
7994+ return ia->i_ino != h_inode->i_ino
7995+ /* || ia->i_nlink != h_inode->i_nlink */
7996+ || !uid_eq(ia->i_uid, h_inode->i_uid)
7997+ || !gid_eq(ia->i_gid, h_inode->i_gid)
7998+ || ia->i_version != h_inode->i_version
7999+/*
8000+ || ia->i_size != h_inode->i_size
8001+ || ia->i_blocks != h_inode->i_blocks
8002+*/
8003+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
8004+}
8005+
8006+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
8007+ struct au_branch *br)
8008+{
8009+ int err;
8010+ struct au_iattr ia;
8011+ struct inode *h_inode;
8012+ struct dentry *h_d;
8013+ struct super_block *h_sb;
8014+
8015+ err = 0;
8016+ memset(&ia, -1, sizeof(ia));
8017+ h_sb = h_dentry->d_sb;
8018+ h_inode = NULL;
8019+ if (d_is_positive(h_dentry)) {
8020+ h_inode = d_inode(h_dentry);
8021+ au_iattr_save(&ia, h_inode);
8022+ } else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
8023+ /* nfs d_revalidate may return 0 for negative dentry */
8024+ /* fuse d_revalidate always return 0 for negative dentry */
8025+ goto out;
8026+
8027+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
8028+ h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent);
8029+ err = PTR_ERR(h_d);
8030+ if (IS_ERR(h_d))
8031+ goto out;
8032+
8033+ err = 0;
8034+ if (unlikely(h_d != h_dentry
8035+ || d_inode(h_d) != h_inode
8036+ || (h_inode && au_iattr_test(&ia, h_inode))))
8037+ err = au_busy_or_stale();
8038+ dput(h_d);
8039+
8040+out:
8041+ AuTraceErr(err);
8042+ return err;
8043+}
8044+
8045+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
8046+ struct dentry *h_parent, struct au_branch *br)
8047+{
8048+ int err;
8049+
8050+ err = 0;
8051+ if (udba == AuOpt_UDBA_REVAL
8052+ && !au_test_fs_remote(h_dentry->d_sb)) {
8053+ IMustLock(h_dir);
8054+ err = (d_inode(h_dentry->d_parent) != h_dir);
8055+ } else if (udba != AuOpt_UDBA_NONE)
8056+ err = au_h_verify_dentry(h_dentry, h_parent, br);
8057+
8058+ return err;
8059+}
8060+
8061+/* ---------------------------------------------------------------------- */
8062+
8063+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
8064+{
8065+ int err;
8066+ aufs_bindex_t new_bindex, bindex, bbot, bwh, bdiropq;
8067+ struct au_hdentry tmp, *p, *q;
8068+ struct au_dinfo *dinfo;
8069+ struct super_block *sb;
8070+
8071+ DiMustWriteLock(dentry);
8072+
8073+ sb = dentry->d_sb;
8074+ dinfo = au_di(dentry);
8075+ bbot = dinfo->di_bbot;
8076+ bwh = dinfo->di_bwh;
8077+ bdiropq = dinfo->di_bdiropq;
8078+ bindex = dinfo->di_btop;
8079+ p = au_hdentry(dinfo, bindex);
8080+ for (; bindex <= bbot; bindex++, p++) {
8081+ if (!p->hd_dentry)
8082+ continue;
8083+
8084+ new_bindex = au_br_index(sb, p->hd_id);
8085+ if (new_bindex == bindex)
8086+ continue;
8087+
8088+ if (dinfo->di_bwh == bindex)
8089+ bwh = new_bindex;
8090+ if (dinfo->di_bdiropq == bindex)
8091+ bdiropq = new_bindex;
8092+ if (new_bindex < 0) {
8093+ au_hdput(p);
8094+ p->hd_dentry = NULL;
8095+ continue;
8096+ }
8097+
8098+ /* swap two lower dentries, and loop again */
8099+ q = au_hdentry(dinfo, new_bindex);
8100+ tmp = *q;
8101+ *q = *p;
8102+ *p = tmp;
8103+ if (tmp.hd_dentry) {
8104+ bindex--;
8105+ p--;
8106+ }
8107+ }
8108+
8109+ dinfo->di_bwh = -1;
8110+ if (bwh >= 0 && bwh <= au_sbbot(sb) && au_sbr_whable(sb, bwh))
8111+ dinfo->di_bwh = bwh;
8112+
8113+ dinfo->di_bdiropq = -1;
8114+ if (bdiropq >= 0
8115+ && bdiropq <= au_sbbot(sb)
8116+ && au_sbr_whable(sb, bdiropq))
8117+ dinfo->di_bdiropq = bdiropq;
8118+
8119+ err = -EIO;
8120+ dinfo->di_btop = -1;
8121+ dinfo->di_bbot = -1;
8122+ bbot = au_dbbot(parent);
8123+ bindex = 0;
8124+ p = au_hdentry(dinfo, bindex);
8125+ for (; bindex <= bbot; bindex++, p++)
8126+ if (p->hd_dentry) {
8127+ dinfo->di_btop = bindex;
8128+ break;
8129+ }
8130+
8131+ if (dinfo->di_btop >= 0) {
8132+ bindex = bbot;
8133+ p = au_hdentry(dinfo, bindex);
8134+ for (; bindex >= 0; bindex--, p--)
8135+ if (p->hd_dentry) {
8136+ dinfo->di_bbot = bindex;
8137+ err = 0;
8138+ break;
8139+ }
8140+ }
8141+
8142+ return err;
8143+}
8144+
8145+static void au_do_hide(struct dentry *dentry)
8146+{
8147+ struct inode *inode;
8148+
8149+ if (d_really_is_positive(dentry)) {
8150+ inode = d_inode(dentry);
8151+ if (!d_is_dir(dentry)) {
8152+ if (inode->i_nlink && !d_unhashed(dentry))
8153+ drop_nlink(inode);
8154+ } else {
8155+ clear_nlink(inode);
8156+ /* stop next lookup */
8157+ inode->i_flags |= S_DEAD;
8158+ }
8159+ smp_mb(); /* necessary? */
8160+ }
8161+ d_drop(dentry);
8162+}
8163+
8164+static int au_hide_children(struct dentry *parent)
8165+{
8166+ int err, i, j, ndentry;
8167+ struct au_dcsub_pages dpages;
8168+ struct au_dpage *dpage;
8169+ struct dentry *dentry;
8170+
8171+ err = au_dpages_init(&dpages, GFP_NOFS);
8172+ if (unlikely(err))
8173+ goto out;
8174+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
8175+ if (unlikely(err))
8176+ goto out_dpages;
8177+
8178+ /* in reverse order */
8179+ for (i = dpages.ndpage - 1; i >= 0; i--) {
8180+ dpage = dpages.dpages + i;
8181+ ndentry = dpage->ndentry;
8182+ for (j = ndentry - 1; j >= 0; j--) {
8183+ dentry = dpage->dentries[j];
8184+ if (dentry != parent)
8185+ au_do_hide(dentry);
8186+ }
8187+ }
8188+
8189+out_dpages:
8190+ au_dpages_free(&dpages);
8191+out:
8192+ return err;
8193+}
8194+
8195+static void au_hide(struct dentry *dentry)
8196+{
8197+ int err;
8198+
8199+ AuDbgDentry(dentry);
8200+ if (d_is_dir(dentry)) {
8201+ /* shrink_dcache_parent(dentry); */
8202+ err = au_hide_children(dentry);
8203+ if (unlikely(err))
8204+ AuIOErr("%pd, failed hiding children, ignored %d\n",
8205+ dentry, err);
8206+ }
8207+ au_do_hide(dentry);
8208+}
8209+
8210+/*
8211+ * By adding a dirty branch, a cached dentry may be affected in various ways.
8212+ *
8213+ * a dirty branch is added
8214+ * - on the top of layers
8215+ * - in the middle of layers
8216+ * - to the bottom of layers
8217+ *
8218+ * on the added branch there exists
8219+ * - a whiteout
8220+ * - a diropq
8221+ * - a same named entry
8222+ * + exist
8223+ * * negative --> positive
8224+ * * positive --> positive
8225+ * - type is unchanged
8226+ * - type is changed
8227+ * + doesn't exist
8228+ * * negative --> negative
8229+ * * positive --> negative (rejected by au_br_del() for non-dir case)
8230+ * - none
8231+ */
8232+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
8233+ struct au_dinfo *tmp)
8234+{
8235+ int err;
8236+ aufs_bindex_t bindex, bbot;
8237+ struct {
8238+ struct dentry *dentry;
8239+ struct inode *inode;
8240+ mode_t mode;
8241+ } orig_h, tmp_h = {
8242+ .dentry = NULL
8243+ };
8244+ struct au_hdentry *hd;
8245+ struct inode *inode, *h_inode;
8246+ struct dentry *h_dentry;
8247+
8248+ err = 0;
8249+ AuDebugOn(dinfo->di_btop < 0);
8250+ orig_h.mode = 0;
8251+ orig_h.dentry = au_hdentry(dinfo, dinfo->di_btop)->hd_dentry;
8252+ orig_h.inode = NULL;
8253+ if (d_is_positive(orig_h.dentry)) {
8254+ orig_h.inode = d_inode(orig_h.dentry);
8255+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
8256+ }
8257+ if (tmp->di_btop >= 0) {
8258+ tmp_h.dentry = au_hdentry(tmp, tmp->di_btop)->hd_dentry;
8259+ if (d_is_positive(tmp_h.dentry)) {
8260+ tmp_h.inode = d_inode(tmp_h.dentry);
8261+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
8262+ }
8263+ }
8264+
8265+ inode = NULL;
8266+ if (d_really_is_positive(dentry))
8267+ inode = d_inode(dentry);
8268+ if (!orig_h.inode) {
8269+ AuDbg("nagative originally\n");
8270+ if (inode) {
8271+ au_hide(dentry);
8272+ goto out;
8273+ }
8274+ AuDebugOn(inode);
8275+ AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
8276+ AuDebugOn(dinfo->di_bdiropq != -1);
8277+
8278+ if (!tmp_h.inode) {
8279+ AuDbg("negative --> negative\n");
8280+ /* should have only one negative lower */
8281+ if (tmp->di_btop >= 0
8282+ && tmp->di_btop < dinfo->di_btop) {
8283+ AuDebugOn(tmp->di_btop != tmp->di_bbot);
8284+ AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
8285+ au_set_h_dptr(dentry, dinfo->di_btop, NULL);
8286+ au_di_cp(dinfo, tmp);
8287+ hd = au_hdentry(tmp, tmp->di_btop);
8288+ au_set_h_dptr(dentry, tmp->di_btop,
8289+ dget(hd->hd_dentry));
8290+ }
8291+ au_dbg_verify_dinode(dentry);
8292+ } else {
8293+ AuDbg("negative --> positive\n");
8294+ /*
8295+ * similar to the behaviour of creating with bypassing
8296+ * aufs.
8297+ * unhash it in order to force an error in the
8298+ * succeeding create operation.
8299+ * we should not set S_DEAD here.
8300+ */
8301+ d_drop(dentry);
8302+ /* au_di_swap(tmp, dinfo); */
8303+ au_dbg_verify_dinode(dentry);
8304+ }
8305+ } else {
8306+ AuDbg("positive originally\n");
8307+ /* inode may be NULL */
8308+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
8309+ if (!tmp_h.inode) {
8310+ AuDbg("positive --> negative\n");
8311+ /* or bypassing aufs */
8312+ au_hide(dentry);
8313+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_btop)
8314+ dinfo->di_bwh = tmp->di_bwh;
8315+ if (inode)
8316+ err = au_refresh_hinode_self(inode);
8317+ au_dbg_verify_dinode(dentry);
8318+ } else if (orig_h.mode == tmp_h.mode) {
8319+ AuDbg("positive --> positive, same type\n");
8320+ if (!S_ISDIR(orig_h.mode)
8321+ && dinfo->di_btop > tmp->di_btop) {
8322+ /*
8323+ * similar to the behaviour of removing and
8324+ * creating.
8325+ */
8326+ au_hide(dentry);
8327+ if (inode)
8328+ err = au_refresh_hinode_self(inode);
8329+ au_dbg_verify_dinode(dentry);
8330+ } else {
8331+ /* fill empty slots */
8332+ if (dinfo->di_btop > tmp->di_btop)
8333+ dinfo->di_btop = tmp->di_btop;
8334+ if (dinfo->di_bbot < tmp->di_bbot)
8335+ dinfo->di_bbot = tmp->di_bbot;
8336+ dinfo->di_bwh = tmp->di_bwh;
8337+ dinfo->di_bdiropq = tmp->di_bdiropq;
8338+ bbot = dinfo->di_bbot;
8339+ bindex = tmp->di_btop;
8340+ hd = au_hdentry(tmp, bindex);
8341+ for (; bindex <= bbot; bindex++, hd++) {
8342+ if (au_h_dptr(dentry, bindex))
8343+ continue;
8344+ h_dentry = hd->hd_dentry;
8345+ if (!h_dentry)
8346+ continue;
8347+ AuDebugOn(d_is_negative(h_dentry));
8348+ h_inode = d_inode(h_dentry);
8349+ AuDebugOn(orig_h.mode
8350+ != (h_inode->i_mode
8351+ & S_IFMT));
8352+ au_set_h_dptr(dentry, bindex,
8353+ dget(h_dentry));
8354+ }
8355+ if (inode)
8356+ err = au_refresh_hinode(inode, dentry);
8357+ au_dbg_verify_dinode(dentry);
8358+ }
8359+ } else {
8360+ AuDbg("positive --> positive, different type\n");
8361+ /* similar to the behaviour of removing and creating */
8362+ au_hide(dentry);
8363+ if (inode)
8364+ err = au_refresh_hinode_self(inode);
8365+ au_dbg_verify_dinode(dentry);
8366+ }
8367+ }
8368+
8369+out:
8370+ return err;
8371+}
8372+
8373+void au_refresh_dop(struct dentry *dentry, int force_reval)
8374+{
8375+ const struct dentry_operations *dop
8376+ = force_reval ? &aufs_dop : dentry->d_sb->s_d_op;
8377+ static const unsigned int mask
8378+ = DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE;
8379+
8380+ BUILD_BUG_ON(sizeof(mask) != sizeof(dentry->d_flags));
8381+
8382+ if (dentry->d_op == dop)
8383+ return;
8384+
8385+ AuDbg("%pd\n", dentry);
8386+ spin_lock(&dentry->d_lock);
8387+ if (dop == &aufs_dop)
8388+ dentry->d_flags |= mask;
8389+ else
8390+ dentry->d_flags &= ~mask;
8391+ dentry->d_op = dop;
8392+ spin_unlock(&dentry->d_lock);
8393+}
8394+
8395+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
8396+{
8397+ int err, ebrange, nbr;
8398+ unsigned int sigen;
8399+ struct au_dinfo *dinfo, *tmp;
8400+ struct super_block *sb;
8401+ struct inode *inode;
8402+
8403+ DiMustWriteLock(dentry);
8404+ AuDebugOn(IS_ROOT(dentry));
8405+ AuDebugOn(d_really_is_negative(parent));
8406+
8407+ sb = dentry->d_sb;
8408+ sigen = au_sigen(sb);
8409+ err = au_digen_test(parent, sigen);
8410+ if (unlikely(err))
8411+ goto out;
8412+
8413+ nbr = au_sbbot(sb) + 1;
8414+ dinfo = au_di(dentry);
8415+ err = au_di_realloc(dinfo, nbr, /*may_shrink*/0);
8416+ if (unlikely(err))
8417+ goto out;
8418+ ebrange = au_dbrange_test(dentry);
8419+ if (!ebrange)
8420+ ebrange = au_do_refresh_hdentry(dentry, parent);
8421+
8422+ if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
8423+ AuDebugOn(au_dbtop(dentry) < 0 && au_dbbot(dentry) >= 0);
8424+ if (d_really_is_positive(dentry)) {
8425+ inode = d_inode(dentry);
8426+ err = au_refresh_hinode_self(inode);
8427+ }
8428+ au_dbg_verify_dinode(dentry);
8429+ if (!err)
8430+ goto out_dgen; /* success */
8431+ goto out;
8432+ }
8433+
8434+ /* temporary dinfo */
8435+ AuDbgDentry(dentry);
8436+ err = -ENOMEM;
8437+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
8438+ if (unlikely(!tmp))
8439+ goto out;
8440+ au_di_swap(tmp, dinfo);
8441+ /* returns the number of positive dentries */
8442+ /*
8443+ * if current working dir is removed, it returns an error.
8444+ * but the dentry is legal.
8445+ */
8446+ err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
8447+ AuDbgDentry(dentry);
8448+ au_di_swap(tmp, dinfo);
8449+ if (err == -ENOENT)
8450+ err = 0;
8451+ if (err >= 0) {
8452+ /* compare/refresh by dinfo */
8453+ AuDbgDentry(dentry);
8454+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
8455+ au_dbg_verify_dinode(dentry);
8456+ AuTraceErr(err);
8457+ }
8458+ au_di_realloc(dinfo, nbr, /*may_shrink*/1); /* harmless if err */
8459+ au_rw_write_unlock(&tmp->di_rwsem);
8460+ au_di_free(tmp);
8461+ if (unlikely(err))
8462+ goto out;
8463+
8464+out_dgen:
8465+ au_update_digen(dentry);
8466+out:
8467+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
8468+ AuIOErr("failed refreshing %pd, %d\n", dentry, err);
8469+ AuDbgDentry(dentry);
8470+ }
8471+ AuTraceErr(err);
8472+ return err;
8473+}
8474+
8475+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags,
8476+ struct dentry *dentry, aufs_bindex_t bindex)
8477+{
8478+ int err, valid;
8479+
8480+ err = 0;
8481+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
8482+ goto out;
8483+
8484+ AuDbg("b%d\n", bindex);
8485+ /*
8486+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
8487+ * due to whiteout and branch permission.
8488+ */
8489+ flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
8490+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
8491+ /* it may return tri-state */
8492+ valid = h_dentry->d_op->d_revalidate(h_dentry, flags);
8493+
8494+ if (unlikely(valid < 0))
8495+ err = valid;
8496+ else if (!valid)
8497+ err = -EINVAL;
8498+
8499+out:
8500+ AuTraceErr(err);
8501+ return err;
8502+}
8503+
8504+/* todo: remove this */
8505+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
8506+ unsigned int flags, int do_udba)
8507+{
8508+ int err;
8509+ umode_t mode, h_mode;
8510+ aufs_bindex_t bindex, btail, btop, ibs, ibe;
8511+ unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
8512+ struct inode *h_inode, *h_cached_inode;
8513+ struct dentry *h_dentry;
8514+ struct qstr *name, *h_name;
8515+
8516+ err = 0;
8517+ plus = 0;
8518+ mode = 0;
8519+ ibs = -1;
8520+ ibe = -1;
8521+ unhashed = !!d_unhashed(dentry);
8522+ is_root = !!IS_ROOT(dentry);
8523+ name = &dentry->d_name;
8524+ tmpfile = au_di(dentry)->di_tmpfile;
8525+
8526+ /*
8527+ * Theoretically, REVAL test should be unnecessary in case of
8528+ * {FS,I}NOTIFY.
8529+ * But {fs,i}notify doesn't fire some necessary events,
8530+ * IN_ATTRIB for atime/nlink/pageio
8531+ * Let's do REVAL test too.
8532+ */
8533+ if (do_udba && inode) {
8534+ mode = (inode->i_mode & S_IFMT);
8535+ plus = (inode->i_nlink > 0);
8536+ ibs = au_ibtop(inode);
8537+ ibe = au_ibbot(inode);
8538+ }
8539+
8540+ btop = au_dbtop(dentry);
8541+ btail = btop;
8542+ if (inode && S_ISDIR(inode->i_mode))
8543+ btail = au_dbtaildir(dentry);
8544+ for (bindex = btop; bindex <= btail; bindex++) {
8545+ h_dentry = au_h_dptr(dentry, bindex);
8546+ if (!h_dentry)
8547+ continue;
8548+
8549+ AuDbg("b%d, %pd\n", bindex, h_dentry);
8550+ h_nfs = !!au_test_nfs(h_dentry->d_sb);
8551+ spin_lock(&h_dentry->d_lock);
8552+ h_name = &h_dentry->d_name;
8553+ if (unlikely(do_udba
8554+ && !is_root
8555+ && ((!h_nfs
8556+ && (unhashed != !!d_unhashed(h_dentry)
8557+ || (!tmpfile
8558+ && !au_qstreq(name, h_name))
8559+ ))
8560+ || (h_nfs
8561+ && !(flags & LOOKUP_OPEN)
8562+ && (h_dentry->d_flags
8563+ & DCACHE_NFSFS_RENAMED)))
8564+ )) {
8565+ int h_unhashed;
8566+
8567+ h_unhashed = d_unhashed(h_dentry);
8568+ spin_unlock(&h_dentry->d_lock);
8569+ AuDbg("unhash 0x%x 0x%x, %pd %pd\n",
8570+ unhashed, h_unhashed, dentry, h_dentry);
8571+ goto err;
8572+ }
8573+ spin_unlock(&h_dentry->d_lock);
8574+
8575+ err = au_do_h_d_reval(h_dentry, flags, dentry, bindex);
8576+ if (unlikely(err))
8577+ /* do not goto err, to keep the errno */
8578+ break;
8579+
8580+ /* todo: plink too? */
8581+ if (!do_udba)
8582+ continue;
8583+
8584+ /* UDBA tests */
8585+ if (unlikely(!!inode != d_is_positive(h_dentry)))
8586+ goto err;
8587+
8588+ h_inode = NULL;
8589+ if (d_is_positive(h_dentry))
8590+ h_inode = d_inode(h_dentry);
8591+ h_plus = plus;
8592+ h_mode = mode;
8593+ h_cached_inode = h_inode;
8594+ if (h_inode) {
8595+ h_mode = (h_inode->i_mode & S_IFMT);
8596+ h_plus = (h_inode->i_nlink > 0);
8597+ }
8598+ if (inode && ibs <= bindex && bindex <= ibe)
8599+ h_cached_inode = au_h_iptr(inode, bindex);
8600+
8601+ if (!h_nfs) {
8602+ if (unlikely(plus != h_plus && !tmpfile))
8603+ goto err;
8604+ } else {
8605+ if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED)
8606+ && !is_root
8607+ && !IS_ROOT(h_dentry)
8608+ && unhashed != d_unhashed(h_dentry)))
8609+ goto err;
8610+ }
8611+ if (unlikely(mode != h_mode
8612+ || h_cached_inode != h_inode))
8613+ goto err;
8614+ continue;
8615+
8616+err:
8617+ err = -EINVAL;
8618+ break;
8619+ }
8620+
8621+ AuTraceErr(err);
8622+ return err;
8623+}
8624+
8625+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
8626+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
8627+{
8628+ int err;
8629+ struct dentry *parent;
8630+
8631+ if (!au_digen_test(dentry, sigen))
8632+ return 0;
8633+
8634+ parent = dget_parent(dentry);
8635+ di_read_lock_parent(parent, AuLock_IR);
8636+ AuDebugOn(au_digen_test(parent, sigen));
8637+ au_dbg_verify_gen(parent, sigen);
8638+ err = au_refresh_dentry(dentry, parent);
8639+ di_read_unlock(parent, AuLock_IR);
8640+ dput(parent);
8641+ AuTraceErr(err);
8642+ return err;
8643+}
8644+
8645+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
8646+{
8647+ int err;
8648+ struct dentry *d, *parent;
8649+
8650+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
8651+ return simple_reval_dpath(dentry, sigen);
8652+
8653+ /* slow loop, keep it simple and stupid */
8654+ /* cf: au_cpup_dirs() */
8655+ err = 0;
8656+ parent = NULL;
8657+ while (au_digen_test(dentry, sigen)) {
8658+ d = dentry;
8659+ while (1) {
8660+ dput(parent);
8661+ parent = dget_parent(d);
8662+ if (!au_digen_test(parent, sigen))
8663+ break;
8664+ d = parent;
8665+ }
8666+
8667+ if (d != dentry)
8668+ di_write_lock_child2(d);
8669+
8670+ /* someone might update our dentry while we were sleeping */
8671+ if (au_digen_test(d, sigen)) {
8672+ /*
8673+ * todo: consolidate with simple_reval_dpath(),
8674+ * do_refresh() and au_reval_for_attr().
8675+ */
8676+ di_read_lock_parent(parent, AuLock_IR);
8677+ err = au_refresh_dentry(d, parent);
8678+ di_read_unlock(parent, AuLock_IR);
8679+ }
8680+
8681+ if (d != dentry)
8682+ di_write_unlock(d);
8683+ dput(parent);
8684+ if (unlikely(err))
8685+ break;
8686+ }
8687+
8688+ return err;
8689+}
8690+
8691+/*
8692+ * if valid returns 1, otherwise 0.
8693+ */
8694+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
8695+{
8696+ int valid, err;
8697+ unsigned int sigen;
8698+ unsigned char do_udba;
8699+ struct super_block *sb;
8700+ struct inode *inode;
8701+
8702+ /* todo: support rcu-walk? */
8703+ if (flags & LOOKUP_RCU)
8704+ return -ECHILD;
8705+
8706+ valid = 0;
8707+ if (unlikely(!au_di(dentry)))
8708+ goto out;
8709+
8710+ valid = 1;
8711+ sb = dentry->d_sb;
8712+ /*
8713+ * todo: very ugly
8714+ * i_mutex of parent dir may be held,
8715+ * but we should not return 'invalid' due to busy.
8716+ */
8717+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
8718+ if (unlikely(err)) {
8719+ valid = err;
8720+ AuTraceErr(err);
8721+ goto out;
8722+ }
8723+ inode = NULL;
8724+ if (d_really_is_positive(dentry))
8725+ inode = d_inode(dentry);
8726+ if (unlikely(inode && au_is_bad_inode(inode))) {
8727+ err = -EINVAL;
8728+ AuTraceErr(err);
8729+ goto out_dgrade;
8730+ }
8731+ if (unlikely(au_dbrange_test(dentry))) {
8732+ err = -EINVAL;
8733+ AuTraceErr(err);
8734+ goto out_dgrade;
8735+ }
8736+
8737+ sigen = au_sigen(sb);
8738+ if (au_digen_test(dentry, sigen)) {
8739+ AuDebugOn(IS_ROOT(dentry));
8740+ err = au_reval_dpath(dentry, sigen);
8741+ if (unlikely(err)) {
8742+ AuTraceErr(err);
8743+ goto out_dgrade;
8744+ }
8745+ }
8746+ di_downgrade_lock(dentry, AuLock_IR);
8747+
8748+ err = -EINVAL;
8749+ if (!(flags & (LOOKUP_OPEN | LOOKUP_EMPTY))
8750+ && inode
8751+ && !(inode->i_state && I_LINKABLE)
8752+ && (IS_DEADDIR(inode) || !inode->i_nlink)) {
8753+ AuTraceErr(err);
8754+ goto out_inval;
8755+ }
8756+
8757+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
8758+ if (do_udba && inode) {
8759+ aufs_bindex_t btop = au_ibtop(inode);
8760+ struct inode *h_inode;
8761+
8762+ if (btop >= 0) {
8763+ h_inode = au_h_iptr(inode, btop);
8764+ if (h_inode && au_test_higen(inode, h_inode)) {
8765+ AuTraceErr(err);
8766+ goto out_inval;
8767+ }
8768+ }
8769+ }
8770+
8771+ err = h_d_revalidate(dentry, inode, flags, do_udba);
8772+ if (unlikely(!err && do_udba && au_dbtop(dentry) < 0)) {
8773+ err = -EIO;
8774+ AuDbg("both of real entry and whiteout found, %p, err %d\n",
8775+ dentry, err);
8776+ }
8777+ goto out_inval;
8778+
8779+out_dgrade:
8780+ di_downgrade_lock(dentry, AuLock_IR);
8781+out_inval:
8782+ aufs_read_unlock(dentry, AuLock_IR);
8783+ AuTraceErr(err);
8784+ valid = !err;
8785+out:
8786+ if (!valid) {
8787+ AuDbg("%pd invalid, %d\n", dentry, valid);
8788+ d_drop(dentry);
8789+ }
8790+ return valid;
8791+}
8792+
8793+static void aufs_d_release(struct dentry *dentry)
8794+{
8795+ if (au_di(dentry)) {
8796+ au_di_fin(dentry);
8797+ au_hn_di_reinit(dentry);
8798+ }
8799+}
8800+
8801+const struct dentry_operations aufs_dop = {
8802+ .d_revalidate = aufs_d_revalidate,
8803+ .d_weak_revalidate = aufs_d_revalidate,
8804+ .d_release = aufs_d_release
8805+};
8806+
8807+/* aufs_dop without d_revalidate */
8808+const struct dentry_operations aufs_dop_noreval = {
8809+ .d_release = aufs_d_release
8810+};
8811diff -urN /usr/share/empty/fs/aufs/dentry.h linux/fs/aufs/dentry.h
8812--- /usr/share/empty/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
8813+++ linux/fs/aufs/dentry.h 2016-10-09 16:55:38.889431135 +0200
8814@@ -0,0 +1,255 @@
8815+/*
8816+ * Copyright (C) 2005-2016 Junjiro R. Okajima
8817+ *
8818+ * This program, aufs is free software; you can redistribute it and/or modify
8819+ * it under the terms of the GNU General Public License as published by
8820+ * the Free Software Foundation; either version 2 of the License, or
8821+ * (at your option) any later version.
8822+ *
8823+ * This program is distributed in the hope that it will be useful,
8824+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8825+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8826+ * GNU General Public License for more details.
8827+ *
8828+ * You should have received a copy of the GNU General Public License
8829+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
8830+ */
8831+
8832+/*
8833+ * lookup and dentry operations
8834+ */
8835+
8836+#ifndef __AUFS_DENTRY_H__
8837+#define __AUFS_DENTRY_H__
8838+
8839+#ifdef __KERNEL__
8840+
8841+#include <linux/dcache.h>
8842+#include "rwsem.h"
8843+
8844+struct au_hdentry {
8845+ struct dentry *hd_dentry;
8846+ aufs_bindex_t hd_id;
8847+};
8848+
8849+struct au_dinfo {
8850+ atomic_t di_generation;
8851+
8852+ struct au_rwsem di_rwsem;
8853+ aufs_bindex_t di_btop, di_bbot, di_bwh, di_bdiropq;
8854+ unsigned char di_tmpfile; /* to allow the different name */
8855+ union {
8856+ struct au_hdentry *di_hdentry;
8857+ struct llist_node di_lnode; /* delayed free */
8858+ };
8859+} ____cacheline_aligned_in_smp;
8860+
8861+/* ---------------------------------------------------------------------- */
8862+
8863+/* flags for au_lkup_dentry() */
8864+#define AuLkup_ALLOW_NEG 1
8865+#define AuLkup_IGNORE_PERM (1 << 1)
8866+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
8867+#define au_fset_lkup(flags, name) \
8868+ do { (flags) |= AuLkup_##name; } while (0)
8869+#define au_fclr_lkup(flags, name) \
8870+ do { (flags) &= ~AuLkup_##name; } while (0)
8871+
8872+/* ---------------------------------------------------------------------- */
8873+
8874+/* dentry.c */
8875+extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
8876+struct au_branch;
8877+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
8878+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
8879+ struct dentry *h_parent, struct au_branch *br);
8880+
8881+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
8882+ unsigned int flags);
8883+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
8884+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
8885+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
8886+void au_refresh_dop(struct dentry *dentry, int force_reval);
8887+
8888+/* dinfo.c */
8889+void au_di_init_once(void *_di);
8890+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
8891+void au_di_free(struct au_dinfo *dinfo);
8892+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
8893+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
8894+int au_di_init(struct dentry *dentry);
8895+void au_di_fin(struct dentry *dentry);
8896+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink);
8897+
8898+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
8899+void di_read_unlock(struct dentry *d, int flags);
8900+void di_downgrade_lock(struct dentry *d, int flags);
8901+void di_write_lock(struct dentry *d, unsigned int lsc);
8902+void di_write_unlock(struct dentry *d);
8903+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
8904+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
8905+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
8906+
8907+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
8908+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
8909+aufs_bindex_t au_dbtail(struct dentry *dentry);
8910+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
8911+
8912+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
8913+ struct dentry *h_dentry);
8914+int au_digen_test(struct dentry *dentry, unsigned int sigen);
8915+int au_dbrange_test(struct dentry *dentry);
8916+void au_update_digen(struct dentry *dentry);
8917+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
8918+void au_update_dbtop(struct dentry *dentry);
8919+void au_update_dbbot(struct dentry *dentry);
8920+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
8921+
8922+/* ---------------------------------------------------------------------- */
8923+
8924+static inline struct au_dinfo *au_di(struct dentry *dentry)
8925+{
8926+ return dentry->d_fsdata;
8927+}
8928+
8929+/* ---------------------------------------------------------------------- */
8930+
8931+/* lock subclass for dinfo */
8932+enum {
8933+ AuLsc_DI_CHILD, /* child first */
8934+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
8935+ AuLsc_DI_CHILD3, /* copyup dirs */
8936+ AuLsc_DI_PARENT,
8937+ AuLsc_DI_PARENT2,
8938+ AuLsc_DI_PARENT3,
8939+ AuLsc_DI_TMP /* temp for replacing dinfo */
8940+};
8941+
8942+/*
8943+ * di_read_lock_child, di_write_lock_child,
8944+ * di_read_lock_child2, di_write_lock_child2,
8945+ * di_read_lock_child3, di_write_lock_child3,
8946+ * di_read_lock_parent, di_write_lock_parent,
8947+ * di_read_lock_parent2, di_write_lock_parent2,
8948+ * di_read_lock_parent3, di_write_lock_parent3,
8949+ */
8950+#define AuReadLockFunc(name, lsc) \
8951+static inline void di_read_lock_##name(struct dentry *d, int flags) \
8952+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
8953+
8954+#define AuWriteLockFunc(name, lsc) \
8955+static inline void di_write_lock_##name(struct dentry *d) \
8956+{ di_write_lock(d, AuLsc_DI_##lsc); }
8957+
8958+#define AuRWLockFuncs(name, lsc) \
8959+ AuReadLockFunc(name, lsc) \
8960+ AuWriteLockFunc(name, lsc)
8961+
8962+AuRWLockFuncs(child, CHILD);
8963+AuRWLockFuncs(child2, CHILD2);
8964+AuRWLockFuncs(child3, CHILD3);
8965+AuRWLockFuncs(parent, PARENT);
8966+AuRWLockFuncs(parent2, PARENT2);
8967+AuRWLockFuncs(parent3, PARENT3);
8968+
8969+#undef AuReadLockFunc
8970+#undef AuWriteLockFunc
8971+#undef AuRWLockFuncs
8972+
8973+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
8974+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
8975+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
8976+
8977+/* ---------------------------------------------------------------------- */
8978+
8979+/* todo: memory barrier? */
8980+static inline unsigned int au_digen(struct dentry *d)
8981+{
8982+ return atomic_read(&au_di(d)->di_generation);
8983+}
8984+
8985+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
8986+{
8987+ hdentry->hd_dentry = NULL;
8988+}
8989+
8990+static inline struct au_hdentry *au_hdentry(struct au_dinfo *di,
8991+ aufs_bindex_t bindex)
8992+{
8993+ return di->di_hdentry + bindex;
8994+}
8995+
8996+static inline void au_hdput(struct au_hdentry *hd)
8997+{
8998+ if (hd)
8999+ dput(hd->hd_dentry);
9000+}
9001+
9002+static inline aufs_bindex_t au_dbtop(struct dentry *dentry)
9003+{
9004+ DiMustAnyLock(dentry);
9005+ return au_di(dentry)->di_btop;
9006+}
9007+
9008+static inline aufs_bindex_t au_dbbot(struct dentry *dentry)
9009+{
9010+ DiMustAnyLock(dentry);
9011+ return au_di(dentry)->di_bbot;
9012+}
9013+
9014+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
9015+{
9016+ DiMustAnyLock(dentry);
9017+ return au_di(dentry)->di_bwh;
9018+}
9019+
9020+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
9021+{
9022+ DiMustAnyLock(dentry);
9023+ return au_di(dentry)->di_bdiropq;
9024+}
9025+
9026+/* todo: hard/soft set? */
9027+static inline void au_set_dbtop(struct dentry *dentry, aufs_bindex_t bindex)
9028+{
9029+ DiMustWriteLock(dentry);
9030+ au_di(dentry)->di_btop = bindex;
9031+}
9032+
9033+static inline void au_set_dbbot(struct dentry *dentry, aufs_bindex_t bindex)
9034+{
9035+ DiMustWriteLock(dentry);
9036+ au_di(dentry)->di_bbot = bindex;
9037+}
9038+
9039+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
9040+{
9041+ DiMustWriteLock(dentry);
9042+ /* dbwh can be outside of btop - bbot range */
9043+ au_di(dentry)->di_bwh = bindex;
9044+}
9045+
9046+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
9047+{
9048+ DiMustWriteLock(dentry);
9049+ au_di(dentry)->di_bdiropq = bindex;
9050+}
9051+
9052+/* ---------------------------------------------------------------------- */
9053+
9054+#ifdef CONFIG_AUFS_HNOTIFY
9055+static inline void au_digen_dec(struct dentry *d)
9056+{
9057+ atomic_dec(&au_di(d)->di_generation);
9058+}
9059+
9060+static inline void au_hn_di_reinit(struct dentry *dentry)
9061+{
9062+ dentry->d_fsdata = NULL;
9063+}
9064+#else
9065+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
9066+#endif /* CONFIG_AUFS_HNOTIFY */
9067+
9068+#endif /* __KERNEL__ */
9069+#endif /* __AUFS_DENTRY_H__ */
9070diff -urN /usr/share/empty/fs/aufs/dinfo.c linux/fs/aufs/dinfo.c
9071--- /usr/share/empty/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
9072+++ linux/fs/aufs/dinfo.c 2016-10-09 16:55:38.889431135 +0200
9073@@ -0,0 +1,553 @@
9074+/*
9075+ * Copyright (C) 2005-2016 Junjiro R. Okajima
9076+ *
9077+ * This program, aufs is free software; you can redistribute it and/or modify
9078+ * it under the terms of the GNU General Public License as published by
9079+ * the Free Software Foundation; either version 2 of the License, or
9080+ * (at your option) any later version.
9081+ *
9082+ * This program is distributed in the hope that it will be useful,
9083+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9084+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9085+ * GNU General Public License for more details.
9086+ *
9087+ * You should have received a copy of the GNU General Public License
9088+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
9089+ */
9090+
9091+/*
9092+ * dentry private data
9093+ */
9094+
9095+#include "aufs.h"
9096+
9097+void au_di_init_once(void *_dinfo)
9098+{
9099+ struct au_dinfo *dinfo = _dinfo;
9100+
9101+ au_rw_init(&dinfo->di_rwsem);
9102+}
9103+
9104+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
9105+{
9106+ struct au_dinfo *dinfo;
9107+ int nbr, i;
9108+
9109+ dinfo = au_cache_alloc_dinfo();
9110+ if (unlikely(!dinfo))
9111+ goto out;
9112+
9113+ nbr = au_sbbot(sb) + 1;
9114+ if (nbr <= 0)
9115+ nbr = 1;
9116+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
9117+ if (dinfo->di_hdentry) {
9118+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
9119+ dinfo->di_btop = -1;
9120+ dinfo->di_bbot = -1;
9121+ dinfo->di_bwh = -1;
9122+ dinfo->di_bdiropq = -1;
9123+ dinfo->di_tmpfile = 0;
9124+ for (i = 0; i < nbr; i++)
9125+ dinfo->di_hdentry[i].hd_id = -1;
9126+ goto out;
9127+ }
9128+
9129+ au_cache_dfree_dinfo(dinfo);
9130+ dinfo = NULL;
9131+
9132+out:
9133+ return dinfo;
9134+}
9135+
9136+void au_di_free(struct au_dinfo *dinfo)
9137+{
9138+ struct au_hdentry *p;
9139+ aufs_bindex_t bbot, bindex;
9140+
9141+ /* dentry may not be revalidated */
9142+ bindex = dinfo->di_btop;
9143+ if (bindex >= 0) {
9144+ bbot = dinfo->di_bbot;
9145+ p = au_hdentry(dinfo, bindex);
9146+ while (bindex++ <= bbot)
9147+ au_hdput(p++);
9148+ }
9149+ au_delayed_kfree(dinfo->di_hdentry);
9150+ au_cache_dfree_dinfo(dinfo);
9151+}
9152+
9153+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
9154+{
9155+ struct au_hdentry *p;
9156+ aufs_bindex_t bi;
9157+
9158+ AuRwMustWriteLock(&a->di_rwsem);
9159+ AuRwMustWriteLock(&b->di_rwsem);
9160+
9161+#define DiSwap(v, name) \
9162+ do { \
9163+ v = a->di_##name; \
9164+ a->di_##name = b->di_##name; \
9165+ b->di_##name = v; \
9166+ } while (0)
9167+
9168+ DiSwap(p, hdentry);
9169+ DiSwap(bi, btop);
9170+ DiSwap(bi, bbot);
9171+ DiSwap(bi, bwh);
9172+ DiSwap(bi, bdiropq);
9173+ /* smp_mb(); */
9174+
9175+#undef DiSwap
9176+}
9177+
9178+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
9179+{
9180+ AuRwMustWriteLock(&dst->di_rwsem);
9181+ AuRwMustWriteLock(&src->di_rwsem);
9182+
9183+ dst->di_btop = src->di_btop;
9184+ dst->di_bbot = src->di_bbot;
9185+ dst->di_bwh = src->di_bwh;
9186+ dst->di_bdiropq = src->di_bdiropq;
9187+ /* smp_mb(); */
9188+}
9189+
9190+int au_di_init(struct dentry *dentry)
9191+{
9192+ int err;
9193+ struct super_block *sb;
9194+ struct au_dinfo *dinfo;
9195+
9196+ err = 0;
9197+ sb = dentry->d_sb;
9198+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
9199+ if (dinfo) {
9200+ atomic_set(&dinfo->di_generation, au_sigen(sb));
9201+ /* smp_mb(); */ /* atomic_set */
9202+ dentry->d_fsdata = dinfo;
9203+ } else
9204+ err = -ENOMEM;
9205+
9206+ return err;
9207+}
9208+
9209+void au_di_fin(struct dentry *dentry)
9210+{
9211+ struct au_dinfo *dinfo;
9212+
9213+ dinfo = au_di(dentry);
9214+ AuRwDestroy(&dinfo->di_rwsem);
9215+ au_di_free(dinfo);
9216+}
9217+
9218+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink)
9219+{
9220+ int err, sz;
9221+ struct au_hdentry *hdp;
9222+
9223+ AuRwMustWriteLock(&dinfo->di_rwsem);
9224+
9225+ err = -ENOMEM;
9226+ sz = sizeof(*hdp) * (dinfo->di_bbot + 1);
9227+ if (!sz)
9228+ sz = sizeof(*hdp);
9229+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS,
9230+ may_shrink);
9231+ if (hdp) {
9232+ dinfo->di_hdentry = hdp;
9233+ err = 0;
9234+ }
9235+
9236+ return err;
9237+}
9238+
9239+/* ---------------------------------------------------------------------- */
9240+
9241+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
9242+{
9243+ switch (lsc) {
9244+ case AuLsc_DI_CHILD:
9245+ ii_write_lock_child(inode);
9246+ break;
9247+ case AuLsc_DI_CHILD2:
9248+ ii_write_lock_child2(inode);
9249+ break;
9250+ case AuLsc_DI_CHILD3:
9251+ ii_write_lock_child3(inode);
9252+ break;
9253+ case AuLsc_DI_PARENT:
9254+ ii_write_lock_parent(inode);
9255+ break;
9256+ case AuLsc_DI_PARENT2:
9257+ ii_write_lock_parent2(inode);
9258+ break;
9259+ case AuLsc_DI_PARENT3:
9260+ ii_write_lock_parent3(inode);
9261+ break;
9262+ default:
9263+ BUG();
9264+ }
9265+}
9266+
9267+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
9268+{
9269+ switch (lsc) {
9270+ case AuLsc_DI_CHILD:
9271+ ii_read_lock_child(inode);
9272+ break;
9273+ case AuLsc_DI_CHILD2:
9274+ ii_read_lock_child2(inode);
9275+ break;
9276+ case AuLsc_DI_CHILD3:
9277+ ii_read_lock_child3(inode);
9278+ break;
9279+ case AuLsc_DI_PARENT:
9280+ ii_read_lock_parent(inode);
9281+ break;
9282+ case AuLsc_DI_PARENT2:
9283+ ii_read_lock_parent2(inode);
9284+ break;
9285+ case AuLsc_DI_PARENT3:
9286+ ii_read_lock_parent3(inode);
9287+ break;
9288+ default:
9289+ BUG();
9290+ }
9291+}
9292+
9293+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
9294+{
9295+ struct inode *inode;
9296+
9297+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
9298+ if (d_really_is_positive(d)) {
9299+ inode = d_inode(d);
9300+ if (au_ftest_lock(flags, IW))
9301+ do_ii_write_lock(inode, lsc);
9302+ else if (au_ftest_lock(flags, IR))
9303+ do_ii_read_lock(inode, lsc);
9304+ }
9305+}
9306+
9307+void di_read_unlock(struct dentry *d, int flags)
9308+{
9309+ struct inode *inode;
9310+
9311+ if (d_really_is_positive(d)) {
9312+ inode = d_inode(d);
9313+ if (au_ftest_lock(flags, IW)) {
9314+ au_dbg_verify_dinode(d);
9315+ ii_write_unlock(inode);
9316+ } else if (au_ftest_lock(flags, IR)) {
9317+ au_dbg_verify_dinode(d);
9318+ ii_read_unlock(inode);
9319+ }
9320+ }
9321+ au_rw_read_unlock(&au_di(d)->di_rwsem);
9322+}
9323+
9324+void di_downgrade_lock(struct dentry *d, int flags)
9325+{
9326+ if (d_really_is_positive(d) && au_ftest_lock(flags, IR))
9327+ ii_downgrade_lock(d_inode(d));
9328+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
9329+}
9330+
9331+void di_write_lock(struct dentry *d, unsigned int lsc)
9332+{
9333+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
9334+ if (d_really_is_positive(d))
9335+ do_ii_write_lock(d_inode(d), lsc);
9336+}
9337+
9338+void di_write_unlock(struct dentry *d)
9339+{
9340+ au_dbg_verify_dinode(d);
9341+ if (d_really_is_positive(d))
9342+ ii_write_unlock(d_inode(d));
9343+ au_rw_write_unlock(&au_di(d)->di_rwsem);
9344+}
9345+
9346+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
9347+{
9348+ AuDebugOn(d1 == d2
9349+ || d_inode(d1) == d_inode(d2)
9350+ || d1->d_sb != d2->d_sb);
9351+
9352+ if (isdir && au_test_subdir(d1, d2)) {
9353+ di_write_lock_child(d1);
9354+ di_write_lock_child2(d2);
9355+ } else {
9356+ /* there should be no races */
9357+ di_write_lock_child(d2);
9358+ di_write_lock_child2(d1);
9359+ }
9360+}
9361+
9362+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
9363+{
9364+ AuDebugOn(d1 == d2
9365+ || d_inode(d1) == d_inode(d2)
9366+ || d1->d_sb != d2->d_sb);
9367+
9368+ if (isdir && au_test_subdir(d1, d2)) {
9369+ di_write_lock_parent(d1);
9370+ di_write_lock_parent2(d2);
9371+ } else {
9372+ /* there should be no races */
9373+ di_write_lock_parent(d2);
9374+ di_write_lock_parent2(d1);
9375+ }
9376+}
9377+
9378+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
9379+{
9380+ di_write_unlock(d1);
9381+ if (d_inode(d1) == d_inode(d2))
9382+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
9383+ else
9384+ di_write_unlock(d2);
9385+}
9386+
9387+/* ---------------------------------------------------------------------- */
9388+
9389+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
9390+{
9391+ struct dentry *d;
9392+
9393+ DiMustAnyLock(dentry);
9394+
9395+ if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
9396+ return NULL;
9397+ AuDebugOn(bindex < 0);
9398+ d = au_hdentry(au_di(dentry), bindex)->hd_dentry;
9399+ AuDebugOn(d && au_dcount(d) <= 0);
9400+ return d;
9401+}
9402+
9403+/*
9404+ * extended version of au_h_dptr().
9405+ * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
9406+ * error.
9407+ */
9408+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
9409+{
9410+ struct dentry *h_dentry;
9411+ struct inode *inode, *h_inode;
9412+
9413+ AuDebugOn(d_really_is_negative(dentry));
9414+
9415+ h_dentry = NULL;
9416+ if (au_dbtop(dentry) <= bindex
9417+ && bindex <= au_dbbot(dentry))
9418+ h_dentry = au_h_dptr(dentry, bindex);
9419+ if (h_dentry && !au_d_linkable(h_dentry)) {
9420+ dget(h_dentry);
9421+ goto out; /* success */
9422+ }
9423+
9424+ inode = d_inode(dentry);
9425+ AuDebugOn(bindex < au_ibtop(inode));
9426+ AuDebugOn(au_ibbot(inode) < bindex);
9427+ h_inode = au_h_iptr(inode, bindex);
9428+ h_dentry = d_find_alias(h_inode);
9429+ if (h_dentry) {
9430+ if (!IS_ERR(h_dentry)) {
9431+ if (!au_d_linkable(h_dentry))
9432+ goto out; /* success */
9433+ dput(h_dentry);
9434+ } else
9435+ goto out;
9436+ }
9437+
9438+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
9439+ h_dentry = au_plink_lkup(inode, bindex);
9440+ AuDebugOn(!h_dentry);
9441+ if (!IS_ERR(h_dentry)) {
9442+ if (!au_d_hashed_positive(h_dentry))
9443+ goto out; /* success */
9444+ dput(h_dentry);
9445+ h_dentry = NULL;
9446+ }
9447+ }
9448+
9449+out:
9450+ AuDbgDentry(h_dentry);
9451+ return h_dentry;
9452+}
9453+
9454+aufs_bindex_t au_dbtail(struct dentry *dentry)
9455+{
9456+ aufs_bindex_t bbot, bwh;
9457+
9458+ bbot = au_dbbot(dentry);
9459+ if (0 <= bbot) {
9460+ bwh = au_dbwh(dentry);
9461+ if (!bwh)
9462+ return bwh;
9463+ if (0 < bwh && bwh < bbot)
9464+ return bwh - 1;
9465+ }
9466+ return bbot;
9467+}
9468+
9469+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
9470+{
9471+ aufs_bindex_t bbot, bopq;
9472+
9473+ bbot = au_dbtail(dentry);
9474+ if (0 <= bbot) {
9475+ bopq = au_dbdiropq(dentry);
9476+ if (0 <= bopq && bopq < bbot)
9477+ bbot = bopq;
9478+ }
9479+ return bbot;
9480+}
9481+
9482+/* ---------------------------------------------------------------------- */
9483+
9484+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
9485+ struct dentry *h_dentry)
9486+{
9487+ struct au_dinfo *dinfo;
9488+ struct au_hdentry *hd;
9489+ struct au_branch *br;
9490+
9491+ DiMustWriteLock(dentry);
9492+
9493+ dinfo = au_di(dentry);
9494+ hd = au_hdentry(dinfo, bindex);
9495+ au_hdput(hd);
9496+ hd->hd_dentry = h_dentry;
9497+ if (h_dentry) {
9498+ br = au_sbr(dentry->d_sb, bindex);
9499+ hd->hd_id = br->br_id;
9500+ }
9501+}
9502+
9503+int au_dbrange_test(struct dentry *dentry)
9504+{
9505+ int err;
9506+ aufs_bindex_t btop, bbot;
9507+
9508+ err = 0;
9509+ btop = au_dbtop(dentry);
9510+ bbot = au_dbbot(dentry);
9511+ if (btop >= 0)
9512+ AuDebugOn(bbot < 0 && btop > bbot);
9513+ else {
9514+ err = -EIO;
9515+ AuDebugOn(bbot >= 0);
9516+ }
9517+
9518+ return err;
9519+}
9520+
9521+int au_digen_test(struct dentry *dentry, unsigned int sigen)
9522+{
9523+ int err;
9524+
9525+ err = 0;
9526+ if (unlikely(au_digen(dentry) != sigen
9527+ || au_iigen_test(d_inode(dentry), sigen)))
9528+ err = -EIO;
9529+
9530+ return err;
9531+}
9532+
9533+void au_update_digen(struct dentry *dentry)
9534+{
9535+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
9536+ /* smp_mb(); */ /* atomic_set */
9537+}
9538+
9539+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
9540+{
9541+ struct au_dinfo *dinfo;
9542+ struct dentry *h_d;
9543+ struct au_hdentry *hdp;
9544+ aufs_bindex_t bindex, bbot;
9545+
9546+ DiMustWriteLock(dentry);
9547+
9548+ dinfo = au_di(dentry);
9549+ if (!dinfo || dinfo->di_btop < 0)
9550+ return;
9551+
9552+ if (do_put_zero) {
9553+ bbot = dinfo->di_bbot;
9554+ bindex = dinfo->di_btop;
9555+ hdp = au_hdentry(dinfo, bindex);
9556+ for (; bindex <= bbot; bindex++, hdp++) {
9557+ h_d = hdp->hd_dentry;
9558+ if (h_d && d_is_negative(h_d))
9559+ au_set_h_dptr(dentry, bindex, NULL);
9560+ }
9561+ }
9562+
9563+ dinfo->di_btop = 0;
9564+ hdp = au_hdentry(dinfo, dinfo->di_btop);
9565+ for (; dinfo->di_btop <= dinfo->di_bbot; dinfo->di_btop++, hdp++)
9566+ if (hdp->hd_dentry)
9567+ break;
9568+ if (dinfo->di_btop > dinfo->di_bbot) {
9569+ dinfo->di_btop = -1;
9570+ dinfo->di_bbot = -1;
9571+ return;
9572+ }
9573+
9574+ hdp = au_hdentry(dinfo, dinfo->di_bbot);
9575+ for (; dinfo->di_bbot >= 0; dinfo->di_bbot--, hdp--)
9576+ if (hdp->hd_dentry)
9577+ break;
9578+ AuDebugOn(dinfo->di_btop > dinfo->di_bbot || dinfo->di_bbot < 0);
9579+}
9580+
9581+void au_update_dbtop(struct dentry *dentry)
9582+{
9583+ aufs_bindex_t bindex, bbot;
9584+ struct dentry *h_dentry;
9585+
9586+ bbot = au_dbbot(dentry);
9587+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
9588+ h_dentry = au_h_dptr(dentry, bindex);
9589+ if (!h_dentry)
9590+ continue;
9591+ if (d_is_positive(h_dentry)) {
9592+ au_set_dbtop(dentry, bindex);
9593+ return;
9594+ }
9595+ au_set_h_dptr(dentry, bindex, NULL);
9596+ }
9597+}
9598+
9599+void au_update_dbbot(struct dentry *dentry)
9600+{
9601+ aufs_bindex_t bindex, btop;
9602+ struct dentry *h_dentry;
9603+
9604+ btop = au_dbtop(dentry);
9605+ for (bindex = au_dbbot(dentry); bindex >= btop; bindex--) {
9606+ h_dentry = au_h_dptr(dentry, bindex);
9607+ if (!h_dentry)
9608+ continue;
9609+ if (d_is_positive(h_dentry)) {
9610+ au_set_dbbot(dentry, bindex);
9611+ return;
9612+ }
9613+ au_set_h_dptr(dentry, bindex, NULL);
9614+ }
9615+}
9616+
9617+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
9618+{
9619+ aufs_bindex_t bindex, bbot;
9620+
9621+ bbot = au_dbbot(dentry);
9622+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++)
9623+ if (au_h_dptr(dentry, bindex) == h_dentry)
9624+ return bindex;
9625+ return -1;
9626+}
9627diff -urN /usr/share/empty/fs/aufs/dir.c linux/fs/aufs/dir.c
9628--- /usr/share/empty/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
9629+++ linux/fs/aufs/dir.c 2016-10-09 16:55:36.489368218 +0200
9630@@ -0,0 +1,762 @@
9631+/*
9632+ * Copyright (C) 2005-2016 Junjiro R. Okajima
9633+ *
9634+ * This program, aufs is free software; you can redistribute it and/or modify
9635+ * it under the terms of the GNU General Public License as published by
9636+ * the Free Software Foundation; either version 2 of the License, or
9637+ * (at your option) any later version.
9638+ *
9639+ * This program is distributed in the hope that it will be useful,
9640+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9641+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9642+ * GNU General Public License for more details.
9643+ *
9644+ * You should have received a copy of the GNU General Public License
9645+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
9646+ */
9647+
9648+/*
9649+ * directory operations
9650+ */
9651+
9652+#include <linux/fs_stack.h>
9653+#include "aufs.h"
9654+
9655+void au_add_nlink(struct inode *dir, struct inode *h_dir)
9656+{
9657+ unsigned int nlink;
9658+
9659+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9660+
9661+ nlink = dir->i_nlink;
9662+ nlink += h_dir->i_nlink - 2;
9663+ if (h_dir->i_nlink < 2)
9664+ nlink += 2;
9665+ smp_mb(); /* for i_nlink */
9666+ /* 0 can happen in revaliding */
9667+ set_nlink(dir, nlink);
9668+}
9669+
9670+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
9671+{
9672+ unsigned int nlink;
9673+
9674+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9675+
9676+ nlink = dir->i_nlink;
9677+ nlink -= h_dir->i_nlink - 2;
9678+ if (h_dir->i_nlink < 2)
9679+ nlink -= 2;
9680+ smp_mb(); /* for i_nlink */
9681+ /* nlink == 0 means the branch-fs is broken */
9682+ set_nlink(dir, nlink);
9683+}
9684+
9685+loff_t au_dir_size(struct file *file, struct dentry *dentry)
9686+{
9687+ loff_t sz;
9688+ aufs_bindex_t bindex, bbot;
9689+ struct file *h_file;
9690+ struct dentry *h_dentry;
9691+
9692+ sz = 0;
9693+ if (file) {
9694+ AuDebugOn(!d_is_dir(file->f_path.dentry));
9695+
9696+ bbot = au_fbbot_dir(file);
9697+ for (bindex = au_fbtop(file);
9698+ bindex <= bbot && sz < KMALLOC_MAX_SIZE;
9699+ bindex++) {
9700+ h_file = au_hf_dir(file, bindex);
9701+ if (h_file && file_inode(h_file))
9702+ sz += vfsub_f_size_read(h_file);
9703+ }
9704+ } else {
9705+ AuDebugOn(!dentry);
9706+ AuDebugOn(!d_is_dir(dentry));
9707+
9708+ bbot = au_dbtaildir(dentry);
9709+ for (bindex = au_dbtop(dentry);
9710+ bindex <= bbot && sz < KMALLOC_MAX_SIZE;
9711+ bindex++) {
9712+ h_dentry = au_h_dptr(dentry, bindex);
9713+ if (h_dentry && d_is_positive(h_dentry))
9714+ sz += i_size_read(d_inode(h_dentry));
9715+ }
9716+ }
9717+ if (sz < KMALLOC_MAX_SIZE)
9718+ sz = roundup_pow_of_two(sz);
9719+ if (sz > KMALLOC_MAX_SIZE)
9720+ sz = KMALLOC_MAX_SIZE;
9721+ else if (sz < NAME_MAX) {
9722+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
9723+ sz = AUFS_RDBLK_DEF;
9724+ }
9725+ return sz;
9726+}
9727+
9728+struct au_dir_ts_arg {
9729+ struct dentry *dentry;
9730+ aufs_bindex_t brid;
9731+};
9732+
9733+static void au_do_dir_ts(void *arg)
9734+{
9735+ struct au_dir_ts_arg *a = arg;
9736+ struct au_dtime dt;
9737+ struct path h_path;
9738+ struct inode *dir, *h_dir;
9739+ struct super_block *sb;
9740+ struct au_branch *br;
9741+ struct au_hinode *hdir;
9742+ int err;
9743+ aufs_bindex_t btop, bindex;
9744+
9745+ sb = a->dentry->d_sb;
9746+ if (d_really_is_negative(a->dentry))
9747+ goto out;
9748+ /* no dir->i_mutex lock */
9749+ aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
9750+
9751+ dir = d_inode(a->dentry);
9752+ btop = au_ibtop(dir);
9753+ bindex = au_br_index(sb, a->brid);
9754+ if (bindex < btop)
9755+ goto out_unlock;
9756+
9757+ br = au_sbr(sb, bindex);
9758+ h_path.dentry = au_h_dptr(a->dentry, bindex);
9759+ if (!h_path.dentry)
9760+ goto out_unlock;
9761+ h_path.mnt = au_br_mnt(br);
9762+ au_dtime_store(&dt, a->dentry, &h_path);
9763+
9764+ br = au_sbr(sb, btop);
9765+ if (!au_br_writable(br->br_perm))
9766+ goto out_unlock;
9767+ h_path.dentry = au_h_dptr(a->dentry, btop);
9768+ h_path.mnt = au_br_mnt(br);
9769+ err = vfsub_mnt_want_write(h_path.mnt);
9770+ if (err)
9771+ goto out_unlock;
9772+ hdir = au_hi(dir, btop);
9773+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
9774+ h_dir = au_h_iptr(dir, btop);
9775+ if (h_dir->i_nlink
9776+ && timespec_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
9777+ dt.dt_h_path = h_path;
9778+ au_dtime_revert(&dt);
9779+ }
9780+ au_hn_inode_unlock(hdir);
9781+ vfsub_mnt_drop_write(h_path.mnt);
9782+ au_cpup_attr_timesizes(dir);
9783+
9784+out_unlock:
9785+ aufs_read_unlock(a->dentry, AuLock_DW);
9786+out:
9787+ dput(a->dentry);
9788+ au_nwt_done(&au_sbi(sb)->si_nowait);
9789+ au_delayed_kfree(arg);
9790+}
9791+
9792+void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
9793+{
9794+ int perm, wkq_err;
9795+ aufs_bindex_t btop;
9796+ struct au_dir_ts_arg *arg;
9797+ struct dentry *dentry;
9798+ struct super_block *sb;
9799+
9800+ IMustLock(dir);
9801+
9802+ dentry = d_find_any_alias(dir);
9803+ AuDebugOn(!dentry);
9804+ sb = dentry->d_sb;
9805+ btop = au_ibtop(dir);
9806+ if (btop == bindex) {
9807+ au_cpup_attr_timesizes(dir);
9808+ goto out;
9809+ }
9810+
9811+ perm = au_sbr_perm(sb, btop);
9812+ if (!au_br_writable(perm))
9813+ goto out;
9814+
9815+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
9816+ if (!arg)
9817+ goto out;
9818+
9819+ arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
9820+ arg->brid = au_sbr_id(sb, bindex);
9821+ wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
9822+ if (unlikely(wkq_err)) {
9823+ pr_err("wkq %d\n", wkq_err);
9824+ dput(dentry);
9825+ au_delayed_kfree(arg);
9826+ }
9827+
9828+out:
9829+ dput(dentry);
9830+}
9831+
9832+/* ---------------------------------------------------------------------- */
9833+
9834+static int reopen_dir(struct file *file)
9835+{
9836+ int err;
9837+ unsigned int flags;
9838+ aufs_bindex_t bindex, btail, btop;
9839+ struct dentry *dentry, *h_dentry;
9840+ struct file *h_file;
9841+
9842+ /* open all lower dirs */
9843+ dentry = file->f_path.dentry;
9844+ btop = au_dbtop(dentry);
9845+ for (bindex = au_fbtop(file); bindex < btop; bindex++)
9846+ au_set_h_fptr(file, bindex, NULL);
9847+ au_set_fbtop(file, btop);
9848+
9849+ btail = au_dbtaildir(dentry);
9850+ for (bindex = au_fbbot_dir(file); btail < bindex; bindex--)
9851+ au_set_h_fptr(file, bindex, NULL);
9852+ au_set_fbbot_dir(file, btail);
9853+
9854+ flags = vfsub_file_flags(file);
9855+ for (bindex = btop; bindex <= btail; bindex++) {
9856+ h_dentry = au_h_dptr(dentry, bindex);
9857+ if (!h_dentry)
9858+ continue;
9859+ h_file = au_hf_dir(file, bindex);
9860+ if (h_file)
9861+ continue;
9862+
9863+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
9864+ err = PTR_ERR(h_file);
9865+ if (IS_ERR(h_file))
9866+ goto out; /* close all? */
9867+ au_set_h_fptr(file, bindex, h_file);
9868+ }
9869+ au_update_figen(file);
9870+ /* todo: necessary? */
9871+ /* file->f_ra = h_file->f_ra; */
9872+ err = 0;
9873+
9874+out:
9875+ return err;
9876+}
9877+
9878+static int do_open_dir(struct file *file, int flags, struct file *h_file)
9879+{
9880+ int err;
9881+ aufs_bindex_t bindex, btail;
9882+ struct dentry *dentry, *h_dentry;
9883+ struct vfsmount *mnt;
9884+
9885+ FiMustWriteLock(file);
9886+ AuDebugOn(h_file);
9887+
9888+ err = 0;
9889+ mnt = file->f_path.mnt;
9890+ dentry = file->f_path.dentry;
9891+ file->f_version = d_inode(dentry)->i_version;
9892+ bindex = au_dbtop(dentry);
9893+ au_set_fbtop(file, bindex);
9894+ btail = au_dbtaildir(dentry);
9895+ au_set_fbbot_dir(file, btail);
9896+ for (; !err && bindex <= btail; bindex++) {
9897+ h_dentry = au_h_dptr(dentry, bindex);
9898+ if (!h_dentry)
9899+ continue;
9900+
9901+ err = vfsub_test_mntns(mnt, h_dentry->d_sb);
9902+ if (unlikely(err))
9903+ break;
9904+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
9905+ if (IS_ERR(h_file)) {
9906+ err = PTR_ERR(h_file);
9907+ break;
9908+ }
9909+ au_set_h_fptr(file, bindex, h_file);
9910+ }
9911+ au_update_figen(file);
9912+ /* todo: necessary? */
9913+ /* file->f_ra = h_file->f_ra; */
9914+ if (!err)
9915+ return 0; /* success */
9916+
9917+ /* close all */
9918+ for (bindex = au_fbtop(file); bindex <= btail; bindex++)
9919+ au_set_h_fptr(file, bindex, NULL);
9920+ au_set_fbtop(file, -1);
9921+ au_set_fbbot_dir(file, -1);
9922+
9923+ return err;
9924+}
9925+
9926+static int aufs_open_dir(struct inode *inode __maybe_unused,
9927+ struct file *file)
9928+{
9929+ int err;
9930+ struct super_block *sb;
9931+ struct au_fidir *fidir;
9932+
9933+ err = -ENOMEM;
9934+ sb = file->f_path.dentry->d_sb;
9935+ si_read_lock(sb, AuLock_FLUSH);
9936+ fidir = au_fidir_alloc(sb);
9937+ if (fidir) {
9938+ struct au_do_open_args args = {
9939+ .open = do_open_dir,
9940+ .fidir = fidir
9941+ };
9942+ err = au_do_open(file, &args);
9943+ if (unlikely(err))
9944+ au_delayed_kfree(fidir);
9945+ }
9946+ si_read_unlock(sb);
9947+ return err;
9948+}
9949+
9950+static int aufs_release_dir(struct inode *inode __maybe_unused,
9951+ struct file *file)
9952+{
9953+ struct au_vdir *vdir_cache;
9954+ struct au_finfo *finfo;
9955+ struct au_fidir *fidir;
9956+ struct au_hfile *hf;
9957+ aufs_bindex_t bindex, bbot;
9958+ int execed, delayed;
9959+
9960+ delayed = (current->flags & PF_KTHREAD) || in_interrupt();
9961+ finfo = au_fi(file);
9962+ fidir = finfo->fi_hdir;
9963+ if (fidir) {
9964+ au_sphl_del(&finfo->fi_hlist,
9965+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
9966+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
9967+ if (vdir_cache)
9968+ au_vdir_free(vdir_cache, delayed);
9969+
9970+ bindex = finfo->fi_btop;
9971+ if (bindex >= 0) {
9972+ execed = vfsub_file_execed(file);
9973+ hf = fidir->fd_hfile + bindex;
9974+ /*
9975+ * calls fput() instead of filp_close(),
9976+ * since no dnotify or lock for the lower file.
9977+ */
9978+ bbot = fidir->fd_bbot;
9979+ for (; bindex <= bbot; bindex++, hf++)
9980+ if (hf->hf_file)
9981+ au_hfput(hf, execed);
9982+ }
9983+ au_delayed_kfree(fidir);
9984+ finfo->fi_hdir = NULL;
9985+ }
9986+ au_finfo_fin(file, delayed);
9987+ return 0;
9988+}
9989+
9990+/* ---------------------------------------------------------------------- */
9991+
9992+static int au_do_flush_dir(struct file *file, fl_owner_t id)
9993+{
9994+ int err;
9995+ aufs_bindex_t bindex, bbot;
9996+ struct file *h_file;
9997+
9998+ err = 0;
9999+ bbot = au_fbbot_dir(file);
10000+ for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
10001+ h_file = au_hf_dir(file, bindex);
10002+ if (h_file)
10003+ err = vfsub_flush(h_file, id);
10004+ }
10005+ return err;
10006+}
10007+
10008+static int aufs_flush_dir(struct file *file, fl_owner_t id)
10009+{
10010+ return au_do_flush(file, id, au_do_flush_dir);
10011+}
10012+
10013+/* ---------------------------------------------------------------------- */
10014+
10015+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
10016+{
10017+ int err;
10018+ aufs_bindex_t bbot, bindex;
10019+ struct inode *inode;
10020+ struct super_block *sb;
10021+
10022+ err = 0;
10023+ sb = dentry->d_sb;
10024+ inode = d_inode(dentry);
10025+ IMustLock(inode);
10026+ bbot = au_dbbot(dentry);
10027+ for (bindex = au_dbtop(dentry); !err && bindex <= bbot; bindex++) {
10028+ struct path h_path;
10029+
10030+ if (au_test_ro(sb, bindex, inode))
10031+ continue;
10032+ h_path.dentry = au_h_dptr(dentry, bindex);
10033+ if (!h_path.dentry)
10034+ continue;
10035+
10036+ h_path.mnt = au_sbr_mnt(sb, bindex);
10037+ err = vfsub_fsync(NULL, &h_path, datasync);
10038+ }
10039+
10040+ return err;
10041+}
10042+
10043+static int au_do_fsync_dir(struct file *file, int datasync)
10044+{
10045+ int err;
10046+ aufs_bindex_t bbot, bindex;
10047+ struct file *h_file;
10048+ struct super_block *sb;
10049+ struct inode *inode;
10050+
10051+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
10052+ if (unlikely(err))
10053+ goto out;
10054+
10055+ inode = file_inode(file);
10056+ sb = inode->i_sb;
10057+ bbot = au_fbbot_dir(file);
10058+ for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
10059+ h_file = au_hf_dir(file, bindex);
10060+ if (!h_file || au_test_ro(sb, bindex, inode))
10061+ continue;
10062+
10063+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
10064+ }
10065+
10066+out:
10067+ return err;
10068+}
10069+
10070+/*
10071+ * @file may be NULL
10072+ */
10073+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
10074+ int datasync)
10075+{
10076+ int err;
10077+ struct dentry *dentry;
10078+ struct inode *inode;
10079+ struct super_block *sb;
10080+
10081+ err = 0;
10082+ dentry = file->f_path.dentry;
10083+ inode = d_inode(dentry);
10084+ inode_lock(inode);
10085+ sb = dentry->d_sb;
10086+ si_noflush_read_lock(sb);
10087+ if (file)
10088+ err = au_do_fsync_dir(file, datasync);
10089+ else {
10090+ di_write_lock_child(dentry);
10091+ err = au_do_fsync_dir_no_file(dentry, datasync);
10092+ }
10093+ au_cpup_attr_timesizes(inode);
10094+ di_write_unlock(dentry);
10095+ if (file)
10096+ fi_write_unlock(file);
10097+
10098+ si_read_unlock(sb);
10099+ inode_unlock(inode);
10100+ return err;
10101+}
10102+
10103+/* ---------------------------------------------------------------------- */
10104+
10105+static int aufs_iterate_shared(struct file *file, struct dir_context *ctx)
10106+{
10107+ int err;
10108+ struct dentry *dentry;
10109+ struct inode *inode, *h_inode;
10110+ struct super_block *sb;
10111+
10112+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
10113+
10114+ dentry = file->f_path.dentry;
10115+ inode = d_inode(dentry);
10116+ IMustLock(inode);
10117+
10118+ sb = dentry->d_sb;
10119+ si_read_lock(sb, AuLock_FLUSH);
10120+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
10121+ if (unlikely(err))
10122+ goto out;
10123+ err = au_alive_dir(dentry);
10124+ if (!err)
10125+ err = au_vdir_init(file);
10126+ di_downgrade_lock(dentry, AuLock_IR);
10127+ if (unlikely(err))
10128+ goto out_unlock;
10129+
10130+ h_inode = au_h_iptr(inode, au_ibtop(inode));
10131+ if (!au_test_nfsd()) {
10132+ err = au_vdir_fill_de(file, ctx);
10133+ fsstack_copy_attr_atime(inode, h_inode);
10134+ } else {
10135+ /*
10136+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
10137+ * encode_fh() and others.
10138+ */
10139+ atomic_inc(&h_inode->i_count);
10140+ di_read_unlock(dentry, AuLock_IR);
10141+ si_read_unlock(sb);
10142+ err = au_vdir_fill_de(file, ctx);
10143+ fsstack_copy_attr_atime(inode, h_inode);
10144+ fi_write_unlock(file);
10145+ iput(h_inode);
10146+
10147+ AuTraceErr(err);
10148+ return err;
10149+ }
10150+
10151+out_unlock:
10152+ di_read_unlock(dentry, AuLock_IR);
10153+ fi_write_unlock(file);
10154+out:
10155+ si_read_unlock(sb);
10156+ return err;
10157+}
10158+
10159+/* ---------------------------------------------------------------------- */
10160+
10161+#define AuTestEmpty_WHONLY 1
10162+#define AuTestEmpty_CALLED (1 << 1)
10163+#define AuTestEmpty_SHWH (1 << 2)
10164+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
10165+#define au_fset_testempty(flags, name) \
10166+ do { (flags) |= AuTestEmpty_##name; } while (0)
10167+#define au_fclr_testempty(flags, name) \
10168+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
10169+
10170+#ifndef CONFIG_AUFS_SHWH
10171+#undef AuTestEmpty_SHWH
10172+#define AuTestEmpty_SHWH 0
10173+#endif
10174+
10175+struct test_empty_arg {
10176+ struct dir_context ctx;
10177+ struct au_nhash *whlist;
10178+ unsigned int flags;
10179+ int err;
10180+ aufs_bindex_t bindex;
10181+};
10182+
10183+static int test_empty_cb(struct dir_context *ctx, const char *__name,
10184+ int namelen, loff_t offset __maybe_unused, u64 ino,
10185+ unsigned int d_type)
10186+{
10187+ struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
10188+ ctx);
10189+ char *name = (void *)__name;
10190+
10191+ arg->err = 0;
10192+ au_fset_testempty(arg->flags, CALLED);
10193+ /* smp_mb(); */
10194+ if (name[0] == '.'
10195+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
10196+ goto out; /* success */
10197+
10198+ if (namelen <= AUFS_WH_PFX_LEN
10199+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
10200+ if (au_ftest_testempty(arg->flags, WHONLY)
10201+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
10202+ arg->err = -ENOTEMPTY;
10203+ goto out;
10204+ }
10205+
10206+ name += AUFS_WH_PFX_LEN;
10207+ namelen -= AUFS_WH_PFX_LEN;
10208+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
10209+ arg->err = au_nhash_append_wh
10210+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
10211+ au_ftest_testempty(arg->flags, SHWH));
10212+
10213+out:
10214+ /* smp_mb(); */
10215+ AuTraceErr(arg->err);
10216+ return arg->err;
10217+}
10218+
10219+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
10220+{
10221+ int err;
10222+ struct file *h_file;
10223+
10224+ h_file = au_h_open(dentry, arg->bindex,
10225+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
10226+ /*file*/NULL, /*force_wr*/0);
10227+ err = PTR_ERR(h_file);
10228+ if (IS_ERR(h_file))
10229+ goto out;
10230+
10231+ err = 0;
10232+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
10233+ && !file_inode(h_file)->i_nlink)
10234+ goto out_put;
10235+
10236+ do {
10237+ arg->err = 0;
10238+ au_fclr_testempty(arg->flags, CALLED);
10239+ /* smp_mb(); */
10240+ err = vfsub_iterate_dir(h_file, &arg->ctx);
10241+ if (err >= 0)
10242+ err = arg->err;
10243+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
10244+
10245+out_put:
10246+ fput(h_file);
10247+ au_sbr_put(dentry->d_sb, arg->bindex);
10248+out:
10249+ return err;
10250+}
10251+
10252+struct do_test_empty_args {
10253+ int *errp;
10254+ struct dentry *dentry;
10255+ struct test_empty_arg *arg;
10256+};
10257+
10258+static void call_do_test_empty(void *args)
10259+{
10260+ struct do_test_empty_args *a = args;
10261+ *a->errp = do_test_empty(a->dentry, a->arg);
10262+}
10263+
10264+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
10265+{
10266+ int err, wkq_err;
10267+ struct dentry *h_dentry;
10268+ struct inode *h_inode;
10269+
10270+ h_dentry = au_h_dptr(dentry, arg->bindex);
10271+ h_inode = d_inode(h_dentry);
10272+ /* todo: i_mode changes anytime? */
10273+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
10274+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
10275+ inode_unlock(h_inode);
10276+ if (!err)
10277+ err = do_test_empty(dentry, arg);
10278+ else {
10279+ struct do_test_empty_args args = {
10280+ .errp = &err,
10281+ .dentry = dentry,
10282+ .arg = arg
10283+ };
10284+ unsigned int flags = arg->flags;
10285+
10286+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
10287+ if (unlikely(wkq_err))
10288+ err = wkq_err;
10289+ arg->flags = flags;
10290+ }
10291+
10292+ return err;
10293+}
10294+
10295+int au_test_empty_lower(struct dentry *dentry)
10296+{
10297+ int err;
10298+ unsigned int rdhash;
10299+ aufs_bindex_t bindex, btop, btail;
10300+ struct au_nhash whlist;
10301+ struct test_empty_arg arg = {
10302+ .ctx = {
10303+ .actor = test_empty_cb
10304+ }
10305+ };
10306+ int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
10307+
10308+ SiMustAnyLock(dentry->d_sb);
10309+
10310+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
10311+ if (!rdhash)
10312+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
10313+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
10314+ if (unlikely(err))
10315+ goto out;
10316+
10317+ arg.flags = 0;
10318+ arg.whlist = &whlist;
10319+ btop = au_dbtop(dentry);
10320+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10321+ au_fset_testempty(arg.flags, SHWH);
10322+ test_empty = do_test_empty;
10323+ if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
10324+ test_empty = sio_test_empty;
10325+ arg.bindex = btop;
10326+ err = test_empty(dentry, &arg);
10327+ if (unlikely(err))
10328+ goto out_whlist;
10329+
10330+ au_fset_testempty(arg.flags, WHONLY);
10331+ btail = au_dbtaildir(dentry);
10332+ for (bindex = btop + 1; !err && bindex <= btail; bindex++) {
10333+ struct dentry *h_dentry;
10334+
10335+ h_dentry = au_h_dptr(dentry, bindex);
10336+ if (h_dentry && d_is_positive(h_dentry)) {
10337+ arg.bindex = bindex;
10338+ err = test_empty(dentry, &arg);
10339+ }
10340+ }
10341+
10342+out_whlist:
10343+ au_nhash_wh_free(&whlist);
10344+out:
10345+ return err;
10346+}
10347+
10348+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
10349+{
10350+ int err;
10351+ struct test_empty_arg arg = {
10352+ .ctx = {
10353+ .actor = test_empty_cb
10354+ }
10355+ };
10356+ aufs_bindex_t bindex, btail;
10357+
10358+ err = 0;
10359+ arg.whlist = whlist;
10360+ arg.flags = AuTestEmpty_WHONLY;
10361+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10362+ au_fset_testempty(arg.flags, SHWH);
10363+ btail = au_dbtaildir(dentry);
10364+ for (bindex = au_dbtop(dentry); !err && bindex <= btail; bindex++) {
10365+ struct dentry *h_dentry;
10366+
10367+ h_dentry = au_h_dptr(dentry, bindex);
10368+ if (h_dentry && d_is_positive(h_dentry)) {
10369+ arg.bindex = bindex;
10370+ err = sio_test_empty(dentry, &arg);
10371+ }
10372+ }
10373+
10374+ return err;
10375+}
10376+
10377+/* ---------------------------------------------------------------------- */
10378+
10379+const struct file_operations aufs_dir_fop = {
10380+ .owner = THIS_MODULE,
10381+ .llseek = default_llseek,
10382+ .read = generic_read_dir,
10383+ .iterate_shared = aufs_iterate_shared,
10384+ .unlocked_ioctl = aufs_ioctl_dir,
10385+#ifdef CONFIG_COMPAT
10386+ .compat_ioctl = aufs_compat_ioctl_dir,
10387+#endif
10388+ .open = aufs_open_dir,
10389+ .release = aufs_release_dir,
10390+ .flush = aufs_flush_dir,
10391+ .fsync = aufs_fsync_dir
10392+};
10393diff -urN /usr/share/empty/fs/aufs/dir.h linux/fs/aufs/dir.h
10394--- /usr/share/empty/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
10395+++ linux/fs/aufs/dir.h 2016-10-09 16:55:36.489368218 +0200
10396@@ -0,0 +1,137 @@
10397+/*
10398+ * Copyright (C) 2005-2016 Junjiro R. Okajima
10399+ *
10400+ * This program, aufs is free software; you can redistribute it and/or modify
10401+ * it under the terms of the GNU General Public License as published by
10402+ * the Free Software Foundation; either version 2 of the License, or
10403+ * (at your option) any later version.
10404+ *
10405+ * This program is distributed in the hope that it will be useful,
10406+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10407+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10408+ * GNU General Public License for more details.
10409+ *
10410+ * You should have received a copy of the GNU General Public License
10411+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
10412+ */
10413+
10414+/*
10415+ * directory operations
10416+ */
10417+
10418+#ifndef __AUFS_DIR_H__
10419+#define __AUFS_DIR_H__
10420+
10421+#ifdef __KERNEL__
10422+
10423+#include <linux/fs.h>
10424+
10425+/* ---------------------------------------------------------------------- */
10426+
10427+/* need to be faster and smaller */
10428+
10429+struct au_nhash {
10430+ unsigned int nh_num;
10431+ struct hlist_head *nh_head;
10432+};
10433+
10434+struct au_vdir_destr {
10435+ unsigned char len;
10436+ unsigned char name[0];
10437+} __packed;
10438+
10439+struct au_vdir_dehstr {
10440+ struct hlist_node hash;
10441+ union {
10442+ struct au_vdir_destr *str;
10443+ struct llist_node lnode; /* delayed free */
10444+ };
10445+} ____cacheline_aligned_in_smp;
10446+
10447+struct au_vdir_de {
10448+ ino_t de_ino;
10449+ unsigned char de_type;
10450+ /* caution: packed */
10451+ struct au_vdir_destr de_str;
10452+} __packed;
10453+
10454+struct au_vdir_wh {
10455+ struct hlist_node wh_hash;
10456+#ifdef CONFIG_AUFS_SHWH
10457+ ino_t wh_ino;
10458+ aufs_bindex_t wh_bindex;
10459+ unsigned char wh_type;
10460+#else
10461+ aufs_bindex_t wh_bindex;
10462+#endif
10463+ /* caution: packed */
10464+ struct au_vdir_destr wh_str;
10465+} __packed;
10466+
10467+union au_vdir_deblk_p {
10468+ unsigned char *deblk;
10469+ struct au_vdir_de *de;
10470+};
10471+
10472+struct au_vdir {
10473+ unsigned char **vd_deblk;
10474+ unsigned long vd_nblk;
10475+ struct {
10476+ unsigned long ul;
10477+ union au_vdir_deblk_p p;
10478+ } vd_last;
10479+
10480+ unsigned long vd_version;
10481+ unsigned int vd_deblk_sz;
10482+ union {
10483+ unsigned long vd_jiffy;
10484+ struct llist_node vd_lnode; /* delayed free */
10485+ };
10486+} ____cacheline_aligned_in_smp;
10487+
10488+/* ---------------------------------------------------------------------- */
10489+
10490+/* dir.c */
10491+extern const struct file_operations aufs_dir_fop;
10492+void au_add_nlink(struct inode *dir, struct inode *h_dir);
10493+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
10494+loff_t au_dir_size(struct file *file, struct dentry *dentry);
10495+void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
10496+int au_test_empty_lower(struct dentry *dentry);
10497+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
10498+
10499+/* vdir.c */
10500+unsigned int au_rdhash_est(loff_t sz);
10501+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
10502+void au_nhash_wh_free(struct au_nhash *whlist);
10503+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
10504+ int limit);
10505+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
10506+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
10507+ unsigned int d_type, aufs_bindex_t bindex,
10508+ unsigned char shwh);
10509+void au_vdir_free(struct au_vdir *vdir, int atonce);
10510+int au_vdir_init(struct file *file);
10511+int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
10512+
10513+/* ioctl.c */
10514+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
10515+
10516+#ifdef CONFIG_AUFS_RDU
10517+/* rdu.c */
10518+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
10519+#ifdef CONFIG_COMPAT
10520+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
10521+ unsigned long arg);
10522+#endif
10523+#else
10524+AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
10525+ unsigned int cmd, unsigned long arg)
10526+#ifdef CONFIG_COMPAT
10527+AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
10528+ unsigned int cmd, unsigned long arg)
10529+#endif
10530+#endif
10531+
10532+#endif /* __KERNEL__ */
10533+#endif /* __AUFS_DIR_H__ */
10534diff -urN /usr/share/empty/fs/aufs/dynop.c linux/fs/aufs/dynop.c
10535--- /usr/share/empty/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
10536+++ linux/fs/aufs/dynop.c 2016-10-09 16:55:36.489368218 +0200
10537@@ -0,0 +1,371 @@
10538+/*
10539+ * Copyright (C) 2010-2016 Junjiro R. Okajima
10540+ *
10541+ * This program, aufs is free software; you can redistribute it and/or modify
10542+ * it under the terms of the GNU General Public License as published by
10543+ * the Free Software Foundation; either version 2 of the License, or
10544+ * (at your option) any later version.
10545+ *
10546+ * This program is distributed in the hope that it will be useful,
10547+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10548+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10549+ * GNU General Public License for more details.
10550+ *
10551+ * You should have received a copy of the GNU General Public License
10552+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
10553+ */
10554+
10555+/*
10556+ * dynamically customizable operations for regular files
10557+ */
10558+
10559+#include "aufs.h"
10560+
10561+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
10562+
10563+/*
10564+ * How large will these lists be?
10565+ * Usually just a few elements, 20-30 at most for each, I guess.
10566+ */
10567+static struct au_sphlhead dynop[AuDyLast];
10568+
10569+static struct au_dykey *dy_gfind_get(struct au_sphlhead *sphl, const void *h_op)
10570+{
10571+ struct au_dykey *key, *tmp;
10572+ struct hlist_head *head;
10573+
10574+ key = NULL;
10575+ head = &sphl->head;
10576+ rcu_read_lock();
10577+ hlist_for_each_entry_rcu(tmp, head, dk_hnode)
10578+ if (tmp->dk_op.dy_hop == h_op) {
10579+ key = tmp;
10580+ kref_get(&key->dk_kref);
10581+ break;
10582+ }
10583+ rcu_read_unlock();
10584+
10585+ return key;
10586+}
10587+
10588+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
10589+{
10590+ struct au_dykey **k, *found;
10591+ const void *h_op = key->dk_op.dy_hop;
10592+ int i;
10593+
10594+ found = NULL;
10595+ k = br->br_dykey;
10596+ for (i = 0; i < AuBrDynOp; i++)
10597+ if (k[i]) {
10598+ if (k[i]->dk_op.dy_hop == h_op) {
10599+ found = k[i];
10600+ break;
10601+ }
10602+ } else
10603+ break;
10604+ if (!found) {
10605+ spin_lock(&br->br_dykey_lock);
10606+ for (; i < AuBrDynOp; i++)
10607+ if (k[i]) {
10608+ if (k[i]->dk_op.dy_hop == h_op) {
10609+ found = k[i];
10610+ break;
10611+ }
10612+ } else {
10613+ k[i] = key;
10614+ break;
10615+ }
10616+ spin_unlock(&br->br_dykey_lock);
10617+ BUG_ON(i == AuBrDynOp); /* expand the array */
10618+ }
10619+
10620+ return found;
10621+}
10622+
10623+/* kref_get() if @key is already added */
10624+static struct au_dykey *dy_gadd(struct au_sphlhead *sphl, struct au_dykey *key)
10625+{
10626+ struct au_dykey *tmp, *found;
10627+ struct hlist_head *head;
10628+ const void *h_op = key->dk_op.dy_hop;
10629+
10630+ found = NULL;
10631+ head = &sphl->head;
10632+ spin_lock(&sphl->spin);
10633+ hlist_for_each_entry(tmp, head, dk_hnode)
10634+ if (tmp->dk_op.dy_hop == h_op) {
10635+ kref_get(&tmp->dk_kref);
10636+ found = tmp;
10637+ break;
10638+ }
10639+ if (!found)
10640+ hlist_add_head_rcu(&key->dk_hnode, head);
10641+ spin_unlock(&sphl->spin);
10642+
10643+ if (!found)
10644+ DyPrSym(key);
10645+ return found;
10646+}
10647+
10648+static void dy_free_rcu(struct rcu_head *rcu)
10649+{
10650+ struct au_dykey *key;
10651+
10652+ key = container_of(rcu, struct au_dykey, dk_rcu);
10653+ DyPrSym(key);
10654+ kfree(key); /* not delayed */
10655+}
10656+
10657+static void dy_free(struct kref *kref)
10658+{
10659+ struct au_dykey *key;
10660+ struct au_sphlhead *sphl;
10661+
10662+ key = container_of(kref, struct au_dykey, dk_kref);
10663+ sphl = dynop + key->dk_op.dy_type;
10664+ au_sphl_del_rcu(&key->dk_hnode, sphl);
10665+ call_rcu(&key->dk_rcu, dy_free_rcu);
10666+}
10667+
10668+void au_dy_put(struct au_dykey *key)
10669+{
10670+ kref_put(&key->dk_kref, dy_free);
10671+}
10672+
10673+/* ---------------------------------------------------------------------- */
10674+
10675+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
10676+
10677+#ifdef CONFIG_AUFS_DEBUG
10678+#define DyDbgDeclare(cnt) unsigned int cnt = 0
10679+#define DyDbgInc(cnt) do { cnt++; } while (0)
10680+#else
10681+#define DyDbgDeclare(cnt) do {} while (0)
10682+#define DyDbgInc(cnt) do {} while (0)
10683+#endif
10684+
10685+#define DySet(func, dst, src, h_op, h_sb) do { \
10686+ DyDbgInc(cnt); \
10687+ if (h_op->func) { \
10688+ if (src.func) \
10689+ dst.func = src.func; \
10690+ else \
10691+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
10692+ } \
10693+} while (0)
10694+
10695+#define DySetForce(func, dst, src) do { \
10696+ AuDebugOn(!src.func); \
10697+ DyDbgInc(cnt); \
10698+ dst.func = src.func; \
10699+} while (0)
10700+
10701+#define DySetAop(func) \
10702+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
10703+#define DySetAopForce(func) \
10704+ DySetForce(func, dyaop->da_op, aufs_aop)
10705+
10706+static void dy_aop(struct au_dykey *key, const void *h_op,
10707+ struct super_block *h_sb __maybe_unused)
10708+{
10709+ struct au_dyaop *dyaop = (void *)key;
10710+ const struct address_space_operations *h_aop = h_op;
10711+ DyDbgDeclare(cnt);
10712+
10713+ AuDbg("%s\n", au_sbtype(h_sb));
10714+
10715+ DySetAop(writepage);
10716+ DySetAopForce(readpage); /* force */
10717+ DySetAop(writepages);
10718+ DySetAop(set_page_dirty);
10719+ DySetAop(readpages);
10720+ DySetAop(write_begin);
10721+ DySetAop(write_end);
10722+ DySetAop(bmap);
10723+ DySetAop(invalidatepage);
10724+ DySetAop(releasepage);
10725+ DySetAop(freepage);
10726+ /* this one will be changed according to an aufs mount option */
10727+ DySetAop(direct_IO);
10728+ DySetAop(migratepage);
10729+ DySetAop(isolate_page);
10730+ DySetAop(putback_page);
10731+ DySetAop(launder_page);
10732+ DySetAop(is_partially_uptodate);
10733+ DySetAop(is_dirty_writeback);
10734+ DySetAop(error_remove_page);
10735+ DySetAop(swap_activate);
10736+ DySetAop(swap_deactivate);
10737+
10738+ DyDbgSize(cnt, *h_aop);
10739+}
10740+
10741+/* ---------------------------------------------------------------------- */
10742+
10743+static void dy_bug(struct kref *kref)
10744+{
10745+ BUG();
10746+}
10747+
10748+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
10749+{
10750+ struct au_dykey *key, *old;
10751+ struct au_sphlhead *sphl;
10752+ struct op {
10753+ unsigned int sz;
10754+ void (*set)(struct au_dykey *key, const void *h_op,
10755+ struct super_block *h_sb __maybe_unused);
10756+ };
10757+ static const struct op a[] = {
10758+ [AuDy_AOP] = {
10759+ .sz = sizeof(struct au_dyaop),
10760+ .set = dy_aop
10761+ }
10762+ };
10763+ const struct op *p;
10764+
10765+ sphl = dynop + op->dy_type;
10766+ key = dy_gfind_get(sphl, op->dy_hop);
10767+ if (key)
10768+ goto out_add; /* success */
10769+
10770+ p = a + op->dy_type;
10771+ key = kzalloc(p->sz, GFP_NOFS);
10772+ if (unlikely(!key)) {
10773+ key = ERR_PTR(-ENOMEM);
10774+ goto out;
10775+ }
10776+
10777+ key->dk_op.dy_hop = op->dy_hop;
10778+ kref_init(&key->dk_kref);
10779+ p->set(key, op->dy_hop, au_br_sb(br));
10780+ old = dy_gadd(sphl, key);
10781+ if (old) {
10782+ au_delayed_kfree(key);
10783+ key = old;
10784+ }
10785+
10786+out_add:
10787+ old = dy_bradd(br, key);
10788+ if (old)
10789+ /* its ref-count should never be zero here */
10790+ kref_put(&key->dk_kref, dy_bug);
10791+out:
10792+ return key;
10793+}
10794+
10795+/* ---------------------------------------------------------------------- */
10796+/*
10797+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
10798+ * This behaviour is necessary to return an error from open(O_DIRECT) instead
10799+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
10800+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
10801+ * See the aufs manual in detail.
10802+ */
10803+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
10804+{
10805+ if (!do_dx)
10806+ dyaop->da_op.direct_IO = NULL;
10807+ else
10808+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
10809+}
10810+
10811+static struct au_dyaop *dy_aget(struct au_branch *br,
10812+ const struct address_space_operations *h_aop,
10813+ int do_dx)
10814+{
10815+ struct au_dyaop *dyaop;
10816+ struct au_dynop op;
10817+
10818+ op.dy_type = AuDy_AOP;
10819+ op.dy_haop = h_aop;
10820+ dyaop = (void *)dy_get(&op, br);
10821+ if (IS_ERR(dyaop))
10822+ goto out;
10823+ dy_adx(dyaop, do_dx);
10824+
10825+out:
10826+ return dyaop;
10827+}
10828+
10829+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10830+ struct inode *h_inode)
10831+{
10832+ int err, do_dx;
10833+ struct super_block *sb;
10834+ struct au_branch *br;
10835+ struct au_dyaop *dyaop;
10836+
10837+ AuDebugOn(!S_ISREG(h_inode->i_mode));
10838+ IiMustWriteLock(inode);
10839+
10840+ sb = inode->i_sb;
10841+ br = au_sbr(sb, bindex);
10842+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
10843+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
10844+ err = PTR_ERR(dyaop);
10845+ if (IS_ERR(dyaop))
10846+ /* unnecessary to call dy_fput() */
10847+ goto out;
10848+
10849+ err = 0;
10850+ inode->i_mapping->a_ops = &dyaop->da_op;
10851+
10852+out:
10853+ return err;
10854+}
10855+
10856+/*
10857+ * Is it safe to replace a_ops during the inode/file is in operation?
10858+ * Yes, I hope so.
10859+ */
10860+int au_dy_irefresh(struct inode *inode)
10861+{
10862+ int err;
10863+ aufs_bindex_t btop;
10864+ struct inode *h_inode;
10865+
10866+ err = 0;
10867+ if (S_ISREG(inode->i_mode)) {
10868+ btop = au_ibtop(inode);
10869+ h_inode = au_h_iptr(inode, btop);
10870+ err = au_dy_iaop(inode, btop, h_inode);
10871+ }
10872+ return err;
10873+}
10874+
10875+void au_dy_arefresh(int do_dx)
10876+{
10877+ struct au_sphlhead *sphl;
10878+ struct hlist_head *head;
10879+ struct au_dykey *key;
10880+
10881+ sphl = dynop + AuDy_AOP;
10882+ head = &sphl->head;
10883+ spin_lock(&sphl->spin);
10884+ hlist_for_each_entry(key, head, dk_hnode)
10885+ dy_adx((void *)key, do_dx);
10886+ spin_unlock(&sphl->spin);
10887+}
10888+
10889+/* ---------------------------------------------------------------------- */
10890+
10891+void __init au_dy_init(void)
10892+{
10893+ int i;
10894+
10895+ /* make sure that 'struct au_dykey *' can be any type */
10896+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
10897+
10898+ for (i = 0; i < AuDyLast; i++)
10899+ au_sphl_init(dynop + i);
10900+}
10901+
10902+void au_dy_fin(void)
10903+{
10904+ int i;
10905+
10906+ for (i = 0; i < AuDyLast; i++)
10907+ WARN_ON(!hlist_empty(&dynop[i].head));
10908+}
10909diff -urN /usr/share/empty/fs/aufs/dynop.h linux/fs/aufs/dynop.h
10910--- /usr/share/empty/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
10911+++ linux/fs/aufs/dynop.h 2016-10-09 16:55:36.489368218 +0200
10912@@ -0,0 +1,74 @@
10913+/*
10914+ * Copyright (C) 2010-2016 Junjiro R. Okajima
10915+ *
10916+ * This program, aufs is free software; you can redistribute it and/or modify
10917+ * it under the terms of the GNU General Public License as published by
10918+ * the Free Software Foundation; either version 2 of the License, or
10919+ * (at your option) any later version.
10920+ *
10921+ * This program is distributed in the hope that it will be useful,
10922+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10923+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10924+ * GNU General Public License for more details.
10925+ *
10926+ * You should have received a copy of the GNU General Public License
10927+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
10928+ */
10929+
10930+/*
10931+ * dynamically customizable operations (for regular files only)
10932+ */
10933+
10934+#ifndef __AUFS_DYNOP_H__
10935+#define __AUFS_DYNOP_H__
10936+
10937+#ifdef __KERNEL__
10938+
10939+#include <linux/fs.h>
10940+#include <linux/kref.h>
10941+
10942+enum {AuDy_AOP, AuDyLast};
10943+
10944+struct au_dynop {
10945+ int dy_type;
10946+ union {
10947+ const void *dy_hop;
10948+ const struct address_space_operations *dy_haop;
10949+ };
10950+};
10951+
10952+struct au_dykey {
10953+ union {
10954+ struct hlist_node dk_hnode;
10955+ struct rcu_head dk_rcu;
10956+ };
10957+ struct au_dynop dk_op;
10958+
10959+ /*
10960+ * during I am in the branch local array, kref is gotten. when the
10961+ * branch is removed, kref is put.
10962+ */
10963+ struct kref dk_kref;
10964+};
10965+
10966+/* stop unioning since their sizes are very different from each other */
10967+struct au_dyaop {
10968+ struct au_dykey da_key;
10969+ struct address_space_operations da_op; /* not const */
10970+};
10971+
10972+/* ---------------------------------------------------------------------- */
10973+
10974+/* dynop.c */
10975+struct au_branch;
10976+void au_dy_put(struct au_dykey *key);
10977+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10978+ struct inode *h_inode);
10979+int au_dy_irefresh(struct inode *inode);
10980+void au_dy_arefresh(int do_dio);
10981+
10982+void __init au_dy_init(void);
10983+void au_dy_fin(void);
10984+
10985+#endif /* __KERNEL__ */
10986+#endif /* __AUFS_DYNOP_H__ */
10987diff -urN /usr/share/empty/fs/aufs/export.c linux/fs/aufs/export.c
10988--- /usr/share/empty/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
10989+++ linux/fs/aufs/export.c 2016-12-17 12:28:17.595211562 +0100
10990@@ -0,0 +1,836 @@
10991+/*
10992+ * Copyright (C) 2005-2016 Junjiro R. Okajima
10993+ *
10994+ * This program, aufs is free software; you can redistribute it and/or modify
10995+ * it under the terms of the GNU General Public License as published by
10996+ * the Free Software Foundation; either version 2 of the License, or
10997+ * (at your option) any later version.
10998+ *
10999+ * This program is distributed in the hope that it will be useful,
11000+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11001+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11002+ * GNU General Public License for more details.
11003+ *
11004+ * You should have received a copy of the GNU General Public License
11005+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
11006+ */
11007+
11008+/*
11009+ * export via nfs
11010+ */
11011+
11012+#include <linux/exportfs.h>
11013+#include <linux/fs_struct.h>
11014+#include <linux/namei.h>
11015+#include <linux/nsproxy.h>
11016+#include <linux/random.h>
11017+#include <linux/writeback.h>
11018+#include "aufs.h"
11019+
11020+union conv {
11021+#ifdef CONFIG_AUFS_INO_T_64
11022+ __u32 a[2];
11023+#else
11024+ __u32 a[1];
11025+#endif
11026+ ino_t ino;
11027+};
11028+
11029+static ino_t decode_ino(__u32 *a)
11030+{
11031+ union conv u;
11032+
11033+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
11034+ u.a[0] = a[0];
11035+#ifdef CONFIG_AUFS_INO_T_64
11036+ u.a[1] = a[1];
11037+#endif
11038+ return u.ino;
11039+}
11040+
11041+static void encode_ino(__u32 *a, ino_t ino)
11042+{
11043+ union conv u;
11044+
11045+ u.ino = ino;
11046+ a[0] = u.a[0];
11047+#ifdef CONFIG_AUFS_INO_T_64
11048+ a[1] = u.a[1];
11049+#endif
11050+}
11051+
11052+/* NFS file handle */
11053+enum {
11054+ Fh_br_id,
11055+ Fh_sigen,
11056+#ifdef CONFIG_AUFS_INO_T_64
11057+ /* support 64bit inode number */
11058+ Fh_ino1,
11059+ Fh_ino2,
11060+ Fh_dir_ino1,
11061+ Fh_dir_ino2,
11062+#else
11063+ Fh_ino1,
11064+ Fh_dir_ino1,
11065+#endif
11066+ Fh_igen,
11067+ Fh_h_type,
11068+ Fh_tail,
11069+
11070+ Fh_ino = Fh_ino1,
11071+ Fh_dir_ino = Fh_dir_ino1
11072+};
11073+
11074+static int au_test_anon(struct dentry *dentry)
11075+{
11076+ /* note: read d_flags without d_lock */
11077+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
11078+}
11079+
11080+int au_test_nfsd(void)
11081+{
11082+ int ret;
11083+ struct task_struct *tsk = current;
11084+ char comm[sizeof(tsk->comm)];
11085+
11086+ ret = 0;
11087+ if (tsk->flags & PF_KTHREAD) {
11088+ get_task_comm(comm, tsk);
11089+ ret = !strcmp(comm, "nfsd");
11090+ }
11091+
11092+ return ret;
11093+}
11094+
11095+/* ---------------------------------------------------------------------- */
11096+/* inode generation external table */
11097+
11098+void au_xigen_inc(struct inode *inode)
11099+{
11100+ loff_t pos;
11101+ ssize_t sz;
11102+ __u32 igen;
11103+ struct super_block *sb;
11104+ struct au_sbinfo *sbinfo;
11105+
11106+ sb = inode->i_sb;
11107+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
11108+
11109+ sbinfo = au_sbi(sb);
11110+ pos = inode->i_ino;
11111+ pos *= sizeof(igen);
11112+ igen = inode->i_generation + 1;
11113+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
11114+ sizeof(igen), &pos);
11115+ if (sz == sizeof(igen))
11116+ return; /* success */
11117+
11118+ if (unlikely(sz >= 0))
11119+ AuIOErr("xigen error (%zd)\n", sz);
11120+}
11121+
11122+int au_xigen_new(struct inode *inode)
11123+{
11124+ int err;
11125+ loff_t pos;
11126+ ssize_t sz;
11127+ struct super_block *sb;
11128+ struct au_sbinfo *sbinfo;
11129+ struct file *file;
11130+
11131+ err = 0;
11132+ /* todo: dirty, at mount time */
11133+ if (inode->i_ino == AUFS_ROOT_INO)
11134+ goto out;
11135+ sb = inode->i_sb;
11136+ SiMustAnyLock(sb);
11137+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
11138+ goto out;
11139+
11140+ err = -EFBIG;
11141+ pos = inode->i_ino;
11142+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
11143+ AuIOErr1("too large i%lld\n", pos);
11144+ goto out;
11145+ }
11146+ pos *= sizeof(inode->i_generation);
11147+
11148+ err = 0;
11149+ sbinfo = au_sbi(sb);
11150+ file = sbinfo->si_xigen;
11151+ BUG_ON(!file);
11152+
11153+ if (vfsub_f_size_read(file)
11154+ < pos + sizeof(inode->i_generation)) {
11155+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
11156+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
11157+ sizeof(inode->i_generation), &pos);
11158+ } else
11159+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
11160+ sizeof(inode->i_generation), &pos);
11161+ if (sz == sizeof(inode->i_generation))
11162+ goto out; /* success */
11163+
11164+ err = sz;
11165+ if (unlikely(sz >= 0)) {
11166+ err = -EIO;
11167+ AuIOErr("xigen error (%zd)\n", sz);
11168+ }
11169+
11170+out:
11171+ return err;
11172+}
11173+
11174+int au_xigen_set(struct super_block *sb, struct file *base)
11175+{
11176+ int err;
11177+ struct au_sbinfo *sbinfo;
11178+ struct file *file;
11179+
11180+ SiMustWriteLock(sb);
11181+
11182+ sbinfo = au_sbi(sb);
11183+ file = au_xino_create2(base, sbinfo->si_xigen);
11184+ err = PTR_ERR(file);
11185+ if (IS_ERR(file))
11186+ goto out;
11187+ err = 0;
11188+ if (sbinfo->si_xigen)
11189+ fput(sbinfo->si_xigen);
11190+ sbinfo->si_xigen = file;
11191+
11192+out:
11193+ return err;
11194+}
11195+
11196+void au_xigen_clr(struct super_block *sb)
11197+{
11198+ struct au_sbinfo *sbinfo;
11199+
11200+ SiMustWriteLock(sb);
11201+
11202+ sbinfo = au_sbi(sb);
11203+ if (sbinfo->si_xigen) {
11204+ fput(sbinfo->si_xigen);
11205+ sbinfo->si_xigen = NULL;
11206+ }
11207+}
11208+
11209+/* ---------------------------------------------------------------------- */
11210+
11211+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
11212+ ino_t dir_ino)
11213+{
11214+ struct dentry *dentry, *d;
11215+ struct inode *inode;
11216+ unsigned int sigen;
11217+
11218+ dentry = NULL;
11219+ inode = ilookup(sb, ino);
11220+ if (!inode)
11221+ goto out;
11222+
11223+ dentry = ERR_PTR(-ESTALE);
11224+ sigen = au_sigen(sb);
11225+ if (unlikely(au_is_bad_inode(inode)
11226+ || IS_DEADDIR(inode)
11227+ || sigen != au_iigen(inode, NULL)))
11228+ goto out_iput;
11229+
11230+ dentry = NULL;
11231+ if (!dir_ino || S_ISDIR(inode->i_mode))
11232+ dentry = d_find_alias(inode);
11233+ else {
11234+ spin_lock(&inode->i_lock);
11235+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
11236+ spin_lock(&d->d_lock);
11237+ if (!au_test_anon(d)
11238+ && d_inode(d->d_parent)->i_ino == dir_ino) {
11239+ dentry = dget_dlock(d);
11240+ spin_unlock(&d->d_lock);
11241+ break;
11242+ }
11243+ spin_unlock(&d->d_lock);
11244+ }
11245+ spin_unlock(&inode->i_lock);
11246+ }
11247+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
11248+ /* need to refresh */
11249+ dput(dentry);
11250+ dentry = NULL;
11251+ }
11252+
11253+out_iput:
11254+ iput(inode);
11255+out:
11256+ AuTraceErrPtr(dentry);
11257+ return dentry;
11258+}
11259+
11260+/* ---------------------------------------------------------------------- */
11261+
11262+/* todo: dirty? */
11263+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
11264+
11265+struct au_compare_mnt_args {
11266+ /* input */
11267+ struct super_block *sb;
11268+
11269+ /* output */
11270+ struct vfsmount *mnt;
11271+};
11272+
11273+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
11274+{
11275+ struct au_compare_mnt_args *a = arg;
11276+
11277+ if (mnt->mnt_sb != a->sb)
11278+ return 0;
11279+ a->mnt = mntget(mnt);
11280+ return 1;
11281+}
11282+
11283+static struct vfsmount *au_mnt_get(struct super_block *sb)
11284+{
11285+ int err;
11286+ struct path root;
11287+ struct au_compare_mnt_args args = {
11288+ .sb = sb
11289+ };
11290+
11291+ get_fs_root(current->fs, &root);
11292+ rcu_read_lock();
11293+ err = iterate_mounts(au_compare_mnt, &args, root.mnt);
11294+ rcu_read_unlock();
11295+ path_put(&root);
11296+ AuDebugOn(!err);
11297+ AuDebugOn(!args.mnt);
11298+ return args.mnt;
11299+}
11300+
11301+struct au_nfsd_si_lock {
11302+ unsigned int sigen;
11303+ aufs_bindex_t bindex, br_id;
11304+ unsigned char force_lock;
11305+};
11306+
11307+static int si_nfsd_read_lock(struct super_block *sb,
11308+ struct au_nfsd_si_lock *nsi_lock)
11309+{
11310+ int err;
11311+ aufs_bindex_t bindex;
11312+
11313+ si_read_lock(sb, AuLock_FLUSH);
11314+
11315+ /* branch id may be wrapped around */
11316+ err = 0;
11317+ bindex = au_br_index(sb, nsi_lock->br_id);
11318+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
11319+ goto out; /* success */
11320+
11321+ err = -ESTALE;
11322+ bindex = -1;
11323+ if (!nsi_lock->force_lock)
11324+ si_read_unlock(sb);
11325+
11326+out:
11327+ nsi_lock->bindex = bindex;
11328+ return err;
11329+}
11330+
11331+struct find_name_by_ino {
11332+ struct dir_context ctx;
11333+ int called, found;
11334+ ino_t ino;
11335+ char *name;
11336+ int namelen;
11337+};
11338+
11339+static int
11340+find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
11341+ loff_t offset, u64 ino, unsigned int d_type)
11342+{
11343+ struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
11344+ ctx);
11345+
11346+ a->called++;
11347+ if (a->ino != ino)
11348+ return 0;
11349+
11350+ memcpy(a->name, name, namelen);
11351+ a->namelen = namelen;
11352+ a->found = 1;
11353+ return 1;
11354+}
11355+
11356+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
11357+ struct au_nfsd_si_lock *nsi_lock)
11358+{
11359+ struct dentry *dentry, *parent;
11360+ struct file *file;
11361+ struct inode *dir;
11362+ struct find_name_by_ino arg = {
11363+ .ctx = {
11364+ .actor = find_name_by_ino
11365+ }
11366+ };
11367+ int err;
11368+
11369+ parent = path->dentry;
11370+ if (nsi_lock)
11371+ si_read_unlock(parent->d_sb);
11372+ file = vfsub_dentry_open(path, au_dir_roflags);
11373+ dentry = (void *)file;
11374+ if (IS_ERR(file))
11375+ goto out;
11376+
11377+ dentry = ERR_PTR(-ENOMEM);
11378+ arg.name = (void *)__get_free_page(GFP_NOFS);
11379+ if (unlikely(!arg.name))
11380+ goto out_file;
11381+ arg.ino = ino;
11382+ arg.found = 0;
11383+ do {
11384+ arg.called = 0;
11385+ /* smp_mb(); */
11386+ err = vfsub_iterate_dir(file, &arg.ctx);
11387+ } while (!err && !arg.found && arg.called);
11388+ dentry = ERR_PTR(err);
11389+ if (unlikely(err))
11390+ goto out_name;
11391+ /* instead of ENOENT */
11392+ dentry = ERR_PTR(-ESTALE);
11393+ if (!arg.found)
11394+ goto out_name;
11395+
11396+ /* do not call vfsub_lkup_one() */
11397+ dir = d_inode(parent);
11398+ dentry = vfsub_lookup_one_len_unlocked(arg.name, parent, arg.namelen);
11399+ AuTraceErrPtr(dentry);
11400+ if (IS_ERR(dentry))
11401+ goto out_name;
11402+ AuDebugOn(au_test_anon(dentry));
11403+ if (unlikely(d_really_is_negative(dentry))) {
11404+ dput(dentry);
11405+ dentry = ERR_PTR(-ENOENT);
11406+ }
11407+
11408+out_name:
11409+ au_delayed_free_page((unsigned long)arg.name);
11410+out_file:
11411+ fput(file);
11412+out:
11413+ if (unlikely(nsi_lock
11414+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
11415+ if (!IS_ERR(dentry)) {
11416+ dput(dentry);
11417+ dentry = ERR_PTR(-ESTALE);
11418+ }
11419+ AuTraceErrPtr(dentry);
11420+ return dentry;
11421+}
11422+
11423+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
11424+ ino_t dir_ino,
11425+ struct au_nfsd_si_lock *nsi_lock)
11426+{
11427+ struct dentry *dentry;
11428+ struct path path;
11429+
11430+ if (dir_ino != AUFS_ROOT_INO) {
11431+ path.dentry = decode_by_ino(sb, dir_ino, 0);
11432+ dentry = path.dentry;
11433+ if (!path.dentry || IS_ERR(path.dentry))
11434+ goto out;
11435+ AuDebugOn(au_test_anon(path.dentry));
11436+ } else
11437+ path.dentry = dget(sb->s_root);
11438+
11439+ path.mnt = au_mnt_get(sb);
11440+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
11441+ path_put(&path);
11442+
11443+out:
11444+ AuTraceErrPtr(dentry);
11445+ return dentry;
11446+}
11447+
11448+/* ---------------------------------------------------------------------- */
11449+
11450+static int h_acceptable(void *expv, struct dentry *dentry)
11451+{
11452+ return 1;
11453+}
11454+
11455+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
11456+ char *buf, int len, struct super_block *sb)
11457+{
11458+ char *p;
11459+ int n;
11460+ struct path path;
11461+
11462+ p = d_path(h_rootpath, buf, len);
11463+ if (IS_ERR(p))
11464+ goto out;
11465+ n = strlen(p);
11466+
11467+ path.mnt = h_rootpath->mnt;
11468+ path.dentry = h_parent;
11469+ p = d_path(&path, buf, len);
11470+ if (IS_ERR(p))
11471+ goto out;
11472+ if (n != 1)
11473+ p += n;
11474+
11475+ path.mnt = au_mnt_get(sb);
11476+ path.dentry = sb->s_root;
11477+ p = d_path(&path, buf, len - strlen(p));
11478+ mntput(path.mnt);
11479+ if (IS_ERR(p))
11480+ goto out;
11481+ if (n != 1)
11482+ p[strlen(p)] = '/';
11483+
11484+out:
11485+ AuTraceErrPtr(p);
11486+ return p;
11487+}
11488+
11489+static
11490+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
11491+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
11492+{
11493+ struct dentry *dentry, *h_parent, *root;
11494+ struct super_block *h_sb;
11495+ char *pathname, *p;
11496+ struct vfsmount *h_mnt;
11497+ struct au_branch *br;
11498+ int err;
11499+ struct path path;
11500+
11501+ br = au_sbr(sb, nsi_lock->bindex);
11502+ h_mnt = au_br_mnt(br);
11503+ h_sb = h_mnt->mnt_sb;
11504+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
11505+ lockdep_off();
11506+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
11507+ fh_len - Fh_tail, fh[Fh_h_type],
11508+ h_acceptable, /*context*/NULL);
11509+ lockdep_on();
11510+ dentry = h_parent;
11511+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
11512+ AuWarn1("%s decode_fh failed, %ld\n",
11513+ au_sbtype(h_sb), PTR_ERR(h_parent));
11514+ goto out;
11515+ }
11516+ dentry = NULL;
11517+ if (unlikely(au_test_anon(h_parent))) {
11518+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
11519+ au_sbtype(h_sb));
11520+ goto out_h_parent;
11521+ }
11522+
11523+ dentry = ERR_PTR(-ENOMEM);
11524+ pathname = (void *)__get_free_page(GFP_NOFS);
11525+ if (unlikely(!pathname))
11526+ goto out_h_parent;
11527+
11528+ root = sb->s_root;
11529+ path.mnt = h_mnt;
11530+ di_read_lock_parent(root, !AuLock_IR);
11531+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
11532+ di_read_unlock(root, !AuLock_IR);
11533+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
11534+ dentry = (void *)p;
11535+ if (IS_ERR(p))
11536+ goto out_pathname;
11537+
11538+ si_read_unlock(sb);
11539+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
11540+ dentry = ERR_PTR(err);
11541+ if (unlikely(err))
11542+ goto out_relock;
11543+
11544+ dentry = ERR_PTR(-ENOENT);
11545+ AuDebugOn(au_test_anon(path.dentry));
11546+ if (unlikely(d_really_is_negative(path.dentry)))
11547+ goto out_path;
11548+
11549+ if (ino != d_inode(path.dentry)->i_ino)
11550+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
11551+ else
11552+ dentry = dget(path.dentry);
11553+
11554+out_path:
11555+ path_put(&path);
11556+out_relock:
11557+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
11558+ if (!IS_ERR(dentry)) {
11559+ dput(dentry);
11560+ dentry = ERR_PTR(-ESTALE);
11561+ }
11562+out_pathname:
11563+ au_delayed_free_page((unsigned long)pathname);
11564+out_h_parent:
11565+ dput(h_parent);
11566+out:
11567+ AuTraceErrPtr(dentry);
11568+ return dentry;
11569+}
11570+
11571+/* ---------------------------------------------------------------------- */
11572+
11573+static struct dentry *
11574+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
11575+ int fh_type)
11576+{
11577+ struct dentry *dentry;
11578+ __u32 *fh = fid->raw;
11579+ struct au_branch *br;
11580+ ino_t ino, dir_ino;
11581+ struct au_nfsd_si_lock nsi_lock = {
11582+ .force_lock = 0
11583+ };
11584+
11585+ dentry = ERR_PTR(-ESTALE);
11586+ /* it should never happen, but the file handle is unreliable */
11587+ if (unlikely(fh_len < Fh_tail))
11588+ goto out;
11589+ nsi_lock.sigen = fh[Fh_sigen];
11590+ nsi_lock.br_id = fh[Fh_br_id];
11591+
11592+ /* branch id may be wrapped around */
11593+ br = NULL;
11594+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
11595+ goto out;
11596+ nsi_lock.force_lock = 1;
11597+
11598+ /* is this inode still cached? */
11599+ ino = decode_ino(fh + Fh_ino);
11600+ /* it should never happen */
11601+ if (unlikely(ino == AUFS_ROOT_INO))
11602+ goto out_unlock;
11603+
11604+ dir_ino = decode_ino(fh + Fh_dir_ino);
11605+ dentry = decode_by_ino(sb, ino, dir_ino);
11606+ if (IS_ERR(dentry))
11607+ goto out_unlock;
11608+ if (dentry)
11609+ goto accept;
11610+
11611+ /* is the parent dir cached? */
11612+ br = au_sbr(sb, nsi_lock.bindex);
11613+ au_br_get(br);
11614+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
11615+ if (IS_ERR(dentry))
11616+ goto out_unlock;
11617+ if (dentry)
11618+ goto accept;
11619+
11620+ /* lookup path */
11621+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
11622+ if (IS_ERR(dentry))
11623+ goto out_unlock;
11624+ if (unlikely(!dentry))
11625+ /* todo?: make it ESTALE */
11626+ goto out_unlock;
11627+
11628+accept:
11629+ if (!au_digen_test(dentry, au_sigen(sb))
11630+ && d_inode(dentry)->i_generation == fh[Fh_igen])
11631+ goto out_unlock; /* success */
11632+
11633+ dput(dentry);
11634+ dentry = ERR_PTR(-ESTALE);
11635+out_unlock:
11636+ if (br)
11637+ au_br_put(br);
11638+ si_read_unlock(sb);
11639+out:
11640+ AuTraceErrPtr(dentry);
11641+ return dentry;
11642+}
11643+
11644+#if 0 /* reserved for future use */
11645+/* support subtreecheck option */
11646+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
11647+ int fh_len, int fh_type)
11648+{
11649+ struct dentry *parent;
11650+ __u32 *fh = fid->raw;
11651+ ino_t dir_ino;
11652+
11653+ dir_ino = decode_ino(fh + Fh_dir_ino);
11654+ parent = decode_by_ino(sb, dir_ino, 0);
11655+ if (IS_ERR(parent))
11656+ goto out;
11657+ if (!parent)
11658+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
11659+ dir_ino, fh, fh_len);
11660+
11661+out:
11662+ AuTraceErrPtr(parent);
11663+ return parent;
11664+}
11665+#endif
11666+
11667+/* ---------------------------------------------------------------------- */
11668+
11669+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
11670+ struct inode *dir)
11671+{
11672+ int err;
11673+ aufs_bindex_t bindex;
11674+ struct super_block *sb, *h_sb;
11675+ struct dentry *dentry, *parent, *h_parent;
11676+ struct inode *h_dir;
11677+ struct au_branch *br;
11678+
11679+ err = -ENOSPC;
11680+ if (unlikely(*max_len <= Fh_tail)) {
11681+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
11682+ goto out;
11683+ }
11684+
11685+ err = FILEID_ROOT;
11686+ if (inode->i_ino == AUFS_ROOT_INO) {
11687+ AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
11688+ goto out;
11689+ }
11690+
11691+ h_parent = NULL;
11692+ sb = inode->i_sb;
11693+ err = si_read_lock(sb, AuLock_FLUSH);
11694+ if (unlikely(err))
11695+ goto out;
11696+
11697+#ifdef CONFIG_AUFS_DEBUG
11698+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
11699+ AuWarn1("NFS-exporting requires xino\n");
11700+#endif
11701+ err = -EIO;
11702+ parent = NULL;
11703+ ii_read_lock_child(inode);
11704+ bindex = au_ibtop(inode);
11705+ if (!dir) {
11706+ dentry = d_find_any_alias(inode);
11707+ if (unlikely(!dentry))
11708+ goto out_unlock;
11709+ AuDebugOn(au_test_anon(dentry));
11710+ parent = dget_parent(dentry);
11711+ dput(dentry);
11712+ if (unlikely(!parent))
11713+ goto out_unlock;
11714+ if (d_really_is_positive(parent))
11715+ dir = d_inode(parent);
11716+ }
11717+
11718+ ii_read_lock_parent(dir);
11719+ h_dir = au_h_iptr(dir, bindex);
11720+ ii_read_unlock(dir);
11721+ if (unlikely(!h_dir))
11722+ goto out_parent;
11723+ h_parent = d_find_any_alias(h_dir);
11724+ if (unlikely(!h_parent))
11725+ goto out_hparent;
11726+
11727+ err = -EPERM;
11728+ br = au_sbr(sb, bindex);
11729+ h_sb = au_br_sb(br);
11730+ if (unlikely(!h_sb->s_export_op)) {
11731+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
11732+ goto out_hparent;
11733+ }
11734+
11735+ fh[Fh_br_id] = br->br_id;
11736+ fh[Fh_sigen] = au_sigen(sb);
11737+ encode_ino(fh + Fh_ino, inode->i_ino);
11738+ encode_ino(fh + Fh_dir_ino, dir->i_ino);
11739+ fh[Fh_igen] = inode->i_generation;
11740+
11741+ *max_len -= Fh_tail;
11742+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
11743+ max_len,
11744+ /*connectable or subtreecheck*/0);
11745+ err = fh[Fh_h_type];
11746+ *max_len += Fh_tail;
11747+ /* todo: macros? */
11748+ if (err != FILEID_INVALID)
11749+ err = 99;
11750+ else
11751+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
11752+
11753+out_hparent:
11754+ dput(h_parent);
11755+out_parent:
11756+ dput(parent);
11757+out_unlock:
11758+ ii_read_unlock(inode);
11759+ si_read_unlock(sb);
11760+out:
11761+ if (unlikely(err < 0))
11762+ err = FILEID_INVALID;
11763+ return err;
11764+}
11765+
11766+/* ---------------------------------------------------------------------- */
11767+
11768+static int aufs_commit_metadata(struct inode *inode)
11769+{
11770+ int err;
11771+ aufs_bindex_t bindex;
11772+ struct super_block *sb;
11773+ struct inode *h_inode;
11774+ int (*f)(struct inode *inode);
11775+
11776+ sb = inode->i_sb;
11777+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
11778+ ii_write_lock_child(inode);
11779+ bindex = au_ibtop(inode);
11780+ AuDebugOn(bindex < 0);
11781+ h_inode = au_h_iptr(inode, bindex);
11782+
11783+ f = h_inode->i_sb->s_export_op->commit_metadata;
11784+ if (f)
11785+ err = f(h_inode);
11786+ else {
11787+ struct writeback_control wbc = {
11788+ .sync_mode = WB_SYNC_ALL,
11789+ .nr_to_write = 0 /* metadata only */
11790+ };
11791+
11792+ err = sync_inode(h_inode, &wbc);
11793+ }
11794+
11795+ au_cpup_attr_timesizes(inode);
11796+ ii_write_unlock(inode);
11797+ si_read_unlock(sb);
11798+ return err;
11799+}
11800+
11801+/* ---------------------------------------------------------------------- */
11802+
11803+static struct export_operations aufs_export_op = {
11804+ .fh_to_dentry = aufs_fh_to_dentry,
11805+ /* .fh_to_parent = aufs_fh_to_parent, */
11806+ .encode_fh = aufs_encode_fh,
11807+ .commit_metadata = aufs_commit_metadata
11808+};
11809+
11810+void au_export_init(struct super_block *sb)
11811+{
11812+ struct au_sbinfo *sbinfo;
11813+ __u32 u;
11814+
11815+ BUILD_BUG_ON_MSG(IS_BUILTIN(CONFIG_AUFS_FS)
11816+ && IS_MODULE(CONFIG_EXPORTFS),
11817+ AUFS_NAME ": unsupported configuration "
11818+ "CONFIG_EXPORTFS=m and CONFIG_AUFS_FS=y");
11819+
11820+ sb->s_export_op = &aufs_export_op;
11821+ sbinfo = au_sbi(sb);
11822+ sbinfo->si_xigen = NULL;
11823+ get_random_bytes(&u, sizeof(u));
11824+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
11825+ atomic_set(&sbinfo->si_xigen_next, u);
11826+}
11827diff -urN /usr/share/empty/fs/aufs/fhsm.c linux/fs/aufs/fhsm.c
11828--- /usr/share/empty/fs/aufs/fhsm.c 1970-01-01 01:00:00.000000000 +0100
11829+++ linux/fs/aufs/fhsm.c 2016-10-09 16:55:36.489368218 +0200
11830@@ -0,0 +1,426 @@
11831+/*
11832+ * Copyright (C) 2011-2016 Junjiro R. Okajima
11833+ *
11834+ * This program, aufs is free software; you can redistribute it and/or modify
11835+ * it under the terms of the GNU General Public License as published by
11836+ * the Free Software Foundation; either version 2 of the License, or
11837+ * (at your option) any later version.
11838+ *
11839+ * This program is distributed in the hope that it will be useful,
11840+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11841+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11842+ * GNU General Public License for more details.
11843+ *
11844+ * You should have received a copy of the GNU General Public License
11845+ * along with this program; if not, write to the Free Software
11846+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11847+ */
11848+
11849+/*
11850+ * File-based Hierarchy Storage Management
11851+ */
11852+
11853+#include <linux/anon_inodes.h>
11854+#include <linux/poll.h>
11855+#include <linux/seq_file.h>
11856+#include <linux/statfs.h>
11857+#include "aufs.h"
11858+
11859+static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
11860+{
11861+ struct au_sbinfo *sbinfo;
11862+ struct au_fhsm *fhsm;
11863+
11864+ SiMustAnyLock(sb);
11865+
11866+ sbinfo = au_sbi(sb);
11867+ fhsm = &sbinfo->si_fhsm;
11868+ AuDebugOn(!fhsm);
11869+ return fhsm->fhsm_bottom;
11870+}
11871+
11872+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
11873+{
11874+ struct au_sbinfo *sbinfo;
11875+ struct au_fhsm *fhsm;
11876+
11877+ SiMustWriteLock(sb);
11878+
11879+ sbinfo = au_sbi(sb);
11880+ fhsm = &sbinfo->si_fhsm;
11881+ AuDebugOn(!fhsm);
11882+ fhsm->fhsm_bottom = bindex;
11883+}
11884+
11885+/* ---------------------------------------------------------------------- */
11886+
11887+static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
11888+{
11889+ struct au_br_fhsm *bf;
11890+
11891+ bf = br->br_fhsm;
11892+ MtxMustLock(&bf->bf_lock);
11893+
11894+ return !bf->bf_readable
11895+ || time_after(jiffies,
11896+ bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
11897+}
11898+
11899+/* ---------------------------------------------------------------------- */
11900+
11901+static void au_fhsm_notify(struct super_block *sb, int val)
11902+{
11903+ struct au_sbinfo *sbinfo;
11904+ struct au_fhsm *fhsm;
11905+
11906+ SiMustAnyLock(sb);
11907+
11908+ sbinfo = au_sbi(sb);
11909+ fhsm = &sbinfo->si_fhsm;
11910+ if (au_fhsm_pid(fhsm)
11911+ && atomic_read(&fhsm->fhsm_readable) != -1) {
11912+ atomic_set(&fhsm->fhsm_readable, val);
11913+ if (val)
11914+ wake_up(&fhsm->fhsm_wqh);
11915+ }
11916+}
11917+
11918+static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
11919+ struct aufs_stfs *rstfs, int do_lock, int do_notify)
11920+{
11921+ int err;
11922+ struct au_branch *br;
11923+ struct au_br_fhsm *bf;
11924+
11925+ br = au_sbr(sb, bindex);
11926+ AuDebugOn(au_br_rdonly(br));
11927+ bf = br->br_fhsm;
11928+ AuDebugOn(!bf);
11929+
11930+ if (do_lock)
11931+ mutex_lock(&bf->bf_lock);
11932+ else
11933+ MtxMustLock(&bf->bf_lock);
11934+
11935+ /* sb->s_root for NFS is unreliable */
11936+ err = au_br_stfs(br, &bf->bf_stfs);
11937+ if (unlikely(err)) {
11938+ AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
11939+ goto out;
11940+ }
11941+
11942+ bf->bf_jiffy = jiffies;
11943+ bf->bf_readable = 1;
11944+ if (do_notify)
11945+ au_fhsm_notify(sb, /*val*/1);
11946+ if (rstfs)
11947+ *rstfs = bf->bf_stfs;
11948+
11949+out:
11950+ if (do_lock)
11951+ mutex_unlock(&bf->bf_lock);
11952+ au_fhsm_notify(sb, /*val*/1);
11953+
11954+ return err;
11955+}
11956+
11957+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
11958+{
11959+ int err;
11960+ struct au_sbinfo *sbinfo;
11961+ struct au_fhsm *fhsm;
11962+ struct au_branch *br;
11963+ struct au_br_fhsm *bf;
11964+
11965+ AuDbg("b%d, force %d\n", bindex, force);
11966+ SiMustAnyLock(sb);
11967+
11968+ sbinfo = au_sbi(sb);
11969+ fhsm = &sbinfo->si_fhsm;
11970+ if (!au_ftest_si(sbinfo, FHSM)
11971+ || fhsm->fhsm_bottom == bindex)
11972+ return;
11973+
11974+ br = au_sbr(sb, bindex);
11975+ bf = br->br_fhsm;
11976+ AuDebugOn(!bf);
11977+ mutex_lock(&bf->bf_lock);
11978+ if (force
11979+ || au_fhsm_pid(fhsm)
11980+ || au_fhsm_test_jiffy(sbinfo, br))
11981+ err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
11982+ /*do_notify*/1);
11983+ mutex_unlock(&bf->bf_lock);
11984+}
11985+
11986+void au_fhsm_wrote_all(struct super_block *sb, int force)
11987+{
11988+ aufs_bindex_t bindex, bbot;
11989+ struct au_branch *br;
11990+
11991+ /* exclude the bottom */
11992+ bbot = au_fhsm_bottom(sb);
11993+ for (bindex = 0; bindex < bbot; bindex++) {
11994+ br = au_sbr(sb, bindex);
11995+ if (au_br_fhsm(br->br_perm))
11996+ au_fhsm_wrote(sb, bindex, force);
11997+ }
11998+}
11999+
12000+/* ---------------------------------------------------------------------- */
12001+
12002+static unsigned int au_fhsm_poll(struct file *file,
12003+ struct poll_table_struct *wait)
12004+{
12005+ unsigned int mask;
12006+ struct au_sbinfo *sbinfo;
12007+ struct au_fhsm *fhsm;
12008+
12009+ mask = 0;
12010+ sbinfo = file->private_data;
12011+ fhsm = &sbinfo->si_fhsm;
12012+ poll_wait(file, &fhsm->fhsm_wqh, wait);
12013+ if (atomic_read(&fhsm->fhsm_readable))
12014+ mask = POLLIN /* | POLLRDNORM */;
12015+
12016+ AuTraceErr((int)mask);
12017+ return mask;
12018+}
12019+
12020+static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
12021+ struct aufs_stfs *stfs, __s16 brid)
12022+{
12023+ int err;
12024+
12025+ err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
12026+ if (!err)
12027+ err = __put_user(brid, &stbr->brid);
12028+ if (unlikely(err))
12029+ err = -EFAULT;
12030+
12031+ return err;
12032+}
12033+
12034+static ssize_t au_fhsm_do_read(struct super_block *sb,
12035+ struct aufs_stbr __user *stbr, size_t count)
12036+{
12037+ ssize_t err;
12038+ int nstbr;
12039+ aufs_bindex_t bindex, bbot;
12040+ struct au_branch *br;
12041+ struct au_br_fhsm *bf;
12042+
12043+ /* except the bottom branch */
12044+ err = 0;
12045+ nstbr = 0;
12046+ bbot = au_fhsm_bottom(sb);
12047+ for (bindex = 0; !err && bindex < bbot; bindex++) {
12048+ br = au_sbr(sb, bindex);
12049+ if (!au_br_fhsm(br->br_perm))
12050+ continue;
12051+
12052+ bf = br->br_fhsm;
12053+ mutex_lock(&bf->bf_lock);
12054+ if (bf->bf_readable) {
12055+ err = -EFAULT;
12056+ if (count >= sizeof(*stbr))
12057+ err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
12058+ br->br_id);
12059+ if (!err) {
12060+ bf->bf_readable = 0;
12061+ count -= sizeof(*stbr);
12062+ nstbr++;
12063+ }
12064+ }
12065+ mutex_unlock(&bf->bf_lock);
12066+ }
12067+ if (!err)
12068+ err = sizeof(*stbr) * nstbr;
12069+
12070+ return err;
12071+}
12072+
12073+static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
12074+ loff_t *pos)
12075+{
12076+ ssize_t err;
12077+ int readable;
12078+ aufs_bindex_t nfhsm, bindex, bbot;
12079+ struct au_sbinfo *sbinfo;
12080+ struct au_fhsm *fhsm;
12081+ struct au_branch *br;
12082+ struct super_block *sb;
12083+
12084+ err = 0;
12085+ sbinfo = file->private_data;
12086+ fhsm = &sbinfo->si_fhsm;
12087+need_data:
12088+ spin_lock_irq(&fhsm->fhsm_wqh.lock);
12089+ if (!atomic_read(&fhsm->fhsm_readable)) {
12090+ if (vfsub_file_flags(file) & O_NONBLOCK)
12091+ err = -EAGAIN;
12092+ else
12093+ err = wait_event_interruptible_locked_irq
12094+ (fhsm->fhsm_wqh,
12095+ atomic_read(&fhsm->fhsm_readable));
12096+ }
12097+ spin_unlock_irq(&fhsm->fhsm_wqh.lock);
12098+ if (unlikely(err))
12099+ goto out;
12100+
12101+ /* sb may already be dead */
12102+ au_rw_read_lock(&sbinfo->si_rwsem);
12103+ readable = atomic_read(&fhsm->fhsm_readable);
12104+ if (readable > 0) {
12105+ sb = sbinfo->si_sb;
12106+ AuDebugOn(!sb);
12107+ /* exclude the bottom branch */
12108+ nfhsm = 0;
12109+ bbot = au_fhsm_bottom(sb);
12110+ for (bindex = 0; bindex < bbot; bindex++) {
12111+ br = au_sbr(sb, bindex);
12112+ if (au_br_fhsm(br->br_perm))
12113+ nfhsm++;
12114+ }
12115+ err = -EMSGSIZE;
12116+ if (nfhsm * sizeof(struct aufs_stbr) <= count) {
12117+ atomic_set(&fhsm->fhsm_readable, 0);
12118+ err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
12119+ count);
12120+ }
12121+ }
12122+ au_rw_read_unlock(&sbinfo->si_rwsem);
12123+ if (!readable)
12124+ goto need_data;
12125+
12126+out:
12127+ return err;
12128+}
12129+
12130+static int au_fhsm_release(struct inode *inode, struct file *file)
12131+{
12132+ struct au_sbinfo *sbinfo;
12133+ struct au_fhsm *fhsm;
12134+
12135+ /* sb may already be dead */
12136+ sbinfo = file->private_data;
12137+ fhsm = &sbinfo->si_fhsm;
12138+ spin_lock(&fhsm->fhsm_spin);
12139+ fhsm->fhsm_pid = 0;
12140+ spin_unlock(&fhsm->fhsm_spin);
12141+ kobject_put(&sbinfo->si_kobj);
12142+
12143+ return 0;
12144+}
12145+
12146+static const struct file_operations au_fhsm_fops = {
12147+ .owner = THIS_MODULE,
12148+ .llseek = noop_llseek,
12149+ .read = au_fhsm_read,
12150+ .poll = au_fhsm_poll,
12151+ .release = au_fhsm_release
12152+};
12153+
12154+int au_fhsm_fd(struct super_block *sb, int oflags)
12155+{
12156+ int err, fd;
12157+ struct au_sbinfo *sbinfo;
12158+ struct au_fhsm *fhsm;
12159+
12160+ err = -EPERM;
12161+ if (unlikely(!capable(CAP_SYS_ADMIN)))
12162+ goto out;
12163+
12164+ err = -EINVAL;
12165+ if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
12166+ goto out;
12167+
12168+ err = 0;
12169+ sbinfo = au_sbi(sb);
12170+ fhsm = &sbinfo->si_fhsm;
12171+ spin_lock(&fhsm->fhsm_spin);
12172+ if (!fhsm->fhsm_pid)
12173+ fhsm->fhsm_pid = current->pid;
12174+ else
12175+ err = -EBUSY;
12176+ spin_unlock(&fhsm->fhsm_spin);
12177+ if (unlikely(err))
12178+ goto out;
12179+
12180+ oflags |= O_RDONLY;
12181+ /* oflags |= FMODE_NONOTIFY; */
12182+ fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
12183+ err = fd;
12184+ if (unlikely(fd < 0))
12185+ goto out_pid;
12186+
12187+ /* succeed reglardless 'fhsm' status */
12188+ kobject_get(&sbinfo->si_kobj);
12189+ si_noflush_read_lock(sb);
12190+ if (au_ftest_si(sbinfo, FHSM))
12191+ au_fhsm_wrote_all(sb, /*force*/0);
12192+ si_read_unlock(sb);
12193+ goto out; /* success */
12194+
12195+out_pid:
12196+ spin_lock(&fhsm->fhsm_spin);
12197+ fhsm->fhsm_pid = 0;
12198+ spin_unlock(&fhsm->fhsm_spin);
12199+out:
12200+ AuTraceErr(err);
12201+ return err;
12202+}
12203+
12204+/* ---------------------------------------------------------------------- */
12205+
12206+int au_fhsm_br_alloc(struct au_branch *br)
12207+{
12208+ int err;
12209+
12210+ err = 0;
12211+ br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
12212+ if (br->br_fhsm)
12213+ au_br_fhsm_init(br->br_fhsm);
12214+ else
12215+ err = -ENOMEM;
12216+
12217+ return err;
12218+}
12219+
12220+/* ---------------------------------------------------------------------- */
12221+
12222+void au_fhsm_fin(struct super_block *sb)
12223+{
12224+ au_fhsm_notify(sb, /*val*/-1);
12225+}
12226+
12227+void au_fhsm_init(struct au_sbinfo *sbinfo)
12228+{
12229+ struct au_fhsm *fhsm;
12230+
12231+ fhsm = &sbinfo->si_fhsm;
12232+ spin_lock_init(&fhsm->fhsm_spin);
12233+ init_waitqueue_head(&fhsm->fhsm_wqh);
12234+ atomic_set(&fhsm->fhsm_readable, 0);
12235+ fhsm->fhsm_expire
12236+ = msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
12237+ fhsm->fhsm_bottom = -1;
12238+}
12239+
12240+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
12241+{
12242+ sbinfo->si_fhsm.fhsm_expire
12243+ = msecs_to_jiffies(sec * MSEC_PER_SEC);
12244+}
12245+
12246+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
12247+{
12248+ unsigned int u;
12249+
12250+ if (!au_ftest_si(sbinfo, FHSM))
12251+ return;
12252+
12253+ u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
12254+ if (u != AUFS_FHSM_CACHE_DEF_SEC)
12255+ seq_printf(seq, ",fhsm_sec=%u", u);
12256+}
12257diff -urN /usr/share/empty/fs/aufs/file.c linux/fs/aufs/file.c
12258--- /usr/share/empty/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
12259+++ linux/fs/aufs/file.c 2016-10-09 16:55:38.889431135 +0200
12260@@ -0,0 +1,857 @@
12261+/*
12262+ * Copyright (C) 2005-2016 Junjiro R. Okajima
12263+ *
12264+ * This program, aufs is free software; you can redistribute it and/or modify
12265+ * it under the terms of the GNU General Public License as published by
12266+ * the Free Software Foundation; either version 2 of the License, or
12267+ * (at your option) any later version.
12268+ *
12269+ * This program is distributed in the hope that it will be useful,
12270+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12271+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12272+ * GNU General Public License for more details.
12273+ *
12274+ * You should have received a copy of the GNU General Public License
12275+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
12276+ */
12277+
12278+/*
12279+ * handling file/dir, and address_space operation
12280+ */
12281+
12282+#ifdef CONFIG_AUFS_DEBUG
12283+#include <linux/migrate.h>
12284+#endif
12285+#include <linux/pagemap.h>
12286+#include "aufs.h"
12287+
12288+/* drop flags for writing */
12289+unsigned int au_file_roflags(unsigned int flags)
12290+{
12291+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
12292+ flags |= O_RDONLY | O_NOATIME;
12293+ return flags;
12294+}
12295+
12296+/* common functions to regular file and dir */
12297+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
12298+ struct file *file, int force_wr)
12299+{
12300+ struct file *h_file;
12301+ struct dentry *h_dentry;
12302+ struct inode *h_inode;
12303+ struct super_block *sb;
12304+ struct au_branch *br;
12305+ struct path h_path;
12306+ int err;
12307+
12308+ /* a race condition can happen between open and unlink/rmdir */
12309+ h_file = ERR_PTR(-ENOENT);
12310+ h_dentry = au_h_dptr(dentry, bindex);
12311+ if (au_test_nfsd() && (!h_dentry || d_is_negative(h_dentry)))
12312+ goto out;
12313+ h_inode = d_inode(h_dentry);
12314+ spin_lock(&h_dentry->d_lock);
12315+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
12316+ /* || !d_inode(dentry)->i_nlink */
12317+ ;
12318+ spin_unlock(&h_dentry->d_lock);
12319+ if (unlikely(err))
12320+ goto out;
12321+
12322+ sb = dentry->d_sb;
12323+ br = au_sbr(sb, bindex);
12324+ err = au_br_test_oflag(flags, br);
12325+ h_file = ERR_PTR(err);
12326+ if (unlikely(err))
12327+ goto out;
12328+
12329+ /* drop flags for writing */
12330+ if (au_test_ro(sb, bindex, d_inode(dentry))) {
12331+ if (force_wr && !(flags & O_WRONLY))
12332+ force_wr = 0;
12333+ flags = au_file_roflags(flags);
12334+ if (force_wr) {
12335+ h_file = ERR_PTR(-EROFS);
12336+ flags = au_file_roflags(flags);
12337+ if (unlikely(vfsub_native_ro(h_inode)
12338+ || IS_APPEND(h_inode)))
12339+ goto out;
12340+ flags &= ~O_ACCMODE;
12341+ flags |= O_WRONLY;
12342+ }
12343+ }
12344+ flags &= ~O_CREAT;
12345+ au_br_get(br);
12346+ h_path.dentry = h_dentry;
12347+ h_path.mnt = au_br_mnt(br);
12348+ h_file = vfsub_dentry_open(&h_path, flags);
12349+ if (IS_ERR(h_file))
12350+ goto out_br;
12351+
12352+ if (flags & __FMODE_EXEC) {
12353+ err = deny_write_access(h_file);
12354+ if (unlikely(err)) {
12355+ fput(h_file);
12356+ h_file = ERR_PTR(err);
12357+ goto out_br;
12358+ }
12359+ }
12360+ fsnotify_open(h_file);
12361+ goto out; /* success */
12362+
12363+out_br:
12364+ au_br_put(br);
12365+out:
12366+ return h_file;
12367+}
12368+
12369+static int au_cmoo(struct dentry *dentry)
12370+{
12371+ int err, cmoo;
12372+ unsigned int udba;
12373+ struct path h_path;
12374+ struct au_pin pin;
12375+ struct au_cp_generic cpg = {
12376+ .dentry = dentry,
12377+ .bdst = -1,
12378+ .bsrc = -1,
12379+ .len = -1,
12380+ .pin = &pin,
12381+ .flags = AuCpup_DTIME | AuCpup_HOPEN
12382+ };
12383+ struct inode *delegated;
12384+ struct super_block *sb;
12385+ struct au_sbinfo *sbinfo;
12386+ struct au_fhsm *fhsm;
12387+ pid_t pid;
12388+ struct au_branch *br;
12389+ struct dentry *parent;
12390+ struct au_hinode *hdir;
12391+
12392+ DiMustWriteLock(dentry);
12393+ IiMustWriteLock(d_inode(dentry));
12394+
12395+ err = 0;
12396+ if (IS_ROOT(dentry))
12397+ goto out;
12398+ cpg.bsrc = au_dbtop(dentry);
12399+ if (!cpg.bsrc)
12400+ goto out;
12401+
12402+ sb = dentry->d_sb;
12403+ sbinfo = au_sbi(sb);
12404+ fhsm = &sbinfo->si_fhsm;
12405+ pid = au_fhsm_pid(fhsm);
12406+ if (pid
12407+ && (current->pid == pid
12408+ || current->real_parent->pid == pid))
12409+ goto out;
12410+
12411+ br = au_sbr(sb, cpg.bsrc);
12412+ cmoo = au_br_cmoo(br->br_perm);
12413+ if (!cmoo)
12414+ goto out;
12415+ if (!d_is_reg(dentry))
12416+ cmoo &= AuBrAttr_COO_ALL;
12417+ if (!cmoo)
12418+ goto out;
12419+
12420+ parent = dget_parent(dentry);
12421+ di_write_lock_parent(parent);
12422+ err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
12423+ cpg.bdst = err;
12424+ if (unlikely(err < 0)) {
12425+ err = 0; /* there is no upper writable branch */
12426+ goto out_dgrade;
12427+ }
12428+ AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
12429+
12430+ /* do not respect the coo attrib for the target branch */
12431+ err = au_cpup_dirs(dentry, cpg.bdst);
12432+ if (unlikely(err))
12433+ goto out_dgrade;
12434+
12435+ di_downgrade_lock(parent, AuLock_IR);
12436+ udba = au_opt_udba(sb);
12437+ err = au_pin(&pin, dentry, cpg.bdst, udba,
12438+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12439+ if (unlikely(err))
12440+ goto out_parent;
12441+
12442+ err = au_sio_cpup_simple(&cpg);
12443+ au_unpin(&pin);
12444+ if (unlikely(err))
12445+ goto out_parent;
12446+ if (!(cmoo & AuBrWAttr_MOO))
12447+ goto out_parent; /* success */
12448+
12449+ err = au_pin(&pin, dentry, cpg.bsrc, udba,
12450+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12451+ if (unlikely(err))
12452+ goto out_parent;
12453+
12454+ h_path.mnt = au_br_mnt(br);
12455+ h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
12456+ hdir = au_hi(d_inode(parent), cpg.bsrc);
12457+ delegated = NULL;
12458+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
12459+ au_unpin(&pin);
12460+ /* todo: keep h_dentry or not? */
12461+ if (unlikely(err == -EWOULDBLOCK)) {
12462+ pr_warn("cannot retry for NFSv4 delegation"
12463+ " for an internal unlink\n");
12464+ iput(delegated);
12465+ }
12466+ if (unlikely(err)) {
12467+ pr_err("unlink %pd after coo failed (%d), ignored\n",
12468+ dentry, err);
12469+ err = 0;
12470+ }
12471+ goto out_parent; /* success */
12472+
12473+out_dgrade:
12474+ di_downgrade_lock(parent, AuLock_IR);
12475+out_parent:
12476+ di_read_unlock(parent, AuLock_IR);
12477+ dput(parent);
12478+out:
12479+ AuTraceErr(err);
12480+ return err;
12481+}
12482+
12483+int au_do_open(struct file *file, struct au_do_open_args *args)
12484+{
12485+ int err, no_lock = args->no_lock;
12486+ struct dentry *dentry;
12487+ struct au_finfo *finfo;
12488+
12489+ if (!no_lock)
12490+ err = au_finfo_init(file, args->fidir);
12491+ else {
12492+ lockdep_off();
12493+ err = au_finfo_init(file, args->fidir);
12494+ lockdep_on();
12495+ }
12496+ if (unlikely(err))
12497+ goto out;
12498+
12499+ dentry = file->f_path.dentry;
12500+ AuDebugOn(IS_ERR_OR_NULL(dentry));
12501+ if (!no_lock) {
12502+ di_write_lock_child(dentry);
12503+ err = au_cmoo(dentry);
12504+ di_downgrade_lock(dentry, AuLock_IR);
12505+ if (!err)
12506+ err = args->open(file, vfsub_file_flags(file), NULL);
12507+ di_read_unlock(dentry, AuLock_IR);
12508+ } else {
12509+ err = au_cmoo(dentry);
12510+ if (!err)
12511+ err = args->open(file, vfsub_file_flags(file),
12512+ args->h_file);
12513+ if (!err && au_fbtop(file) != au_dbtop(dentry))
12514+ /*
12515+ * cmoo happens after h_file was opened.
12516+ * need to refresh file later.
12517+ */
12518+ atomic_dec(&au_fi(file)->fi_generation);
12519+ }
12520+
12521+ finfo = au_fi(file);
12522+ if (!err) {
12523+ finfo->fi_file = file;
12524+ au_sphl_add(&finfo->fi_hlist,
12525+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
12526+ }
12527+ if (!no_lock)
12528+ fi_write_unlock(file);
12529+ else {
12530+ lockdep_off();
12531+ fi_write_unlock(file);
12532+ lockdep_on();
12533+ }
12534+ if (unlikely(err)) {
12535+ finfo->fi_hdir = NULL;
12536+ au_finfo_fin(file, /*atonce*/0);
12537+ }
12538+
12539+out:
12540+ return err;
12541+}
12542+
12543+int au_reopen_nondir(struct file *file)
12544+{
12545+ int err;
12546+ aufs_bindex_t btop;
12547+ struct dentry *dentry;
12548+ struct file *h_file, *h_file_tmp;
12549+
12550+ dentry = file->f_path.dentry;
12551+ btop = au_dbtop(dentry);
12552+ h_file_tmp = NULL;
12553+ if (au_fbtop(file) == btop) {
12554+ h_file = au_hf_top(file);
12555+ if (file->f_mode == h_file->f_mode)
12556+ return 0; /* success */
12557+ h_file_tmp = h_file;
12558+ get_file(h_file_tmp);
12559+ au_set_h_fptr(file, btop, NULL);
12560+ }
12561+ AuDebugOn(au_fi(file)->fi_hdir);
12562+ /*
12563+ * it can happen
12564+ * file exists on both of rw and ro
12565+ * open --> dbtop and fbtop are both 0
12566+ * prepend a branch as rw, "rw" become ro
12567+ * remove rw/file
12568+ * delete the top branch, "rw" becomes rw again
12569+ * --> dbtop is 1, fbtop is still 0
12570+ * write --> fbtop is 0 but dbtop is 1
12571+ */
12572+ /* AuDebugOn(au_fbtop(file) < btop); */
12573+
12574+ h_file = au_h_open(dentry, btop, vfsub_file_flags(file) & ~O_TRUNC,
12575+ file, /*force_wr*/0);
12576+ err = PTR_ERR(h_file);
12577+ if (IS_ERR(h_file)) {
12578+ if (h_file_tmp) {
12579+ au_sbr_get(dentry->d_sb, btop);
12580+ au_set_h_fptr(file, btop, h_file_tmp);
12581+ h_file_tmp = NULL;
12582+ }
12583+ goto out; /* todo: close all? */
12584+ }
12585+
12586+ err = 0;
12587+ au_set_fbtop(file, btop);
12588+ au_set_h_fptr(file, btop, h_file);
12589+ au_update_figen(file);
12590+ /* todo: necessary? */
12591+ /* file->f_ra = h_file->f_ra; */
12592+
12593+out:
12594+ if (h_file_tmp)
12595+ fput(h_file_tmp);
12596+ return err;
12597+}
12598+
12599+/* ---------------------------------------------------------------------- */
12600+
12601+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
12602+ struct dentry *hi_wh)
12603+{
12604+ int err;
12605+ aufs_bindex_t btop;
12606+ struct au_dinfo *dinfo;
12607+ struct dentry *h_dentry;
12608+ struct au_hdentry *hdp;
12609+
12610+ dinfo = au_di(file->f_path.dentry);
12611+ AuRwMustWriteLock(&dinfo->di_rwsem);
12612+
12613+ btop = dinfo->di_btop;
12614+ dinfo->di_btop = btgt;
12615+ hdp = au_hdentry(dinfo, btgt);
12616+ h_dentry = hdp->hd_dentry;
12617+ hdp->hd_dentry = hi_wh;
12618+ err = au_reopen_nondir(file);
12619+ hdp->hd_dentry = h_dentry;
12620+ dinfo->di_btop = btop;
12621+
12622+ return err;
12623+}
12624+
12625+static int au_ready_to_write_wh(struct file *file, loff_t len,
12626+ aufs_bindex_t bcpup, struct au_pin *pin)
12627+{
12628+ int err;
12629+ struct inode *inode, *h_inode;
12630+ struct dentry *h_dentry, *hi_wh;
12631+ struct au_cp_generic cpg = {
12632+ .dentry = file->f_path.dentry,
12633+ .bdst = bcpup,
12634+ .bsrc = -1,
12635+ .len = len,
12636+ .pin = pin
12637+ };
12638+
12639+ au_update_dbtop(cpg.dentry);
12640+ inode = d_inode(cpg.dentry);
12641+ h_inode = NULL;
12642+ if (au_dbtop(cpg.dentry) <= bcpup
12643+ && au_dbbot(cpg.dentry) >= bcpup) {
12644+ h_dentry = au_h_dptr(cpg.dentry, bcpup);
12645+ if (h_dentry && d_is_positive(h_dentry))
12646+ h_inode = d_inode(h_dentry);
12647+ }
12648+ hi_wh = au_hi_wh(inode, bcpup);
12649+ if (!hi_wh && !h_inode)
12650+ err = au_sio_cpup_wh(&cpg, file);
12651+ else
12652+ /* already copied-up after unlink */
12653+ err = au_reopen_wh(file, bcpup, hi_wh);
12654+
12655+ if (!err
12656+ && (inode->i_nlink > 1
12657+ || (inode->i_state & I_LINKABLE))
12658+ && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
12659+ au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
12660+
12661+ return err;
12662+}
12663+
12664+/*
12665+ * prepare the @file for writing.
12666+ */
12667+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
12668+{
12669+ int err;
12670+ aufs_bindex_t dbtop;
12671+ struct dentry *parent;
12672+ struct inode *inode;
12673+ struct super_block *sb;
12674+ struct file *h_file;
12675+ struct au_cp_generic cpg = {
12676+ .dentry = file->f_path.dentry,
12677+ .bdst = -1,
12678+ .bsrc = -1,
12679+ .len = len,
12680+ .pin = pin,
12681+ .flags = AuCpup_DTIME
12682+ };
12683+
12684+ sb = cpg.dentry->d_sb;
12685+ inode = d_inode(cpg.dentry);
12686+ cpg.bsrc = au_fbtop(file);
12687+ err = au_test_ro(sb, cpg.bsrc, inode);
12688+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
12689+ err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
12690+ /*flags*/0);
12691+ goto out;
12692+ }
12693+
12694+ /* need to cpup or reopen */
12695+ parent = dget_parent(cpg.dentry);
12696+ di_write_lock_parent(parent);
12697+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12698+ cpg.bdst = err;
12699+ if (unlikely(err < 0))
12700+ goto out_dgrade;
12701+ err = 0;
12702+
12703+ if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
12704+ err = au_cpup_dirs(cpg.dentry, cpg.bdst);
12705+ if (unlikely(err))
12706+ goto out_dgrade;
12707+ }
12708+
12709+ err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
12710+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12711+ if (unlikely(err))
12712+ goto out_dgrade;
12713+
12714+ dbtop = au_dbtop(cpg.dentry);
12715+ if (dbtop <= cpg.bdst)
12716+ cpg.bsrc = cpg.bdst;
12717+
12718+ if (dbtop <= cpg.bdst /* just reopen */
12719+ || !d_unhashed(cpg.dentry) /* copyup and reopen */
12720+ ) {
12721+ h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
12722+ if (IS_ERR(h_file))
12723+ err = PTR_ERR(h_file);
12724+ else {
12725+ di_downgrade_lock(parent, AuLock_IR);
12726+ if (dbtop > cpg.bdst)
12727+ err = au_sio_cpup_simple(&cpg);
12728+ if (!err)
12729+ err = au_reopen_nondir(file);
12730+ au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
12731+ }
12732+ } else { /* copyup as wh and reopen */
12733+ /*
12734+ * since writable hfsplus branch is not supported,
12735+ * h_open_pre/post() are unnecessary.
12736+ */
12737+ err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
12738+ di_downgrade_lock(parent, AuLock_IR);
12739+ }
12740+
12741+ if (!err) {
12742+ au_pin_set_parent_lflag(pin, /*lflag*/0);
12743+ goto out_dput; /* success */
12744+ }
12745+ au_unpin(pin);
12746+ goto out_unlock;
12747+
12748+out_dgrade:
12749+ di_downgrade_lock(parent, AuLock_IR);
12750+out_unlock:
12751+ di_read_unlock(parent, AuLock_IR);
12752+out_dput:
12753+ dput(parent);
12754+out:
12755+ return err;
12756+}
12757+
12758+/* ---------------------------------------------------------------------- */
12759+
12760+int au_do_flush(struct file *file, fl_owner_t id,
12761+ int (*flush)(struct file *file, fl_owner_t id))
12762+{
12763+ int err;
12764+ struct super_block *sb;
12765+ struct inode *inode;
12766+
12767+ inode = file_inode(file);
12768+ sb = inode->i_sb;
12769+ si_noflush_read_lock(sb);
12770+ fi_read_lock(file);
12771+ ii_read_lock_child(inode);
12772+
12773+ err = flush(file, id);
12774+ au_cpup_attr_timesizes(inode);
12775+
12776+ ii_read_unlock(inode);
12777+ fi_read_unlock(file);
12778+ si_read_unlock(sb);
12779+ return err;
12780+}
12781+
12782+/* ---------------------------------------------------------------------- */
12783+
12784+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
12785+{
12786+ int err;
12787+ struct au_pin pin;
12788+ struct au_finfo *finfo;
12789+ struct dentry *parent, *hi_wh;
12790+ struct inode *inode;
12791+ struct super_block *sb;
12792+ struct au_cp_generic cpg = {
12793+ .dentry = file->f_path.dentry,
12794+ .bdst = -1,
12795+ .bsrc = -1,
12796+ .len = -1,
12797+ .pin = &pin,
12798+ .flags = AuCpup_DTIME
12799+ };
12800+
12801+ FiMustWriteLock(file);
12802+
12803+ err = 0;
12804+ finfo = au_fi(file);
12805+ sb = cpg.dentry->d_sb;
12806+ inode = d_inode(cpg.dentry);
12807+ cpg.bdst = au_ibtop(inode);
12808+ if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
12809+ goto out;
12810+
12811+ parent = dget_parent(cpg.dentry);
12812+ if (au_test_ro(sb, cpg.bdst, inode)) {
12813+ di_read_lock_parent(parent, !AuLock_IR);
12814+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12815+ cpg.bdst = err;
12816+ di_read_unlock(parent, !AuLock_IR);
12817+ if (unlikely(err < 0))
12818+ goto out_parent;
12819+ err = 0;
12820+ }
12821+
12822+ di_read_lock_parent(parent, AuLock_IR);
12823+ hi_wh = au_hi_wh(inode, cpg.bdst);
12824+ if (!S_ISDIR(inode->i_mode)
12825+ && au_opt_test(au_mntflags(sb), PLINK)
12826+ && au_plink_test(inode)
12827+ && !d_unhashed(cpg.dentry)
12828+ && cpg.bdst < au_dbtop(cpg.dentry)) {
12829+ err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
12830+ if (unlikely(err))
12831+ goto out_unlock;
12832+
12833+ /* always superio. */
12834+ err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
12835+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12836+ if (!err) {
12837+ err = au_sio_cpup_simple(&cpg);
12838+ au_unpin(&pin);
12839+ }
12840+ } else if (hi_wh) {
12841+ /* already copied-up after unlink */
12842+ err = au_reopen_wh(file, cpg.bdst, hi_wh);
12843+ *need_reopen = 0;
12844+ }
12845+
12846+out_unlock:
12847+ di_read_unlock(parent, AuLock_IR);
12848+out_parent:
12849+ dput(parent);
12850+out:
12851+ return err;
12852+}
12853+
12854+static void au_do_refresh_dir(struct file *file)
12855+{
12856+ int execed;
12857+ aufs_bindex_t bindex, bbot, new_bindex, brid;
12858+ struct au_hfile *p, tmp, *q;
12859+ struct au_finfo *finfo;
12860+ struct super_block *sb;
12861+ struct au_fidir *fidir;
12862+
12863+ FiMustWriteLock(file);
12864+
12865+ sb = file->f_path.dentry->d_sb;
12866+ finfo = au_fi(file);
12867+ fidir = finfo->fi_hdir;
12868+ AuDebugOn(!fidir);
12869+ p = fidir->fd_hfile + finfo->fi_btop;
12870+ brid = p->hf_br->br_id;
12871+ bbot = fidir->fd_bbot;
12872+ for (bindex = finfo->fi_btop; bindex <= bbot; bindex++, p++) {
12873+ if (!p->hf_file)
12874+ continue;
12875+
12876+ new_bindex = au_br_index(sb, p->hf_br->br_id);
12877+ if (new_bindex == bindex)
12878+ continue;
12879+ if (new_bindex < 0) {
12880+ au_set_h_fptr(file, bindex, NULL);
12881+ continue;
12882+ }
12883+
12884+ /* swap two lower inode, and loop again */
12885+ q = fidir->fd_hfile + new_bindex;
12886+ tmp = *q;
12887+ *q = *p;
12888+ *p = tmp;
12889+ if (tmp.hf_file) {
12890+ bindex--;
12891+ p--;
12892+ }
12893+ }
12894+
12895+ execed = vfsub_file_execed(file);
12896+ p = fidir->fd_hfile;
12897+ if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
12898+ bbot = au_sbbot(sb);
12899+ for (finfo->fi_btop = 0; finfo->fi_btop <= bbot;
12900+ finfo->fi_btop++, p++)
12901+ if (p->hf_file) {
12902+ if (file_inode(p->hf_file))
12903+ break;
12904+ au_hfput(p, execed);
12905+ }
12906+ } else {
12907+ bbot = au_br_index(sb, brid);
12908+ for (finfo->fi_btop = 0; finfo->fi_btop < bbot;
12909+ finfo->fi_btop++, p++)
12910+ if (p->hf_file)
12911+ au_hfput(p, execed);
12912+ bbot = au_sbbot(sb);
12913+ }
12914+
12915+ p = fidir->fd_hfile + bbot;
12916+ for (fidir->fd_bbot = bbot; fidir->fd_bbot >= finfo->fi_btop;
12917+ fidir->fd_bbot--, p--)
12918+ if (p->hf_file) {
12919+ if (file_inode(p->hf_file))
12920+ break;
12921+ au_hfput(p, execed);
12922+ }
12923+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
12924+}
12925+
12926+/*
12927+ * after branch manipulating, refresh the file.
12928+ */
12929+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
12930+{
12931+ int err, need_reopen, nbr;
12932+ aufs_bindex_t bbot, bindex;
12933+ struct dentry *dentry;
12934+ struct super_block *sb;
12935+ struct au_finfo *finfo;
12936+ struct au_hfile *hfile;
12937+
12938+ dentry = file->f_path.dentry;
12939+ sb = dentry->d_sb;
12940+ nbr = au_sbbot(sb) + 1;
12941+ finfo = au_fi(file);
12942+ if (!finfo->fi_hdir) {
12943+ hfile = &finfo->fi_htop;
12944+ AuDebugOn(!hfile->hf_file);
12945+ bindex = au_br_index(sb, hfile->hf_br->br_id);
12946+ AuDebugOn(bindex < 0);
12947+ if (bindex != finfo->fi_btop)
12948+ au_set_fbtop(file, bindex);
12949+ } else {
12950+ err = au_fidir_realloc(finfo, nbr, /*may_shrink*/0);
12951+ if (unlikely(err))
12952+ goto out;
12953+ au_do_refresh_dir(file);
12954+ }
12955+
12956+ err = 0;
12957+ need_reopen = 1;
12958+ if (!au_test_mmapped(file))
12959+ err = au_file_refresh_by_inode(file, &need_reopen);
12960+ if (finfo->fi_hdir)
12961+ /* harmless if err */
12962+ au_fidir_realloc(finfo, nbr, /*may_shrink*/1);
12963+ if (!err && need_reopen && !d_unlinked(dentry))
12964+ err = reopen(file);
12965+ if (!err) {
12966+ au_update_figen(file);
12967+ goto out; /* success */
12968+ }
12969+
12970+ /* error, close all lower files */
12971+ if (finfo->fi_hdir) {
12972+ bbot = au_fbbot_dir(file);
12973+ for (bindex = au_fbtop(file); bindex <= bbot; bindex++)
12974+ au_set_h_fptr(file, bindex, NULL);
12975+ }
12976+
12977+out:
12978+ return err;
12979+}
12980+
12981+/* common function to regular file and dir */
12982+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
12983+ int wlock)
12984+{
12985+ int err;
12986+ unsigned int sigen, figen;
12987+ aufs_bindex_t btop;
12988+ unsigned char pseudo_link;
12989+ struct dentry *dentry;
12990+ struct inode *inode;
12991+
12992+ err = 0;
12993+ dentry = file->f_path.dentry;
12994+ inode = d_inode(dentry);
12995+ sigen = au_sigen(dentry->d_sb);
12996+ fi_write_lock(file);
12997+ figen = au_figen(file);
12998+ di_write_lock_child(dentry);
12999+ btop = au_dbtop(dentry);
13000+ pseudo_link = (btop != au_ibtop(inode));
13001+ if (sigen == figen && !pseudo_link && au_fbtop(file) == btop) {
13002+ if (!wlock) {
13003+ di_downgrade_lock(dentry, AuLock_IR);
13004+ fi_downgrade_lock(file);
13005+ }
13006+ goto out; /* success */
13007+ }
13008+
13009+ AuDbg("sigen %d, figen %d\n", sigen, figen);
13010+ if (au_digen_test(dentry, sigen)) {
13011+ err = au_reval_dpath(dentry, sigen);
13012+ AuDebugOn(!err && au_digen_test(dentry, sigen));
13013+ }
13014+
13015+ if (!err)
13016+ err = refresh_file(file, reopen);
13017+ if (!err) {
13018+ if (!wlock) {
13019+ di_downgrade_lock(dentry, AuLock_IR);
13020+ fi_downgrade_lock(file);
13021+ }
13022+ } else {
13023+ di_write_unlock(dentry);
13024+ fi_write_unlock(file);
13025+ }
13026+
13027+out:
13028+ return err;
13029+}
13030+
13031+/* ---------------------------------------------------------------------- */
13032+
13033+/* cf. aufs_nopage() */
13034+/* for madvise(2) */
13035+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
13036+{
13037+ unlock_page(page);
13038+ return 0;
13039+}
13040+
13041+/* it will never be called, but necessary to support O_DIRECT */
13042+static ssize_t aufs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
13043+{ BUG(); return 0; }
13044+
13045+/* they will never be called. */
13046+#ifdef CONFIG_AUFS_DEBUG
13047+static int aufs_write_begin(struct file *file, struct address_space *mapping,
13048+ loff_t pos, unsigned len, unsigned flags,
13049+ struct page **pagep, void **fsdata)
13050+{ AuUnsupport(); return 0; }
13051+static int aufs_write_end(struct file *file, struct address_space *mapping,
13052+ loff_t pos, unsigned len, unsigned copied,
13053+ struct page *page, void *fsdata)
13054+{ AuUnsupport(); return 0; }
13055+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
13056+{ AuUnsupport(); return 0; }
13057+
13058+static int aufs_set_page_dirty(struct page *page)
13059+{ AuUnsupport(); return 0; }
13060+static void aufs_invalidatepage(struct page *page, unsigned int offset,
13061+ unsigned int length)
13062+{ AuUnsupport(); }
13063+static int aufs_releasepage(struct page *page, gfp_t gfp)
13064+{ AuUnsupport(); return 0; }
13065+#if 0 /* called by memory compaction regardless file */
13066+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
13067+ struct page *page, enum migrate_mode mode)
13068+{ AuUnsupport(); return 0; }
13069+#endif
13070+static bool aufs_isolate_page(struct page *page, isolate_mode_t mode)
13071+{ AuUnsupport(); return true; }
13072+static void aufs_putback_page(struct page *page)
13073+{ AuUnsupport(); }
13074+static int aufs_launder_page(struct page *page)
13075+{ AuUnsupport(); return 0; }
13076+static int aufs_is_partially_uptodate(struct page *page,
13077+ unsigned long from,
13078+ unsigned long count)
13079+{ AuUnsupport(); return 0; }
13080+static void aufs_is_dirty_writeback(struct page *page, bool *dirty,
13081+ bool *writeback)
13082+{ AuUnsupport(); }
13083+static int aufs_error_remove_page(struct address_space *mapping,
13084+ struct page *page)
13085+{ AuUnsupport(); return 0; }
13086+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
13087+ sector_t *span)
13088+{ AuUnsupport(); return 0; }
13089+static void aufs_swap_deactivate(struct file *file)
13090+{ AuUnsupport(); }
13091+#endif /* CONFIG_AUFS_DEBUG */
13092+
13093+const struct address_space_operations aufs_aop = {
13094+ .readpage = aufs_readpage,
13095+ .direct_IO = aufs_direct_IO,
13096+#ifdef CONFIG_AUFS_DEBUG
13097+ .writepage = aufs_writepage,
13098+ /* no writepages, because of writepage */
13099+ .set_page_dirty = aufs_set_page_dirty,
13100+ /* no readpages, because of readpage */
13101+ .write_begin = aufs_write_begin,
13102+ .write_end = aufs_write_end,
13103+ /* no bmap, no block device */
13104+ .invalidatepage = aufs_invalidatepage,
13105+ .releasepage = aufs_releasepage,
13106+ /* is fallback_migrate_page ok? */
13107+ /* .migratepage = aufs_migratepage, */
13108+ .isolate_page = aufs_isolate_page,
13109+ .putback_page = aufs_putback_page,
13110+ .launder_page = aufs_launder_page,
13111+ .is_partially_uptodate = aufs_is_partially_uptodate,
13112+ .is_dirty_writeback = aufs_is_dirty_writeback,
13113+ .error_remove_page = aufs_error_remove_page,
13114+ .swap_activate = aufs_swap_activate,
13115+ .swap_deactivate = aufs_swap_deactivate
13116+#endif /* CONFIG_AUFS_DEBUG */
13117+};
13118diff -urN /usr/share/empty/fs/aufs/file.h linux/fs/aufs/file.h
13119--- /usr/share/empty/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
13120+++ linux/fs/aufs/file.h 2016-10-09 16:55:38.889431135 +0200
13121@@ -0,0 +1,294 @@
13122+/*
13123+ * Copyright (C) 2005-2016 Junjiro R. Okajima
13124+ *
13125+ * This program, aufs is free software; you can redistribute it and/or modify
13126+ * it under the terms of the GNU General Public License as published by
13127+ * the Free Software Foundation; either version 2 of the License, or
13128+ * (at your option) any later version.
13129+ *
13130+ * This program is distributed in the hope that it will be useful,
13131+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13132+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13133+ * GNU General Public License for more details.
13134+ *
13135+ * You should have received a copy of the GNU General Public License
13136+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
13137+ */
13138+
13139+/*
13140+ * file operations
13141+ */
13142+
13143+#ifndef __AUFS_FILE_H__
13144+#define __AUFS_FILE_H__
13145+
13146+#ifdef __KERNEL__
13147+
13148+#include <linux/file.h>
13149+#include <linux/fs.h>
13150+#include <linux/poll.h>
13151+#include "rwsem.h"
13152+
13153+struct au_branch;
13154+struct au_hfile {
13155+ struct file *hf_file;
13156+ struct au_branch *hf_br;
13157+};
13158+
13159+struct au_vdir;
13160+struct au_fidir {
13161+ aufs_bindex_t fd_bbot;
13162+ aufs_bindex_t fd_nent;
13163+ struct au_vdir *fd_vdir_cache;
13164+ struct au_hfile fd_hfile[];
13165+};
13166+
13167+static inline int au_fidir_sz(int nent)
13168+{
13169+ AuDebugOn(nent < 0);
13170+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
13171+}
13172+
13173+struct au_finfo {
13174+ atomic_t fi_generation;
13175+
13176+ struct au_rwsem fi_rwsem;
13177+ aufs_bindex_t fi_btop;
13178+
13179+ /* do not union them */
13180+ struct { /* for non-dir */
13181+ struct au_hfile fi_htop;
13182+ atomic_t fi_mmapped;
13183+ };
13184+ struct au_fidir *fi_hdir; /* for dir only */
13185+
13186+ struct hlist_node fi_hlist;
13187+ union {
13188+ struct file *fi_file; /* very ugly */
13189+ struct llist_node fi_lnode; /* delayed free */
13190+ };
13191+} ____cacheline_aligned_in_smp;
13192+
13193+/* ---------------------------------------------------------------------- */
13194+
13195+/* file.c */
13196+extern const struct address_space_operations aufs_aop;
13197+unsigned int au_file_roflags(unsigned int flags);
13198+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
13199+ struct file *file, int force_wr);
13200+struct au_do_open_args {
13201+ int no_lock;
13202+ int (*open)(struct file *file, int flags,
13203+ struct file *h_file);
13204+ struct au_fidir *fidir;
13205+ struct file *h_file;
13206+};
13207+int au_do_open(struct file *file, struct au_do_open_args *args);
13208+int au_reopen_nondir(struct file *file);
13209+struct au_pin;
13210+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
13211+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
13212+ int wlock);
13213+int au_do_flush(struct file *file, fl_owner_t id,
13214+ int (*flush)(struct file *file, fl_owner_t id));
13215+
13216+/* poll.c */
13217+#ifdef CONFIG_AUFS_POLL
13218+unsigned int aufs_poll(struct file *file, poll_table *wait);
13219+#endif
13220+
13221+#ifdef CONFIG_AUFS_BR_HFSPLUS
13222+/* hfsplus.c */
13223+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
13224+ int force_wr);
13225+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
13226+ struct file *h_file);
13227+#else
13228+AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
13229+ aufs_bindex_t bindex, int force_wr)
13230+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
13231+ struct file *h_file);
13232+#endif
13233+
13234+/* f_op.c */
13235+extern const struct file_operations aufs_file_fop;
13236+int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
13237+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
13238+struct file *au_read_pre(struct file *file, int keep_fi);
13239+
13240+/* finfo.c */
13241+void au_hfput(struct au_hfile *hf, int execed);
13242+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
13243+ struct file *h_file);
13244+
13245+void au_update_figen(struct file *file);
13246+struct au_fidir *au_fidir_alloc(struct super_block *sb);
13247+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink);
13248+
13249+void au_fi_init_once(void *_fi);
13250+void au_finfo_fin(struct file *file, int atonce);
13251+int au_finfo_init(struct file *file, struct au_fidir *fidir);
13252+
13253+/* ioctl.c */
13254+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
13255+#ifdef CONFIG_COMPAT
13256+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
13257+ unsigned long arg);
13258+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
13259+ unsigned long arg);
13260+#endif
13261+
13262+/* ---------------------------------------------------------------------- */
13263+
13264+static inline struct au_finfo *au_fi(struct file *file)
13265+{
13266+ return file->private_data;
13267+}
13268+
13269+/* ---------------------------------------------------------------------- */
13270+
13271+/*
13272+ * fi_read_lock, fi_write_lock,
13273+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
13274+ */
13275+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
13276+
13277+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
13278+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
13279+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
13280+
13281+/* ---------------------------------------------------------------------- */
13282+
13283+/* todo: hard/soft set? */
13284+static inline aufs_bindex_t au_fbtop(struct file *file)
13285+{
13286+ FiMustAnyLock(file);
13287+ return au_fi(file)->fi_btop;
13288+}
13289+
13290+static inline aufs_bindex_t au_fbbot_dir(struct file *file)
13291+{
13292+ FiMustAnyLock(file);
13293+ AuDebugOn(!au_fi(file)->fi_hdir);
13294+ return au_fi(file)->fi_hdir->fd_bbot;
13295+}
13296+
13297+static inline struct au_vdir *au_fvdir_cache(struct file *file)
13298+{
13299+ FiMustAnyLock(file);
13300+ AuDebugOn(!au_fi(file)->fi_hdir);
13301+ return au_fi(file)->fi_hdir->fd_vdir_cache;
13302+}
13303+
13304+static inline void au_set_fbtop(struct file *file, aufs_bindex_t bindex)
13305+{
13306+ FiMustWriteLock(file);
13307+ au_fi(file)->fi_btop = bindex;
13308+}
13309+
13310+static inline void au_set_fbbot_dir(struct file *file, aufs_bindex_t bindex)
13311+{
13312+ FiMustWriteLock(file);
13313+ AuDebugOn(!au_fi(file)->fi_hdir);
13314+ au_fi(file)->fi_hdir->fd_bbot = bindex;
13315+}
13316+
13317+static inline void au_set_fvdir_cache(struct file *file,
13318+ struct au_vdir *vdir_cache)
13319+{
13320+ FiMustWriteLock(file);
13321+ AuDebugOn(!au_fi(file)->fi_hdir);
13322+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
13323+}
13324+
13325+static inline struct file *au_hf_top(struct file *file)
13326+{
13327+ FiMustAnyLock(file);
13328+ AuDebugOn(au_fi(file)->fi_hdir);
13329+ return au_fi(file)->fi_htop.hf_file;
13330+}
13331+
13332+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
13333+{
13334+ FiMustAnyLock(file);
13335+ AuDebugOn(!au_fi(file)->fi_hdir);
13336+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
13337+}
13338+
13339+/* todo: memory barrier? */
13340+static inline unsigned int au_figen(struct file *f)
13341+{
13342+ return atomic_read(&au_fi(f)->fi_generation);
13343+}
13344+
13345+static inline void au_set_mmapped(struct file *f)
13346+{
13347+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
13348+ return;
13349+ pr_warn("fi_mmapped wrapped around\n");
13350+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
13351+ ;
13352+}
13353+
13354+static inline void au_unset_mmapped(struct file *f)
13355+{
13356+ atomic_dec(&au_fi(f)->fi_mmapped);
13357+}
13358+
13359+static inline int au_test_mmapped(struct file *f)
13360+{
13361+ return atomic_read(&au_fi(f)->fi_mmapped);
13362+}
13363+
13364+/* customize vma->vm_file */
13365+
13366+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
13367+ struct file *file)
13368+{
13369+ struct file *f;
13370+
13371+ f = vma->vm_file;
13372+ get_file(file);
13373+ vma->vm_file = file;
13374+ fput(f);
13375+}
13376+
13377+#ifdef CONFIG_MMU
13378+#define AuDbgVmRegion(file, vma) do {} while (0)
13379+
13380+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13381+ struct file *file)
13382+{
13383+ au_do_vm_file_reset(vma, file);
13384+}
13385+#else
13386+#define AuDbgVmRegion(file, vma) \
13387+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
13388+
13389+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13390+ struct file *file)
13391+{
13392+ struct file *f;
13393+
13394+ au_do_vm_file_reset(vma, file);
13395+ f = vma->vm_region->vm_file;
13396+ get_file(file);
13397+ vma->vm_region->vm_file = file;
13398+ fput(f);
13399+}
13400+#endif /* CONFIG_MMU */
13401+
13402+/* handle vma->vm_prfile */
13403+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
13404+ struct file *file)
13405+{
13406+ get_file(file);
13407+ vma->vm_prfile = file;
13408+#ifndef CONFIG_MMU
13409+ get_file(file);
13410+ vma->vm_region->vm_prfile = file;
13411+#endif
13412+}
13413+
13414+#endif /* __KERNEL__ */
13415+#endif /* __AUFS_FILE_H__ */
13416diff -urN /usr/share/empty/fs/aufs/finfo.c linux/fs/aufs/finfo.c
13417--- /usr/share/empty/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
13418+++ linux/fs/aufs/finfo.c 2016-10-09 16:55:38.889431135 +0200
13419@@ -0,0 +1,151 @@
13420+/*
13421+ * Copyright (C) 2005-2016 Junjiro R. Okajima
13422+ *
13423+ * This program, aufs is free software; you can redistribute it and/or modify
13424+ * it under the terms of the GNU General Public License as published by
13425+ * the Free Software Foundation; either version 2 of the License, or
13426+ * (at your option) any later version.
13427+ *
13428+ * This program is distributed in the hope that it will be useful,
13429+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13430+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13431+ * GNU General Public License for more details.
13432+ *
13433+ * You should have received a copy of the GNU General Public License
13434+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
13435+ */
13436+
13437+/*
13438+ * file private data
13439+ */
13440+
13441+#include "aufs.h"
13442+
13443+void au_hfput(struct au_hfile *hf, int execed)
13444+{
13445+ if (execed)
13446+ allow_write_access(hf->hf_file);
13447+ fput(hf->hf_file);
13448+ hf->hf_file = NULL;
13449+ au_br_put(hf->hf_br);
13450+ hf->hf_br = NULL;
13451+}
13452+
13453+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
13454+{
13455+ struct au_finfo *finfo = au_fi(file);
13456+ struct au_hfile *hf;
13457+ struct au_fidir *fidir;
13458+
13459+ fidir = finfo->fi_hdir;
13460+ if (!fidir) {
13461+ AuDebugOn(finfo->fi_btop != bindex);
13462+ hf = &finfo->fi_htop;
13463+ } else
13464+ hf = fidir->fd_hfile + bindex;
13465+
13466+ if (hf && hf->hf_file)
13467+ au_hfput(hf, vfsub_file_execed(file));
13468+ if (val) {
13469+ FiMustWriteLock(file);
13470+ AuDebugOn(IS_ERR_OR_NULL(file->f_path.dentry));
13471+ hf->hf_file = val;
13472+ hf->hf_br = au_sbr(file->f_path.dentry->d_sb, bindex);
13473+ }
13474+}
13475+
13476+void au_update_figen(struct file *file)
13477+{
13478+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_path.dentry));
13479+ /* smp_mb(); */ /* atomic_set */
13480+}
13481+
13482+/* ---------------------------------------------------------------------- */
13483+
13484+struct au_fidir *au_fidir_alloc(struct super_block *sb)
13485+{
13486+ struct au_fidir *fidir;
13487+ int nbr;
13488+
13489+ nbr = au_sbbot(sb) + 1;
13490+ if (nbr < 2)
13491+ nbr = 2; /* initial allocate for 2 branches */
13492+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
13493+ if (fidir) {
13494+ fidir->fd_bbot = -1;
13495+ fidir->fd_nent = nbr;
13496+ }
13497+
13498+ return fidir;
13499+}
13500+
13501+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink)
13502+{
13503+ int err;
13504+ struct au_fidir *fidir, *p;
13505+
13506+ AuRwMustWriteLock(&finfo->fi_rwsem);
13507+ fidir = finfo->fi_hdir;
13508+ AuDebugOn(!fidir);
13509+
13510+ err = -ENOMEM;
13511+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
13512+ GFP_NOFS, may_shrink);
13513+ if (p) {
13514+ p->fd_nent = nbr;
13515+ finfo->fi_hdir = p;
13516+ err = 0;
13517+ }
13518+
13519+ return err;
13520+}
13521+
13522+/* ---------------------------------------------------------------------- */
13523+
13524+void au_finfo_fin(struct file *file, int atonce)
13525+{
13526+ struct au_finfo *finfo;
13527+
13528+ au_nfiles_dec(file->f_path.dentry->d_sb);
13529+
13530+ finfo = au_fi(file);
13531+ AuDebugOn(finfo->fi_hdir);
13532+ AuRwDestroy(&finfo->fi_rwsem);
13533+ if (!atonce)
13534+ au_cache_dfree_finfo(finfo);
13535+ else
13536+ au_cache_free_finfo(finfo);
13537+}
13538+
13539+void au_fi_init_once(void *_finfo)
13540+{
13541+ struct au_finfo *finfo = _finfo;
13542+
13543+ au_rw_init(&finfo->fi_rwsem);
13544+}
13545+
13546+int au_finfo_init(struct file *file, struct au_fidir *fidir)
13547+{
13548+ int err;
13549+ struct au_finfo *finfo;
13550+ struct dentry *dentry;
13551+
13552+ err = -ENOMEM;
13553+ dentry = file->f_path.dentry;
13554+ finfo = au_cache_alloc_finfo();
13555+ if (unlikely(!finfo))
13556+ goto out;
13557+
13558+ err = 0;
13559+ au_nfiles_inc(dentry->d_sb);
13560+ au_rw_write_lock(&finfo->fi_rwsem);
13561+ finfo->fi_btop = -1;
13562+ finfo->fi_hdir = fidir;
13563+ atomic_set(&finfo->fi_generation, au_digen(dentry));
13564+ /* smp_mb(); */ /* atomic_set */
13565+
13566+ file->private_data = finfo;
13567+
13568+out:
13569+ return err;
13570+}
13571diff -urN /usr/share/empty/fs/aufs/f_op.c linux/fs/aufs/f_op.c
13572--- /usr/share/empty/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
13573+++ linux/fs/aufs/f_op.c 2016-12-17 12:28:17.595211562 +0100
13574@@ -0,0 +1,723 @@
13575+/*
13576+ * Copyright (C) 2005-2016 Junjiro R. Okajima
13577+ *
13578+ * This program, aufs is free software; you can redistribute it and/or modify
13579+ * it under the terms of the GNU General Public License as published by
13580+ * the Free Software Foundation; either version 2 of the License, or
13581+ * (at your option) any later version.
13582+ *
13583+ * This program is distributed in the hope that it will be useful,
13584+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13585+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13586+ * GNU General Public License for more details.
13587+ *
13588+ * You should have received a copy of the GNU General Public License
13589+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
13590+ */
13591+
13592+/*
13593+ * file and vm operations
13594+ */
13595+
13596+#include <linux/aio.h>
13597+#include <linux/fs_stack.h>
13598+#include <linux/mman.h>
13599+#include <linux/security.h>
13600+#include "aufs.h"
13601+
13602+int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
13603+{
13604+ int err;
13605+ aufs_bindex_t bindex;
13606+ struct dentry *dentry, *h_dentry;
13607+ struct au_finfo *finfo;
13608+ struct inode *h_inode;
13609+
13610+ FiMustWriteLock(file);
13611+
13612+ err = 0;
13613+ dentry = file->f_path.dentry;
13614+ AuDebugOn(IS_ERR_OR_NULL(dentry));
13615+ finfo = au_fi(file);
13616+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
13617+ atomic_set(&finfo->fi_mmapped, 0);
13618+ bindex = au_dbtop(dentry);
13619+ if (!h_file) {
13620+ h_dentry = au_h_dptr(dentry, bindex);
13621+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
13622+ if (unlikely(err))
13623+ goto out;
13624+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
13625+ } else {
13626+ h_dentry = h_file->f_path.dentry;
13627+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
13628+ if (unlikely(err))
13629+ goto out;
13630+ get_file(h_file);
13631+ }
13632+ if (IS_ERR(h_file))
13633+ err = PTR_ERR(h_file);
13634+ else {
13635+ if ((flags & __O_TMPFILE)
13636+ && !(flags & O_EXCL)) {
13637+ h_inode = file_inode(h_file);
13638+ spin_lock(&h_inode->i_lock);
13639+ h_inode->i_state |= I_LINKABLE;
13640+ spin_unlock(&h_inode->i_lock);
13641+ }
13642+ au_set_fbtop(file, bindex);
13643+ au_set_h_fptr(file, bindex, h_file);
13644+ au_update_figen(file);
13645+ /* todo: necessary? */
13646+ /* file->f_ra = h_file->f_ra; */
13647+ }
13648+
13649+out:
13650+ return err;
13651+}
13652+
13653+static int aufs_open_nondir(struct inode *inode __maybe_unused,
13654+ struct file *file)
13655+{
13656+ int err;
13657+ struct super_block *sb;
13658+ struct au_do_open_args args = {
13659+ .open = au_do_open_nondir
13660+ };
13661+
13662+ AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
13663+ file, vfsub_file_flags(file), file->f_mode);
13664+
13665+ sb = file->f_path.dentry->d_sb;
13666+ si_read_lock(sb, AuLock_FLUSH);
13667+ err = au_do_open(file, &args);
13668+ si_read_unlock(sb);
13669+ return err;
13670+}
13671+
13672+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
13673+{
13674+ struct au_finfo *finfo;
13675+ aufs_bindex_t bindex;
13676+ int delayed;
13677+
13678+ finfo = au_fi(file);
13679+ au_sphl_del(&finfo->fi_hlist,
13680+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
13681+ bindex = finfo->fi_btop;
13682+ if (bindex >= 0)
13683+ au_set_h_fptr(file, bindex, NULL);
13684+
13685+ delayed = (current->flags & PF_KTHREAD) || in_interrupt();
13686+ au_finfo_fin(file, delayed);
13687+ return 0;
13688+}
13689+
13690+/* ---------------------------------------------------------------------- */
13691+
13692+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
13693+{
13694+ int err;
13695+ struct file *h_file;
13696+
13697+ err = 0;
13698+ h_file = au_hf_top(file);
13699+ if (h_file)
13700+ err = vfsub_flush(h_file, id);
13701+ return err;
13702+}
13703+
13704+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
13705+{
13706+ return au_do_flush(file, id, au_do_flush_nondir);
13707+}
13708+
13709+/* ---------------------------------------------------------------------- */
13710+/*
13711+ * read and write functions acquire [fdi]_rwsem once, but release before
13712+ * mmap_sem. This is because to stop a race condition between mmap(2).
13713+ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
13714+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
13715+ * read functions after [fdi]_rwsem are released, but it should be harmless.
13716+ */
13717+
13718+/* Callers should call au_read_post() or fput() in the end */
13719+struct file *au_read_pre(struct file *file, int keep_fi)
13720+{
13721+ struct file *h_file;
13722+ int err;
13723+
13724+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
13725+ if (!err) {
13726+ di_read_unlock(file->f_path.dentry, AuLock_IR);
13727+ h_file = au_hf_top(file);
13728+ get_file(h_file);
13729+ if (!keep_fi)
13730+ fi_read_unlock(file);
13731+ } else
13732+ h_file = ERR_PTR(err);
13733+
13734+ return h_file;
13735+}
13736+
13737+static void au_read_post(struct inode *inode, struct file *h_file)
13738+{
13739+ /* update without lock, I don't think it a problem */
13740+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13741+ fput(h_file);
13742+}
13743+
13744+struct au_write_pre {
13745+ blkcnt_t blks;
13746+ aufs_bindex_t btop;
13747+};
13748+
13749+/*
13750+ * return with iinfo is write-locked
13751+ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the
13752+ * end
13753+ */
13754+static struct file *au_write_pre(struct file *file, int do_ready,
13755+ struct au_write_pre *wpre)
13756+{
13757+ struct file *h_file;
13758+ struct dentry *dentry;
13759+ int err;
13760+ struct au_pin pin;
13761+
13762+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
13763+ h_file = ERR_PTR(err);
13764+ if (unlikely(err))
13765+ goto out;
13766+
13767+ dentry = file->f_path.dentry;
13768+ if (do_ready) {
13769+ err = au_ready_to_write(file, -1, &pin);
13770+ if (unlikely(err)) {
13771+ h_file = ERR_PTR(err);
13772+ di_write_unlock(dentry);
13773+ goto out_fi;
13774+ }
13775+ }
13776+
13777+ di_downgrade_lock(dentry, /*flags*/0);
13778+ if (wpre)
13779+ wpre->btop = au_fbtop(file);
13780+ h_file = au_hf_top(file);
13781+ get_file(h_file);
13782+ if (wpre)
13783+ wpre->blks = file_inode(h_file)->i_blocks;
13784+ if (do_ready)
13785+ au_unpin(&pin);
13786+ di_read_unlock(dentry, /*flags*/0);
13787+
13788+out_fi:
13789+ fi_write_unlock(file);
13790+out:
13791+ return h_file;
13792+}
13793+
13794+static void au_write_post(struct inode *inode, struct file *h_file,
13795+ struct au_write_pre *wpre, ssize_t written)
13796+{
13797+ struct inode *h_inode;
13798+
13799+ au_cpup_attr_timesizes(inode);
13800+ AuDebugOn(au_ibtop(inode) != wpre->btop);
13801+ h_inode = file_inode(h_file);
13802+ inode->i_mode = h_inode->i_mode;
13803+ ii_write_unlock(inode);
13804+ fput(h_file);
13805+
13806+ /* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
13807+ if (written > 0)
13808+ au_fhsm_wrote(inode->i_sb, wpre->btop,
13809+ /*force*/h_inode->i_blocks > wpre->blks);
13810+}
13811+
13812+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
13813+ loff_t *ppos)
13814+{
13815+ ssize_t err;
13816+ struct inode *inode;
13817+ struct file *h_file;
13818+ struct super_block *sb;
13819+
13820+ inode = file_inode(file);
13821+ sb = inode->i_sb;
13822+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
13823+
13824+ h_file = au_read_pre(file, /*keep_fi*/0);
13825+ err = PTR_ERR(h_file);
13826+ if (IS_ERR(h_file))
13827+ goto out;
13828+
13829+ /* filedata may be obsoleted by concurrent copyup, but no problem */
13830+ err = vfsub_read_u(h_file, buf, count, ppos);
13831+ /* todo: necessary? */
13832+ /* file->f_ra = h_file->f_ra; */
13833+ au_read_post(inode, h_file);
13834+
13835+out:
13836+ si_read_unlock(sb);
13837+ return err;
13838+}
13839+
13840+/*
13841+ * todo: very ugly
13842+ * it locks both of i_mutex and si_rwsem for read in safe.
13843+ * if the plink maintenance mode continues forever (that is the problem),
13844+ * may loop forever.
13845+ */
13846+static void au_mtx_and_read_lock(struct inode *inode)
13847+{
13848+ int err;
13849+ struct super_block *sb = inode->i_sb;
13850+
13851+ while (1) {
13852+ inode_lock(inode);
13853+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
13854+ if (!err)
13855+ break;
13856+ inode_unlock(inode);
13857+ si_read_lock(sb, AuLock_NOPLMW);
13858+ si_read_unlock(sb);
13859+ }
13860+}
13861+
13862+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
13863+ size_t count, loff_t *ppos)
13864+{
13865+ ssize_t err;
13866+ struct au_write_pre wpre;
13867+ struct inode *inode;
13868+ struct file *h_file;
13869+ char __user *buf = (char __user *)ubuf;
13870+
13871+ inode = file_inode(file);
13872+ au_mtx_and_read_lock(inode);
13873+
13874+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13875+ err = PTR_ERR(h_file);
13876+ if (IS_ERR(h_file))
13877+ goto out;
13878+
13879+ err = vfsub_write_u(h_file, buf, count, ppos);
13880+ au_write_post(inode, h_file, &wpre, err);
13881+
13882+out:
13883+ si_read_unlock(inode->i_sb);
13884+ inode_unlock(inode);
13885+ return err;
13886+}
13887+
13888+static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
13889+ struct iov_iter *iov_iter)
13890+{
13891+ ssize_t err;
13892+ struct file *file;
13893+ ssize_t (*iter)(struct kiocb *, struct iov_iter *);
13894+
13895+ err = security_file_permission(h_file, rw);
13896+ if (unlikely(err))
13897+ goto out;
13898+
13899+ err = -ENOSYS;
13900+ iter = NULL;
13901+ if (rw == MAY_READ)
13902+ iter = h_file->f_op->read_iter;
13903+ else if (rw == MAY_WRITE)
13904+ iter = h_file->f_op->write_iter;
13905+
13906+ file = kio->ki_filp;
13907+ kio->ki_filp = h_file;
13908+ if (iter) {
13909+ lockdep_off();
13910+ err = iter(kio, iov_iter);
13911+ lockdep_on();
13912+ } else
13913+ /* currently there is no such fs */
13914+ WARN_ON_ONCE(1);
13915+ kio->ki_filp = file;
13916+
13917+out:
13918+ return err;
13919+}
13920+
13921+static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
13922+{
13923+ ssize_t err;
13924+ struct file *file, *h_file;
13925+ struct inode *inode;
13926+ struct super_block *sb;
13927+
13928+ file = kio->ki_filp;
13929+ inode = file_inode(file);
13930+ sb = inode->i_sb;
13931+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
13932+
13933+ h_file = au_read_pre(file, /*keep_fi*/1);
13934+ err = PTR_ERR(h_file);
13935+ if (IS_ERR(h_file))
13936+ goto out;
13937+
13938+ if (au_test_loopback_kthread()) {
13939+ au_warn_loopback(h_file->f_path.dentry->d_sb);
13940+ if (file->f_mapping != h_file->f_mapping) {
13941+ file->f_mapping = h_file->f_mapping;
13942+ smp_mb(); /* unnecessary? */
13943+ }
13944+ }
13945+ fi_read_unlock(file);
13946+
13947+ err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
13948+ /* todo: necessary? */
13949+ /* file->f_ra = h_file->f_ra; */
13950+ au_read_post(inode, h_file);
13951+
13952+out:
13953+ si_read_unlock(sb);
13954+ return err;
13955+}
13956+
13957+static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
13958+{
13959+ ssize_t err;
13960+ struct au_write_pre wpre;
13961+ struct inode *inode;
13962+ struct file *file, *h_file;
13963+
13964+ file = kio->ki_filp;
13965+ inode = file_inode(file);
13966+ au_mtx_and_read_lock(inode);
13967+
13968+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13969+ err = PTR_ERR(h_file);
13970+ if (IS_ERR(h_file))
13971+ goto out;
13972+
13973+ err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
13974+ au_write_post(inode, h_file, &wpre, err);
13975+
13976+out:
13977+ si_read_unlock(inode->i_sb);
13978+ inode_unlock(inode);
13979+ return err;
13980+}
13981+
13982+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
13983+ struct pipe_inode_info *pipe, size_t len,
13984+ unsigned int flags)
13985+{
13986+ ssize_t err;
13987+ struct file *h_file;
13988+ struct inode *inode;
13989+ struct super_block *sb;
13990+
13991+ inode = file_inode(file);
13992+ sb = inode->i_sb;
13993+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
13994+
13995+ h_file = au_read_pre(file, /*keep_fi*/0);
13996+ err = PTR_ERR(h_file);
13997+ if (IS_ERR(h_file))
13998+ goto out;
13999+
14000+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
14001+ /* todo: necessasry? */
14002+ /* file->f_ra = h_file->f_ra; */
14003+ au_read_post(inode, h_file);
14004+
14005+out:
14006+ si_read_unlock(sb);
14007+ return err;
14008+}
14009+
14010+static ssize_t
14011+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
14012+ size_t len, unsigned int flags)
14013+{
14014+ ssize_t err;
14015+ struct au_write_pre wpre;
14016+ struct inode *inode;
14017+ struct file *h_file;
14018+
14019+ inode = file_inode(file);
14020+ au_mtx_and_read_lock(inode);
14021+
14022+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14023+ err = PTR_ERR(h_file);
14024+ if (IS_ERR(h_file))
14025+ goto out;
14026+
14027+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
14028+ au_write_post(inode, h_file, &wpre, err);
14029+
14030+out:
14031+ si_read_unlock(inode->i_sb);
14032+ inode_unlock(inode);
14033+ return err;
14034+}
14035+
14036+static long aufs_fallocate(struct file *file, int mode, loff_t offset,
14037+ loff_t len)
14038+{
14039+ long err;
14040+ struct au_write_pre wpre;
14041+ struct inode *inode;
14042+ struct file *h_file;
14043+
14044+ inode = file_inode(file);
14045+ au_mtx_and_read_lock(inode);
14046+
14047+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14048+ err = PTR_ERR(h_file);
14049+ if (IS_ERR(h_file))
14050+ goto out;
14051+
14052+ lockdep_off();
14053+ err = vfs_fallocate(h_file, mode, offset, len);
14054+ lockdep_on();
14055+ au_write_post(inode, h_file, &wpre, /*written*/1);
14056+
14057+out:
14058+ si_read_unlock(inode->i_sb);
14059+ inode_unlock(inode);
14060+ return err;
14061+}
14062+
14063+/* ---------------------------------------------------------------------- */
14064+
14065+/*
14066+ * The locking order around current->mmap_sem.
14067+ * - in most and regular cases
14068+ * file I/O syscall -- aufs_read() or something
14069+ * -- si_rwsem for read -- mmap_sem
14070+ * (Note that [fdi]i_rwsem are released before mmap_sem).
14071+ * - in mmap case
14072+ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
14073+ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
14074+ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
14075+ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
14076+ * It means that when aufs acquires si_rwsem for write, the process should never
14077+ * acquire mmap_sem.
14078+ *
14079+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
14080+ * problem either since any directory is not able to be mmap-ed.
14081+ * The similar scenario is applied to aufs_readlink() too.
14082+ */
14083+
14084+#if 0 /* stop calling security_file_mmap() */
14085+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
14086+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
14087+
14088+static unsigned long au_arch_prot_conv(unsigned long flags)
14089+{
14090+ /* currently ppc64 only */
14091+#ifdef CONFIG_PPC64
14092+ /* cf. linux/arch/powerpc/include/asm/mman.h */
14093+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
14094+ return AuConv_VM_PROT(flags, SAO);
14095+#else
14096+ AuDebugOn(arch_calc_vm_prot_bits(-1));
14097+ return 0;
14098+#endif
14099+}
14100+
14101+static unsigned long au_prot_conv(unsigned long flags)
14102+{
14103+ return AuConv_VM_PROT(flags, READ)
14104+ | AuConv_VM_PROT(flags, WRITE)
14105+ | AuConv_VM_PROT(flags, EXEC)
14106+ | au_arch_prot_conv(flags);
14107+}
14108+
14109+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
14110+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
14111+
14112+static unsigned long au_flag_conv(unsigned long flags)
14113+{
14114+ return AuConv_VM_MAP(flags, GROWSDOWN)
14115+ | AuConv_VM_MAP(flags, DENYWRITE)
14116+ | AuConv_VM_MAP(flags, LOCKED);
14117+}
14118+#endif
14119+
14120+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
14121+{
14122+ int err;
14123+ const unsigned char wlock
14124+ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
14125+ struct super_block *sb;
14126+ struct file *h_file;
14127+ struct inode *inode;
14128+
14129+ AuDbgVmRegion(file, vma);
14130+
14131+ inode = file_inode(file);
14132+ sb = inode->i_sb;
14133+ lockdep_off();
14134+ si_read_lock(sb, AuLock_NOPLMW);
14135+
14136+ h_file = au_write_pre(file, wlock, /*wpre*/NULL);
14137+ lockdep_on();
14138+ err = PTR_ERR(h_file);
14139+ if (IS_ERR(h_file))
14140+ goto out;
14141+
14142+ err = 0;
14143+ au_set_mmapped(file);
14144+ au_vm_file_reset(vma, h_file);
14145+ /*
14146+ * we cannot call security_mmap_file() here since it may acquire
14147+ * mmap_sem or i_mutex.
14148+ *
14149+ * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
14150+ * au_flag_conv(vma->vm_flags));
14151+ */
14152+ if (!err)
14153+ err = h_file->f_op->mmap(h_file, vma);
14154+ if (!err) {
14155+ au_vm_prfile_set(vma, file);
14156+ fsstack_copy_attr_atime(inode, file_inode(h_file));
14157+ goto out_fput; /* success */
14158+ }
14159+ au_unset_mmapped(file);
14160+ au_vm_file_reset(vma, file);
14161+
14162+out_fput:
14163+ lockdep_off();
14164+ ii_write_unlock(inode);
14165+ lockdep_on();
14166+ fput(h_file);
14167+out:
14168+ lockdep_off();
14169+ si_read_unlock(sb);
14170+ lockdep_on();
14171+ AuTraceErr(err);
14172+ return err;
14173+}
14174+
14175+/* ---------------------------------------------------------------------- */
14176+
14177+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
14178+ int datasync)
14179+{
14180+ int err;
14181+ struct au_write_pre wpre;
14182+ struct inode *inode;
14183+ struct file *h_file;
14184+
14185+ err = 0; /* -EBADF; */ /* posix? */
14186+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
14187+ goto out;
14188+
14189+ inode = file_inode(file);
14190+ au_mtx_and_read_lock(inode);
14191+
14192+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14193+ err = PTR_ERR(h_file);
14194+ if (IS_ERR(h_file))
14195+ goto out_unlock;
14196+
14197+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
14198+ au_write_post(inode, h_file, &wpre, /*written*/0);
14199+
14200+out_unlock:
14201+ si_read_unlock(inode->i_sb);
14202+ inode_unlock(inode);
14203+out:
14204+ return err;
14205+}
14206+
14207+static int aufs_fasync(int fd, struct file *file, int flag)
14208+{
14209+ int err;
14210+ struct file *h_file;
14211+ struct super_block *sb;
14212+
14213+ sb = file->f_path.dentry->d_sb;
14214+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
14215+
14216+ h_file = au_read_pre(file, /*keep_fi*/0);
14217+ err = PTR_ERR(h_file);
14218+ if (IS_ERR(h_file))
14219+ goto out;
14220+
14221+ if (h_file->f_op->fasync)
14222+ err = h_file->f_op->fasync(fd, h_file, flag);
14223+ fput(h_file); /* instead of au_read_post() */
14224+
14225+out:
14226+ si_read_unlock(sb);
14227+ return err;
14228+}
14229+
14230+static int aufs_setfl(struct file *file, unsigned long arg)
14231+{
14232+ int err;
14233+ struct file *h_file;
14234+ struct super_block *sb;
14235+
14236+ sb = file->f_path.dentry->d_sb;
14237+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
14238+
14239+ h_file = au_read_pre(file, /*keep_fi*/0);
14240+ err = PTR_ERR(h_file);
14241+ if (IS_ERR(h_file))
14242+ goto out;
14243+
14244+ arg |= vfsub_file_flags(file) & FASYNC; /* stop calling h_file->fasync */
14245+ err = setfl(/*unused fd*/-1, h_file, arg);
14246+ fput(h_file); /* instead of au_read_post() */
14247+
14248+out:
14249+ si_read_unlock(sb);
14250+ return err;
14251+}
14252+
14253+/* ---------------------------------------------------------------------- */
14254+
14255+/* no one supports this operation, currently */
14256+#if 0
14257+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
14258+ size_t len, loff_t *pos, int more)
14259+{
14260+}
14261+#endif
14262+
14263+/* ---------------------------------------------------------------------- */
14264+
14265+const struct file_operations aufs_file_fop = {
14266+ .owner = THIS_MODULE,
14267+
14268+ .llseek = default_llseek,
14269+
14270+ .read = aufs_read,
14271+ .write = aufs_write,
14272+ .read_iter = aufs_read_iter,
14273+ .write_iter = aufs_write_iter,
14274+
14275+#ifdef CONFIG_AUFS_POLL
14276+ .poll = aufs_poll,
14277+#endif
14278+ .unlocked_ioctl = aufs_ioctl_nondir,
14279+#ifdef CONFIG_COMPAT
14280+ .compat_ioctl = aufs_compat_ioctl_nondir,
14281+#endif
14282+ .mmap = aufs_mmap,
14283+ .open = aufs_open_nondir,
14284+ .flush = aufs_flush_nondir,
14285+ .release = aufs_release_nondir,
14286+ .fsync = aufs_fsync_nondir,
14287+ .fasync = aufs_fasync,
14288+ /* .sendpage = aufs_sendpage, */
14289+ .setfl = aufs_setfl,
14290+ .splice_write = aufs_splice_write,
14291+ .splice_read = aufs_splice_read,
14292+#if 0
14293+ .aio_splice_write = aufs_aio_splice_write,
14294+ .aio_splice_read = aufs_aio_splice_read,
14295+#endif
14296+ .fallocate = aufs_fallocate
14297+};
14298diff -urN /usr/share/empty/fs/aufs/fstype.h linux/fs/aufs/fstype.h
14299--- /usr/share/empty/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
14300+++ linux/fs/aufs/fstype.h 2016-10-09 16:55:36.492701639 +0200
14301@@ -0,0 +1,400 @@
14302+/*
14303+ * Copyright (C) 2005-2016 Junjiro R. Okajima
14304+ *
14305+ * This program, aufs is free software; you can redistribute it and/or modify
14306+ * it under the terms of the GNU General Public License as published by
14307+ * the Free Software Foundation; either version 2 of the License, or
14308+ * (at your option) any later version.
14309+ *
14310+ * This program is distributed in the hope that it will be useful,
14311+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14312+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14313+ * GNU General Public License for more details.
14314+ *
14315+ * You should have received a copy of the GNU General Public License
14316+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
14317+ */
14318+
14319+/*
14320+ * judging filesystem type
14321+ */
14322+
14323+#ifndef __AUFS_FSTYPE_H__
14324+#define __AUFS_FSTYPE_H__
14325+
14326+#ifdef __KERNEL__
14327+
14328+#include <linux/fs.h>
14329+#include <linux/magic.h>
14330+#include <linux/nfs_fs.h>
14331+#include <linux/romfs_fs.h>
14332+
14333+static inline int au_test_aufs(struct super_block *sb)
14334+{
14335+ return sb->s_magic == AUFS_SUPER_MAGIC;
14336+}
14337+
14338+static inline const char *au_sbtype(struct super_block *sb)
14339+{
14340+ return sb->s_type->name;
14341+}
14342+
14343+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
14344+{
14345+#if IS_ENABLED(CONFIG_ISO9660_FS)
14346+ return sb->s_magic == ISOFS_SUPER_MAGIC;
14347+#else
14348+ return 0;
14349+#endif
14350+}
14351+
14352+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
14353+{
14354+#if IS_ENABLED(CONFIG_ROMFS_FS)
14355+ return sb->s_magic == ROMFS_MAGIC;
14356+#else
14357+ return 0;
14358+#endif
14359+}
14360+
14361+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
14362+{
14363+#if IS_ENABLED(CONFIG_CRAMFS)
14364+ return sb->s_magic == CRAMFS_MAGIC;
14365+#endif
14366+ return 0;
14367+}
14368+
14369+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
14370+{
14371+#if IS_ENABLED(CONFIG_NFS_FS)
14372+ return sb->s_magic == NFS_SUPER_MAGIC;
14373+#else
14374+ return 0;
14375+#endif
14376+}
14377+
14378+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
14379+{
14380+#if IS_ENABLED(CONFIG_FUSE_FS)
14381+ return sb->s_magic == FUSE_SUPER_MAGIC;
14382+#else
14383+ return 0;
14384+#endif
14385+}
14386+
14387+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
14388+{
14389+#if IS_ENABLED(CONFIG_XFS_FS)
14390+ return sb->s_magic == XFS_SB_MAGIC;
14391+#else
14392+ return 0;
14393+#endif
14394+}
14395+
14396+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
14397+{
14398+#ifdef CONFIG_TMPFS
14399+ return sb->s_magic == TMPFS_MAGIC;
14400+#else
14401+ return 0;
14402+#endif
14403+}
14404+
14405+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
14406+{
14407+#if IS_ENABLED(CONFIG_ECRYPT_FS)
14408+ return !strcmp(au_sbtype(sb), "ecryptfs");
14409+#else
14410+ return 0;
14411+#endif
14412+}
14413+
14414+static inline int au_test_ramfs(struct super_block *sb)
14415+{
14416+ return sb->s_magic == RAMFS_MAGIC;
14417+}
14418+
14419+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
14420+{
14421+#if IS_ENABLED(CONFIG_UBIFS_FS)
14422+ return sb->s_magic == UBIFS_SUPER_MAGIC;
14423+#else
14424+ return 0;
14425+#endif
14426+}
14427+
14428+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
14429+{
14430+#ifdef CONFIG_PROC_FS
14431+ return sb->s_magic == PROC_SUPER_MAGIC;
14432+#else
14433+ return 0;
14434+#endif
14435+}
14436+
14437+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
14438+{
14439+#ifdef CONFIG_SYSFS
14440+ return sb->s_magic == SYSFS_MAGIC;
14441+#else
14442+ return 0;
14443+#endif
14444+}
14445+
14446+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
14447+{
14448+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
14449+ return sb->s_magic == CONFIGFS_MAGIC;
14450+#else
14451+ return 0;
14452+#endif
14453+}
14454+
14455+static inline int au_test_minix(struct super_block *sb __maybe_unused)
14456+{
14457+#if IS_ENABLED(CONFIG_MINIX_FS)
14458+ return sb->s_magic == MINIX3_SUPER_MAGIC
14459+ || sb->s_magic == MINIX2_SUPER_MAGIC
14460+ || sb->s_magic == MINIX2_SUPER_MAGIC2
14461+ || sb->s_magic == MINIX_SUPER_MAGIC
14462+ || sb->s_magic == MINIX_SUPER_MAGIC2;
14463+#else
14464+ return 0;
14465+#endif
14466+}
14467+
14468+static inline int au_test_fat(struct super_block *sb __maybe_unused)
14469+{
14470+#if IS_ENABLED(CONFIG_FAT_FS)
14471+ return sb->s_magic == MSDOS_SUPER_MAGIC;
14472+#else
14473+ return 0;
14474+#endif
14475+}
14476+
14477+static inline int au_test_msdos(struct super_block *sb)
14478+{
14479+ return au_test_fat(sb);
14480+}
14481+
14482+static inline int au_test_vfat(struct super_block *sb)
14483+{
14484+ return au_test_fat(sb);
14485+}
14486+
14487+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
14488+{
14489+#ifdef CONFIG_SECURITYFS
14490+ return sb->s_magic == SECURITYFS_MAGIC;
14491+#else
14492+ return 0;
14493+#endif
14494+}
14495+
14496+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
14497+{
14498+#if IS_ENABLED(CONFIG_SQUASHFS)
14499+ return sb->s_magic == SQUASHFS_MAGIC;
14500+#else
14501+ return 0;
14502+#endif
14503+}
14504+
14505+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
14506+{
14507+#if IS_ENABLED(CONFIG_BTRFS_FS)
14508+ return sb->s_magic == BTRFS_SUPER_MAGIC;
14509+#else
14510+ return 0;
14511+#endif
14512+}
14513+
14514+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
14515+{
14516+#if IS_ENABLED(CONFIG_XENFS)
14517+ return sb->s_magic == XENFS_SUPER_MAGIC;
14518+#else
14519+ return 0;
14520+#endif
14521+}
14522+
14523+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
14524+{
14525+#ifdef CONFIG_DEBUG_FS
14526+ return sb->s_magic == DEBUGFS_MAGIC;
14527+#else
14528+ return 0;
14529+#endif
14530+}
14531+
14532+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
14533+{
14534+#if IS_ENABLED(CONFIG_NILFS)
14535+ return sb->s_magic == NILFS_SUPER_MAGIC;
14536+#else
14537+ return 0;
14538+#endif
14539+}
14540+
14541+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
14542+{
14543+#if IS_ENABLED(CONFIG_HFSPLUS_FS)
14544+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
14545+#else
14546+ return 0;
14547+#endif
14548+}
14549+
14550+/* ---------------------------------------------------------------------- */
14551+/*
14552+ * they can't be an aufs branch.
14553+ */
14554+static inline int au_test_fs_unsuppoted(struct super_block *sb)
14555+{
14556+ return
14557+#ifndef CONFIG_AUFS_BR_RAMFS
14558+ au_test_ramfs(sb) ||
14559+#endif
14560+ au_test_procfs(sb)
14561+ || au_test_sysfs(sb)
14562+ || au_test_configfs(sb)
14563+ || au_test_debugfs(sb)
14564+ || au_test_securityfs(sb)
14565+ || au_test_xenfs(sb)
14566+ || au_test_ecryptfs(sb)
14567+ /* || !strcmp(au_sbtype(sb), "unionfs") */
14568+ || au_test_aufs(sb); /* will be supported in next version */
14569+}
14570+
14571+static inline int au_test_fs_remote(struct super_block *sb)
14572+{
14573+ return !au_test_tmpfs(sb)
14574+#ifdef CONFIG_AUFS_BR_RAMFS
14575+ && !au_test_ramfs(sb)
14576+#endif
14577+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
14578+}
14579+
14580+/* ---------------------------------------------------------------------- */
14581+
14582+/*
14583+ * Note: these functions (below) are created after reading ->getattr() in all
14584+ * filesystems under linux/fs. it means we have to do so in every update...
14585+ */
14586+
14587+/*
14588+ * some filesystems require getattr to refresh the inode attributes before
14589+ * referencing.
14590+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
14591+ * and leave the work for d_revalidate()
14592+ */
14593+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
14594+{
14595+ return au_test_nfs(sb)
14596+ || au_test_fuse(sb)
14597+ /* || au_test_btrfs(sb) */ /* untested */
14598+ ;
14599+}
14600+
14601+/*
14602+ * filesystems which don't maintain i_size or i_blocks.
14603+ */
14604+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
14605+{
14606+ return au_test_xfs(sb)
14607+ || au_test_btrfs(sb)
14608+ || au_test_ubifs(sb)
14609+ || au_test_hfsplus(sb) /* maintained, but incorrect */
14610+ /* || au_test_minix(sb) */ /* untested */
14611+ ;
14612+}
14613+
14614+/*
14615+ * filesystems which don't store the correct value in some of their inode
14616+ * attributes.
14617+ */
14618+static inline int au_test_fs_bad_iattr(struct super_block *sb)
14619+{
14620+ return au_test_fs_bad_iattr_size(sb)
14621+ || au_test_fat(sb)
14622+ || au_test_msdos(sb)
14623+ || au_test_vfat(sb);
14624+}
14625+
14626+/* they don't check i_nlink in link(2) */
14627+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
14628+{
14629+ return au_test_tmpfs(sb)
14630+#ifdef CONFIG_AUFS_BR_RAMFS
14631+ || au_test_ramfs(sb)
14632+#endif
14633+ || au_test_ubifs(sb)
14634+ || au_test_hfsplus(sb);
14635+}
14636+
14637+/*
14638+ * filesystems which sets S_NOATIME and S_NOCMTIME.
14639+ */
14640+static inline int au_test_fs_notime(struct super_block *sb)
14641+{
14642+ return au_test_nfs(sb)
14643+ || au_test_fuse(sb)
14644+ || au_test_ubifs(sb)
14645+ ;
14646+}
14647+
14648+/* temporary support for i#1 in cramfs */
14649+static inline int au_test_fs_unique_ino(struct inode *inode)
14650+{
14651+ if (au_test_cramfs(inode->i_sb))
14652+ return inode->i_ino != 1;
14653+ return 1;
14654+}
14655+
14656+/* ---------------------------------------------------------------------- */
14657+
14658+/*
14659+ * the filesystem where the xino files placed must support i/o after unlink and
14660+ * maintain i_size and i_blocks.
14661+ */
14662+static inline int au_test_fs_bad_xino(struct super_block *sb)
14663+{
14664+ return au_test_fs_remote(sb)
14665+ || au_test_fs_bad_iattr_size(sb)
14666+ /* don't want unnecessary work for xino */
14667+ || au_test_aufs(sb)
14668+ || au_test_ecryptfs(sb)
14669+ || au_test_nilfs(sb);
14670+}
14671+
14672+static inline int au_test_fs_trunc_xino(struct super_block *sb)
14673+{
14674+ return au_test_tmpfs(sb)
14675+ || au_test_ramfs(sb);
14676+}
14677+
14678+/*
14679+ * test if the @sb is real-readonly.
14680+ */
14681+static inline int au_test_fs_rr(struct super_block *sb)
14682+{
14683+ return au_test_squashfs(sb)
14684+ || au_test_iso9660(sb)
14685+ || au_test_cramfs(sb)
14686+ || au_test_romfs(sb);
14687+}
14688+
14689+/*
14690+ * test if the @inode is nfs with 'noacl' option
14691+ * NFS always sets MS_POSIXACL regardless its mount option 'noacl.'
14692+ */
14693+static inline int au_test_nfs_noacl(struct inode *inode)
14694+{
14695+ return au_test_nfs(inode->i_sb)
14696+ /* && IS_POSIXACL(inode) */
14697+ && !nfs_server_capable(inode, NFS_CAP_ACLS);
14698+}
14699+
14700+#endif /* __KERNEL__ */
14701+#endif /* __AUFS_FSTYPE_H__ */
14702diff -urN /usr/share/empty/fs/aufs/hfsnotify.c linux/fs/aufs/hfsnotify.c
14703--- /usr/share/empty/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
14704+++ linux/fs/aufs/hfsnotify.c 2016-10-09 16:55:36.492701639 +0200
14705@@ -0,0 +1,287 @@
14706+/*
14707+ * Copyright (C) 2005-2016 Junjiro R. Okajima
14708+ *
14709+ * This program, aufs is free software; you can redistribute it and/or modify
14710+ * it under the terms of the GNU General Public License as published by
14711+ * the Free Software Foundation; either version 2 of the License, or
14712+ * (at your option) any later version.
14713+ *
14714+ * This program is distributed in the hope that it will be useful,
14715+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14716+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14717+ * GNU General Public License for more details.
14718+ *
14719+ * You should have received a copy of the GNU General Public License
14720+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
14721+ */
14722+
14723+/*
14724+ * fsnotify for the lower directories
14725+ */
14726+
14727+#include "aufs.h"
14728+
14729+/* FS_IN_IGNORED is unnecessary */
14730+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
14731+ | FS_CREATE | FS_EVENT_ON_CHILD);
14732+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
14733+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
14734+
14735+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
14736+{
14737+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
14738+ hn_mark);
14739+ /* AuDbg("here\n"); */
14740+ au_cache_dfree_hnotify(hn);
14741+ smp_mb__before_atomic();
14742+ if (atomic64_dec_and_test(&au_hfsn_ifree))
14743+ wake_up(&au_hfsn_wq);
14744+}
14745+
14746+static int au_hfsn_alloc(struct au_hinode *hinode)
14747+{
14748+ int err;
14749+ struct au_hnotify *hn;
14750+ struct super_block *sb;
14751+ struct au_branch *br;
14752+ struct fsnotify_mark *mark;
14753+ aufs_bindex_t bindex;
14754+
14755+ hn = hinode->hi_notify;
14756+ sb = hn->hn_aufs_inode->i_sb;
14757+ bindex = au_br_index(sb, hinode->hi_id);
14758+ br = au_sbr(sb, bindex);
14759+ AuDebugOn(!br->br_hfsn);
14760+
14761+ mark = &hn->hn_mark;
14762+ fsnotify_init_mark(mark, au_hfsn_free_mark);
14763+ mark->mask = AuHfsnMask;
14764+ /*
14765+ * by udba rename or rmdir, aufs assign a new inode to the known
14766+ * h_inode, so specify 1 to allow dups.
14767+ */
14768+ lockdep_off();
14769+ err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode,
14770+ /*mnt*/NULL, /*allow_dups*/1);
14771+ lockdep_on();
14772+
14773+ return err;
14774+}
14775+
14776+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
14777+{
14778+ struct fsnotify_mark *mark;
14779+ unsigned long long ull;
14780+ struct fsnotify_group *group;
14781+
14782+ ull = atomic64_inc_return(&au_hfsn_ifree);
14783+ BUG_ON(!ull);
14784+
14785+ mark = &hn->hn_mark;
14786+ spin_lock(&mark->lock);
14787+ group = mark->group;
14788+ fsnotify_get_group(group);
14789+ spin_unlock(&mark->lock);
14790+ lockdep_off();
14791+ fsnotify_destroy_mark(mark, group);
14792+ fsnotify_put_mark(mark);
14793+ fsnotify_put_group(group);
14794+ lockdep_on();
14795+
14796+ /* free hn by myself */
14797+ return 0;
14798+}
14799+
14800+/* ---------------------------------------------------------------------- */
14801+
14802+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
14803+{
14804+ struct fsnotify_mark *mark;
14805+
14806+ mark = &hinode->hi_notify->hn_mark;
14807+ spin_lock(&mark->lock);
14808+ if (do_set) {
14809+ AuDebugOn(mark->mask & AuHfsnMask);
14810+ mark->mask |= AuHfsnMask;
14811+ } else {
14812+ AuDebugOn(!(mark->mask & AuHfsnMask));
14813+ mark->mask &= ~AuHfsnMask;
14814+ }
14815+ spin_unlock(&mark->lock);
14816+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
14817+}
14818+
14819+/* ---------------------------------------------------------------------- */
14820+
14821+/* #define AuDbgHnotify */
14822+#ifdef AuDbgHnotify
14823+static char *au_hfsn_name(u32 mask)
14824+{
14825+#ifdef CONFIG_AUFS_DEBUG
14826+#define test_ret(flag) \
14827+ do { \
14828+ if (mask & flag) \
14829+ return #flag; \
14830+ } while (0)
14831+ test_ret(FS_ACCESS);
14832+ test_ret(FS_MODIFY);
14833+ test_ret(FS_ATTRIB);
14834+ test_ret(FS_CLOSE_WRITE);
14835+ test_ret(FS_CLOSE_NOWRITE);
14836+ test_ret(FS_OPEN);
14837+ test_ret(FS_MOVED_FROM);
14838+ test_ret(FS_MOVED_TO);
14839+ test_ret(FS_CREATE);
14840+ test_ret(FS_DELETE);
14841+ test_ret(FS_DELETE_SELF);
14842+ test_ret(FS_MOVE_SELF);
14843+ test_ret(FS_UNMOUNT);
14844+ test_ret(FS_Q_OVERFLOW);
14845+ test_ret(FS_IN_IGNORED);
14846+ test_ret(FS_ISDIR);
14847+ test_ret(FS_IN_ONESHOT);
14848+ test_ret(FS_EVENT_ON_CHILD);
14849+ return "";
14850+#undef test_ret
14851+#else
14852+ return "??";
14853+#endif
14854+}
14855+#endif
14856+
14857+/* ---------------------------------------------------------------------- */
14858+
14859+static void au_hfsn_free_group(struct fsnotify_group *group)
14860+{
14861+ struct au_br_hfsnotify *hfsn = group->private;
14862+
14863+ /* AuDbg("here\n"); */
14864+ au_delayed_kfree(hfsn);
14865+}
14866+
14867+static int au_hfsn_handle_event(struct fsnotify_group *group,
14868+ struct inode *inode,
14869+ struct fsnotify_mark *inode_mark,
14870+ struct fsnotify_mark *vfsmount_mark,
14871+ u32 mask, void *data, int data_type,
14872+ const unsigned char *file_name, u32 cookie)
14873+{
14874+ int err;
14875+ struct au_hnotify *hnotify;
14876+ struct inode *h_dir, *h_inode;
14877+ struct qstr h_child_qstr = QSTR_INIT(file_name, strlen(file_name));
14878+
14879+ AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
14880+
14881+ err = 0;
14882+ /* if FS_UNMOUNT happens, there must be another bug */
14883+ AuDebugOn(mask & FS_UNMOUNT);
14884+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
14885+ goto out;
14886+
14887+ h_dir = inode;
14888+ h_inode = NULL;
14889+#ifdef AuDbgHnotify
14890+ au_debug_on();
14891+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
14892+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
14893+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
14894+ h_dir->i_ino, mask, au_hfsn_name(mask),
14895+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
14896+ /* WARN_ON(1); */
14897+ }
14898+ au_debug_off();
14899+#endif
14900+
14901+ AuDebugOn(!inode_mark);
14902+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
14903+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
14904+
14905+out:
14906+ return err;
14907+}
14908+
14909+static struct fsnotify_ops au_hfsn_ops = {
14910+ .handle_event = au_hfsn_handle_event,
14911+ .free_group_priv = au_hfsn_free_group
14912+};
14913+
14914+/* ---------------------------------------------------------------------- */
14915+
14916+static void au_hfsn_fin_br(struct au_branch *br)
14917+{
14918+ struct au_br_hfsnotify *hfsn;
14919+
14920+ hfsn = br->br_hfsn;
14921+ if (hfsn) {
14922+ lockdep_off();
14923+ fsnotify_put_group(hfsn->hfsn_group);
14924+ lockdep_on();
14925+ }
14926+}
14927+
14928+static int au_hfsn_init_br(struct au_branch *br, int perm)
14929+{
14930+ int err;
14931+ struct fsnotify_group *group;
14932+ struct au_br_hfsnotify *hfsn;
14933+
14934+ err = 0;
14935+ br->br_hfsn = NULL;
14936+ if (!au_br_hnotifyable(perm))
14937+ goto out;
14938+
14939+ err = -ENOMEM;
14940+ hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
14941+ if (unlikely(!hfsn))
14942+ goto out;
14943+
14944+ err = 0;
14945+ group = fsnotify_alloc_group(&au_hfsn_ops);
14946+ if (IS_ERR(group)) {
14947+ err = PTR_ERR(group);
14948+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
14949+ goto out_hfsn;
14950+ }
14951+
14952+ group->private = hfsn;
14953+ hfsn->hfsn_group = group;
14954+ br->br_hfsn = hfsn;
14955+ goto out; /* success */
14956+
14957+out_hfsn:
14958+ au_delayed_kfree(hfsn);
14959+out:
14960+ return err;
14961+}
14962+
14963+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
14964+{
14965+ int err;
14966+
14967+ err = 0;
14968+ if (!br->br_hfsn)
14969+ err = au_hfsn_init_br(br, perm);
14970+
14971+ return err;
14972+}
14973+
14974+/* ---------------------------------------------------------------------- */
14975+
14976+static void au_hfsn_fin(void)
14977+{
14978+ AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
14979+ wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
14980+}
14981+
14982+const struct au_hnotify_op au_hnotify_op = {
14983+ .ctl = au_hfsn_ctl,
14984+ .alloc = au_hfsn_alloc,
14985+ .free = au_hfsn_free,
14986+
14987+ .fin = au_hfsn_fin,
14988+
14989+ .reset_br = au_hfsn_reset_br,
14990+ .fin_br = au_hfsn_fin_br,
14991+ .init_br = au_hfsn_init_br
14992+};
14993diff -urN /usr/share/empty/fs/aufs/hfsplus.c linux/fs/aufs/hfsplus.c
14994--- /usr/share/empty/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
14995+++ linux/fs/aufs/hfsplus.c 2016-10-09 16:55:36.492701639 +0200
14996@@ -0,0 +1,56 @@
14997+/*
14998+ * Copyright (C) 2010-2016 Junjiro R. Okajima
14999+ *
15000+ * This program, aufs is free software; you can redistribute it and/or modify
15001+ * it under the terms of the GNU General Public License as published by
15002+ * the Free Software Foundation; either version 2 of the License, or
15003+ * (at your option) any later version.
15004+ *
15005+ * This program is distributed in the hope that it will be useful,
15006+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15007+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15008+ * GNU General Public License for more details.
15009+ *
15010+ * You should have received a copy of the GNU General Public License
15011+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
15012+ */
15013+
15014+/*
15015+ * special support for filesystems which aqucires an inode mutex
15016+ * at final closing a file, eg, hfsplus.
15017+ *
15018+ * This trick is very simple and stupid, just to open the file before really
15019+ * neceeary open to tell hfsplus that this is not the final closing.
15020+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
15021+ * and au_h_open_post() after releasing it.
15022+ */
15023+
15024+#include "aufs.h"
15025+
15026+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
15027+ int force_wr)
15028+{
15029+ struct file *h_file;
15030+ struct dentry *h_dentry;
15031+
15032+ h_dentry = au_h_dptr(dentry, bindex);
15033+ AuDebugOn(!h_dentry);
15034+ AuDebugOn(d_is_negative(h_dentry));
15035+
15036+ h_file = NULL;
15037+ if (au_test_hfsplus(h_dentry->d_sb)
15038+ && d_is_reg(h_dentry))
15039+ h_file = au_h_open(dentry, bindex,
15040+ O_RDONLY | O_NOATIME | O_LARGEFILE,
15041+ /*file*/NULL, force_wr);
15042+ return h_file;
15043+}
15044+
15045+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
15046+ struct file *h_file)
15047+{
15048+ if (h_file) {
15049+ fput(h_file);
15050+ au_sbr_put(dentry->d_sb, bindex);
15051+ }
15052+}
15053diff -urN /usr/share/empty/fs/aufs/hnotify.c linux/fs/aufs/hnotify.c
15054--- /usr/share/empty/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
15055+++ linux/fs/aufs/hnotify.c 2016-10-09 16:55:36.492701639 +0200
15056@@ -0,0 +1,723 @@
15057+/*
15058+ * Copyright (C) 2005-2016 Junjiro R. Okajima
15059+ *
15060+ * This program, aufs is free software; you can redistribute it and/or modify
15061+ * it under the terms of the GNU General Public License as published by
15062+ * the Free Software Foundation; either version 2 of the License, or
15063+ * (at your option) any later version.
15064+ *
15065+ * This program is distributed in the hope that it will be useful,
15066+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15067+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15068+ * GNU General Public License for more details.
15069+ *
15070+ * You should have received a copy of the GNU General Public License
15071+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
15072+ */
15073+
15074+/*
15075+ * abstraction to notify the direct changes on lower directories
15076+ */
15077+
15078+#include "aufs.h"
15079+
15080+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
15081+{
15082+ int err;
15083+ struct au_hnotify *hn;
15084+
15085+ err = -ENOMEM;
15086+ hn = au_cache_alloc_hnotify();
15087+ if (hn) {
15088+ hn->hn_aufs_inode = inode;
15089+ hinode->hi_notify = hn;
15090+ err = au_hnotify_op.alloc(hinode);
15091+ AuTraceErr(err);
15092+ if (unlikely(err)) {
15093+ hinode->hi_notify = NULL;
15094+ au_cache_dfree_hnotify(hn);
15095+ /*
15096+ * The upper dir was removed by udba, but the same named
15097+ * dir left. In this case, aufs assignes a new inode
15098+ * number and set the monitor again.
15099+ * For the lower dir, the old monitnor is still left.
15100+ */
15101+ if (err == -EEXIST)
15102+ err = 0;
15103+ }
15104+ }
15105+
15106+ AuTraceErr(err);
15107+ return err;
15108+}
15109+
15110+void au_hn_free(struct au_hinode *hinode)
15111+{
15112+ struct au_hnotify *hn;
15113+
15114+ hn = hinode->hi_notify;
15115+ if (hn) {
15116+ hinode->hi_notify = NULL;
15117+ if (au_hnotify_op.free(hinode, hn))
15118+ au_cache_dfree_hnotify(hn);
15119+ }
15120+}
15121+
15122+/* ---------------------------------------------------------------------- */
15123+
15124+void au_hn_ctl(struct au_hinode *hinode, int do_set)
15125+{
15126+ if (hinode->hi_notify)
15127+ au_hnotify_op.ctl(hinode, do_set);
15128+}
15129+
15130+void au_hn_reset(struct inode *inode, unsigned int flags)
15131+{
15132+ aufs_bindex_t bindex, bbot;
15133+ struct inode *hi;
15134+ struct dentry *iwhdentry;
15135+
15136+ bbot = au_ibbot(inode);
15137+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
15138+ hi = au_h_iptr(inode, bindex);
15139+ if (!hi)
15140+ continue;
15141+
15142+ /* inode_lock_nested(hi, AuLsc_I_CHILD); */
15143+ iwhdentry = au_hi_wh(inode, bindex);
15144+ if (iwhdentry)
15145+ dget(iwhdentry);
15146+ au_igrab(hi);
15147+ au_set_h_iptr(inode, bindex, NULL, 0);
15148+ au_set_h_iptr(inode, bindex, au_igrab(hi),
15149+ flags & ~AuHi_XINO);
15150+ iput(hi);
15151+ dput(iwhdentry);
15152+ /* inode_unlock(hi); */
15153+ }
15154+}
15155+
15156+/* ---------------------------------------------------------------------- */
15157+
15158+static int hn_xino(struct inode *inode, struct inode *h_inode)
15159+{
15160+ int err;
15161+ aufs_bindex_t bindex, bbot, bfound, btop;
15162+ struct inode *h_i;
15163+
15164+ err = 0;
15165+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
15166+ pr_warn("branch root dir was changed\n");
15167+ goto out;
15168+ }
15169+
15170+ bfound = -1;
15171+ bbot = au_ibbot(inode);
15172+ btop = au_ibtop(inode);
15173+#if 0 /* reserved for future use */
15174+ if (bindex == bbot) {
15175+ /* keep this ino in rename case */
15176+ goto out;
15177+ }
15178+#endif
15179+ for (bindex = btop; bindex <= bbot; bindex++)
15180+ if (au_h_iptr(inode, bindex) == h_inode) {
15181+ bfound = bindex;
15182+ break;
15183+ }
15184+ if (bfound < 0)
15185+ goto out;
15186+
15187+ for (bindex = btop; bindex <= bbot; bindex++) {
15188+ h_i = au_h_iptr(inode, bindex);
15189+ if (!h_i)
15190+ continue;
15191+
15192+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
15193+ /* ignore this error */
15194+ /* bad action? */
15195+ }
15196+
15197+ /* children inode number will be broken */
15198+
15199+out:
15200+ AuTraceErr(err);
15201+ return err;
15202+}
15203+
15204+static int hn_gen_tree(struct dentry *dentry)
15205+{
15206+ int err, i, j, ndentry;
15207+ struct au_dcsub_pages dpages;
15208+ struct au_dpage *dpage;
15209+ struct dentry **dentries;
15210+
15211+ err = au_dpages_init(&dpages, GFP_NOFS);
15212+ if (unlikely(err))
15213+ goto out;
15214+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
15215+ if (unlikely(err))
15216+ goto out_dpages;
15217+
15218+ for (i = 0; i < dpages.ndpage; i++) {
15219+ dpage = dpages.dpages + i;
15220+ dentries = dpage->dentries;
15221+ ndentry = dpage->ndentry;
15222+ for (j = 0; j < ndentry; j++) {
15223+ struct dentry *d;
15224+
15225+ d = dentries[j];
15226+ if (IS_ROOT(d))
15227+ continue;
15228+
15229+ au_digen_dec(d);
15230+ if (d_really_is_positive(d))
15231+ /* todo: reset children xino?
15232+ cached children only? */
15233+ au_iigen_dec(d_inode(d));
15234+ }
15235+ }
15236+
15237+out_dpages:
15238+ au_dpages_free(&dpages);
15239+
15240+#if 0
15241+ /* discard children */
15242+ dentry_unhash(dentry);
15243+ dput(dentry);
15244+#endif
15245+out:
15246+ return err;
15247+}
15248+
15249+/*
15250+ * return 0 if processed.
15251+ */
15252+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
15253+ const unsigned int isdir)
15254+{
15255+ int err;
15256+ struct dentry *d;
15257+ struct qstr *dname;
15258+
15259+ err = 1;
15260+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
15261+ pr_warn("branch root dir was changed\n");
15262+ err = 0;
15263+ goto out;
15264+ }
15265+
15266+ if (!isdir) {
15267+ AuDebugOn(!name);
15268+ au_iigen_dec(inode);
15269+ spin_lock(&inode->i_lock);
15270+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
15271+ spin_lock(&d->d_lock);
15272+ dname = &d->d_name;
15273+ if (dname->len != nlen
15274+ && memcmp(dname->name, name, nlen)) {
15275+ spin_unlock(&d->d_lock);
15276+ continue;
15277+ }
15278+ err = 0;
15279+ au_digen_dec(d);
15280+ spin_unlock(&d->d_lock);
15281+ break;
15282+ }
15283+ spin_unlock(&inode->i_lock);
15284+ } else {
15285+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
15286+ d = d_find_any_alias(inode);
15287+ if (!d) {
15288+ au_iigen_dec(inode);
15289+ goto out;
15290+ }
15291+
15292+ spin_lock(&d->d_lock);
15293+ dname = &d->d_name;
15294+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
15295+ spin_unlock(&d->d_lock);
15296+ err = hn_gen_tree(d);
15297+ spin_lock(&d->d_lock);
15298+ }
15299+ spin_unlock(&d->d_lock);
15300+ dput(d);
15301+ }
15302+
15303+out:
15304+ AuTraceErr(err);
15305+ return err;
15306+}
15307+
15308+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
15309+{
15310+ int err;
15311+
15312+ if (IS_ROOT(dentry)) {
15313+ pr_warn("branch root dir was changed\n");
15314+ return 0;
15315+ }
15316+
15317+ err = 0;
15318+ if (!isdir) {
15319+ au_digen_dec(dentry);
15320+ if (d_really_is_positive(dentry))
15321+ au_iigen_dec(d_inode(dentry));
15322+ } else {
15323+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
15324+ if (d_really_is_positive(dentry))
15325+ err = hn_gen_tree(dentry);
15326+ }
15327+
15328+ AuTraceErr(err);
15329+ return err;
15330+}
15331+
15332+/* ---------------------------------------------------------------------- */
15333+
15334+/* hnotify job flags */
15335+#define AuHnJob_XINO0 1
15336+#define AuHnJob_GEN (1 << 1)
15337+#define AuHnJob_DIRENT (1 << 2)
15338+#define AuHnJob_ISDIR (1 << 3)
15339+#define AuHnJob_TRYXINO0 (1 << 4)
15340+#define AuHnJob_MNTPNT (1 << 5)
15341+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
15342+#define au_fset_hnjob(flags, name) \
15343+ do { (flags) |= AuHnJob_##name; } while (0)
15344+#define au_fclr_hnjob(flags, name) \
15345+ do { (flags) &= ~AuHnJob_##name; } while (0)
15346+
15347+enum {
15348+ AuHn_CHILD,
15349+ AuHn_PARENT,
15350+ AuHnLast
15351+};
15352+
15353+struct au_hnotify_args {
15354+ struct inode *h_dir, *dir, *h_child_inode;
15355+ u32 mask;
15356+ unsigned int flags[AuHnLast];
15357+ unsigned int h_child_nlen;
15358+ char h_child_name[];
15359+};
15360+
15361+struct hn_job_args {
15362+ unsigned int flags;
15363+ struct inode *inode, *h_inode, *dir, *h_dir;
15364+ struct dentry *dentry;
15365+ char *h_name;
15366+ int h_nlen;
15367+};
15368+
15369+static int hn_job(struct hn_job_args *a)
15370+{
15371+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
15372+ int e;
15373+
15374+ /* reset xino */
15375+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
15376+ hn_xino(a->inode, a->h_inode); /* ignore this error */
15377+
15378+ if (au_ftest_hnjob(a->flags, TRYXINO0)
15379+ && a->inode
15380+ && a->h_inode) {
15381+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
15382+ if (!a->h_inode->i_nlink
15383+ && !(a->h_inode->i_state & I_LINKABLE))
15384+ hn_xino(a->inode, a->h_inode); /* ignore this error */
15385+ inode_unlock(a->h_inode);
15386+ }
15387+
15388+ /* make the generation obsolete */
15389+ if (au_ftest_hnjob(a->flags, GEN)) {
15390+ e = -1;
15391+ if (a->inode)
15392+ e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
15393+ isdir);
15394+ if (e && a->dentry)
15395+ hn_gen_by_name(a->dentry, isdir);
15396+ /* ignore this error */
15397+ }
15398+
15399+ /* make dir entries obsolete */
15400+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
15401+ struct au_vdir *vdir;
15402+
15403+ vdir = au_ivdir(a->inode);
15404+ if (vdir)
15405+ vdir->vd_jiffy = 0;
15406+ /* IMustLock(a->inode); */
15407+ /* a->inode->i_version++; */
15408+ }
15409+
15410+ /* can do nothing but warn */
15411+ if (au_ftest_hnjob(a->flags, MNTPNT)
15412+ && a->dentry
15413+ && d_mountpoint(a->dentry))
15414+ pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
15415+
15416+ return 0;
15417+}
15418+
15419+/* ---------------------------------------------------------------------- */
15420+
15421+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
15422+ struct inode *dir)
15423+{
15424+ struct dentry *dentry, *d, *parent;
15425+ struct qstr *dname;
15426+
15427+ parent = d_find_any_alias(dir);
15428+ if (!parent)
15429+ return NULL;
15430+
15431+ dentry = NULL;
15432+ spin_lock(&parent->d_lock);
15433+ list_for_each_entry(d, &parent->d_subdirs, d_child) {
15434+ /* AuDbg("%pd\n", d); */
15435+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
15436+ dname = &d->d_name;
15437+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
15438+ goto cont_unlock;
15439+ if (au_di(d))
15440+ au_digen_dec(d);
15441+ else
15442+ goto cont_unlock;
15443+ if (au_dcount(d) > 0) {
15444+ dentry = dget_dlock(d);
15445+ spin_unlock(&d->d_lock);
15446+ break;
15447+ }
15448+
15449+cont_unlock:
15450+ spin_unlock(&d->d_lock);
15451+ }
15452+ spin_unlock(&parent->d_lock);
15453+ dput(parent);
15454+
15455+ if (dentry)
15456+ di_write_lock_child(dentry);
15457+
15458+ return dentry;
15459+}
15460+
15461+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
15462+ aufs_bindex_t bindex, ino_t h_ino)
15463+{
15464+ struct inode *inode;
15465+ ino_t ino;
15466+ int err;
15467+
15468+ inode = NULL;
15469+ err = au_xino_read(sb, bindex, h_ino, &ino);
15470+ if (!err && ino)
15471+ inode = ilookup(sb, ino);
15472+ if (!inode)
15473+ goto out;
15474+
15475+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
15476+ pr_warn("wrong root branch\n");
15477+ iput(inode);
15478+ inode = NULL;
15479+ goto out;
15480+ }
15481+
15482+ ii_write_lock_child(inode);
15483+
15484+out:
15485+ return inode;
15486+}
15487+
15488+static void au_hn_bh(void *_args)
15489+{
15490+ struct au_hnotify_args *a = _args;
15491+ struct super_block *sb;
15492+ aufs_bindex_t bindex, bbot, bfound;
15493+ unsigned char xino, try_iput;
15494+ int err;
15495+ struct inode *inode;
15496+ ino_t h_ino;
15497+ struct hn_job_args args;
15498+ struct dentry *dentry;
15499+ struct au_sbinfo *sbinfo;
15500+
15501+ AuDebugOn(!_args);
15502+ AuDebugOn(!a->h_dir);
15503+ AuDebugOn(!a->dir);
15504+ AuDebugOn(!a->mask);
15505+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
15506+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
15507+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
15508+
15509+ inode = NULL;
15510+ dentry = NULL;
15511+ /*
15512+ * do not lock a->dir->i_mutex here
15513+ * because of d_revalidate() may cause a deadlock.
15514+ */
15515+ sb = a->dir->i_sb;
15516+ AuDebugOn(!sb);
15517+ sbinfo = au_sbi(sb);
15518+ AuDebugOn(!sbinfo);
15519+ si_write_lock(sb, AuLock_NOPLMW);
15520+
15521+ ii_read_lock_parent(a->dir);
15522+ bfound = -1;
15523+ bbot = au_ibbot(a->dir);
15524+ for (bindex = au_ibtop(a->dir); bindex <= bbot; bindex++)
15525+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
15526+ bfound = bindex;
15527+ break;
15528+ }
15529+ ii_read_unlock(a->dir);
15530+ if (unlikely(bfound < 0))
15531+ goto out;
15532+
15533+ xino = !!au_opt_test(au_mntflags(sb), XINO);
15534+ h_ino = 0;
15535+ if (a->h_child_inode)
15536+ h_ino = a->h_child_inode->i_ino;
15537+
15538+ if (a->h_child_nlen
15539+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
15540+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
15541+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
15542+ a->dir);
15543+ try_iput = 0;
15544+ if (dentry && d_really_is_positive(dentry))
15545+ inode = d_inode(dentry);
15546+ if (xino && !inode && h_ino
15547+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
15548+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
15549+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
15550+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
15551+ try_iput = 1;
15552+ }
15553+
15554+ args.flags = a->flags[AuHn_CHILD];
15555+ args.dentry = dentry;
15556+ args.inode = inode;
15557+ args.h_inode = a->h_child_inode;
15558+ args.dir = a->dir;
15559+ args.h_dir = a->h_dir;
15560+ args.h_name = a->h_child_name;
15561+ args.h_nlen = a->h_child_nlen;
15562+ err = hn_job(&args);
15563+ if (dentry) {
15564+ if (au_di(dentry))
15565+ di_write_unlock(dentry);
15566+ dput(dentry);
15567+ }
15568+ if (inode && try_iput) {
15569+ ii_write_unlock(inode);
15570+ iput(inode);
15571+ }
15572+
15573+ ii_write_lock_parent(a->dir);
15574+ args.flags = a->flags[AuHn_PARENT];
15575+ args.dentry = NULL;
15576+ args.inode = a->dir;
15577+ args.h_inode = a->h_dir;
15578+ args.dir = NULL;
15579+ args.h_dir = NULL;
15580+ args.h_name = NULL;
15581+ args.h_nlen = 0;
15582+ err = hn_job(&args);
15583+ ii_write_unlock(a->dir);
15584+
15585+out:
15586+ iput(a->h_child_inode);
15587+ iput(a->h_dir);
15588+ iput(a->dir);
15589+ si_write_unlock(sb);
15590+ au_nwt_done(&sbinfo->si_nowait);
15591+ au_delayed_kfree(a);
15592+}
15593+
15594+/* ---------------------------------------------------------------------- */
15595+
15596+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
15597+ struct qstr *h_child_qstr, struct inode *h_child_inode)
15598+{
15599+ int err, len;
15600+ unsigned int flags[AuHnLast], f;
15601+ unsigned char isdir, isroot, wh;
15602+ struct inode *dir;
15603+ struct au_hnotify_args *args;
15604+ char *p, *h_child_name;
15605+
15606+ err = 0;
15607+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
15608+ dir = igrab(hnotify->hn_aufs_inode);
15609+ if (!dir)
15610+ goto out;
15611+
15612+ isroot = (dir->i_ino == AUFS_ROOT_INO);
15613+ wh = 0;
15614+ h_child_name = (void *)h_child_qstr->name;
15615+ len = h_child_qstr->len;
15616+ if (h_child_name) {
15617+ if (len > AUFS_WH_PFX_LEN
15618+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
15619+ h_child_name += AUFS_WH_PFX_LEN;
15620+ len -= AUFS_WH_PFX_LEN;
15621+ wh = 1;
15622+ }
15623+ }
15624+
15625+ isdir = 0;
15626+ if (h_child_inode)
15627+ isdir = !!S_ISDIR(h_child_inode->i_mode);
15628+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
15629+ flags[AuHn_CHILD] = 0;
15630+ if (isdir)
15631+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
15632+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
15633+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
15634+ switch (mask & FS_EVENTS_POSS_ON_CHILD) {
15635+ case FS_MOVED_FROM:
15636+ case FS_MOVED_TO:
15637+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
15638+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15639+ /*FALLTHROUGH*/
15640+ case FS_CREATE:
15641+ AuDebugOn(!h_child_name);
15642+ break;
15643+
15644+ case FS_DELETE:
15645+ /*
15646+ * aufs never be able to get this child inode.
15647+ * revalidation should be in d_revalidate()
15648+ * by checking i_nlink, i_generation or d_unhashed().
15649+ */
15650+ AuDebugOn(!h_child_name);
15651+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
15652+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15653+ break;
15654+
15655+ default:
15656+ AuDebugOn(1);
15657+ }
15658+
15659+ if (wh)
15660+ h_child_inode = NULL;
15661+
15662+ err = -ENOMEM;
15663+ /* iput() and kfree() will be called in au_hnotify() */
15664+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
15665+ if (unlikely(!args)) {
15666+ AuErr1("no memory\n");
15667+ iput(dir);
15668+ goto out;
15669+ }
15670+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
15671+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
15672+ args->mask = mask;
15673+ args->dir = dir;
15674+ args->h_dir = igrab(h_dir);
15675+ if (h_child_inode)
15676+ h_child_inode = igrab(h_child_inode); /* can be NULL */
15677+ args->h_child_inode = h_child_inode;
15678+ args->h_child_nlen = len;
15679+ if (len) {
15680+ p = (void *)args;
15681+ p += sizeof(*args);
15682+ memcpy(p, h_child_name, len);
15683+ p[len] = 0;
15684+ }
15685+
15686+ /* NFS fires the event for silly-renamed one from kworker */
15687+ f = 0;
15688+ if (!dir->i_nlink
15689+ || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
15690+ f = AuWkq_NEST;
15691+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
15692+ if (unlikely(err)) {
15693+ pr_err("wkq %d\n", err);
15694+ iput(args->h_child_inode);
15695+ iput(args->h_dir);
15696+ iput(args->dir);
15697+ au_delayed_kfree(args);
15698+ }
15699+
15700+out:
15701+ return err;
15702+}
15703+
15704+/* ---------------------------------------------------------------------- */
15705+
15706+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
15707+{
15708+ int err;
15709+
15710+ AuDebugOn(!(udba & AuOptMask_UDBA));
15711+
15712+ err = 0;
15713+ if (au_hnotify_op.reset_br)
15714+ err = au_hnotify_op.reset_br(udba, br, perm);
15715+
15716+ return err;
15717+}
15718+
15719+int au_hnotify_init_br(struct au_branch *br, int perm)
15720+{
15721+ int err;
15722+
15723+ err = 0;
15724+ if (au_hnotify_op.init_br)
15725+ err = au_hnotify_op.init_br(br, perm);
15726+
15727+ return err;
15728+}
15729+
15730+void au_hnotify_fin_br(struct au_branch *br)
15731+{
15732+ if (au_hnotify_op.fin_br)
15733+ au_hnotify_op.fin_br(br);
15734+}
15735+
15736+static void au_hn_destroy_cache(void)
15737+{
15738+ struct au_cache *cp;
15739+
15740+ flush_delayed_work(&au_dfree.dwork);
15741+ cp = au_dfree.cache + AuCache_HNOTIFY;
15742+ AuDebugOn(!llist_empty(&cp->llist));
15743+ kmem_cache_destroy(cp->cache);
15744+ cp->cache = NULL;
15745+}
15746+
15747+AU_CACHE_DFREE_FUNC(hnotify, HNOTIFY, hn_lnode);
15748+
15749+int __init au_hnotify_init(void)
15750+{
15751+ int err;
15752+ struct au_cache *cp;
15753+
15754+ err = -ENOMEM;
15755+ cp = au_dfree.cache + AuCache_HNOTIFY;
15756+ cp->cache = AuCache(au_hnotify);
15757+ if (cp->cache) {
15758+ err = 0;
15759+ if (au_hnotify_op.init)
15760+ err = au_hnotify_op.init();
15761+ if (unlikely(err))
15762+ au_hn_destroy_cache();
15763+ }
15764+ AuTraceErr(err);
15765+ return err;
15766+}
15767+
15768+void au_hnotify_fin(void)
15769+{
15770+ struct au_cache *cp;
15771+
15772+ if (au_hnotify_op.fin)
15773+ au_hnotify_op.fin();
15774+
15775+ /* cf. au_cache_fin() */
15776+ cp = au_dfree.cache + AuCache_HNOTIFY;
15777+ if (cp->cache)
15778+ au_hn_destroy_cache();
15779+}
15780diff -urN /usr/share/empty/fs/aufs/iinfo.c linux/fs/aufs/iinfo.c
15781--- /usr/share/empty/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
15782+++ linux/fs/aufs/iinfo.c 2016-10-09 16:55:38.889431135 +0200
15783@@ -0,0 +1,285 @@
15784+/*
15785+ * Copyright (C) 2005-2016 Junjiro R. Okajima
15786+ *
15787+ * This program, aufs is free software; you can redistribute it and/or modify
15788+ * it under the terms of the GNU General Public License as published by
15789+ * the Free Software Foundation; either version 2 of the License, or
15790+ * (at your option) any later version.
15791+ *
15792+ * This program is distributed in the hope that it will be useful,
15793+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15794+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15795+ * GNU General Public License for more details.
15796+ *
15797+ * You should have received a copy of the GNU General Public License
15798+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
15799+ */
15800+
15801+/*
15802+ * inode private data
15803+ */
15804+
15805+#include "aufs.h"
15806+
15807+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
15808+{
15809+ struct inode *h_inode;
15810+ struct au_hinode *hinode;
15811+
15812+ IiMustAnyLock(inode);
15813+
15814+ hinode = au_hinode(au_ii(inode), bindex);
15815+ h_inode = hinode->hi_inode;
15816+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15817+ return h_inode;
15818+}
15819+
15820+/* todo: hard/soft set? */
15821+void au_hiput(struct au_hinode *hinode)
15822+{
15823+ au_hn_free(hinode);
15824+ dput(hinode->hi_whdentry);
15825+ iput(hinode->hi_inode);
15826+}
15827+
15828+unsigned int au_hi_flags(struct inode *inode, int isdir)
15829+{
15830+ unsigned int flags;
15831+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
15832+
15833+ flags = 0;
15834+ if (au_opt_test(mnt_flags, XINO))
15835+ au_fset_hi(flags, XINO);
15836+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
15837+ au_fset_hi(flags, HNOTIFY);
15838+ return flags;
15839+}
15840+
15841+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
15842+ struct inode *h_inode, unsigned int flags)
15843+{
15844+ struct au_hinode *hinode;
15845+ struct inode *hi;
15846+ struct au_iinfo *iinfo = au_ii(inode);
15847+
15848+ IiMustWriteLock(inode);
15849+
15850+ hinode = au_hinode(iinfo, bindex);
15851+ hi = hinode->hi_inode;
15852+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15853+
15854+ if (hi)
15855+ au_hiput(hinode);
15856+ hinode->hi_inode = h_inode;
15857+ if (h_inode) {
15858+ int err;
15859+ struct super_block *sb = inode->i_sb;
15860+ struct au_branch *br;
15861+
15862+ AuDebugOn(inode->i_mode
15863+ && (h_inode->i_mode & S_IFMT)
15864+ != (inode->i_mode & S_IFMT));
15865+ if (bindex == iinfo->ii_btop)
15866+ au_cpup_igen(inode, h_inode);
15867+ br = au_sbr(sb, bindex);
15868+ hinode->hi_id = br->br_id;
15869+ if (au_ftest_hi(flags, XINO)) {
15870+ err = au_xino_write(sb, bindex, h_inode->i_ino,
15871+ inode->i_ino);
15872+ if (unlikely(err))
15873+ AuIOErr1("failed au_xino_write() %d\n", err);
15874+ }
15875+
15876+ if (au_ftest_hi(flags, HNOTIFY)
15877+ && au_br_hnotifyable(br->br_perm)) {
15878+ err = au_hn_alloc(hinode, inode);
15879+ if (unlikely(err))
15880+ AuIOErr1("au_hn_alloc() %d\n", err);
15881+ }
15882+ }
15883+}
15884+
15885+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
15886+ struct dentry *h_wh)
15887+{
15888+ struct au_hinode *hinode;
15889+
15890+ IiMustWriteLock(inode);
15891+
15892+ hinode = au_hinode(au_ii(inode), bindex);
15893+ AuDebugOn(hinode->hi_whdentry);
15894+ hinode->hi_whdentry = h_wh;
15895+}
15896+
15897+void au_update_iigen(struct inode *inode, int half)
15898+{
15899+ struct au_iinfo *iinfo;
15900+ struct au_iigen *iigen;
15901+ unsigned int sigen;
15902+
15903+ sigen = au_sigen(inode->i_sb);
15904+ iinfo = au_ii(inode);
15905+ iigen = &iinfo->ii_generation;
15906+ spin_lock(&iigen->ig_spin);
15907+ iigen->ig_generation = sigen;
15908+ if (half)
15909+ au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
15910+ else
15911+ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
15912+ spin_unlock(&iigen->ig_spin);
15913+}
15914+
15915+/* it may be called at remount time, too */
15916+void au_update_ibrange(struct inode *inode, int do_put_zero)
15917+{
15918+ struct au_iinfo *iinfo;
15919+ aufs_bindex_t bindex, bbot;
15920+
15921+ AuDebugOn(au_is_bad_inode(inode));
15922+ IiMustWriteLock(inode);
15923+
15924+ iinfo = au_ii(inode);
15925+ if (do_put_zero && iinfo->ii_btop >= 0) {
15926+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
15927+ bindex++) {
15928+ struct inode *h_i;
15929+
15930+ h_i = au_hinode(iinfo, bindex)->hi_inode;
15931+ if (h_i
15932+ && !h_i->i_nlink
15933+ && !(h_i->i_state & I_LINKABLE))
15934+ au_set_h_iptr(inode, bindex, NULL, 0);
15935+ }
15936+ }
15937+
15938+ iinfo->ii_btop = -1;
15939+ iinfo->ii_bbot = -1;
15940+ bbot = au_sbbot(inode->i_sb);
15941+ for (bindex = 0; bindex <= bbot; bindex++)
15942+ if (au_hinode(iinfo, bindex)->hi_inode) {
15943+ iinfo->ii_btop = bindex;
15944+ break;
15945+ }
15946+ if (iinfo->ii_btop >= 0)
15947+ for (bindex = bbot; bindex >= iinfo->ii_btop; bindex--)
15948+ if (au_hinode(iinfo, bindex)->hi_inode) {
15949+ iinfo->ii_bbot = bindex;
15950+ break;
15951+ }
15952+ AuDebugOn(iinfo->ii_btop > iinfo->ii_bbot);
15953+}
15954+
15955+/* ---------------------------------------------------------------------- */
15956+
15957+void au_icntnr_init_once(void *_c)
15958+{
15959+ struct au_icntnr *c = _c;
15960+ struct au_iinfo *iinfo = &c->iinfo;
15961+
15962+ spin_lock_init(&iinfo->ii_generation.ig_spin);
15963+ au_rw_init(&iinfo->ii_rwsem);
15964+ inode_init_once(&c->vfs_inode);
15965+}
15966+
15967+void au_hinode_init(struct au_hinode *hinode)
15968+{
15969+ hinode->hi_inode = NULL;
15970+ hinode->hi_id = -1;
15971+ au_hn_init(hinode);
15972+ hinode->hi_whdentry = NULL;
15973+}
15974+
15975+int au_iinfo_init(struct inode *inode)
15976+{
15977+ struct au_iinfo *iinfo;
15978+ struct super_block *sb;
15979+ struct au_hinode *hi;
15980+ int nbr, i;
15981+
15982+ sb = inode->i_sb;
15983+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
15984+ nbr = au_sbbot(sb) + 1;
15985+ if (unlikely(nbr <= 0))
15986+ nbr = 1;
15987+ hi = kmalloc_array(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
15988+ if (hi) {
15989+ au_ninodes_inc(sb);
15990+
15991+ iinfo->ii_hinode = hi;
15992+ for (i = 0; i < nbr; i++, hi++)
15993+ au_hinode_init(hi);
15994+
15995+ iinfo->ii_generation.ig_generation = au_sigen(sb);
15996+ iinfo->ii_btop = -1;
15997+ iinfo->ii_bbot = -1;
15998+ iinfo->ii_vdir = NULL;
15999+ return 0;
16000+ }
16001+ return -ENOMEM;
16002+}
16003+
16004+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink)
16005+{
16006+ int err, i;
16007+ struct au_hinode *hip;
16008+
16009+ AuRwMustWriteLock(&iinfo->ii_rwsem);
16010+
16011+ err = -ENOMEM;
16012+ hip = au_krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS,
16013+ may_shrink);
16014+ if (hip) {
16015+ iinfo->ii_hinode = hip;
16016+ i = iinfo->ii_bbot + 1;
16017+ hip += i;
16018+ for (; i < nbr; i++, hip++)
16019+ au_hinode_init(hip);
16020+ err = 0;
16021+ }
16022+
16023+ return err;
16024+}
16025+
16026+void au_iinfo_fin(struct inode *inode)
16027+{
16028+ struct au_iinfo *iinfo;
16029+ struct au_hinode *hi;
16030+ struct super_block *sb;
16031+ aufs_bindex_t bindex, bbot;
16032+ const unsigned char unlinked = !inode->i_nlink;
16033+
16034+ AuDebugOn(au_is_bad_inode(inode));
16035+
16036+ sb = inode->i_sb;
16037+ au_ninodes_dec(sb);
16038+ if (si_pid_test(sb))
16039+ au_xino_delete_inode(inode, unlinked);
16040+ else {
16041+ /*
16042+ * it is safe to hide the dependency between sbinfo and
16043+ * sb->s_umount.
16044+ */
16045+ lockdep_off();
16046+ si_noflush_read_lock(sb);
16047+ au_xino_delete_inode(inode, unlinked);
16048+ si_read_unlock(sb);
16049+ lockdep_on();
16050+ }
16051+
16052+ iinfo = au_ii(inode);
16053+ if (iinfo->ii_vdir)
16054+ au_vdir_free(iinfo->ii_vdir, /*atonce*/0);
16055+
16056+ bindex = iinfo->ii_btop;
16057+ if (bindex >= 0) {
16058+ hi = au_hinode(iinfo, bindex);
16059+ bbot = iinfo->ii_bbot;
16060+ while (bindex++ <= bbot) {
16061+ if (hi->hi_inode)
16062+ au_hiput(hi);
16063+ hi++;
16064+ }
16065+ }
16066+ au_delayed_kfree(iinfo->ii_hinode);
16067+ AuRwDestroy(&iinfo->ii_rwsem);
16068+}
16069diff -urN /usr/share/empty/fs/aufs/inode.c linux/fs/aufs/inode.c
16070--- /usr/share/empty/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
16071+++ linux/fs/aufs/inode.c 2016-10-09 16:55:38.889431135 +0200
16072@@ -0,0 +1,519 @@
16073+/*
16074+ * Copyright (C) 2005-2016 Junjiro R. Okajima
16075+ *
16076+ * This program, aufs is free software; you can redistribute it and/or modify
16077+ * it under the terms of the GNU General Public License as published by
16078+ * the Free Software Foundation; either version 2 of the License, or
16079+ * (at your option) any later version.
16080+ *
16081+ * This program is distributed in the hope that it will be useful,
16082+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16083+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16084+ * GNU General Public License for more details.
16085+ *
16086+ * You should have received a copy of the GNU General Public License
16087+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16088+ */
16089+
16090+/*
16091+ * inode functions
16092+ */
16093+
16094+#include "aufs.h"
16095+
16096+struct inode *au_igrab(struct inode *inode)
16097+{
16098+ if (inode) {
16099+ AuDebugOn(!atomic_read(&inode->i_count));
16100+ ihold(inode);
16101+ }
16102+ return inode;
16103+}
16104+
16105+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
16106+{
16107+ au_cpup_attr_all(inode, /*force*/0);
16108+ au_update_iigen(inode, /*half*/1);
16109+ if (do_version)
16110+ inode->i_version++;
16111+}
16112+
16113+static int au_ii_refresh(struct inode *inode, int *update)
16114+{
16115+ int err, e, nbr;
16116+ umode_t type;
16117+ aufs_bindex_t bindex, new_bindex;
16118+ struct super_block *sb;
16119+ struct au_iinfo *iinfo;
16120+ struct au_hinode *p, *q, tmp;
16121+
16122+ AuDebugOn(au_is_bad_inode(inode));
16123+ IiMustWriteLock(inode);
16124+
16125+ *update = 0;
16126+ sb = inode->i_sb;
16127+ nbr = au_sbbot(sb) + 1;
16128+ type = inode->i_mode & S_IFMT;
16129+ iinfo = au_ii(inode);
16130+ err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0);
16131+ if (unlikely(err))
16132+ goto out;
16133+
16134+ AuDebugOn(iinfo->ii_btop < 0);
16135+ p = au_hinode(iinfo, iinfo->ii_btop);
16136+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
16137+ bindex++, p++) {
16138+ if (!p->hi_inode)
16139+ continue;
16140+
16141+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
16142+ new_bindex = au_br_index(sb, p->hi_id);
16143+ if (new_bindex == bindex)
16144+ continue;
16145+
16146+ if (new_bindex < 0) {
16147+ *update = 1;
16148+ au_hiput(p);
16149+ p->hi_inode = NULL;
16150+ continue;
16151+ }
16152+
16153+ if (new_bindex < iinfo->ii_btop)
16154+ iinfo->ii_btop = new_bindex;
16155+ if (iinfo->ii_bbot < new_bindex)
16156+ iinfo->ii_bbot = new_bindex;
16157+ /* swap two lower inode, and loop again */
16158+ q = au_hinode(iinfo, new_bindex);
16159+ tmp = *q;
16160+ *q = *p;
16161+ *p = tmp;
16162+ if (tmp.hi_inode) {
16163+ bindex--;
16164+ p--;
16165+ }
16166+ }
16167+ au_update_ibrange(inode, /*do_put_zero*/0);
16168+ au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */
16169+ e = au_dy_irefresh(inode);
16170+ if (unlikely(e && !err))
16171+ err = e;
16172+
16173+out:
16174+ AuTraceErr(err);
16175+ return err;
16176+}
16177+
16178+void au_refresh_iop(struct inode *inode, int force_getattr)
16179+{
16180+ int type;
16181+ struct au_sbinfo *sbi = au_sbi(inode->i_sb);
16182+ const struct inode_operations *iop
16183+ = force_getattr ? aufs_iop : sbi->si_iop_array;
16184+
16185+ if (inode->i_op == iop)
16186+ return;
16187+
16188+ switch (inode->i_mode & S_IFMT) {
16189+ case S_IFDIR:
16190+ type = AuIop_DIR;
16191+ break;
16192+ case S_IFLNK:
16193+ type = AuIop_SYMLINK;
16194+ break;
16195+ default:
16196+ type = AuIop_OTHER;
16197+ break;
16198+ }
16199+
16200+ inode->i_op = iop + type;
16201+ /* unnecessary smp_wmb() */
16202+}
16203+
16204+int au_refresh_hinode_self(struct inode *inode)
16205+{
16206+ int err, update;
16207+
16208+ err = au_ii_refresh(inode, &update);
16209+ if (!err)
16210+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
16211+
16212+ AuTraceErr(err);
16213+ return err;
16214+}
16215+
16216+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
16217+{
16218+ int err, e, update;
16219+ unsigned int flags;
16220+ umode_t mode;
16221+ aufs_bindex_t bindex, bbot;
16222+ unsigned char isdir;
16223+ struct au_hinode *p;
16224+ struct au_iinfo *iinfo;
16225+
16226+ err = au_ii_refresh(inode, &update);
16227+ if (unlikely(err))
16228+ goto out;
16229+
16230+ update = 0;
16231+ iinfo = au_ii(inode);
16232+ p = au_hinode(iinfo, iinfo->ii_btop);
16233+ mode = (inode->i_mode & S_IFMT);
16234+ isdir = S_ISDIR(mode);
16235+ flags = au_hi_flags(inode, isdir);
16236+ bbot = au_dbbot(dentry);
16237+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
16238+ struct inode *h_i, *h_inode;
16239+ struct dentry *h_d;
16240+
16241+ h_d = au_h_dptr(dentry, bindex);
16242+ if (!h_d || d_is_negative(h_d))
16243+ continue;
16244+
16245+ h_inode = d_inode(h_d);
16246+ AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
16247+ if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) {
16248+ h_i = au_h_iptr(inode, bindex);
16249+ if (h_i) {
16250+ if (h_i == h_inode)
16251+ continue;
16252+ err = -EIO;
16253+ break;
16254+ }
16255+ }
16256+ if (bindex < iinfo->ii_btop)
16257+ iinfo->ii_btop = bindex;
16258+ if (iinfo->ii_bbot < bindex)
16259+ iinfo->ii_bbot = bindex;
16260+ au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
16261+ update = 1;
16262+ }
16263+ au_update_ibrange(inode, /*do_put_zero*/0);
16264+ e = au_dy_irefresh(inode);
16265+ if (unlikely(e && !err))
16266+ err = e;
16267+ if (!err)
16268+ au_refresh_hinode_attr(inode, update && isdir);
16269+
16270+out:
16271+ AuTraceErr(err);
16272+ return err;
16273+}
16274+
16275+static int set_inode(struct inode *inode, struct dentry *dentry)
16276+{
16277+ int err;
16278+ unsigned int flags;
16279+ umode_t mode;
16280+ aufs_bindex_t bindex, btop, btail;
16281+ unsigned char isdir;
16282+ struct dentry *h_dentry;
16283+ struct inode *h_inode;
16284+ struct au_iinfo *iinfo;
16285+ struct inode_operations *iop;
16286+
16287+ IiMustWriteLock(inode);
16288+
16289+ err = 0;
16290+ isdir = 0;
16291+ iop = au_sbi(inode->i_sb)->si_iop_array;
16292+ btop = au_dbtop(dentry);
16293+ h_dentry = au_h_dptr(dentry, btop);
16294+ h_inode = d_inode(h_dentry);
16295+ mode = h_inode->i_mode;
16296+ switch (mode & S_IFMT) {
16297+ case S_IFREG:
16298+ btail = au_dbtail(dentry);
16299+ inode->i_op = iop + AuIop_OTHER;
16300+ inode->i_fop = &aufs_file_fop;
16301+ err = au_dy_iaop(inode, btop, h_inode);
16302+ if (unlikely(err))
16303+ goto out;
16304+ break;
16305+ case S_IFDIR:
16306+ isdir = 1;
16307+ btail = au_dbtaildir(dentry);
16308+ inode->i_op = iop + AuIop_DIR;
16309+ inode->i_fop = &aufs_dir_fop;
16310+ break;
16311+ case S_IFLNK:
16312+ btail = au_dbtail(dentry);
16313+ inode->i_op = iop + AuIop_SYMLINK;
16314+ break;
16315+ case S_IFBLK:
16316+ case S_IFCHR:
16317+ case S_IFIFO:
16318+ case S_IFSOCK:
16319+ btail = au_dbtail(dentry);
16320+ inode->i_op = iop + AuIop_OTHER;
16321+ init_special_inode(inode, mode, h_inode->i_rdev);
16322+ break;
16323+ default:
16324+ AuIOErr("Unknown file type 0%o\n", mode);
16325+ err = -EIO;
16326+ goto out;
16327+ }
16328+
16329+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
16330+ flags = au_hi_flags(inode, isdir);
16331+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
16332+ && au_ftest_hi(flags, HNOTIFY)
16333+ && dentry->d_name.len > AUFS_WH_PFX_LEN
16334+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
16335+ au_fclr_hi(flags, HNOTIFY);
16336+ iinfo = au_ii(inode);
16337+ iinfo->ii_btop = btop;
16338+ iinfo->ii_bbot = btail;
16339+ for (bindex = btop; bindex <= btail; bindex++) {
16340+ h_dentry = au_h_dptr(dentry, bindex);
16341+ if (h_dentry)
16342+ au_set_h_iptr(inode, bindex,
16343+ au_igrab(d_inode(h_dentry)), flags);
16344+ }
16345+ au_cpup_attr_all(inode, /*force*/1);
16346+ /*
16347+ * to force calling aufs_get_acl() every time,
16348+ * do not call cache_no_acl() for aufs inode.
16349+ */
16350+
16351+out:
16352+ return err;
16353+}
16354+
16355+/*
16356+ * successful returns with iinfo write_locked
16357+ * minus: errno
16358+ * zero: success, matched
16359+ * plus: no error, but unmatched
16360+ */
16361+static int reval_inode(struct inode *inode, struct dentry *dentry)
16362+{
16363+ int err;
16364+ unsigned int gen, igflags;
16365+ aufs_bindex_t bindex, bbot;
16366+ struct inode *h_inode, *h_dinode;
16367+ struct dentry *h_dentry;
16368+
16369+ /*
16370+ * before this function, if aufs got any iinfo lock, it must be only
16371+ * one, the parent dir.
16372+ * it can happen by UDBA and the obsoleted inode number.
16373+ */
16374+ err = -EIO;
16375+ if (unlikely(inode->i_ino == parent_ino(dentry)))
16376+ goto out;
16377+
16378+ err = 1;
16379+ ii_write_lock_new_child(inode);
16380+ h_dentry = au_h_dptr(dentry, au_dbtop(dentry));
16381+ h_dinode = d_inode(h_dentry);
16382+ bbot = au_ibbot(inode);
16383+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
16384+ h_inode = au_h_iptr(inode, bindex);
16385+ if (!h_inode || h_inode != h_dinode)
16386+ continue;
16387+
16388+ err = 0;
16389+ gen = au_iigen(inode, &igflags);
16390+ if (gen == au_digen(dentry)
16391+ && !au_ig_ftest(igflags, HALF_REFRESHED))
16392+ break;
16393+
16394+ /* fully refresh inode using dentry */
16395+ err = au_refresh_hinode(inode, dentry);
16396+ if (!err)
16397+ au_update_iigen(inode, /*half*/0);
16398+ break;
16399+ }
16400+
16401+ if (unlikely(err))
16402+ ii_write_unlock(inode);
16403+out:
16404+ return err;
16405+}
16406+
16407+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16408+ unsigned int d_type, ino_t *ino)
16409+{
16410+ int err;
16411+ struct mutex *mtx;
16412+
16413+ /* prevent hardlinked inode number from race condition */
16414+ mtx = NULL;
16415+ if (d_type != DT_DIR) {
16416+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
16417+ mutex_lock(mtx);
16418+ }
16419+ err = au_xino_read(sb, bindex, h_ino, ino);
16420+ if (unlikely(err))
16421+ goto out;
16422+
16423+ if (!*ino) {
16424+ err = -EIO;
16425+ *ino = au_xino_new_ino(sb);
16426+ if (unlikely(!*ino))
16427+ goto out;
16428+ err = au_xino_write(sb, bindex, h_ino, *ino);
16429+ if (unlikely(err))
16430+ goto out;
16431+ }
16432+
16433+out:
16434+ if (mtx)
16435+ mutex_unlock(mtx);
16436+ return err;
16437+}
16438+
16439+/* successful returns with iinfo write_locked */
16440+/* todo: return with unlocked? */
16441+struct inode *au_new_inode(struct dentry *dentry, int must_new)
16442+{
16443+ struct inode *inode, *h_inode;
16444+ struct dentry *h_dentry;
16445+ struct super_block *sb;
16446+ struct mutex *mtx;
16447+ ino_t h_ino, ino;
16448+ int err;
16449+ aufs_bindex_t btop;
16450+
16451+ sb = dentry->d_sb;
16452+ btop = au_dbtop(dentry);
16453+ h_dentry = au_h_dptr(dentry, btop);
16454+ h_inode = d_inode(h_dentry);
16455+ h_ino = h_inode->i_ino;
16456+
16457+ /*
16458+ * stop 'race'-ing between hardlinks under different
16459+ * parents.
16460+ */
16461+ mtx = NULL;
16462+ if (!d_is_dir(h_dentry))
16463+ mtx = &au_sbr(sb, btop)->br_xino.xi_nondir_mtx;
16464+
16465+new_ino:
16466+ if (mtx)
16467+ mutex_lock(mtx);
16468+ err = au_xino_read(sb, btop, h_ino, &ino);
16469+ inode = ERR_PTR(err);
16470+ if (unlikely(err))
16471+ goto out;
16472+
16473+ if (!ino) {
16474+ ino = au_xino_new_ino(sb);
16475+ if (unlikely(!ino)) {
16476+ inode = ERR_PTR(-EIO);
16477+ goto out;
16478+ }
16479+ }
16480+
16481+ AuDbg("i%lu\n", (unsigned long)ino);
16482+ inode = au_iget_locked(sb, ino);
16483+ err = PTR_ERR(inode);
16484+ if (IS_ERR(inode))
16485+ goto out;
16486+
16487+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
16488+ if (inode->i_state & I_NEW) {
16489+ ii_write_lock_new_child(inode);
16490+ err = set_inode(inode, dentry);
16491+ if (!err) {
16492+ unlock_new_inode(inode);
16493+ goto out; /* success */
16494+ }
16495+
16496+ /*
16497+ * iget_failed() calls iput(), but we need to call
16498+ * ii_write_unlock() after iget_failed(). so dirty hack for
16499+ * i_count.
16500+ */
16501+ atomic_inc(&inode->i_count);
16502+ iget_failed(inode);
16503+ ii_write_unlock(inode);
16504+ au_xino_write(sb, btop, h_ino, /*ino*/0);
16505+ /* ignore this error */
16506+ goto out_iput;
16507+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
16508+ /*
16509+ * horrible race condition between lookup, readdir and copyup
16510+ * (or something).
16511+ */
16512+ if (mtx)
16513+ mutex_unlock(mtx);
16514+ err = reval_inode(inode, dentry);
16515+ if (unlikely(err < 0)) {
16516+ mtx = NULL;
16517+ goto out_iput;
16518+ }
16519+
16520+ if (!err) {
16521+ mtx = NULL;
16522+ goto out; /* success */
16523+ } else if (mtx)
16524+ mutex_lock(mtx);
16525+ }
16526+
16527+ if (unlikely(au_test_fs_unique_ino(h_inode)))
16528+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
16529+ " b%d, %s, %pd, hi%lu, i%lu.\n",
16530+ btop, au_sbtype(h_dentry->d_sb), dentry,
16531+ (unsigned long)h_ino, (unsigned long)ino);
16532+ ino = 0;
16533+ err = au_xino_write(sb, btop, h_ino, /*ino*/0);
16534+ if (!err) {
16535+ iput(inode);
16536+ if (mtx)
16537+ mutex_unlock(mtx);
16538+ goto new_ino;
16539+ }
16540+
16541+out_iput:
16542+ iput(inode);
16543+ inode = ERR_PTR(err);
16544+out:
16545+ if (mtx)
16546+ mutex_unlock(mtx);
16547+ return inode;
16548+}
16549+
16550+/* ---------------------------------------------------------------------- */
16551+
16552+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16553+ struct inode *inode)
16554+{
16555+ int err;
16556+ struct inode *hi;
16557+
16558+ err = au_br_rdonly(au_sbr(sb, bindex));
16559+
16560+ /* pseudo-link after flushed may happen out of bounds */
16561+ if (!err
16562+ && inode
16563+ && au_ibtop(inode) <= bindex
16564+ && bindex <= au_ibbot(inode)) {
16565+ /*
16566+ * permission check is unnecessary since vfsub routine
16567+ * will be called later
16568+ */
16569+ hi = au_h_iptr(inode, bindex);
16570+ if (hi)
16571+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
16572+ }
16573+
16574+ return err;
16575+}
16576+
16577+int au_test_h_perm(struct inode *h_inode, int mask)
16578+{
16579+ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
16580+ return 0;
16581+ return inode_permission(h_inode, mask);
16582+}
16583+
16584+int au_test_h_perm_sio(struct inode *h_inode, int mask)
16585+{
16586+ if (au_test_nfs(h_inode->i_sb)
16587+ && (mask & MAY_WRITE)
16588+ && S_ISDIR(h_inode->i_mode))
16589+ mask |= MAY_READ; /* force permission check */
16590+ return au_test_h_perm(h_inode, mask);
16591+}
16592diff -urN /usr/share/empty/fs/aufs/inode.h linux/fs/aufs/inode.h
16593--- /usr/share/empty/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
16594+++ linux/fs/aufs/inode.h 2016-12-17 12:28:17.595211562 +0100
16595@@ -0,0 +1,691 @@
16596+/*
16597+ * Copyright (C) 2005-2016 Junjiro R. Okajima
16598+ *
16599+ * This program, aufs is free software; you can redistribute it and/or modify
16600+ * it under the terms of the GNU General Public License as published by
16601+ * the Free Software Foundation; either version 2 of the License, or
16602+ * (at your option) any later version.
16603+ *
16604+ * This program is distributed in the hope that it will be useful,
16605+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16606+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16607+ * GNU General Public License for more details.
16608+ *
16609+ * You should have received a copy of the GNU General Public License
16610+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16611+ */
16612+
16613+/*
16614+ * inode operations
16615+ */
16616+
16617+#ifndef __AUFS_INODE_H__
16618+#define __AUFS_INODE_H__
16619+
16620+#ifdef __KERNEL__
16621+
16622+#include <linux/fsnotify.h>
16623+#include "rwsem.h"
16624+
16625+struct vfsmount;
16626+
16627+struct au_hnotify {
16628+#ifdef CONFIG_AUFS_HNOTIFY
16629+#ifdef CONFIG_AUFS_HFSNOTIFY
16630+ /* never use fsnotify_add_vfsmount_mark() */
16631+ struct fsnotify_mark hn_mark;
16632+#endif
16633+ union {
16634+ struct inode *hn_aufs_inode; /* no get/put */
16635+ struct llist_node hn_lnode; /* delayed free */
16636+ };
16637+#endif
16638+} ____cacheline_aligned_in_smp;
16639+
16640+struct au_hinode {
16641+ struct inode *hi_inode;
16642+ aufs_bindex_t hi_id;
16643+#ifdef CONFIG_AUFS_HNOTIFY
16644+ struct au_hnotify *hi_notify;
16645+#endif
16646+
16647+ /* reference to the copied-up whiteout with get/put */
16648+ struct dentry *hi_whdentry;
16649+};
16650+
16651+/* ig_flags */
16652+#define AuIG_HALF_REFRESHED 1
16653+#define au_ig_ftest(flags, name) ((flags) & AuIG_##name)
16654+#define au_ig_fset(flags, name) \
16655+ do { (flags) |= AuIG_##name; } while (0)
16656+#define au_ig_fclr(flags, name) \
16657+ do { (flags) &= ~AuIG_##name; } while (0)
16658+
16659+struct au_iigen {
16660+ spinlock_t ig_spin;
16661+ __u32 ig_generation, ig_flags;
16662+};
16663+
16664+struct au_vdir;
16665+struct au_iinfo {
16666+ struct au_iigen ii_generation;
16667+ struct super_block *ii_hsb1; /* no get/put */
16668+
16669+ struct au_rwsem ii_rwsem;
16670+ aufs_bindex_t ii_btop, ii_bbot;
16671+ __u32 ii_higen;
16672+ struct au_hinode *ii_hinode;
16673+ struct au_vdir *ii_vdir;
16674+};
16675+
16676+struct au_icntnr {
16677+ struct au_iinfo iinfo;
16678+ struct inode vfs_inode;
16679+ union {
16680+ struct hlist_node plink;
16681+ struct llist_node lnode; /* delayed free */
16682+ };
16683+} ____cacheline_aligned_in_smp;
16684+
16685+/* au_pin flags */
16686+#define AuPin_DI_LOCKED 1
16687+#define AuPin_MNT_WRITE (1 << 1)
16688+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
16689+#define au_fset_pin(flags, name) \
16690+ do { (flags) |= AuPin_##name; } while (0)
16691+#define au_fclr_pin(flags, name) \
16692+ do { (flags) &= ~AuPin_##name; } while (0)
16693+
16694+struct au_pin {
16695+ /* input */
16696+ struct dentry *dentry;
16697+ unsigned int udba;
16698+ unsigned char lsc_di, lsc_hi, flags;
16699+ aufs_bindex_t bindex;
16700+
16701+ /* output */
16702+ struct dentry *parent;
16703+ struct au_hinode *hdir;
16704+ struct vfsmount *h_mnt;
16705+
16706+ /* temporary unlock/relock for copyup */
16707+ struct dentry *h_dentry, *h_parent;
16708+ struct au_branch *br;
16709+ struct task_struct *task;
16710+};
16711+
16712+void au_pin_hdir_unlock(struct au_pin *p);
16713+int au_pin_hdir_lock(struct au_pin *p);
16714+int au_pin_hdir_relock(struct au_pin *p);
16715+void au_pin_hdir_acquire_nest(struct au_pin *p);
16716+void au_pin_hdir_release(struct au_pin *p);
16717+
16718+/* ---------------------------------------------------------------------- */
16719+
16720+static inline struct au_iinfo *au_ii(struct inode *inode)
16721+{
16722+ BUG_ON(is_bad_inode(inode));
16723+ return &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
16724+}
16725+
16726+/* ---------------------------------------------------------------------- */
16727+
16728+/* inode.c */
16729+struct inode *au_igrab(struct inode *inode);
16730+void au_refresh_iop(struct inode *inode, int force_getattr);
16731+int au_refresh_hinode_self(struct inode *inode);
16732+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
16733+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16734+ unsigned int d_type, ino_t *ino);
16735+struct inode *au_new_inode(struct dentry *dentry, int must_new);
16736+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16737+ struct inode *inode);
16738+int au_test_h_perm(struct inode *h_inode, int mask);
16739+int au_test_h_perm_sio(struct inode *h_inode, int mask);
16740+
16741+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
16742+ ino_t h_ino, unsigned int d_type, ino_t *ino)
16743+{
16744+#ifdef CONFIG_AUFS_SHWH
16745+ return au_ino(sb, bindex, h_ino, d_type, ino);
16746+#else
16747+ return 0;
16748+#endif
16749+}
16750+
16751+/* i_op.c */
16752+enum {
16753+ AuIop_SYMLINK,
16754+ AuIop_DIR,
16755+ AuIop_OTHER,
16756+ AuIop_Last
16757+};
16758+extern struct inode_operations aufs_iop[AuIop_Last],
16759+ aufs_iop_nogetattr[AuIop_Last];
16760+
16761+/* au_wr_dir flags */
16762+#define AuWrDir_ADD_ENTRY 1
16763+#define AuWrDir_ISDIR (1 << 1)
16764+#define AuWrDir_TMPFILE (1 << 2)
16765+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
16766+#define au_fset_wrdir(flags, name) \
16767+ do { (flags) |= AuWrDir_##name; } while (0)
16768+#define au_fclr_wrdir(flags, name) \
16769+ do { (flags) &= ~AuWrDir_##name; } while (0)
16770+
16771+struct au_wr_dir_args {
16772+ aufs_bindex_t force_btgt;
16773+ unsigned char flags;
16774+};
16775+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
16776+ struct au_wr_dir_args *args);
16777+
16778+struct dentry *au_pinned_h_parent(struct au_pin *pin);
16779+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
16780+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
16781+ unsigned int udba, unsigned char flags);
16782+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
16783+ unsigned int udba, unsigned char flags) __must_check;
16784+int au_do_pin(struct au_pin *pin) __must_check;
16785+void au_unpin(struct au_pin *pin);
16786+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
16787+
16788+#define AuIcpup_DID_CPUP 1
16789+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
16790+#define au_fset_icpup(flags, name) \
16791+ do { (flags) |= AuIcpup_##name; } while (0)
16792+#define au_fclr_icpup(flags, name) \
16793+ do { (flags) &= ~AuIcpup_##name; } while (0)
16794+
16795+struct au_icpup_args {
16796+ unsigned char flags;
16797+ unsigned char pin_flags;
16798+ aufs_bindex_t btgt;
16799+ unsigned int udba;
16800+ struct au_pin pin;
16801+ struct path h_path;
16802+ struct inode *h_inode;
16803+};
16804+
16805+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
16806+ struct au_icpup_args *a);
16807+
16808+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path);
16809+
16810+/* i_op_add.c */
16811+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
16812+ struct dentry *h_parent, int isdir);
16813+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
16814+ dev_t dev);
16815+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
16816+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
16817+ bool want_excl);
16818+struct vfsub_aopen_args;
16819+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
16820+ struct vfsub_aopen_args *args);
16821+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode);
16822+int aufs_link(struct dentry *src_dentry, struct inode *dir,
16823+ struct dentry *dentry);
16824+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
16825+
16826+/* i_op_del.c */
16827+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
16828+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
16829+ struct dentry *h_parent, int isdir);
16830+int aufs_unlink(struct inode *dir, struct dentry *dentry);
16831+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
16832+
16833+/* i_op_ren.c */
16834+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
16835+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
16836+ struct inode *dir, struct dentry *dentry,
16837+ unsigned int flags);
16838+
16839+/* iinfo.c */
16840+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
16841+void au_hiput(struct au_hinode *hinode);
16842+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
16843+ struct dentry *h_wh);
16844+unsigned int au_hi_flags(struct inode *inode, int isdir);
16845+
16846+/* hinode flags */
16847+#define AuHi_XINO 1
16848+#define AuHi_HNOTIFY (1 << 1)
16849+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
16850+#define au_fset_hi(flags, name) \
16851+ do { (flags) |= AuHi_##name; } while (0)
16852+#define au_fclr_hi(flags, name) \
16853+ do { (flags) &= ~AuHi_##name; } while (0)
16854+
16855+#ifndef CONFIG_AUFS_HNOTIFY
16856+#undef AuHi_HNOTIFY
16857+#define AuHi_HNOTIFY 0
16858+#endif
16859+
16860+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
16861+ struct inode *h_inode, unsigned int flags);
16862+
16863+void au_update_iigen(struct inode *inode, int half);
16864+void au_update_ibrange(struct inode *inode, int do_put_zero);
16865+
16866+void au_icntnr_init_once(void *_c);
16867+void au_hinode_init(struct au_hinode *hinode);
16868+int au_iinfo_init(struct inode *inode);
16869+void au_iinfo_fin(struct inode *inode);
16870+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink);
16871+
16872+#ifdef CONFIG_PROC_FS
16873+/* plink.c */
16874+int au_plink_maint(struct super_block *sb, int flags);
16875+struct au_sbinfo;
16876+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
16877+int au_plink_maint_enter(struct super_block *sb);
16878+#ifdef CONFIG_AUFS_DEBUG
16879+void au_plink_list(struct super_block *sb);
16880+#else
16881+AuStubVoid(au_plink_list, struct super_block *sb)
16882+#endif
16883+int au_plink_test(struct inode *inode);
16884+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
16885+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
16886+ struct dentry *h_dentry);
16887+void au_plink_put(struct super_block *sb, int verbose);
16888+void au_plink_clean(struct super_block *sb, int verbose);
16889+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
16890+#else
16891+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
16892+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
16893+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
16894+AuStubVoid(au_plink_list, struct super_block *sb);
16895+AuStubInt0(au_plink_test, struct inode *inode);
16896+AuStub(struct dentry *, au_plink_lkup, return NULL,
16897+ struct inode *inode, aufs_bindex_t bindex);
16898+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
16899+ struct dentry *h_dentry);
16900+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
16901+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
16902+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
16903+#endif /* CONFIG_PROC_FS */
16904+
16905+#ifdef CONFIG_AUFS_XATTR
16906+/* xattr.c */
16907+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
16908+ unsigned int verbose);
16909+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
16910+void au_xattr_init(struct super_block *sb);
16911+#else
16912+AuStubInt0(au_cpup_xattr, struct dentry *h_dst, struct dentry *h_src,
16913+ int ignore_flags, unsigned int verbose);
16914+AuStubVoid(au_xattr_init, struct super_block *sb);
16915+#endif
16916+
16917+#ifdef CONFIG_FS_POSIX_ACL
16918+struct posix_acl *aufs_get_acl(struct inode *inode, int type);
16919+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
16920+#endif
16921+
16922+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
16923+enum {
16924+ AU_XATTR_SET,
16925+ AU_ACL_SET
16926+};
16927+
16928+struct au_sxattr {
16929+ int type;
16930+ union {
16931+ struct {
16932+ const char *name;
16933+ const void *value;
16934+ size_t size;
16935+ int flags;
16936+ } set;
16937+ struct {
16938+ struct posix_acl *acl;
16939+ int type;
16940+ } acl_set;
16941+ } u;
16942+};
16943+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
16944+ struct au_sxattr *arg);
16945+#endif
16946+
16947+/* ---------------------------------------------------------------------- */
16948+
16949+/* lock subclass for iinfo */
16950+enum {
16951+ AuLsc_II_CHILD, /* child first */
16952+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
16953+ AuLsc_II_CHILD3, /* copyup dirs */
16954+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
16955+ AuLsc_II_PARENT2,
16956+ AuLsc_II_PARENT3, /* copyup dirs */
16957+ AuLsc_II_NEW_CHILD
16958+};
16959+
16960+/*
16961+ * ii_read_lock_child, ii_write_lock_child,
16962+ * ii_read_lock_child2, ii_write_lock_child2,
16963+ * ii_read_lock_child3, ii_write_lock_child3,
16964+ * ii_read_lock_parent, ii_write_lock_parent,
16965+ * ii_read_lock_parent2, ii_write_lock_parent2,
16966+ * ii_read_lock_parent3, ii_write_lock_parent3,
16967+ * ii_read_lock_new_child, ii_write_lock_new_child,
16968+ */
16969+#define AuReadLockFunc(name, lsc) \
16970+static inline void ii_read_lock_##name(struct inode *i) \
16971+{ \
16972+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
16973+}
16974+
16975+#define AuWriteLockFunc(name, lsc) \
16976+static inline void ii_write_lock_##name(struct inode *i) \
16977+{ \
16978+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
16979+}
16980+
16981+#define AuRWLockFuncs(name, lsc) \
16982+ AuReadLockFunc(name, lsc) \
16983+ AuWriteLockFunc(name, lsc)
16984+
16985+AuRWLockFuncs(child, CHILD);
16986+AuRWLockFuncs(child2, CHILD2);
16987+AuRWLockFuncs(child3, CHILD3);
16988+AuRWLockFuncs(parent, PARENT);
16989+AuRWLockFuncs(parent2, PARENT2);
16990+AuRWLockFuncs(parent3, PARENT3);
16991+AuRWLockFuncs(new_child, NEW_CHILD);
16992+
16993+#undef AuReadLockFunc
16994+#undef AuWriteLockFunc
16995+#undef AuRWLockFuncs
16996+
16997+/*
16998+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
16999+ */
17000+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
17001+
17002+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
17003+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
17004+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
17005+
17006+/* ---------------------------------------------------------------------- */
17007+
17008+static inline void au_icntnr_init(struct au_icntnr *c)
17009+{
17010+#ifdef CONFIG_AUFS_DEBUG
17011+ c->vfs_inode.i_mode = 0;
17012+#endif
17013+}
17014+
17015+static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags)
17016+{
17017+ unsigned int gen;
17018+ struct au_iinfo *iinfo;
17019+ struct au_iigen *iigen;
17020+
17021+ iinfo = au_ii(inode);
17022+ iigen = &iinfo->ii_generation;
17023+ spin_lock(&iigen->ig_spin);
17024+ if (igflags)
17025+ *igflags = iigen->ig_flags;
17026+ gen = iigen->ig_generation;
17027+ spin_unlock(&iigen->ig_spin);
17028+
17029+ return gen;
17030+}
17031+
17032+/* tiny test for inode number */
17033+/* tmpfs generation is too rough */
17034+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
17035+{
17036+ struct au_iinfo *iinfo;
17037+
17038+ iinfo = au_ii(inode);
17039+ AuRwMustAnyLock(&iinfo->ii_rwsem);
17040+ return !(iinfo->ii_hsb1 == h_inode->i_sb
17041+ && iinfo->ii_higen == h_inode->i_generation);
17042+}
17043+
17044+static inline void au_iigen_dec(struct inode *inode)
17045+{
17046+ struct au_iinfo *iinfo;
17047+ struct au_iigen *iigen;
17048+
17049+ iinfo = au_ii(inode);
17050+ iigen = &iinfo->ii_generation;
17051+ spin_lock(&iigen->ig_spin);
17052+ iigen->ig_generation--;
17053+ spin_unlock(&iigen->ig_spin);
17054+}
17055+
17056+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
17057+{
17058+ int err;
17059+
17060+ err = 0;
17061+ if (unlikely(inode && au_iigen(inode, NULL) != sigen))
17062+ err = -EIO;
17063+
17064+ return err;
17065+}
17066+
17067+/* ---------------------------------------------------------------------- */
17068+
17069+static inline struct au_hinode *au_hinode(struct au_iinfo *iinfo,
17070+ aufs_bindex_t bindex)
17071+{
17072+ return iinfo->ii_hinode + bindex;
17073+}
17074+
17075+static inline int au_is_bad_inode(struct inode *inode)
17076+{
17077+ return !!(is_bad_inode(inode) || !au_hinode(au_ii(inode), 0));
17078+}
17079+
17080+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
17081+ aufs_bindex_t bindex)
17082+{
17083+ IiMustAnyLock(inode);
17084+ return au_hinode(au_ii(inode), bindex)->hi_id;
17085+}
17086+
17087+static inline aufs_bindex_t au_ibtop(struct inode *inode)
17088+{
17089+ IiMustAnyLock(inode);
17090+ return au_ii(inode)->ii_btop;
17091+}
17092+
17093+static inline aufs_bindex_t au_ibbot(struct inode *inode)
17094+{
17095+ IiMustAnyLock(inode);
17096+ return au_ii(inode)->ii_bbot;
17097+}
17098+
17099+static inline struct au_vdir *au_ivdir(struct inode *inode)
17100+{
17101+ IiMustAnyLock(inode);
17102+ return au_ii(inode)->ii_vdir;
17103+}
17104+
17105+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
17106+{
17107+ IiMustAnyLock(inode);
17108+ return au_hinode(au_ii(inode), bindex)->hi_whdentry;
17109+}
17110+
17111+static inline void au_set_ibtop(struct inode *inode, aufs_bindex_t bindex)
17112+{
17113+ IiMustWriteLock(inode);
17114+ au_ii(inode)->ii_btop = bindex;
17115+}
17116+
17117+static inline void au_set_ibbot(struct inode *inode, aufs_bindex_t bindex)
17118+{
17119+ IiMustWriteLock(inode);
17120+ au_ii(inode)->ii_bbot = bindex;
17121+}
17122+
17123+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
17124+{
17125+ IiMustWriteLock(inode);
17126+ au_ii(inode)->ii_vdir = vdir;
17127+}
17128+
17129+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
17130+{
17131+ IiMustAnyLock(inode);
17132+ return au_hinode(au_ii(inode), bindex);
17133+}
17134+
17135+/* ---------------------------------------------------------------------- */
17136+
17137+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
17138+{
17139+ if (pin)
17140+ return pin->parent;
17141+ return NULL;
17142+}
17143+
17144+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
17145+{
17146+ if (pin && pin->hdir)
17147+ return pin->hdir->hi_inode;
17148+ return NULL;
17149+}
17150+
17151+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
17152+{
17153+ if (pin)
17154+ return pin->hdir;
17155+ return NULL;
17156+}
17157+
17158+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
17159+{
17160+ if (pin)
17161+ pin->dentry = dentry;
17162+}
17163+
17164+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
17165+ unsigned char lflag)
17166+{
17167+ if (pin) {
17168+ if (lflag)
17169+ au_fset_pin(pin->flags, DI_LOCKED);
17170+ else
17171+ au_fclr_pin(pin->flags, DI_LOCKED);
17172+ }
17173+}
17174+
17175+#if 0 /* reserved */
17176+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
17177+{
17178+ if (pin) {
17179+ dput(pin->parent);
17180+ pin->parent = dget(parent);
17181+ }
17182+}
17183+#endif
17184+
17185+/* ---------------------------------------------------------------------- */
17186+
17187+struct au_branch;
17188+#ifdef CONFIG_AUFS_HNOTIFY
17189+struct au_hnotify_op {
17190+ void (*ctl)(struct au_hinode *hinode, int do_set);
17191+ int (*alloc)(struct au_hinode *hinode);
17192+
17193+ /*
17194+ * if it returns true, the the caller should free hinode->hi_notify,
17195+ * otherwise ->free() frees it.
17196+ */
17197+ int (*free)(struct au_hinode *hinode,
17198+ struct au_hnotify *hn) __must_check;
17199+
17200+ void (*fin)(void);
17201+ int (*init)(void);
17202+
17203+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
17204+ void (*fin_br)(struct au_branch *br);
17205+ int (*init_br)(struct au_branch *br, int perm);
17206+};
17207+
17208+/* hnotify.c */
17209+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
17210+void au_hn_free(struct au_hinode *hinode);
17211+void au_hn_ctl(struct au_hinode *hinode, int do_set);
17212+void au_hn_reset(struct inode *inode, unsigned int flags);
17213+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
17214+ struct qstr *h_child_qstr, struct inode *h_child_inode);
17215+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
17216+int au_hnotify_init_br(struct au_branch *br, int perm);
17217+void au_hnotify_fin_br(struct au_branch *br);
17218+int __init au_hnotify_init(void);
17219+void au_hnotify_fin(void);
17220+
17221+/* hfsnotify.c */
17222+extern const struct au_hnotify_op au_hnotify_op;
17223+
17224+static inline
17225+void au_hn_init(struct au_hinode *hinode)
17226+{
17227+ hinode->hi_notify = NULL;
17228+}
17229+
17230+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
17231+{
17232+ return hinode->hi_notify;
17233+}
17234+
17235+#else
17236+AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
17237+ struct au_hinode *hinode __maybe_unused,
17238+ struct inode *inode __maybe_unused)
17239+AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
17240+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
17241+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
17242+ int do_set __maybe_unused)
17243+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
17244+ unsigned int flags __maybe_unused)
17245+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
17246+ struct au_branch *br __maybe_unused,
17247+ int perm __maybe_unused)
17248+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
17249+ int perm __maybe_unused)
17250+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
17251+AuStubInt0(__init au_hnotify_init, void)
17252+AuStubVoid(au_hnotify_fin, void)
17253+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
17254+#endif /* CONFIG_AUFS_HNOTIFY */
17255+
17256+static inline void au_hn_suspend(struct au_hinode *hdir)
17257+{
17258+ au_hn_ctl(hdir, /*do_set*/0);
17259+}
17260+
17261+static inline void au_hn_resume(struct au_hinode *hdir)
17262+{
17263+ au_hn_ctl(hdir, /*do_set*/1);
17264+}
17265+
17266+static inline void au_hn_inode_lock(struct au_hinode *hdir)
17267+{
17268+ inode_lock(hdir->hi_inode);
17269+ au_hn_suspend(hdir);
17270+}
17271+
17272+static inline void au_hn_inode_lock_nested(struct au_hinode *hdir,
17273+ unsigned int sc __maybe_unused)
17274+{
17275+ inode_lock_nested(hdir->hi_inode, sc);
17276+ au_hn_suspend(hdir);
17277+}
17278+
17279+static inline void au_hn_inode_unlock(struct au_hinode *hdir)
17280+{
17281+ au_hn_resume(hdir);
17282+ inode_unlock(hdir->hi_inode);
17283+}
17284+
17285+#endif /* __KERNEL__ */
17286+#endif /* __AUFS_INODE_H__ */
17287diff -urN /usr/share/empty/fs/aufs/ioctl.c linux/fs/aufs/ioctl.c
17288--- /usr/share/empty/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
17289+++ linux/fs/aufs/ioctl.c 2016-10-09 16:55:36.492701639 +0200
17290@@ -0,0 +1,219 @@
17291+/*
17292+ * Copyright (C) 2005-2016 Junjiro R. Okajima
17293+ *
17294+ * This program, aufs is free software; you can redistribute it and/or modify
17295+ * it under the terms of the GNU General Public License as published by
17296+ * the Free Software Foundation; either version 2 of the License, or
17297+ * (at your option) any later version.
17298+ *
17299+ * This program is distributed in the hope that it will be useful,
17300+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17301+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17302+ * GNU General Public License for more details.
17303+ *
17304+ * You should have received a copy of the GNU General Public License
17305+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17306+ */
17307+
17308+/*
17309+ * ioctl
17310+ * plink-management and readdir in userspace.
17311+ * assist the pathconf(3) wrapper library.
17312+ * move-down
17313+ * File-based Hierarchical Storage Management.
17314+ */
17315+
17316+#include <linux/compat.h>
17317+#include <linux/file.h>
17318+#include "aufs.h"
17319+
17320+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
17321+{
17322+ int err, fd;
17323+ aufs_bindex_t wbi, bindex, bbot;
17324+ struct file *h_file;
17325+ struct super_block *sb;
17326+ struct dentry *root;
17327+ struct au_branch *br;
17328+ struct aufs_wbr_fd wbrfd = {
17329+ .oflags = au_dir_roflags,
17330+ .brid = -1
17331+ };
17332+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
17333+ | O_NOATIME | O_CLOEXEC;
17334+
17335+ AuDebugOn(wbrfd.oflags & ~valid);
17336+
17337+ if (arg) {
17338+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
17339+ if (unlikely(err)) {
17340+ err = -EFAULT;
17341+ goto out;
17342+ }
17343+
17344+ err = -EINVAL;
17345+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
17346+ wbrfd.oflags |= au_dir_roflags;
17347+ AuDbg("0%o\n", wbrfd.oflags);
17348+ if (unlikely(wbrfd.oflags & ~valid))
17349+ goto out;
17350+ }
17351+
17352+ fd = get_unused_fd_flags(0);
17353+ err = fd;
17354+ if (unlikely(fd < 0))
17355+ goto out;
17356+
17357+ h_file = ERR_PTR(-EINVAL);
17358+ wbi = 0;
17359+ br = NULL;
17360+ sb = path->dentry->d_sb;
17361+ root = sb->s_root;
17362+ aufs_read_lock(root, AuLock_IR);
17363+ bbot = au_sbbot(sb);
17364+ if (wbrfd.brid >= 0) {
17365+ wbi = au_br_index(sb, wbrfd.brid);
17366+ if (unlikely(wbi < 0 || wbi > bbot))
17367+ goto out_unlock;
17368+ }
17369+
17370+ h_file = ERR_PTR(-ENOENT);
17371+ br = au_sbr(sb, wbi);
17372+ if (!au_br_writable(br->br_perm)) {
17373+ if (arg)
17374+ goto out_unlock;
17375+
17376+ bindex = wbi + 1;
17377+ wbi = -1;
17378+ for (; bindex <= bbot; bindex++) {
17379+ br = au_sbr(sb, bindex);
17380+ if (au_br_writable(br->br_perm)) {
17381+ wbi = bindex;
17382+ br = au_sbr(sb, wbi);
17383+ break;
17384+ }
17385+ }
17386+ }
17387+ AuDbg("wbi %d\n", wbi);
17388+ if (wbi >= 0)
17389+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
17390+ /*force_wr*/0);
17391+
17392+out_unlock:
17393+ aufs_read_unlock(root, AuLock_IR);
17394+ err = PTR_ERR(h_file);
17395+ if (IS_ERR(h_file))
17396+ goto out_fd;
17397+
17398+ au_br_put(br); /* cf. au_h_open() */
17399+ fd_install(fd, h_file);
17400+ err = fd;
17401+ goto out; /* success */
17402+
17403+out_fd:
17404+ put_unused_fd(fd);
17405+out:
17406+ AuTraceErr(err);
17407+ return err;
17408+}
17409+
17410+/* ---------------------------------------------------------------------- */
17411+
17412+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
17413+{
17414+ long err;
17415+ struct dentry *dentry;
17416+
17417+ switch (cmd) {
17418+ case AUFS_CTL_RDU:
17419+ case AUFS_CTL_RDU_INO:
17420+ err = au_rdu_ioctl(file, cmd, arg);
17421+ break;
17422+
17423+ case AUFS_CTL_WBR_FD:
17424+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
17425+ break;
17426+
17427+ case AUFS_CTL_IBUSY:
17428+ err = au_ibusy_ioctl(file, arg);
17429+ break;
17430+
17431+ case AUFS_CTL_BRINFO:
17432+ err = au_brinfo_ioctl(file, arg);
17433+ break;
17434+
17435+ case AUFS_CTL_FHSM_FD:
17436+ dentry = file->f_path.dentry;
17437+ if (IS_ROOT(dentry))
17438+ err = au_fhsm_fd(dentry->d_sb, arg);
17439+ else
17440+ err = -ENOTTY;
17441+ break;
17442+
17443+ default:
17444+ /* do not call the lower */
17445+ AuDbg("0x%x\n", cmd);
17446+ err = -ENOTTY;
17447+ }
17448+
17449+ AuTraceErr(err);
17450+ return err;
17451+}
17452+
17453+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
17454+{
17455+ long err;
17456+
17457+ switch (cmd) {
17458+ case AUFS_CTL_MVDOWN:
17459+ err = au_mvdown(file->f_path.dentry, (void __user *)arg);
17460+ break;
17461+
17462+ case AUFS_CTL_WBR_FD:
17463+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
17464+ break;
17465+
17466+ default:
17467+ /* do not call the lower */
17468+ AuDbg("0x%x\n", cmd);
17469+ err = -ENOTTY;
17470+ }
17471+
17472+ AuTraceErr(err);
17473+ return err;
17474+}
17475+
17476+#ifdef CONFIG_COMPAT
17477+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
17478+ unsigned long arg)
17479+{
17480+ long err;
17481+
17482+ switch (cmd) {
17483+ case AUFS_CTL_RDU:
17484+ case AUFS_CTL_RDU_INO:
17485+ err = au_rdu_compat_ioctl(file, cmd, arg);
17486+ break;
17487+
17488+ case AUFS_CTL_IBUSY:
17489+ err = au_ibusy_compat_ioctl(file, arg);
17490+ break;
17491+
17492+ case AUFS_CTL_BRINFO:
17493+ err = au_brinfo_compat_ioctl(file, arg);
17494+ break;
17495+
17496+ default:
17497+ err = aufs_ioctl_dir(file, cmd, arg);
17498+ }
17499+
17500+ AuTraceErr(err);
17501+ return err;
17502+}
17503+
17504+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
17505+ unsigned long arg)
17506+{
17507+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
17508+}
17509+#endif
17510diff -urN /usr/share/empty/fs/aufs/i_op_add.c linux/fs/aufs/i_op_add.c
17511--- /usr/share/empty/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
17512+++ linux/fs/aufs/i_op_add.c 2016-12-17 12:28:17.595211562 +0100
17513@@ -0,0 +1,928 @@
17514+/*
17515+ * Copyright (C) 2005-2016 Junjiro R. Okajima
17516+ *
17517+ * This program, aufs is free software; you can redistribute it and/or modify
17518+ * it under the terms of the GNU General Public License as published by
17519+ * the Free Software Foundation; either version 2 of the License, or
17520+ * (at your option) any later version.
17521+ *
17522+ * This program is distributed in the hope that it will be useful,
17523+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17524+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17525+ * GNU General Public License for more details.
17526+ *
17527+ * You should have received a copy of the GNU General Public License
17528+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17529+ */
17530+
17531+/*
17532+ * inode operations (add entry)
17533+ */
17534+
17535+#include "aufs.h"
17536+
17537+/*
17538+ * final procedure of adding a new entry, except link(2).
17539+ * remove whiteout, instantiate, copyup the parent dir's times and size
17540+ * and update version.
17541+ * if it failed, re-create the removed whiteout.
17542+ */
17543+static int epilog(struct inode *dir, aufs_bindex_t bindex,
17544+ struct dentry *wh_dentry, struct dentry *dentry)
17545+{
17546+ int err, rerr;
17547+ aufs_bindex_t bwh;
17548+ struct path h_path;
17549+ struct super_block *sb;
17550+ struct inode *inode, *h_dir;
17551+ struct dentry *wh;
17552+
17553+ bwh = -1;
17554+ sb = dir->i_sb;
17555+ if (wh_dentry) {
17556+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
17557+ IMustLock(h_dir);
17558+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
17559+ bwh = au_dbwh(dentry);
17560+ h_path.dentry = wh_dentry;
17561+ h_path.mnt = au_sbr_mnt(sb, bindex);
17562+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
17563+ dentry);
17564+ if (unlikely(err))
17565+ goto out;
17566+ }
17567+
17568+ inode = au_new_inode(dentry, /*must_new*/1);
17569+ if (!IS_ERR(inode)) {
17570+ d_instantiate(dentry, inode);
17571+ dir = d_inode(dentry->d_parent); /* dir inode is locked */
17572+ IMustLock(dir);
17573+ au_dir_ts(dir, bindex);
17574+ dir->i_version++;
17575+ au_fhsm_wrote(sb, bindex, /*force*/0);
17576+ return 0; /* success */
17577+ }
17578+
17579+ err = PTR_ERR(inode);
17580+ if (!wh_dentry)
17581+ goto out;
17582+
17583+ /* revert */
17584+ /* dir inode is locked */
17585+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
17586+ rerr = PTR_ERR(wh);
17587+ if (IS_ERR(wh)) {
17588+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
17589+ dentry, err, rerr);
17590+ err = -EIO;
17591+ } else
17592+ dput(wh);
17593+
17594+out:
17595+ return err;
17596+}
17597+
17598+static int au_d_may_add(struct dentry *dentry)
17599+{
17600+ int err;
17601+
17602+ err = 0;
17603+ if (unlikely(d_unhashed(dentry)))
17604+ err = -ENOENT;
17605+ if (unlikely(d_really_is_positive(dentry)))
17606+ err = -EEXIST;
17607+ return err;
17608+}
17609+
17610+/*
17611+ * simple tests for the adding inode operations.
17612+ * following the checks in vfs, plus the parent-child relationship.
17613+ */
17614+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
17615+ struct dentry *h_parent, int isdir)
17616+{
17617+ int err;
17618+ umode_t h_mode;
17619+ struct dentry *h_dentry;
17620+ struct inode *h_inode;
17621+
17622+ err = -ENAMETOOLONG;
17623+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17624+ goto out;
17625+
17626+ h_dentry = au_h_dptr(dentry, bindex);
17627+ if (d_really_is_negative(dentry)) {
17628+ err = -EEXIST;
17629+ if (unlikely(d_is_positive(h_dentry)))
17630+ goto out;
17631+ } else {
17632+ /* rename(2) case */
17633+ err = -EIO;
17634+ if (unlikely(d_is_negative(h_dentry)))
17635+ goto out;
17636+ h_inode = d_inode(h_dentry);
17637+ if (unlikely(!h_inode->i_nlink))
17638+ goto out;
17639+
17640+ h_mode = h_inode->i_mode;
17641+ if (!isdir) {
17642+ err = -EISDIR;
17643+ if (unlikely(S_ISDIR(h_mode)))
17644+ goto out;
17645+ } else if (unlikely(!S_ISDIR(h_mode))) {
17646+ err = -ENOTDIR;
17647+ goto out;
17648+ }
17649+ }
17650+
17651+ err = 0;
17652+ /* expected parent dir is locked */
17653+ if (unlikely(h_parent != h_dentry->d_parent))
17654+ err = -EIO;
17655+
17656+out:
17657+ AuTraceErr(err);
17658+ return err;
17659+}
17660+
17661+/*
17662+ * initial procedure of adding a new entry.
17663+ * prepare writable branch and the parent dir, lock it,
17664+ * and lookup whiteout for the new entry.
17665+ */
17666+static struct dentry*
17667+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
17668+ struct dentry *src_dentry, struct au_pin *pin,
17669+ struct au_wr_dir_args *wr_dir_args)
17670+{
17671+ struct dentry *wh_dentry, *h_parent;
17672+ struct super_block *sb;
17673+ struct au_branch *br;
17674+ int err;
17675+ unsigned int udba;
17676+ aufs_bindex_t bcpup;
17677+
17678+ AuDbg("%pd\n", dentry);
17679+
17680+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
17681+ bcpup = err;
17682+ wh_dentry = ERR_PTR(err);
17683+ if (unlikely(err < 0))
17684+ goto out;
17685+
17686+ sb = dentry->d_sb;
17687+ udba = au_opt_udba(sb);
17688+ err = au_pin(pin, dentry, bcpup, udba,
17689+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17690+ wh_dentry = ERR_PTR(err);
17691+ if (unlikely(err))
17692+ goto out;
17693+
17694+ h_parent = au_pinned_h_parent(pin);
17695+ if (udba != AuOpt_UDBA_NONE
17696+ && au_dbtop(dentry) == bcpup)
17697+ err = au_may_add(dentry, bcpup, h_parent,
17698+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
17699+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17700+ err = -ENAMETOOLONG;
17701+ wh_dentry = ERR_PTR(err);
17702+ if (unlikely(err))
17703+ goto out_unpin;
17704+
17705+ br = au_sbr(sb, bcpup);
17706+ if (dt) {
17707+ struct path tmp = {
17708+ .dentry = h_parent,
17709+ .mnt = au_br_mnt(br)
17710+ };
17711+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
17712+ }
17713+
17714+ wh_dentry = NULL;
17715+ if (bcpup != au_dbwh(dentry))
17716+ goto out; /* success */
17717+
17718+ /*
17719+ * ENAMETOOLONG here means that if we allowed create such name, then it
17720+ * would not be able to removed in the future. So we don't allow such
17721+ * name here and we don't handle ENAMETOOLONG differently here.
17722+ */
17723+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
17724+
17725+out_unpin:
17726+ if (IS_ERR(wh_dentry))
17727+ au_unpin(pin);
17728+out:
17729+ return wh_dentry;
17730+}
17731+
17732+/* ---------------------------------------------------------------------- */
17733+
17734+enum { Mknod, Symlink, Creat };
17735+struct simple_arg {
17736+ int type;
17737+ union {
17738+ struct {
17739+ umode_t mode;
17740+ bool want_excl;
17741+ bool try_aopen;
17742+ struct vfsub_aopen_args *aopen;
17743+ } c;
17744+ struct {
17745+ const char *symname;
17746+ } s;
17747+ struct {
17748+ umode_t mode;
17749+ dev_t dev;
17750+ } m;
17751+ } u;
17752+};
17753+
17754+static int add_simple(struct inode *dir, struct dentry *dentry,
17755+ struct simple_arg *arg)
17756+{
17757+ int err, rerr;
17758+ aufs_bindex_t btop;
17759+ unsigned char created;
17760+ const unsigned char try_aopen
17761+ = (arg->type == Creat && arg->u.c.try_aopen);
17762+ struct dentry *wh_dentry, *parent;
17763+ struct inode *h_dir;
17764+ struct super_block *sb;
17765+ struct au_branch *br;
17766+ /* to reuduce stack size */
17767+ struct {
17768+ struct au_dtime dt;
17769+ struct au_pin pin;
17770+ struct path h_path;
17771+ struct au_wr_dir_args wr_dir_args;
17772+ } *a;
17773+
17774+ AuDbg("%pd\n", dentry);
17775+ IMustLock(dir);
17776+
17777+ err = -ENOMEM;
17778+ a = kmalloc(sizeof(*a), GFP_NOFS);
17779+ if (unlikely(!a))
17780+ goto out;
17781+ a->wr_dir_args.force_btgt = -1;
17782+ a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
17783+
17784+ parent = dentry->d_parent; /* dir inode is locked */
17785+ if (!try_aopen) {
17786+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
17787+ if (unlikely(err))
17788+ goto out_free;
17789+ }
17790+ err = au_d_may_add(dentry);
17791+ if (unlikely(err))
17792+ goto out_unlock;
17793+ if (!try_aopen)
17794+ di_write_lock_parent(parent);
17795+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
17796+ &a->pin, &a->wr_dir_args);
17797+ err = PTR_ERR(wh_dentry);
17798+ if (IS_ERR(wh_dentry))
17799+ goto out_parent;
17800+
17801+ btop = au_dbtop(dentry);
17802+ sb = dentry->d_sb;
17803+ br = au_sbr(sb, btop);
17804+ a->h_path.dentry = au_h_dptr(dentry, btop);
17805+ a->h_path.mnt = au_br_mnt(br);
17806+ h_dir = au_pinned_h_dir(&a->pin);
17807+ switch (arg->type) {
17808+ case Creat:
17809+ err = 0;
17810+ if (!try_aopen || !h_dir->i_op->atomic_open)
17811+ err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
17812+ arg->u.c.want_excl);
17813+ else
17814+ err = vfsub_atomic_open(h_dir, a->h_path.dentry,
17815+ arg->u.c.aopen, br);
17816+ break;
17817+ case Symlink:
17818+ err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
17819+ break;
17820+ case Mknod:
17821+ err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
17822+ arg->u.m.dev);
17823+ break;
17824+ default:
17825+ BUG();
17826+ }
17827+ created = !err;
17828+ if (!err)
17829+ err = epilog(dir, btop, wh_dentry, dentry);
17830+
17831+ /* revert */
17832+ if (unlikely(created && err && d_is_positive(a->h_path.dentry))) {
17833+ /* no delegation since it is just created */
17834+ rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
17835+ /*force*/0);
17836+ if (rerr) {
17837+ AuIOErr("%pd revert failure(%d, %d)\n",
17838+ dentry, err, rerr);
17839+ err = -EIO;
17840+ }
17841+ au_dtime_revert(&a->dt);
17842+ }
17843+
17844+ if (!err && try_aopen && !h_dir->i_op->atomic_open)
17845+ *arg->u.c.aopen->opened |= FILE_CREATED;
17846+
17847+ au_unpin(&a->pin);
17848+ dput(wh_dentry);
17849+
17850+out_parent:
17851+ if (!try_aopen)
17852+ di_write_unlock(parent);
17853+out_unlock:
17854+ if (unlikely(err)) {
17855+ au_update_dbtop(dentry);
17856+ d_drop(dentry);
17857+ }
17858+ if (!try_aopen)
17859+ aufs_read_unlock(dentry, AuLock_DW);
17860+out_free:
17861+ au_delayed_kfree(a);
17862+out:
17863+ return err;
17864+}
17865+
17866+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
17867+ dev_t dev)
17868+{
17869+ struct simple_arg arg = {
17870+ .type = Mknod,
17871+ .u.m = {
17872+ .mode = mode,
17873+ .dev = dev
17874+ }
17875+ };
17876+ return add_simple(dir, dentry, &arg);
17877+}
17878+
17879+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
17880+{
17881+ struct simple_arg arg = {
17882+ .type = Symlink,
17883+ .u.s.symname = symname
17884+ };
17885+ return add_simple(dir, dentry, &arg);
17886+}
17887+
17888+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
17889+ bool want_excl)
17890+{
17891+ struct simple_arg arg = {
17892+ .type = Creat,
17893+ .u.c = {
17894+ .mode = mode,
17895+ .want_excl = want_excl
17896+ }
17897+ };
17898+ return add_simple(dir, dentry, &arg);
17899+}
17900+
17901+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
17902+ struct vfsub_aopen_args *aopen_args)
17903+{
17904+ struct simple_arg arg = {
17905+ .type = Creat,
17906+ .u.c = {
17907+ .mode = aopen_args->create_mode,
17908+ .want_excl = aopen_args->open_flag & O_EXCL,
17909+ .try_aopen = true,
17910+ .aopen = aopen_args
17911+ }
17912+ };
17913+ return add_simple(dir, dentry, &arg);
17914+}
17915+
17916+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
17917+{
17918+ int err;
17919+ aufs_bindex_t bindex;
17920+ struct super_block *sb;
17921+ struct dentry *parent, *h_parent, *h_dentry;
17922+ struct inode *h_dir, *inode;
17923+ struct vfsmount *h_mnt;
17924+ struct au_wr_dir_args wr_dir_args = {
17925+ .force_btgt = -1,
17926+ .flags = AuWrDir_TMPFILE
17927+ };
17928+
17929+ /* copy-up may happen */
17930+ inode_lock(dir);
17931+
17932+ sb = dir->i_sb;
17933+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
17934+ if (unlikely(err))
17935+ goto out;
17936+
17937+ err = au_di_init(dentry);
17938+ if (unlikely(err))
17939+ goto out_si;
17940+
17941+ err = -EBUSY;
17942+ parent = d_find_any_alias(dir);
17943+ AuDebugOn(!parent);
17944+ di_write_lock_parent(parent);
17945+ if (unlikely(d_inode(parent) != dir))
17946+ goto out_parent;
17947+
17948+ err = au_digen_test(parent, au_sigen(sb));
17949+ if (unlikely(err))
17950+ goto out_parent;
17951+
17952+ bindex = au_dbtop(parent);
17953+ au_set_dbtop(dentry, bindex);
17954+ au_set_dbbot(dentry, bindex);
17955+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
17956+ bindex = err;
17957+ if (unlikely(err < 0))
17958+ goto out_parent;
17959+
17960+ err = -EOPNOTSUPP;
17961+ h_dir = au_h_iptr(dir, bindex);
17962+ if (unlikely(!h_dir->i_op->tmpfile))
17963+ goto out_parent;
17964+
17965+ h_mnt = au_sbr_mnt(sb, bindex);
17966+ err = vfsub_mnt_want_write(h_mnt);
17967+ if (unlikely(err))
17968+ goto out_parent;
17969+
17970+ h_parent = au_h_dptr(parent, bindex);
17971+ err = inode_permission(d_inode(h_parent), MAY_WRITE | MAY_EXEC);
17972+ if (unlikely(err))
17973+ goto out_mnt;
17974+
17975+ err = -ENOMEM;
17976+ h_dentry = d_alloc(h_parent, &dentry->d_name);
17977+ if (unlikely(!h_dentry))
17978+ goto out_mnt;
17979+
17980+ err = h_dir->i_op->tmpfile(h_dir, h_dentry, mode);
17981+ if (unlikely(err))
17982+ goto out_dentry;
17983+
17984+ au_set_dbtop(dentry, bindex);
17985+ au_set_dbbot(dentry, bindex);
17986+ au_set_h_dptr(dentry, bindex, dget(h_dentry));
17987+ inode = au_new_inode(dentry, /*must_new*/1);
17988+ if (IS_ERR(inode)) {
17989+ err = PTR_ERR(inode);
17990+ au_set_h_dptr(dentry, bindex, NULL);
17991+ au_set_dbtop(dentry, -1);
17992+ au_set_dbbot(dentry, -1);
17993+ } else {
17994+ if (!inode->i_nlink)
17995+ set_nlink(inode, 1);
17996+ d_tmpfile(dentry, inode);
17997+ au_di(dentry)->di_tmpfile = 1;
17998+
17999+ /* update without i_mutex */
18000+ if (au_ibtop(dir) == au_dbtop(dentry))
18001+ au_cpup_attr_timesizes(dir);
18002+ }
18003+
18004+out_dentry:
18005+ dput(h_dentry);
18006+out_mnt:
18007+ vfsub_mnt_drop_write(h_mnt);
18008+out_parent:
18009+ di_write_unlock(parent);
18010+ dput(parent);
18011+ di_write_unlock(dentry);
18012+ if (unlikely(err)) {
18013+ au_di_fin(dentry);
18014+ dentry->d_fsdata = NULL;
18015+ }
18016+out_si:
18017+ si_read_unlock(sb);
18018+out:
18019+ inode_unlock(dir);
18020+ return err;
18021+}
18022+
18023+/* ---------------------------------------------------------------------- */
18024+
18025+struct au_link_args {
18026+ aufs_bindex_t bdst, bsrc;
18027+ struct au_pin pin;
18028+ struct path h_path;
18029+ struct dentry *src_parent, *parent;
18030+};
18031+
18032+static int au_cpup_before_link(struct dentry *src_dentry,
18033+ struct au_link_args *a)
18034+{
18035+ int err;
18036+ struct dentry *h_src_dentry;
18037+ struct au_cp_generic cpg = {
18038+ .dentry = src_dentry,
18039+ .bdst = a->bdst,
18040+ .bsrc = a->bsrc,
18041+ .len = -1,
18042+ .pin = &a->pin,
18043+ .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
18044+ };
18045+
18046+ di_read_lock_parent(a->src_parent, AuLock_IR);
18047+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
18048+ if (unlikely(err))
18049+ goto out;
18050+
18051+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
18052+ err = au_pin(&a->pin, src_dentry, a->bdst,
18053+ au_opt_udba(src_dentry->d_sb),
18054+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
18055+ if (unlikely(err))
18056+ goto out;
18057+
18058+ err = au_sio_cpup_simple(&cpg);
18059+ au_unpin(&a->pin);
18060+
18061+out:
18062+ di_read_unlock(a->src_parent, AuLock_IR);
18063+ return err;
18064+}
18065+
18066+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
18067+ struct au_link_args *a)
18068+{
18069+ int err;
18070+ unsigned char plink;
18071+ aufs_bindex_t bbot;
18072+ struct dentry *h_src_dentry;
18073+ struct inode *h_inode, *inode, *delegated;
18074+ struct super_block *sb;
18075+ struct file *h_file;
18076+
18077+ plink = 0;
18078+ h_inode = NULL;
18079+ sb = src_dentry->d_sb;
18080+ inode = d_inode(src_dentry);
18081+ if (au_ibtop(inode) <= a->bdst)
18082+ h_inode = au_h_iptr(inode, a->bdst);
18083+ if (!h_inode || !h_inode->i_nlink) {
18084+ /* copyup src_dentry as the name of dentry. */
18085+ bbot = au_dbbot(dentry);
18086+ if (bbot < a->bsrc)
18087+ au_set_dbbot(dentry, a->bsrc);
18088+ au_set_h_dptr(dentry, a->bsrc,
18089+ dget(au_h_dptr(src_dentry, a->bsrc)));
18090+ dget(a->h_path.dentry);
18091+ au_set_h_dptr(dentry, a->bdst, NULL);
18092+ AuDbg("temporary d_inode...\n");
18093+ spin_lock(&dentry->d_lock);
18094+ dentry->d_inode = d_inode(src_dentry); /* tmp */
18095+ spin_unlock(&dentry->d_lock);
18096+ h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
18097+ if (IS_ERR(h_file))
18098+ err = PTR_ERR(h_file);
18099+ else {
18100+ struct au_cp_generic cpg = {
18101+ .dentry = dentry,
18102+ .bdst = a->bdst,
18103+ .bsrc = -1,
18104+ .len = -1,
18105+ .pin = &a->pin,
18106+ .flags = AuCpup_KEEPLINO
18107+ };
18108+ err = au_sio_cpup_simple(&cpg);
18109+ au_h_open_post(dentry, a->bsrc, h_file);
18110+ if (!err) {
18111+ dput(a->h_path.dentry);
18112+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
18113+ } else
18114+ au_set_h_dptr(dentry, a->bdst,
18115+ a->h_path.dentry);
18116+ }
18117+ spin_lock(&dentry->d_lock);
18118+ dentry->d_inode = NULL; /* restore */
18119+ spin_unlock(&dentry->d_lock);
18120+ AuDbg("temporary d_inode...done\n");
18121+ au_set_h_dptr(dentry, a->bsrc, NULL);
18122+ au_set_dbbot(dentry, bbot);
18123+ } else {
18124+ /* the inode of src_dentry already exists on a.bdst branch */
18125+ h_src_dentry = d_find_alias(h_inode);
18126+ if (!h_src_dentry && au_plink_test(inode)) {
18127+ plink = 1;
18128+ h_src_dentry = au_plink_lkup(inode, a->bdst);
18129+ err = PTR_ERR(h_src_dentry);
18130+ if (IS_ERR(h_src_dentry))
18131+ goto out;
18132+
18133+ if (unlikely(d_is_negative(h_src_dentry))) {
18134+ dput(h_src_dentry);
18135+ h_src_dentry = NULL;
18136+ }
18137+
18138+ }
18139+ if (h_src_dentry) {
18140+ delegated = NULL;
18141+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
18142+ &a->h_path, &delegated);
18143+ if (unlikely(err == -EWOULDBLOCK)) {
18144+ pr_warn("cannot retry for NFSv4 delegation"
18145+ " for an internal link\n");
18146+ iput(delegated);
18147+ }
18148+ dput(h_src_dentry);
18149+ } else {
18150+ AuIOErr("no dentry found for hi%lu on b%d\n",
18151+ h_inode->i_ino, a->bdst);
18152+ err = -EIO;
18153+ }
18154+ }
18155+
18156+ if (!err && !plink)
18157+ au_plink_append(inode, a->bdst, a->h_path.dentry);
18158+
18159+out:
18160+ AuTraceErr(err);
18161+ return err;
18162+}
18163+
18164+int aufs_link(struct dentry *src_dentry, struct inode *dir,
18165+ struct dentry *dentry)
18166+{
18167+ int err, rerr;
18168+ struct au_dtime dt;
18169+ struct au_link_args *a;
18170+ struct dentry *wh_dentry, *h_src_dentry;
18171+ struct inode *inode, *delegated;
18172+ struct super_block *sb;
18173+ struct au_wr_dir_args wr_dir_args = {
18174+ /* .force_btgt = -1, */
18175+ .flags = AuWrDir_ADD_ENTRY
18176+ };
18177+
18178+ IMustLock(dir);
18179+ inode = d_inode(src_dentry);
18180+ IMustLock(inode);
18181+
18182+ err = -ENOMEM;
18183+ a = kzalloc(sizeof(*a), GFP_NOFS);
18184+ if (unlikely(!a))
18185+ goto out;
18186+
18187+ a->parent = dentry->d_parent; /* dir inode is locked */
18188+ err = aufs_read_and_write_lock2(dentry, src_dentry,
18189+ AuLock_NOPLM | AuLock_GEN);
18190+ if (unlikely(err))
18191+ goto out_kfree;
18192+ err = au_d_linkable(src_dentry);
18193+ if (unlikely(err))
18194+ goto out_unlock;
18195+ err = au_d_may_add(dentry);
18196+ if (unlikely(err))
18197+ goto out_unlock;
18198+
18199+ a->src_parent = dget_parent(src_dentry);
18200+ wr_dir_args.force_btgt = au_ibtop(inode);
18201+
18202+ di_write_lock_parent(a->parent);
18203+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
18204+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
18205+ &wr_dir_args);
18206+ err = PTR_ERR(wh_dentry);
18207+ if (IS_ERR(wh_dentry))
18208+ goto out_parent;
18209+
18210+ err = 0;
18211+ sb = dentry->d_sb;
18212+ a->bdst = au_dbtop(dentry);
18213+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
18214+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
18215+ a->bsrc = au_ibtop(inode);
18216+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
18217+ if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
18218+ h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
18219+ if (!h_src_dentry) {
18220+ a->bsrc = au_dbtop(src_dentry);
18221+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
18222+ AuDebugOn(!h_src_dentry);
18223+ } else if (IS_ERR(h_src_dentry)) {
18224+ err = PTR_ERR(h_src_dentry);
18225+ goto out_parent;
18226+ }
18227+
18228+ /*
18229+ * aufs doesn't touch the credential so
18230+ * security_dentry_create_files_as() is unnecrssary.
18231+ */
18232+ if (au_opt_test(au_mntflags(sb), PLINK)) {
18233+ if (a->bdst < a->bsrc
18234+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
18235+ err = au_cpup_or_link(src_dentry, dentry, a);
18236+ else {
18237+ delegated = NULL;
18238+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
18239+ &a->h_path, &delegated);
18240+ if (unlikely(err == -EWOULDBLOCK)) {
18241+ pr_warn("cannot retry for NFSv4 delegation"
18242+ " for an internal link\n");
18243+ iput(delegated);
18244+ }
18245+ }
18246+ dput(h_src_dentry);
18247+ } else {
18248+ /*
18249+ * copyup src_dentry to the branch we process,
18250+ * and then link(2) to it.
18251+ */
18252+ dput(h_src_dentry);
18253+ if (a->bdst < a->bsrc
18254+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
18255+ au_unpin(&a->pin);
18256+ di_write_unlock(a->parent);
18257+ err = au_cpup_before_link(src_dentry, a);
18258+ di_write_lock_parent(a->parent);
18259+ if (!err)
18260+ err = au_pin(&a->pin, dentry, a->bdst,
18261+ au_opt_udba(sb),
18262+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
18263+ if (unlikely(err))
18264+ goto out_wh;
18265+ }
18266+ if (!err) {
18267+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
18268+ err = -ENOENT;
18269+ if (h_src_dentry && d_is_positive(h_src_dentry)) {
18270+ delegated = NULL;
18271+ err = vfsub_link(h_src_dentry,
18272+ au_pinned_h_dir(&a->pin),
18273+ &a->h_path, &delegated);
18274+ if (unlikely(err == -EWOULDBLOCK)) {
18275+ pr_warn("cannot retry"
18276+ " for NFSv4 delegation"
18277+ " for an internal link\n");
18278+ iput(delegated);
18279+ }
18280+ }
18281+ }
18282+ }
18283+ if (unlikely(err))
18284+ goto out_unpin;
18285+
18286+ if (wh_dentry) {
18287+ a->h_path.dentry = wh_dentry;
18288+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
18289+ dentry);
18290+ if (unlikely(err))
18291+ goto out_revert;
18292+ }
18293+
18294+ au_dir_ts(dir, a->bdst);
18295+ dir->i_version++;
18296+ inc_nlink(inode);
18297+ inode->i_ctime = dir->i_ctime;
18298+ d_instantiate(dentry, au_igrab(inode));
18299+ if (d_unhashed(a->h_path.dentry))
18300+ /* some filesystem calls d_drop() */
18301+ d_drop(dentry);
18302+ /* some filesystems consume an inode even hardlink */
18303+ au_fhsm_wrote(sb, a->bdst, /*force*/0);
18304+ goto out_unpin; /* success */
18305+
18306+out_revert:
18307+ /* no delegation since it is just created */
18308+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
18309+ /*delegated*/NULL, /*force*/0);
18310+ if (unlikely(rerr)) {
18311+ AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
18312+ err = -EIO;
18313+ }
18314+ au_dtime_revert(&dt);
18315+out_unpin:
18316+ au_unpin(&a->pin);
18317+out_wh:
18318+ dput(wh_dentry);
18319+out_parent:
18320+ di_write_unlock(a->parent);
18321+ dput(a->src_parent);
18322+out_unlock:
18323+ if (unlikely(err)) {
18324+ au_update_dbtop(dentry);
18325+ d_drop(dentry);
18326+ }
18327+ aufs_read_and_write_unlock2(dentry, src_dentry);
18328+out_kfree:
18329+ au_delayed_kfree(a);
18330+out:
18331+ AuTraceErr(err);
18332+ return err;
18333+}
18334+
18335+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
18336+{
18337+ int err, rerr;
18338+ aufs_bindex_t bindex;
18339+ unsigned char diropq;
18340+ struct path h_path;
18341+ struct dentry *wh_dentry, *parent, *opq_dentry;
18342+ struct inode *h_inode;
18343+ struct super_block *sb;
18344+ struct {
18345+ struct au_pin pin;
18346+ struct au_dtime dt;
18347+ } *a; /* reduce the stack usage */
18348+ struct au_wr_dir_args wr_dir_args = {
18349+ .force_btgt = -1,
18350+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
18351+ };
18352+
18353+ IMustLock(dir);
18354+
18355+ err = -ENOMEM;
18356+ a = kmalloc(sizeof(*a), GFP_NOFS);
18357+ if (unlikely(!a))
18358+ goto out;
18359+
18360+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
18361+ if (unlikely(err))
18362+ goto out_free;
18363+ err = au_d_may_add(dentry);
18364+ if (unlikely(err))
18365+ goto out_unlock;
18366+
18367+ parent = dentry->d_parent; /* dir inode is locked */
18368+ di_write_lock_parent(parent);
18369+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
18370+ &a->pin, &wr_dir_args);
18371+ err = PTR_ERR(wh_dentry);
18372+ if (IS_ERR(wh_dentry))
18373+ goto out_parent;
18374+
18375+ sb = dentry->d_sb;
18376+ bindex = au_dbtop(dentry);
18377+ h_path.dentry = au_h_dptr(dentry, bindex);
18378+ h_path.mnt = au_sbr_mnt(sb, bindex);
18379+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
18380+ if (unlikely(err))
18381+ goto out_unpin;
18382+
18383+ /* make the dir opaque */
18384+ diropq = 0;
18385+ h_inode = d_inode(h_path.dentry);
18386+ if (wh_dentry
18387+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
18388+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
18389+ opq_dentry = au_diropq_create(dentry, bindex);
18390+ inode_unlock(h_inode);
18391+ err = PTR_ERR(opq_dentry);
18392+ if (IS_ERR(opq_dentry))
18393+ goto out_dir;
18394+ dput(opq_dentry);
18395+ diropq = 1;
18396+ }
18397+
18398+ err = epilog(dir, bindex, wh_dentry, dentry);
18399+ if (!err) {
18400+ inc_nlink(dir);
18401+ goto out_unpin; /* success */
18402+ }
18403+
18404+ /* revert */
18405+ if (diropq) {
18406+ AuLabel(revert opq);
18407+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
18408+ rerr = au_diropq_remove(dentry, bindex);
18409+ inode_unlock(h_inode);
18410+ if (rerr) {
18411+ AuIOErr("%pd reverting diropq failed(%d, %d)\n",
18412+ dentry, err, rerr);
18413+ err = -EIO;
18414+ }
18415+ }
18416+
18417+out_dir:
18418+ AuLabel(revert dir);
18419+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
18420+ if (rerr) {
18421+ AuIOErr("%pd reverting dir failed(%d, %d)\n",
18422+ dentry, err, rerr);
18423+ err = -EIO;
18424+ }
18425+ au_dtime_revert(&a->dt);
18426+out_unpin:
18427+ au_unpin(&a->pin);
18428+ dput(wh_dentry);
18429+out_parent:
18430+ di_write_unlock(parent);
18431+out_unlock:
18432+ if (unlikely(err)) {
18433+ au_update_dbtop(dentry);
18434+ d_drop(dentry);
18435+ }
18436+ aufs_read_unlock(dentry, AuLock_DW);
18437+out_free:
18438+ au_delayed_kfree(a);
18439+out:
18440+ return err;
18441+}
18442diff -urN /usr/share/empty/fs/aufs/i_op.c linux/fs/aufs/i_op.c
18443--- /usr/share/empty/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
18444+++ linux/fs/aufs/i_op.c 2016-12-17 12:28:17.595211562 +0100
18445@@ -0,0 +1,1444 @@
18446+/*
18447+ * Copyright (C) 2005-2016 Junjiro R. Okajima
18448+ *
18449+ * This program, aufs is free software; you can redistribute it and/or modify
18450+ * it under the terms of the GNU General Public License as published by
18451+ * the Free Software Foundation; either version 2 of the License, or
18452+ * (at your option) any later version.
18453+ *
18454+ * This program is distributed in the hope that it will be useful,
18455+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18456+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18457+ * GNU General Public License for more details.
18458+ *
18459+ * You should have received a copy of the GNU General Public License
18460+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
18461+ */
18462+
18463+/*
18464+ * inode operations (except add/del/rename)
18465+ */
18466+
18467+#include <linux/device_cgroup.h>
18468+#include <linux/fs_stack.h>
18469+#include <linux/namei.h>
18470+#include <linux/security.h>
18471+#include "aufs.h"
18472+
18473+static int h_permission(struct inode *h_inode, int mask,
18474+ struct path *h_path, int brperm)
18475+{
18476+ int err;
18477+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
18478+
18479+ err = -EPERM;
18480+ if (write_mask && IS_IMMUTABLE(h_inode))
18481+ goto out;
18482+
18483+ err = -EACCES;
18484+ if (((mask & MAY_EXEC)
18485+ && S_ISREG(h_inode->i_mode)
18486+ && (path_noexec(h_path)
18487+ || !(h_inode->i_mode & S_IXUGO))))
18488+ goto out;
18489+
18490+ /*
18491+ * - skip the lower fs test in the case of write to ro branch.
18492+ * - nfs dir permission write check is optimized, but a policy for
18493+ * link/rename requires a real check.
18494+ * - nfs always sets MS_POSIXACL regardless its mount option 'noacl.'
18495+ * in this case, generic_permission() returns -EOPNOTSUPP.
18496+ */
18497+ if ((write_mask && !au_br_writable(brperm))
18498+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
18499+ && write_mask && !(mask & MAY_READ))
18500+ || !h_inode->i_op->permission) {
18501+ /* AuLabel(generic_permission); */
18502+ /* AuDbg("get_acl %pf\n", h_inode->i_op->get_acl); */
18503+ err = generic_permission(h_inode, mask);
18504+ if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
18505+ err = h_inode->i_op->permission(h_inode, mask);
18506+ AuTraceErr(err);
18507+ } else {
18508+ /* AuLabel(h_inode->permission); */
18509+ err = h_inode->i_op->permission(h_inode, mask);
18510+ AuTraceErr(err);
18511+ }
18512+
18513+ if (!err)
18514+ err = devcgroup_inode_permission(h_inode, mask);
18515+ if (!err)
18516+ err = security_inode_permission(h_inode, mask);
18517+
18518+#if 0
18519+ if (!err) {
18520+ /* todo: do we need to call ima_path_check()? */
18521+ struct path h_path = {
18522+ .dentry =
18523+ .mnt = h_mnt
18524+ };
18525+ err = ima_path_check(&h_path,
18526+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
18527+ IMA_COUNT_LEAVE);
18528+ }
18529+#endif
18530+
18531+out:
18532+ return err;
18533+}
18534+
18535+static int aufs_permission(struct inode *inode, int mask)
18536+{
18537+ int err;
18538+ aufs_bindex_t bindex, bbot;
18539+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
18540+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
18541+ struct inode *h_inode;
18542+ struct super_block *sb;
18543+ struct au_branch *br;
18544+
18545+ /* todo: support rcu-walk? */
18546+ if (mask & MAY_NOT_BLOCK)
18547+ return -ECHILD;
18548+
18549+ sb = inode->i_sb;
18550+ si_read_lock(sb, AuLock_FLUSH);
18551+ ii_read_lock_child(inode);
18552+#if 0
18553+ err = au_iigen_test(inode, au_sigen(sb));
18554+ if (unlikely(err))
18555+ goto out;
18556+#endif
18557+
18558+ if (!isdir
18559+ || write_mask
18560+ || au_opt_test(au_mntflags(sb), DIRPERM1)) {
18561+ err = au_busy_or_stale();
18562+ h_inode = au_h_iptr(inode, au_ibtop(inode));
18563+ if (unlikely(!h_inode
18564+ || (h_inode->i_mode & S_IFMT)
18565+ != (inode->i_mode & S_IFMT)))
18566+ goto out;
18567+
18568+ err = 0;
18569+ bindex = au_ibtop(inode);
18570+ br = au_sbr(sb, bindex);
18571+ err = h_permission(h_inode, mask, &br->br_path, br->br_perm);
18572+ if (write_mask
18573+ && !err
18574+ && !special_file(h_inode->i_mode)) {
18575+ /* test whether the upper writable branch exists */
18576+ err = -EROFS;
18577+ for (; bindex >= 0; bindex--)
18578+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
18579+ err = 0;
18580+ break;
18581+ }
18582+ }
18583+ goto out;
18584+ }
18585+
18586+ /* non-write to dir */
18587+ err = 0;
18588+ bbot = au_ibbot(inode);
18589+ for (bindex = au_ibtop(inode); !err && bindex <= bbot; bindex++) {
18590+ h_inode = au_h_iptr(inode, bindex);
18591+ if (h_inode) {
18592+ err = au_busy_or_stale();
18593+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
18594+ break;
18595+
18596+ br = au_sbr(sb, bindex);
18597+ err = h_permission(h_inode, mask, &br->br_path,
18598+ br->br_perm);
18599+ }
18600+ }
18601+
18602+out:
18603+ ii_read_unlock(inode);
18604+ si_read_unlock(sb);
18605+ return err;
18606+}
18607+
18608+/* ---------------------------------------------------------------------- */
18609+
18610+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
18611+ unsigned int flags)
18612+{
18613+ struct dentry *ret, *parent;
18614+ struct inode *inode;
18615+ struct super_block *sb;
18616+ int err, npositive;
18617+
18618+ IMustLock(dir);
18619+
18620+ /* todo: support rcu-walk? */
18621+ ret = ERR_PTR(-ECHILD);
18622+ if (flags & LOOKUP_RCU)
18623+ goto out;
18624+
18625+ ret = ERR_PTR(-ENAMETOOLONG);
18626+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
18627+ goto out;
18628+
18629+ sb = dir->i_sb;
18630+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
18631+ ret = ERR_PTR(err);
18632+ if (unlikely(err))
18633+ goto out;
18634+
18635+ err = au_di_init(dentry);
18636+ ret = ERR_PTR(err);
18637+ if (unlikely(err))
18638+ goto out_si;
18639+
18640+ inode = NULL;
18641+ npositive = 0; /* suppress a warning */
18642+ parent = dentry->d_parent; /* dir inode is locked */
18643+ di_read_lock_parent(parent, AuLock_IR);
18644+ err = au_alive_dir(parent);
18645+ if (!err)
18646+ err = au_digen_test(parent, au_sigen(sb));
18647+ if (!err) {
18648+ /* regardless LOOKUP_CREATE, always ALLOW_NEG */
18649+ npositive = au_lkup_dentry(dentry, au_dbtop(parent),
18650+ AuLkup_ALLOW_NEG);
18651+ err = npositive;
18652+ }
18653+ di_read_unlock(parent, AuLock_IR);
18654+ ret = ERR_PTR(err);
18655+ if (unlikely(err < 0))
18656+ goto out_unlock;
18657+
18658+ if (npositive) {
18659+ inode = au_new_inode(dentry, /*must_new*/0);
18660+ if (IS_ERR(inode)) {
18661+ ret = (void *)inode;
18662+ inode = NULL;
18663+ goto out_unlock;
18664+ }
18665+ }
18666+
18667+ if (inode)
18668+ atomic_inc(&inode->i_count);
18669+ ret = d_splice_alias(inode, dentry);
18670+#if 0
18671+ if (unlikely(d_need_lookup(dentry))) {
18672+ spin_lock(&dentry->d_lock);
18673+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
18674+ spin_unlock(&dentry->d_lock);
18675+ } else
18676+#endif
18677+ if (inode) {
18678+ if (!IS_ERR(ret)) {
18679+ iput(inode);
18680+ if (ret && ret != dentry)
18681+ ii_write_unlock(inode);
18682+ } else {
18683+ ii_write_unlock(inode);
18684+ iput(inode);
18685+ inode = NULL;
18686+ }
18687+ }
18688+
18689+out_unlock:
18690+ di_write_unlock(dentry);
18691+out_si:
18692+ si_read_unlock(sb);
18693+out:
18694+ return ret;
18695+}
18696+
18697+/* ---------------------------------------------------------------------- */
18698+
18699+struct aopen_node {
18700+ struct hlist_node hlist;
18701+ struct file *file, *h_file;
18702+};
18703+
18704+static int au_do_aopen(struct inode *inode, struct file *file)
18705+{
18706+ struct au_sphlhead *aopen;
18707+ struct aopen_node *node;
18708+ struct au_do_open_args args = {
18709+ .no_lock = 1,
18710+ .open = au_do_open_nondir
18711+ };
18712+
18713+ aopen = &au_sbi(inode->i_sb)->si_aopen;
18714+ spin_lock(&aopen->spin);
18715+ hlist_for_each_entry(node, &aopen->head, hlist)
18716+ if (node->file == file) {
18717+ args.h_file = node->h_file;
18718+ break;
18719+ }
18720+ spin_unlock(&aopen->spin);
18721+ /* AuDebugOn(!args.h_file); */
18722+
18723+ return au_do_open(file, &args);
18724+}
18725+
18726+static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
18727+ struct file *file, unsigned int open_flag,
18728+ umode_t create_mode, int *opened)
18729+{
18730+ int err, h_opened = *opened;
18731+ unsigned int lkup_flags;
18732+ struct dentry *parent, *d;
18733+ struct au_sphlhead *aopen;
18734+ struct vfsub_aopen_args args = {
18735+ .open_flag = open_flag,
18736+ .create_mode = create_mode,
18737+ .opened = &h_opened
18738+ };
18739+ struct aopen_node aopen_node = {
18740+ .file = file
18741+ };
18742+
18743+ IMustLock(dir);
18744+ AuDbg("open_flag 0%o\n", open_flag);
18745+ AuDbgDentry(dentry);
18746+
18747+ err = 0;
18748+ if (!au_di(dentry)) {
18749+ lkup_flags = LOOKUP_OPEN;
18750+ if (open_flag & O_CREAT)
18751+ lkup_flags |= LOOKUP_CREATE;
18752+ d = aufs_lookup(dir, dentry, lkup_flags);
18753+ if (IS_ERR(d)) {
18754+ err = PTR_ERR(d);
18755+ AuTraceErr(err);
18756+ goto out;
18757+ } else if (d) {
18758+ /*
18759+ * obsoleted dentry found.
18760+ * another error will be returned later.
18761+ */
18762+ d_drop(d);
18763+ AuDbgDentry(d);
18764+ dput(d);
18765+ }
18766+ AuDbgDentry(dentry);
18767+ }
18768+
18769+ if (d_is_positive(dentry)
18770+ || d_unhashed(dentry)
18771+ || d_unlinked(dentry)
18772+ || !(open_flag & O_CREAT))
18773+ goto out_no_open;
18774+
18775+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
18776+ if (unlikely(err))
18777+ goto out;
18778+
18779+ parent = dentry->d_parent; /* dir is locked */
18780+ di_write_lock_parent(parent);
18781+ err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
18782+ if (unlikely(err))
18783+ goto out_unlock;
18784+
18785+ AuDbgDentry(dentry);
18786+ if (d_is_positive(dentry))
18787+ goto out_unlock;
18788+
18789+ args.file = get_empty_filp();
18790+ err = PTR_ERR(args.file);
18791+ if (IS_ERR(args.file))
18792+ goto out_unlock;
18793+
18794+ args.file->f_flags = file->f_flags;
18795+ err = au_aopen_or_create(dir, dentry, &args);
18796+ AuTraceErr(err);
18797+ AuDbgFile(args.file);
18798+ if (unlikely(err < 0)) {
18799+ if (h_opened & FILE_OPENED)
18800+ fput(args.file);
18801+ else
18802+ put_filp(args.file);
18803+ goto out_unlock;
18804+ }
18805+
18806+ /* some filesystems don't set FILE_CREATED while succeeded? */
18807+ *opened |= FILE_CREATED;
18808+ if (h_opened & FILE_OPENED)
18809+ aopen_node.h_file = args.file;
18810+ else {
18811+ put_filp(args.file);
18812+ args.file = NULL;
18813+ }
18814+ aopen = &au_sbi(dir->i_sb)->si_aopen;
18815+ au_sphl_add(&aopen_node.hlist, aopen);
18816+ err = finish_open(file, dentry, au_do_aopen, opened);
18817+ au_sphl_del(&aopen_node.hlist, aopen);
18818+ AuTraceErr(err);
18819+ AuDbgFile(file);
18820+ if (aopen_node.h_file)
18821+ fput(aopen_node.h_file);
18822+
18823+out_unlock:
18824+ di_write_unlock(parent);
18825+ aufs_read_unlock(dentry, AuLock_DW);
18826+ AuDbgDentry(dentry);
18827+ if (unlikely(err < 0))
18828+ goto out;
18829+out_no_open:
18830+ if (err >= 0 && !(*opened & FILE_CREATED)) {
18831+ AuLabel(out_no_open);
18832+ dget(dentry);
18833+ err = finish_no_open(file, dentry);
18834+ }
18835+out:
18836+ AuDbg("%pd%s%s\n", dentry,
18837+ (*opened & FILE_CREATED) ? " created" : "",
18838+ (*opened & FILE_OPENED) ? " opened" : "");
18839+ AuTraceErr(err);
18840+ return err;
18841+}
18842+
18843+
18844+/* ---------------------------------------------------------------------- */
18845+
18846+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
18847+ const unsigned char add_entry, aufs_bindex_t bcpup,
18848+ aufs_bindex_t btop)
18849+{
18850+ int err;
18851+ struct dentry *h_parent;
18852+ struct inode *h_dir;
18853+
18854+ if (add_entry)
18855+ IMustLock(d_inode(parent));
18856+ else
18857+ di_write_lock_parent(parent);
18858+
18859+ err = 0;
18860+ if (!au_h_dptr(parent, bcpup)) {
18861+ if (btop > bcpup)
18862+ err = au_cpup_dirs(dentry, bcpup);
18863+ else if (btop < bcpup)
18864+ err = au_cpdown_dirs(dentry, bcpup);
18865+ else
18866+ BUG();
18867+ }
18868+ if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
18869+ h_parent = au_h_dptr(parent, bcpup);
18870+ h_dir = d_inode(h_parent);
18871+ inode_lock_nested(h_dir, AuLsc_I_PARENT);
18872+ err = au_lkup_neg(dentry, bcpup, /*wh*/0);
18873+ /* todo: no unlock here */
18874+ inode_unlock(h_dir);
18875+
18876+ AuDbg("bcpup %d\n", bcpup);
18877+ if (!err) {
18878+ if (d_really_is_negative(dentry))
18879+ au_set_h_dptr(dentry, btop, NULL);
18880+ au_update_dbrange(dentry, /*do_put_zero*/0);
18881+ }
18882+ }
18883+
18884+ if (!add_entry)
18885+ di_write_unlock(parent);
18886+ if (!err)
18887+ err = bcpup; /* success */
18888+
18889+ AuTraceErr(err);
18890+ return err;
18891+}
18892+
18893+/*
18894+ * decide the branch and the parent dir where we will create a new entry.
18895+ * returns new bindex or an error.
18896+ * copyup the parent dir if needed.
18897+ */
18898+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
18899+ struct au_wr_dir_args *args)
18900+{
18901+ int err;
18902+ unsigned int flags;
18903+ aufs_bindex_t bcpup, btop, src_btop;
18904+ const unsigned char add_entry
18905+ = au_ftest_wrdir(args->flags, ADD_ENTRY)
18906+ | au_ftest_wrdir(args->flags, TMPFILE);
18907+ struct super_block *sb;
18908+ struct dentry *parent;
18909+ struct au_sbinfo *sbinfo;
18910+
18911+ sb = dentry->d_sb;
18912+ sbinfo = au_sbi(sb);
18913+ parent = dget_parent(dentry);
18914+ btop = au_dbtop(dentry);
18915+ bcpup = btop;
18916+ if (args->force_btgt < 0) {
18917+ if (src_dentry) {
18918+ src_btop = au_dbtop(src_dentry);
18919+ if (src_btop < btop)
18920+ bcpup = src_btop;
18921+ } else if (add_entry) {
18922+ flags = 0;
18923+ if (au_ftest_wrdir(args->flags, ISDIR))
18924+ au_fset_wbr(flags, DIR);
18925+ err = AuWbrCreate(sbinfo, dentry, flags);
18926+ bcpup = err;
18927+ }
18928+
18929+ if (bcpup < 0 || au_test_ro(sb, bcpup, d_inode(dentry))) {
18930+ if (add_entry)
18931+ err = AuWbrCopyup(sbinfo, dentry);
18932+ else {
18933+ if (!IS_ROOT(dentry)) {
18934+ di_read_lock_parent(parent, !AuLock_IR);
18935+ err = AuWbrCopyup(sbinfo, dentry);
18936+ di_read_unlock(parent, !AuLock_IR);
18937+ } else
18938+ err = AuWbrCopyup(sbinfo, dentry);
18939+ }
18940+ bcpup = err;
18941+ if (unlikely(err < 0))
18942+ goto out;
18943+ }
18944+ } else {
18945+ bcpup = args->force_btgt;
18946+ AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
18947+ }
18948+
18949+ AuDbg("btop %d, bcpup %d\n", btop, bcpup);
18950+ err = bcpup;
18951+ if (bcpup == btop)
18952+ goto out; /* success */
18953+
18954+ /* copyup the new parent into the branch we process */
18955+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, btop);
18956+ if (err >= 0) {
18957+ if (d_really_is_negative(dentry)) {
18958+ au_set_h_dptr(dentry, btop, NULL);
18959+ au_set_dbtop(dentry, bcpup);
18960+ au_set_dbbot(dentry, bcpup);
18961+ }
18962+ AuDebugOn(add_entry
18963+ && !au_ftest_wrdir(args->flags, TMPFILE)
18964+ && !au_h_dptr(dentry, bcpup));
18965+ }
18966+
18967+out:
18968+ dput(parent);
18969+ return err;
18970+}
18971+
18972+/* ---------------------------------------------------------------------- */
18973+
18974+void au_pin_hdir_unlock(struct au_pin *p)
18975+{
18976+ if (p->hdir)
18977+ au_hn_inode_unlock(p->hdir);
18978+}
18979+
18980+int au_pin_hdir_lock(struct au_pin *p)
18981+{
18982+ int err;
18983+
18984+ err = 0;
18985+ if (!p->hdir)
18986+ goto out;
18987+
18988+ /* even if an error happens later, keep this lock */
18989+ au_hn_inode_lock_nested(p->hdir, p->lsc_hi);
18990+
18991+ err = -EBUSY;
18992+ if (unlikely(p->hdir->hi_inode != d_inode(p->h_parent)))
18993+ goto out;
18994+
18995+ err = 0;
18996+ if (p->h_dentry)
18997+ err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
18998+ p->h_parent, p->br);
18999+
19000+out:
19001+ return err;
19002+}
19003+
19004+int au_pin_hdir_relock(struct au_pin *p)
19005+{
19006+ int err, i;
19007+ struct inode *h_i;
19008+ struct dentry *h_d[] = {
19009+ p->h_dentry,
19010+ p->h_parent
19011+ };
19012+
19013+ err = au_pin_hdir_lock(p);
19014+ if (unlikely(err))
19015+ goto out;
19016+
19017+ for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
19018+ if (!h_d[i])
19019+ continue;
19020+ if (d_is_positive(h_d[i])) {
19021+ h_i = d_inode(h_d[i]);
19022+ err = !h_i->i_nlink;
19023+ }
19024+ }
19025+
19026+out:
19027+ return err;
19028+}
19029+
19030+static void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
19031+{
19032+#if !defined(CONFIG_RWSEM_GENERIC_SPINLOCK) && defined(CONFIG_RWSEM_SPIN_ON_OWNER)
19033+ p->hdir->hi_inode->i_rwsem.owner = task;
19034+#endif
19035+}
19036+
19037+void au_pin_hdir_acquire_nest(struct au_pin *p)
19038+{
19039+ if (p->hdir) {
19040+ rwsem_acquire_nest(&p->hdir->hi_inode->i_rwsem.dep_map,
19041+ p->lsc_hi, 0, NULL, _RET_IP_);
19042+ au_pin_hdir_set_owner(p, current);
19043+ }
19044+}
19045+
19046+void au_pin_hdir_release(struct au_pin *p)
19047+{
19048+ if (p->hdir) {
19049+ au_pin_hdir_set_owner(p, p->task);
19050+ rwsem_release(&p->hdir->hi_inode->i_rwsem.dep_map, 1, _RET_IP_);
19051+ }
19052+}
19053+
19054+struct dentry *au_pinned_h_parent(struct au_pin *pin)
19055+{
19056+ if (pin && pin->parent)
19057+ return au_h_dptr(pin->parent, pin->bindex);
19058+ return NULL;
19059+}
19060+
19061+void au_unpin(struct au_pin *p)
19062+{
19063+ if (p->hdir)
19064+ au_pin_hdir_unlock(p);
19065+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
19066+ vfsub_mnt_drop_write(p->h_mnt);
19067+ if (!p->hdir)
19068+ return;
19069+
19070+ if (!au_ftest_pin(p->flags, DI_LOCKED))
19071+ di_read_unlock(p->parent, AuLock_IR);
19072+ iput(p->hdir->hi_inode);
19073+ dput(p->parent);
19074+ p->parent = NULL;
19075+ p->hdir = NULL;
19076+ p->h_mnt = NULL;
19077+ /* do not clear p->task */
19078+}
19079+
19080+int au_do_pin(struct au_pin *p)
19081+{
19082+ int err;
19083+ struct super_block *sb;
19084+ struct inode *h_dir;
19085+
19086+ err = 0;
19087+ sb = p->dentry->d_sb;
19088+ p->br = au_sbr(sb, p->bindex);
19089+ if (IS_ROOT(p->dentry)) {
19090+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
19091+ p->h_mnt = au_br_mnt(p->br);
19092+ err = vfsub_mnt_want_write(p->h_mnt);
19093+ if (unlikely(err)) {
19094+ au_fclr_pin(p->flags, MNT_WRITE);
19095+ goto out_err;
19096+ }
19097+ }
19098+ goto out;
19099+ }
19100+
19101+ p->h_dentry = NULL;
19102+ if (p->bindex <= au_dbbot(p->dentry))
19103+ p->h_dentry = au_h_dptr(p->dentry, p->bindex);
19104+
19105+ p->parent = dget_parent(p->dentry);
19106+ if (!au_ftest_pin(p->flags, DI_LOCKED))
19107+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
19108+
19109+ h_dir = NULL;
19110+ p->h_parent = au_h_dptr(p->parent, p->bindex);
19111+ p->hdir = au_hi(d_inode(p->parent), p->bindex);
19112+ if (p->hdir)
19113+ h_dir = p->hdir->hi_inode;
19114+
19115+ /*
19116+ * udba case, or
19117+ * if DI_LOCKED is not set, then p->parent may be different
19118+ * and h_parent can be NULL.
19119+ */
19120+ if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
19121+ err = -EBUSY;
19122+ if (!au_ftest_pin(p->flags, DI_LOCKED))
19123+ di_read_unlock(p->parent, AuLock_IR);
19124+ dput(p->parent);
19125+ p->parent = NULL;
19126+ goto out_err;
19127+ }
19128+
19129+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
19130+ p->h_mnt = au_br_mnt(p->br);
19131+ err = vfsub_mnt_want_write(p->h_mnt);
19132+ if (unlikely(err)) {
19133+ au_fclr_pin(p->flags, MNT_WRITE);
19134+ if (!au_ftest_pin(p->flags, DI_LOCKED))
19135+ di_read_unlock(p->parent, AuLock_IR);
19136+ dput(p->parent);
19137+ p->parent = NULL;
19138+ goto out_err;
19139+ }
19140+ }
19141+
19142+ au_igrab(h_dir);
19143+ err = au_pin_hdir_lock(p);
19144+ if (!err)
19145+ goto out; /* success */
19146+
19147+ au_unpin(p);
19148+
19149+out_err:
19150+ pr_err("err %d\n", err);
19151+ err = au_busy_or_stale();
19152+out:
19153+ return err;
19154+}
19155+
19156+void au_pin_init(struct au_pin *p, struct dentry *dentry,
19157+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
19158+ unsigned int udba, unsigned char flags)
19159+{
19160+ p->dentry = dentry;
19161+ p->udba = udba;
19162+ p->lsc_di = lsc_di;
19163+ p->lsc_hi = lsc_hi;
19164+ p->flags = flags;
19165+ p->bindex = bindex;
19166+
19167+ p->parent = NULL;
19168+ p->hdir = NULL;
19169+ p->h_mnt = NULL;
19170+
19171+ p->h_dentry = NULL;
19172+ p->h_parent = NULL;
19173+ p->br = NULL;
19174+ p->task = current;
19175+}
19176+
19177+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
19178+ unsigned int udba, unsigned char flags)
19179+{
19180+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
19181+ udba, flags);
19182+ return au_do_pin(pin);
19183+}
19184+
19185+/* ---------------------------------------------------------------------- */
19186+
19187+/*
19188+ * ->setattr() and ->getattr() are called in various cases.
19189+ * chmod, stat: dentry is revalidated.
19190+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
19191+ * unhashed.
19192+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
19193+ */
19194+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
19195+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
19196+{
19197+ int err;
19198+ struct dentry *parent;
19199+
19200+ err = 0;
19201+ if (au_digen_test(dentry, sigen)) {
19202+ parent = dget_parent(dentry);
19203+ di_read_lock_parent(parent, AuLock_IR);
19204+ err = au_refresh_dentry(dentry, parent);
19205+ di_read_unlock(parent, AuLock_IR);
19206+ dput(parent);
19207+ }
19208+
19209+ AuTraceErr(err);
19210+ return err;
19211+}
19212+
19213+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
19214+ struct au_icpup_args *a)
19215+{
19216+ int err;
19217+ loff_t sz;
19218+ aufs_bindex_t btop, ibtop;
19219+ struct dentry *hi_wh, *parent;
19220+ struct inode *inode;
19221+ struct au_wr_dir_args wr_dir_args = {
19222+ .force_btgt = -1,
19223+ .flags = 0
19224+ };
19225+
19226+ if (d_is_dir(dentry))
19227+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
19228+ /* plink or hi_wh() case */
19229+ btop = au_dbtop(dentry);
19230+ inode = d_inode(dentry);
19231+ ibtop = au_ibtop(inode);
19232+ if (btop != ibtop && !au_test_ro(inode->i_sb, ibtop, inode))
19233+ wr_dir_args.force_btgt = ibtop;
19234+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
19235+ if (unlikely(err < 0))
19236+ goto out;
19237+ a->btgt = err;
19238+ if (err != btop)
19239+ au_fset_icpup(a->flags, DID_CPUP);
19240+
19241+ err = 0;
19242+ a->pin_flags = AuPin_MNT_WRITE;
19243+ parent = NULL;
19244+ if (!IS_ROOT(dentry)) {
19245+ au_fset_pin(a->pin_flags, DI_LOCKED);
19246+ parent = dget_parent(dentry);
19247+ di_write_lock_parent(parent);
19248+ }
19249+
19250+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
19251+ if (unlikely(err))
19252+ goto out_parent;
19253+
19254+ sz = -1;
19255+ a->h_path.dentry = au_h_dptr(dentry, btop);
19256+ a->h_inode = d_inode(a->h_path.dentry);
19257+ if (ia && (ia->ia_valid & ATTR_SIZE)) {
19258+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
19259+ if (ia->ia_size < i_size_read(a->h_inode))
19260+ sz = ia->ia_size;
19261+ inode_unlock(a->h_inode);
19262+ }
19263+
19264+ hi_wh = NULL;
19265+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
19266+ hi_wh = au_hi_wh(inode, a->btgt);
19267+ if (!hi_wh) {
19268+ struct au_cp_generic cpg = {
19269+ .dentry = dentry,
19270+ .bdst = a->btgt,
19271+ .bsrc = -1,
19272+ .len = sz,
19273+ .pin = &a->pin
19274+ };
19275+ err = au_sio_cpup_wh(&cpg, /*file*/NULL);
19276+ if (unlikely(err))
19277+ goto out_unlock;
19278+ hi_wh = au_hi_wh(inode, a->btgt);
19279+ /* todo: revalidate hi_wh? */
19280+ }
19281+ }
19282+
19283+ if (parent) {
19284+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
19285+ di_downgrade_lock(parent, AuLock_IR);
19286+ dput(parent);
19287+ parent = NULL;
19288+ }
19289+ if (!au_ftest_icpup(a->flags, DID_CPUP))
19290+ goto out; /* success */
19291+
19292+ if (!d_unhashed(dentry)) {
19293+ struct au_cp_generic cpg = {
19294+ .dentry = dentry,
19295+ .bdst = a->btgt,
19296+ .bsrc = btop,
19297+ .len = sz,
19298+ .pin = &a->pin,
19299+ .flags = AuCpup_DTIME | AuCpup_HOPEN
19300+ };
19301+ err = au_sio_cpup_simple(&cpg);
19302+ if (!err)
19303+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19304+ } else if (!hi_wh)
19305+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19306+ else
19307+ a->h_path.dentry = hi_wh; /* do not dget here */
19308+
19309+out_unlock:
19310+ a->h_inode = d_inode(a->h_path.dentry);
19311+ if (!err)
19312+ goto out; /* success */
19313+ au_unpin(&a->pin);
19314+out_parent:
19315+ if (parent) {
19316+ di_write_unlock(parent);
19317+ dput(parent);
19318+ }
19319+out:
19320+ if (!err)
19321+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
19322+ return err;
19323+}
19324+
19325+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
19326+{
19327+ int err;
19328+ struct inode *inode, *delegated;
19329+ struct super_block *sb;
19330+ struct file *file;
19331+ struct au_icpup_args *a;
19332+
19333+ inode = d_inode(dentry);
19334+ IMustLock(inode);
19335+
19336+ err = setattr_prepare(dentry, ia);
19337+ if (unlikely(err))
19338+ goto out;
19339+
19340+ err = -ENOMEM;
19341+ a = kzalloc(sizeof(*a), GFP_NOFS);
19342+ if (unlikely(!a))
19343+ goto out;
19344+
19345+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
19346+ ia->ia_valid &= ~ATTR_MODE;
19347+
19348+ file = NULL;
19349+ sb = dentry->d_sb;
19350+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19351+ if (unlikely(err))
19352+ goto out_kfree;
19353+
19354+ if (ia->ia_valid & ATTR_FILE) {
19355+ /* currently ftruncate(2) only */
19356+ AuDebugOn(!d_is_reg(dentry));
19357+ file = ia->ia_file;
19358+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
19359+ if (unlikely(err))
19360+ goto out_si;
19361+ ia->ia_file = au_hf_top(file);
19362+ a->udba = AuOpt_UDBA_NONE;
19363+ } else {
19364+ /* fchmod() doesn't pass ia_file */
19365+ a->udba = au_opt_udba(sb);
19366+ di_write_lock_child(dentry);
19367+ /* no d_unlinked(), to set UDBA_NONE for root */
19368+ if (d_unhashed(dentry))
19369+ a->udba = AuOpt_UDBA_NONE;
19370+ if (a->udba != AuOpt_UDBA_NONE) {
19371+ AuDebugOn(IS_ROOT(dentry));
19372+ err = au_reval_for_attr(dentry, au_sigen(sb));
19373+ if (unlikely(err))
19374+ goto out_dentry;
19375+ }
19376+ }
19377+
19378+ err = au_pin_and_icpup(dentry, ia, a);
19379+ if (unlikely(err < 0))
19380+ goto out_dentry;
19381+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
19382+ ia->ia_file = NULL;
19383+ ia->ia_valid &= ~ATTR_FILE;
19384+ }
19385+
19386+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
19387+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
19388+ == (ATTR_MODE | ATTR_CTIME)) {
19389+ err = security_path_chmod(&a->h_path, ia->ia_mode);
19390+ if (unlikely(err))
19391+ goto out_unlock;
19392+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
19393+ && (ia->ia_valid & ATTR_CTIME)) {
19394+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
19395+ if (unlikely(err))
19396+ goto out_unlock;
19397+ }
19398+
19399+ if (ia->ia_valid & ATTR_SIZE) {
19400+ struct file *f;
19401+
19402+ if (ia->ia_size < i_size_read(inode))
19403+ /* unmap only */
19404+ truncate_setsize(inode, ia->ia_size);
19405+
19406+ f = NULL;
19407+ if (ia->ia_valid & ATTR_FILE)
19408+ f = ia->ia_file;
19409+ inode_unlock(a->h_inode);
19410+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
19411+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
19412+ } else {
19413+ delegated = NULL;
19414+ while (1) {
19415+ err = vfsub_notify_change(&a->h_path, ia, &delegated);
19416+ if (delegated) {
19417+ err = break_deleg_wait(&delegated);
19418+ if (!err)
19419+ continue;
19420+ }
19421+ break;
19422+ }
19423+ }
19424+ /*
19425+ * regardless aufs 'acl' option setting.
19426+ * why don't all acl-aware fs call this func from their ->setattr()?
19427+ */
19428+ if (!err && (ia->ia_valid & ATTR_MODE))
19429+ err = vfsub_acl_chmod(a->h_inode, ia->ia_mode);
19430+ if (!err)
19431+ au_cpup_attr_changeable(inode);
19432+
19433+out_unlock:
19434+ inode_unlock(a->h_inode);
19435+ au_unpin(&a->pin);
19436+ if (unlikely(err))
19437+ au_update_dbtop(dentry);
19438+out_dentry:
19439+ di_write_unlock(dentry);
19440+ if (file) {
19441+ fi_write_unlock(file);
19442+ ia->ia_file = file;
19443+ ia->ia_valid |= ATTR_FILE;
19444+ }
19445+out_si:
19446+ si_read_unlock(sb);
19447+out_kfree:
19448+ au_delayed_kfree(a);
19449+out:
19450+ AuTraceErr(err);
19451+ return err;
19452+}
19453+
19454+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
19455+static int au_h_path_to_set_attr(struct dentry *dentry,
19456+ struct au_icpup_args *a, struct path *h_path)
19457+{
19458+ int err;
19459+ struct super_block *sb;
19460+
19461+ sb = dentry->d_sb;
19462+ a->udba = au_opt_udba(sb);
19463+ /* no d_unlinked(), to set UDBA_NONE for root */
19464+ if (d_unhashed(dentry))
19465+ a->udba = AuOpt_UDBA_NONE;
19466+ if (a->udba != AuOpt_UDBA_NONE) {
19467+ AuDebugOn(IS_ROOT(dentry));
19468+ err = au_reval_for_attr(dentry, au_sigen(sb));
19469+ if (unlikely(err))
19470+ goto out;
19471+ }
19472+ err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
19473+ if (unlikely(err < 0))
19474+ goto out;
19475+
19476+ h_path->dentry = a->h_path.dentry;
19477+ h_path->mnt = au_sbr_mnt(sb, a->btgt);
19478+
19479+out:
19480+ return err;
19481+}
19482+
19483+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
19484+ struct au_sxattr *arg)
19485+{
19486+ int err;
19487+ struct path h_path;
19488+ struct super_block *sb;
19489+ struct au_icpup_args *a;
19490+ struct inode *h_inode;
19491+
19492+ IMustLock(inode);
19493+
19494+ err = -ENOMEM;
19495+ a = kzalloc(sizeof(*a), GFP_NOFS);
19496+ if (unlikely(!a))
19497+ goto out;
19498+
19499+ sb = dentry->d_sb;
19500+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19501+ if (unlikely(err))
19502+ goto out_kfree;
19503+
19504+ h_path.dentry = NULL; /* silence gcc */
19505+ di_write_lock_child(dentry);
19506+ err = au_h_path_to_set_attr(dentry, a, &h_path);
19507+ if (unlikely(err))
19508+ goto out_di;
19509+
19510+ inode_unlock(a->h_inode);
19511+ switch (arg->type) {
19512+ case AU_XATTR_SET:
19513+ AuDebugOn(d_is_negative(h_path.dentry));
19514+ err = vfsub_setxattr(h_path.dentry,
19515+ arg->u.set.name, arg->u.set.value,
19516+ arg->u.set.size, arg->u.set.flags);
19517+ break;
19518+ case AU_ACL_SET:
19519+ err = -EOPNOTSUPP;
19520+ h_inode = d_inode(h_path.dentry);
19521+ if (h_inode->i_op->set_acl)
19522+ /* this will call posix_acl_update_mode */
19523+ err = h_inode->i_op->set_acl(h_inode,
19524+ arg->u.acl_set.acl,
19525+ arg->u.acl_set.type);
19526+ break;
19527+ }
19528+ if (!err)
19529+ au_cpup_attr_timesizes(inode);
19530+
19531+ au_unpin(&a->pin);
19532+ if (unlikely(err))
19533+ au_update_dbtop(dentry);
19534+
19535+out_di:
19536+ di_write_unlock(dentry);
19537+ si_read_unlock(sb);
19538+out_kfree:
19539+ au_delayed_kfree(a);
19540+out:
19541+ AuTraceErr(err);
19542+ return err;
19543+}
19544+#endif
19545+
19546+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
19547+ unsigned int nlink)
19548+{
19549+ unsigned int n;
19550+
19551+ inode->i_mode = st->mode;
19552+ /* don't i_[ug]id_write() here */
19553+ inode->i_uid = st->uid;
19554+ inode->i_gid = st->gid;
19555+ inode->i_atime = st->atime;
19556+ inode->i_mtime = st->mtime;
19557+ inode->i_ctime = st->ctime;
19558+
19559+ au_cpup_attr_nlink(inode, /*force*/0);
19560+ if (S_ISDIR(inode->i_mode)) {
19561+ n = inode->i_nlink;
19562+ n -= nlink;
19563+ n += st->nlink;
19564+ smp_mb(); /* for i_nlink */
19565+ /* 0 can happen */
19566+ set_nlink(inode, n);
19567+ }
19568+
19569+ spin_lock(&inode->i_lock);
19570+ inode->i_blocks = st->blocks;
19571+ i_size_write(inode, st->size);
19572+ spin_unlock(&inode->i_lock);
19573+}
19574+
19575+/*
19576+ * common routine for aufs_getattr() and au_getxattr().
19577+ * returns zero or negative (an error).
19578+ * @dentry will be read-locked in success.
19579+ */
19580+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path)
19581+{
19582+ int err;
19583+ unsigned int mnt_flags, sigen;
19584+ unsigned char udba_none;
19585+ aufs_bindex_t bindex;
19586+ struct super_block *sb, *h_sb;
19587+ struct inode *inode;
19588+
19589+ h_path->mnt = NULL;
19590+ h_path->dentry = NULL;
19591+
19592+ err = 0;
19593+ sb = dentry->d_sb;
19594+ mnt_flags = au_mntflags(sb);
19595+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
19596+
19597+ /* support fstat(2) */
19598+ if (!d_unlinked(dentry) && !udba_none) {
19599+ sigen = au_sigen(sb);
19600+ err = au_digen_test(dentry, sigen);
19601+ if (!err) {
19602+ di_read_lock_child(dentry, AuLock_IR);
19603+ err = au_dbrange_test(dentry);
19604+ if (unlikely(err)) {
19605+ di_read_unlock(dentry, AuLock_IR);
19606+ goto out;
19607+ }
19608+ } else {
19609+ AuDebugOn(IS_ROOT(dentry));
19610+ di_write_lock_child(dentry);
19611+ err = au_dbrange_test(dentry);
19612+ if (!err)
19613+ err = au_reval_for_attr(dentry, sigen);
19614+ if (!err)
19615+ di_downgrade_lock(dentry, AuLock_IR);
19616+ else {
19617+ di_write_unlock(dentry);
19618+ goto out;
19619+ }
19620+ }
19621+ } else
19622+ di_read_lock_child(dentry, AuLock_IR);
19623+
19624+ inode = d_inode(dentry);
19625+ bindex = au_ibtop(inode);
19626+ h_path->mnt = au_sbr_mnt(sb, bindex);
19627+ h_sb = h_path->mnt->mnt_sb;
19628+ if (!force
19629+ && !au_test_fs_bad_iattr(h_sb)
19630+ && udba_none)
19631+ goto out; /* success */
19632+
19633+ if (au_dbtop(dentry) == bindex)
19634+ h_path->dentry = au_h_dptr(dentry, bindex);
19635+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
19636+ h_path->dentry = au_plink_lkup(inode, bindex);
19637+ if (IS_ERR(h_path->dentry))
19638+ /* pretending success */
19639+ h_path->dentry = NULL;
19640+ else
19641+ dput(h_path->dentry);
19642+ }
19643+
19644+out:
19645+ return err;
19646+}
19647+
19648+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
19649+ struct dentry *dentry, struct kstat *st)
19650+{
19651+ int err;
19652+ unsigned char positive;
19653+ struct path h_path;
19654+ struct inode *inode;
19655+ struct super_block *sb;
19656+
19657+ inode = d_inode(dentry);
19658+ sb = dentry->d_sb;
19659+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19660+ if (unlikely(err))
19661+ goto out;
19662+ err = au_h_path_getattr(dentry, /*force*/0, &h_path);
19663+ if (unlikely(err))
19664+ goto out_si;
19665+ if (unlikely(!h_path.dentry))
19666+ /* illegally overlapped or something */
19667+ goto out_fill; /* pretending success */
19668+
19669+ positive = d_is_positive(h_path.dentry);
19670+ if (positive)
19671+ err = vfs_getattr(&h_path, st);
19672+ if (!err) {
19673+ if (positive)
19674+ au_refresh_iattr(inode, st,
19675+ d_inode(h_path.dentry)->i_nlink);
19676+ goto out_fill; /* success */
19677+ }
19678+ AuTraceErr(err);
19679+ goto out_di;
19680+
19681+out_fill:
19682+ generic_fillattr(inode, st);
19683+out_di:
19684+ di_read_unlock(dentry, AuLock_IR);
19685+out_si:
19686+ si_read_unlock(sb);
19687+out:
19688+ AuTraceErr(err);
19689+ return err;
19690+}
19691+
19692+/* ---------------------------------------------------------------------- */
19693+
19694+static const char *aufs_get_link(struct dentry *dentry, struct inode *inode,
19695+ struct delayed_call *done)
19696+{
19697+ const char *ret;
19698+ struct dentry *h_dentry;
19699+ struct inode *h_inode;
19700+ int err;
19701+ aufs_bindex_t bindex;
19702+
19703+ ret = NULL; /* suppress a warning */
19704+ err = -ECHILD;
19705+ if (!dentry)
19706+ goto out;
19707+
19708+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
19709+ if (unlikely(err))
19710+ goto out;
19711+
19712+ err = au_d_hashed_positive(dentry);
19713+ if (unlikely(err))
19714+ goto out_unlock;
19715+
19716+ err = -EINVAL;
19717+ inode = d_inode(dentry);
19718+ bindex = au_ibtop(inode);
19719+ h_inode = au_h_iptr(inode, bindex);
19720+ if (unlikely(!h_inode->i_op->get_link))
19721+ goto out_unlock;
19722+
19723+ err = -EBUSY;
19724+ h_dentry = NULL;
19725+ if (au_dbtop(dentry) <= bindex) {
19726+ h_dentry = au_h_dptr(dentry, bindex);
19727+ if (h_dentry)
19728+ dget(h_dentry);
19729+ }
19730+ if (!h_dentry) {
19731+ h_dentry = d_find_any_alias(h_inode);
19732+ if (IS_ERR(h_dentry)) {
19733+ err = PTR_ERR(h_dentry);
19734+ goto out_unlock;
19735+ }
19736+ }
19737+ if (unlikely(!h_dentry))
19738+ goto out_unlock;
19739+
19740+ err = 0;
19741+ AuDbg("%pf\n", h_inode->i_op->get_link);
19742+ AuDbgDentry(h_dentry);
19743+ ret = vfs_get_link(h_dentry, done);
19744+ dput(h_dentry);
19745+ if (IS_ERR(ret))
19746+ err = PTR_ERR(ret);
19747+
19748+out_unlock:
19749+ aufs_read_unlock(dentry, AuLock_IR);
19750+out:
19751+ if (unlikely(err))
19752+ ret = ERR_PTR(err);
19753+ AuTraceErrPtr(ret);
19754+ return ret;
19755+}
19756+
19757+/* ---------------------------------------------------------------------- */
19758+
19759+static int au_is_special(struct inode *inode)
19760+{
19761+ return (inode->i_mode & (S_IFBLK | S_IFCHR | S_IFIFO | S_IFSOCK));
19762+}
19763+
19764+static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags)
19765+{
19766+ int err;
19767+ aufs_bindex_t bindex;
19768+ struct super_block *sb;
19769+ struct inode *h_inode;
19770+ struct vfsmount *h_mnt;
19771+
19772+ sb = inode->i_sb;
19773+ WARN_ONCE((flags & S_ATIME) && !IS_NOATIME(inode),
19774+ "unexpected s_flags 0x%lx", sb->s_flags);
19775+
19776+ /* mmap_sem might be acquired already, cf. aufs_mmap() */
19777+ lockdep_off();
19778+ si_read_lock(sb, AuLock_FLUSH);
19779+ ii_write_lock_child(inode);
19780+ lockdep_on();
19781+
19782+ err = 0;
19783+ bindex = au_ibtop(inode);
19784+ h_inode = au_h_iptr(inode, bindex);
19785+ if (!au_test_ro(sb, bindex, inode)) {
19786+ h_mnt = au_sbr_mnt(sb, bindex);
19787+ err = vfsub_mnt_want_write(h_mnt);
19788+ if (!err) {
19789+ err = vfsub_update_time(h_inode, ts, flags);
19790+ vfsub_mnt_drop_write(h_mnt);
19791+ }
19792+ } else if (au_is_special(h_inode)) {
19793+ /*
19794+ * Never copy-up here.
19795+ * These special files may already be opened and used for
19796+ * communicating. If we copied it up, then the communication
19797+ * would be corrupted.
19798+ */
19799+ AuWarn1("timestamps for i%lu are ignored "
19800+ "since it is on readonly branch (hi%lu).\n",
19801+ inode->i_ino, h_inode->i_ino);
19802+ } else if (flags & ~S_ATIME) {
19803+ err = -EIO;
19804+ AuIOErr1("unexpected flags 0x%x\n", flags);
19805+ AuDebugOn(1);
19806+ }
19807+
19808+ lockdep_off();
19809+ if (!err)
19810+ au_cpup_attr_timesizes(inode);
19811+ ii_write_unlock(inode);
19812+ si_read_unlock(sb);
19813+ lockdep_on();
19814+
19815+ if (!err && (flags & S_VERSION))
19816+ inode_inc_iversion(inode);
19817+
19818+ return err;
19819+}
19820+
19821+/* ---------------------------------------------------------------------- */
19822+
19823+/* no getattr version will be set by module.c:aufs_init() */
19824+struct inode_operations aufs_iop_nogetattr[AuIop_Last],
19825+ aufs_iop[] = {
19826+ [AuIop_SYMLINK] = {
19827+ .permission = aufs_permission,
19828+#ifdef CONFIG_FS_POSIX_ACL
19829+ .get_acl = aufs_get_acl,
19830+ .set_acl = aufs_set_acl, /* unsupport for symlink? */
19831+#endif
19832+
19833+ .setattr = aufs_setattr,
19834+ .getattr = aufs_getattr,
19835+
19836+#ifdef CONFIG_AUFS_XATTR
19837+ .listxattr = aufs_listxattr,
19838+#endif
19839+
19840+ .readlink = generic_readlink,
19841+ .get_link = aufs_get_link,
19842+
19843+ /* .update_time = aufs_update_time */
19844+ },
19845+ [AuIop_DIR] = {
19846+ .create = aufs_create,
19847+ .lookup = aufs_lookup,
19848+ .link = aufs_link,
19849+ .unlink = aufs_unlink,
19850+ .symlink = aufs_symlink,
19851+ .mkdir = aufs_mkdir,
19852+ .rmdir = aufs_rmdir,
19853+ .mknod = aufs_mknod,
19854+ .rename = aufs_rename,
19855+
19856+ .permission = aufs_permission,
19857+#ifdef CONFIG_FS_POSIX_ACL
19858+ .get_acl = aufs_get_acl,
19859+ .set_acl = aufs_set_acl,
19860+#endif
19861+
19862+ .setattr = aufs_setattr,
19863+ .getattr = aufs_getattr,
19864+
19865+#ifdef CONFIG_AUFS_XATTR
19866+ .listxattr = aufs_listxattr,
19867+#endif
19868+
19869+ .update_time = aufs_update_time,
19870+ .atomic_open = aufs_atomic_open,
19871+ .tmpfile = aufs_tmpfile
19872+ },
19873+ [AuIop_OTHER] = {
19874+ .permission = aufs_permission,
19875+#ifdef CONFIG_FS_POSIX_ACL
19876+ .get_acl = aufs_get_acl,
19877+ .set_acl = aufs_set_acl,
19878+#endif
19879+
19880+ .setattr = aufs_setattr,
19881+ .getattr = aufs_getattr,
19882+
19883+#ifdef CONFIG_AUFS_XATTR
19884+ .listxattr = aufs_listxattr,
19885+#endif
19886+
19887+ .update_time = aufs_update_time
19888+ }
19889+};
19890diff -urN /usr/share/empty/fs/aufs/i_op_del.c linux/fs/aufs/i_op_del.c
19891--- /usr/share/empty/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
19892+++ linux/fs/aufs/i_op_del.c 2016-10-09 16:55:36.492701639 +0200
19893@@ -0,0 +1,511 @@
19894+/*
19895+ * Copyright (C) 2005-2016 Junjiro R. Okajima
19896+ *
19897+ * This program, aufs is free software; you can redistribute it and/or modify
19898+ * it under the terms of the GNU General Public License as published by
19899+ * the Free Software Foundation; either version 2 of the License, or
19900+ * (at your option) any later version.
19901+ *
19902+ * This program is distributed in the hope that it will be useful,
19903+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19904+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19905+ * GNU General Public License for more details.
19906+ *
19907+ * You should have received a copy of the GNU General Public License
19908+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
19909+ */
19910+
19911+/*
19912+ * inode operations (del entry)
19913+ */
19914+
19915+#include "aufs.h"
19916+
19917+/*
19918+ * decide if a new whiteout for @dentry is necessary or not.
19919+ * when it is necessary, prepare the parent dir for the upper branch whose
19920+ * branch index is @bcpup for creation. the actual creation of the whiteout will
19921+ * be done by caller.
19922+ * return value:
19923+ * 0: wh is unnecessary
19924+ * plus: wh is necessary
19925+ * minus: error
19926+ */
19927+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
19928+{
19929+ int need_wh, err;
19930+ aufs_bindex_t btop;
19931+ struct super_block *sb;
19932+
19933+ sb = dentry->d_sb;
19934+ btop = au_dbtop(dentry);
19935+ if (*bcpup < 0) {
19936+ *bcpup = btop;
19937+ if (au_test_ro(sb, btop, d_inode(dentry))) {
19938+ err = AuWbrCopyup(au_sbi(sb), dentry);
19939+ *bcpup = err;
19940+ if (unlikely(err < 0))
19941+ goto out;
19942+ }
19943+ } else
19944+ AuDebugOn(btop < *bcpup
19945+ || au_test_ro(sb, *bcpup, d_inode(dentry)));
19946+ AuDbg("bcpup %d, btop %d\n", *bcpup, btop);
19947+
19948+ if (*bcpup != btop) {
19949+ err = au_cpup_dirs(dentry, *bcpup);
19950+ if (unlikely(err))
19951+ goto out;
19952+ need_wh = 1;
19953+ } else {
19954+ struct au_dinfo *dinfo, *tmp;
19955+
19956+ need_wh = -ENOMEM;
19957+ dinfo = au_di(dentry);
19958+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
19959+ if (tmp) {
19960+ au_di_cp(tmp, dinfo);
19961+ au_di_swap(tmp, dinfo);
19962+ /* returns the number of positive dentries */
19963+ need_wh = au_lkup_dentry(dentry, btop + 1,
19964+ /* AuLkup_IGNORE_PERM */ 0);
19965+ au_di_swap(tmp, dinfo);
19966+ au_rw_write_unlock(&tmp->di_rwsem);
19967+ au_di_free(tmp);
19968+ }
19969+ }
19970+ AuDbg("need_wh %d\n", need_wh);
19971+ err = need_wh;
19972+
19973+out:
19974+ return err;
19975+}
19976+
19977+/*
19978+ * simple tests for the del-entry operations.
19979+ * following the checks in vfs, plus the parent-child relationship.
19980+ */
19981+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
19982+ struct dentry *h_parent, int isdir)
19983+{
19984+ int err;
19985+ umode_t h_mode;
19986+ struct dentry *h_dentry, *h_latest;
19987+ struct inode *h_inode;
19988+
19989+ h_dentry = au_h_dptr(dentry, bindex);
19990+ if (d_really_is_positive(dentry)) {
19991+ err = -ENOENT;
19992+ if (unlikely(d_is_negative(h_dentry)))
19993+ goto out;
19994+ h_inode = d_inode(h_dentry);
19995+ if (unlikely(!h_inode->i_nlink))
19996+ goto out;
19997+
19998+ h_mode = h_inode->i_mode;
19999+ if (!isdir) {
20000+ err = -EISDIR;
20001+ if (unlikely(S_ISDIR(h_mode)))
20002+ goto out;
20003+ } else if (unlikely(!S_ISDIR(h_mode))) {
20004+ err = -ENOTDIR;
20005+ goto out;
20006+ }
20007+ } else {
20008+ /* rename(2) case */
20009+ err = -EIO;
20010+ if (unlikely(d_is_positive(h_dentry)))
20011+ goto out;
20012+ }
20013+
20014+ err = -ENOENT;
20015+ /* expected parent dir is locked */
20016+ if (unlikely(h_parent != h_dentry->d_parent))
20017+ goto out;
20018+ err = 0;
20019+
20020+ /*
20021+ * rmdir a dir may break the consistency on some filesystem.
20022+ * let's try heavy test.
20023+ */
20024+ err = -EACCES;
20025+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)
20026+ && au_test_h_perm(d_inode(h_parent),
20027+ MAY_EXEC | MAY_WRITE)))
20028+ goto out;
20029+
20030+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent);
20031+ err = -EIO;
20032+ if (IS_ERR(h_latest))
20033+ goto out;
20034+ if (h_latest == h_dentry)
20035+ err = 0;
20036+ dput(h_latest);
20037+
20038+out:
20039+ return err;
20040+}
20041+
20042+/*
20043+ * decide the branch where we operate for @dentry. the branch index will be set
20044+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
20045+ * dir for reverting.
20046+ * when a new whiteout is necessary, create it.
20047+ */
20048+static struct dentry*
20049+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
20050+ struct au_dtime *dt, struct au_pin *pin)
20051+{
20052+ struct dentry *wh_dentry;
20053+ struct super_block *sb;
20054+ struct path h_path;
20055+ int err, need_wh;
20056+ unsigned int udba;
20057+ aufs_bindex_t bcpup;
20058+
20059+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
20060+ wh_dentry = ERR_PTR(need_wh);
20061+ if (unlikely(need_wh < 0))
20062+ goto out;
20063+
20064+ sb = dentry->d_sb;
20065+ udba = au_opt_udba(sb);
20066+ bcpup = *rbcpup;
20067+ err = au_pin(pin, dentry, bcpup, udba,
20068+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
20069+ wh_dentry = ERR_PTR(err);
20070+ if (unlikely(err))
20071+ goto out;
20072+
20073+ h_path.dentry = au_pinned_h_parent(pin);
20074+ if (udba != AuOpt_UDBA_NONE
20075+ && au_dbtop(dentry) == bcpup) {
20076+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
20077+ wh_dentry = ERR_PTR(err);
20078+ if (unlikely(err))
20079+ goto out_unpin;
20080+ }
20081+
20082+ h_path.mnt = au_sbr_mnt(sb, bcpup);
20083+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
20084+ wh_dentry = NULL;
20085+ if (!need_wh)
20086+ goto out; /* success, no need to create whiteout */
20087+
20088+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
20089+ if (IS_ERR(wh_dentry))
20090+ goto out_unpin;
20091+
20092+ /* returns with the parent is locked and wh_dentry is dget-ed */
20093+ goto out; /* success */
20094+
20095+out_unpin:
20096+ au_unpin(pin);
20097+out:
20098+ return wh_dentry;
20099+}
20100+
20101+/*
20102+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
20103+ * in order to be revertible and save time for removing many child whiteouts
20104+ * under the dir.
20105+ * returns 1 when there are too many child whiteout and caller should remove
20106+ * them asynchronously. returns 0 when the number of children is enough small to
20107+ * remove now or the branch fs is a remote fs.
20108+ * otherwise return an error.
20109+ */
20110+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
20111+ struct au_nhash *whlist, struct inode *dir)
20112+{
20113+ int rmdir_later, err, dirwh;
20114+ struct dentry *h_dentry;
20115+ struct super_block *sb;
20116+ struct inode *inode;
20117+
20118+ sb = dentry->d_sb;
20119+ SiMustAnyLock(sb);
20120+ h_dentry = au_h_dptr(dentry, bindex);
20121+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
20122+ if (unlikely(err))
20123+ goto out;
20124+
20125+ /* stop monitoring */
20126+ inode = d_inode(dentry);
20127+ au_hn_free(au_hi(inode, bindex));
20128+
20129+ if (!au_test_fs_remote(h_dentry->d_sb)) {
20130+ dirwh = au_sbi(sb)->si_dirwh;
20131+ rmdir_later = (dirwh <= 1);
20132+ if (!rmdir_later)
20133+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
20134+ dirwh);
20135+ if (rmdir_later)
20136+ return rmdir_later;
20137+ }
20138+
20139+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
20140+ if (unlikely(err)) {
20141+ AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
20142+ h_dentry, bindex, err);
20143+ err = 0;
20144+ }
20145+
20146+out:
20147+ AuTraceErr(err);
20148+ return err;
20149+}
20150+
20151+/*
20152+ * final procedure for deleting a entry.
20153+ * maintain dentry and iattr.
20154+ */
20155+static void epilog(struct inode *dir, struct dentry *dentry,
20156+ aufs_bindex_t bindex)
20157+{
20158+ struct inode *inode;
20159+
20160+ inode = d_inode(dentry);
20161+ d_drop(dentry);
20162+ inode->i_ctime = dir->i_ctime;
20163+
20164+ au_dir_ts(dir, bindex);
20165+ dir->i_version++;
20166+}
20167+
20168+/*
20169+ * when an error happened, remove the created whiteout and revert everything.
20170+ */
20171+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
20172+ aufs_bindex_t bwh, struct dentry *wh_dentry,
20173+ struct dentry *dentry, struct au_dtime *dt)
20174+{
20175+ int rerr;
20176+ struct path h_path = {
20177+ .dentry = wh_dentry,
20178+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
20179+ };
20180+
20181+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
20182+ if (!rerr) {
20183+ au_set_dbwh(dentry, bwh);
20184+ au_dtime_revert(dt);
20185+ return 0;
20186+ }
20187+
20188+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
20189+ return -EIO;
20190+}
20191+
20192+/* ---------------------------------------------------------------------- */
20193+
20194+int aufs_unlink(struct inode *dir, struct dentry *dentry)
20195+{
20196+ int err;
20197+ aufs_bindex_t bwh, bindex, btop;
20198+ struct inode *inode, *h_dir, *delegated;
20199+ struct dentry *parent, *wh_dentry;
20200+ /* to reuduce stack size */
20201+ struct {
20202+ struct au_dtime dt;
20203+ struct au_pin pin;
20204+ struct path h_path;
20205+ } *a;
20206+
20207+ IMustLock(dir);
20208+
20209+ err = -ENOMEM;
20210+ a = kmalloc(sizeof(*a), GFP_NOFS);
20211+ if (unlikely(!a))
20212+ goto out;
20213+
20214+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
20215+ if (unlikely(err))
20216+ goto out_free;
20217+ err = au_d_hashed_positive(dentry);
20218+ if (unlikely(err))
20219+ goto out_unlock;
20220+ inode = d_inode(dentry);
20221+ IMustLock(inode);
20222+ err = -EISDIR;
20223+ if (unlikely(d_is_dir(dentry)))
20224+ goto out_unlock; /* possible? */
20225+
20226+ btop = au_dbtop(dentry);
20227+ bwh = au_dbwh(dentry);
20228+ bindex = -1;
20229+ parent = dentry->d_parent; /* dir inode is locked */
20230+ di_write_lock_parent(parent);
20231+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
20232+ &a->pin);
20233+ err = PTR_ERR(wh_dentry);
20234+ if (IS_ERR(wh_dentry))
20235+ goto out_parent;
20236+
20237+ a->h_path.mnt = au_sbr_mnt(dentry->d_sb, btop);
20238+ a->h_path.dentry = au_h_dptr(dentry, btop);
20239+ dget(a->h_path.dentry);
20240+ if (bindex == btop) {
20241+ h_dir = au_pinned_h_dir(&a->pin);
20242+ delegated = NULL;
20243+ err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
20244+ if (unlikely(err == -EWOULDBLOCK)) {
20245+ pr_warn("cannot retry for NFSv4 delegation"
20246+ " for an internal unlink\n");
20247+ iput(delegated);
20248+ }
20249+ } else {
20250+ /* dir inode is locked */
20251+ h_dir = d_inode(wh_dentry->d_parent);
20252+ IMustLock(h_dir);
20253+ err = 0;
20254+ }
20255+
20256+ if (!err) {
20257+ vfsub_drop_nlink(inode);
20258+ epilog(dir, dentry, bindex);
20259+
20260+ /* update target timestamps */
20261+ if (bindex == btop) {
20262+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
20263+ /*ignore*/
20264+ inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
20265+ } else
20266+ /* todo: this timestamp may be reverted later */
20267+ inode->i_ctime = h_dir->i_ctime;
20268+ goto out_unpin; /* success */
20269+ }
20270+
20271+ /* revert */
20272+ if (wh_dentry) {
20273+ int rerr;
20274+
20275+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20276+ &a->dt);
20277+ if (rerr)
20278+ err = rerr;
20279+ }
20280+
20281+out_unpin:
20282+ au_unpin(&a->pin);
20283+ dput(wh_dentry);
20284+ dput(a->h_path.dentry);
20285+out_parent:
20286+ di_write_unlock(parent);
20287+out_unlock:
20288+ aufs_read_unlock(dentry, AuLock_DW);
20289+out_free:
20290+ au_delayed_kfree(a);
20291+out:
20292+ return err;
20293+}
20294+
20295+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
20296+{
20297+ int err, rmdir_later;
20298+ aufs_bindex_t bwh, bindex, btop;
20299+ struct inode *inode;
20300+ struct dentry *parent, *wh_dentry, *h_dentry;
20301+ struct au_whtmp_rmdir *args;
20302+ /* to reuduce stack size */
20303+ struct {
20304+ struct au_dtime dt;
20305+ struct au_pin pin;
20306+ } *a;
20307+
20308+ IMustLock(dir);
20309+
20310+ err = -ENOMEM;
20311+ a = kmalloc(sizeof(*a), GFP_NOFS);
20312+ if (unlikely(!a))
20313+ goto out;
20314+
20315+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
20316+ if (unlikely(err))
20317+ goto out_free;
20318+ err = au_alive_dir(dentry);
20319+ if (unlikely(err))
20320+ goto out_unlock;
20321+ inode = d_inode(dentry);
20322+ IMustLock(inode);
20323+ err = -ENOTDIR;
20324+ if (unlikely(!d_is_dir(dentry)))
20325+ goto out_unlock; /* possible? */
20326+
20327+ err = -ENOMEM;
20328+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
20329+ if (unlikely(!args))
20330+ goto out_unlock;
20331+
20332+ parent = dentry->d_parent; /* dir inode is locked */
20333+ di_write_lock_parent(parent);
20334+ err = au_test_empty(dentry, &args->whlist);
20335+ if (unlikely(err))
20336+ goto out_parent;
20337+
20338+ btop = au_dbtop(dentry);
20339+ bwh = au_dbwh(dentry);
20340+ bindex = -1;
20341+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
20342+ &a->pin);
20343+ err = PTR_ERR(wh_dentry);
20344+ if (IS_ERR(wh_dentry))
20345+ goto out_parent;
20346+
20347+ h_dentry = au_h_dptr(dentry, btop);
20348+ dget(h_dentry);
20349+ rmdir_later = 0;
20350+ if (bindex == btop) {
20351+ err = renwh_and_rmdir(dentry, btop, &args->whlist, dir);
20352+ if (err > 0) {
20353+ rmdir_later = err;
20354+ err = 0;
20355+ }
20356+ } else {
20357+ /* stop monitoring */
20358+ au_hn_free(au_hi(inode, btop));
20359+
20360+ /* dir inode is locked */
20361+ IMustLock(d_inode(wh_dentry->d_parent));
20362+ err = 0;
20363+ }
20364+
20365+ if (!err) {
20366+ vfsub_dead_dir(inode);
20367+ au_set_dbdiropq(dentry, -1);
20368+ epilog(dir, dentry, bindex);
20369+
20370+ if (rmdir_later) {
20371+ au_whtmp_kick_rmdir(dir, btop, h_dentry, args);
20372+ args = NULL;
20373+ }
20374+
20375+ goto out_unpin; /* success */
20376+ }
20377+
20378+ /* revert */
20379+ AuLabel(revert);
20380+ if (wh_dentry) {
20381+ int rerr;
20382+
20383+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20384+ &a->dt);
20385+ if (rerr)
20386+ err = rerr;
20387+ }
20388+
20389+out_unpin:
20390+ au_unpin(&a->pin);
20391+ dput(wh_dentry);
20392+ dput(h_dentry);
20393+out_parent:
20394+ di_write_unlock(parent);
20395+ if (args)
20396+ au_whtmp_rmdir_free(args);
20397+out_unlock:
20398+ aufs_read_unlock(dentry, AuLock_DW);
20399+out_free:
20400+ au_delayed_kfree(a);
20401+out:
20402+ AuTraceErr(err);
20403+ return err;
20404+}
20405diff -urN /usr/share/empty/fs/aufs/i_op_ren.c linux/fs/aufs/i_op_ren.c
20406--- /usr/share/empty/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
20407+++ linux/fs/aufs/i_op_ren.c 2016-12-17 12:28:17.595211562 +0100
20408@@ -0,0 +1,1165 @@
20409+/*
20410+ * Copyright (C) 2005-2016 Junjiro R. Okajima
20411+ *
20412+ * This program, aufs is free software; you can redistribute it and/or modify
20413+ * it under the terms of the GNU General Public License as published by
20414+ * the Free Software Foundation; either version 2 of the License, or
20415+ * (at your option) any later version.
20416+ *
20417+ * This program is distributed in the hope that it will be useful,
20418+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20419+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20420+ * GNU General Public License for more details.
20421+ *
20422+ * You should have received a copy of the GNU General Public License
20423+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
20424+ */
20425+
20426+/*
20427+ * inode operation (rename entry)
20428+ * todo: this is crazy monster
20429+ */
20430+
20431+#include "aufs.h"
20432+
20433+enum { AuSRC, AuDST, AuSrcDst };
20434+enum { AuPARENT, AuCHILD, AuParentChild };
20435+
20436+#define AuRen_ISDIR_SRC 1
20437+#define AuRen_ISDIR_DST (1 << 1)
20438+#define AuRen_ISSAMEDIR (1 << 2)
20439+#define AuRen_WHSRC (1 << 3)
20440+#define AuRen_WHDST (1 << 4)
20441+#define AuRen_MNT_WRITE (1 << 5)
20442+#define AuRen_DT_DSTDIR (1 << 6)
20443+#define AuRen_DIROPQ_SRC (1 << 7)
20444+#define AuRen_DIROPQ_DST (1 << 8)
20445+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
20446+#define au_fset_ren(flags, name) \
20447+ do { (flags) |= AuRen_##name; } while (0)
20448+#define au_fclr_ren(flags, name) \
20449+ do { (flags) &= ~AuRen_##name; } while (0)
20450+
20451+struct au_ren_args {
20452+ struct {
20453+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
20454+ *wh_dentry;
20455+ struct inode *dir, *inode;
20456+ struct au_hinode *hdir, *hinode;
20457+ struct au_dtime dt[AuParentChild];
20458+ aufs_bindex_t btop, bdiropq;
20459+ } sd[AuSrcDst];
20460+
20461+#define src_dentry sd[AuSRC].dentry
20462+#define src_dir sd[AuSRC].dir
20463+#define src_inode sd[AuSRC].inode
20464+#define src_h_dentry sd[AuSRC].h_dentry
20465+#define src_parent sd[AuSRC].parent
20466+#define src_h_parent sd[AuSRC].h_parent
20467+#define src_wh_dentry sd[AuSRC].wh_dentry
20468+#define src_hdir sd[AuSRC].hdir
20469+#define src_hinode sd[AuSRC].hinode
20470+#define src_h_dir sd[AuSRC].hdir->hi_inode
20471+#define src_dt sd[AuSRC].dt
20472+#define src_btop sd[AuSRC].btop
20473+#define src_bdiropq sd[AuSRC].bdiropq
20474+
20475+#define dst_dentry sd[AuDST].dentry
20476+#define dst_dir sd[AuDST].dir
20477+#define dst_inode sd[AuDST].inode
20478+#define dst_h_dentry sd[AuDST].h_dentry
20479+#define dst_parent sd[AuDST].parent
20480+#define dst_h_parent sd[AuDST].h_parent
20481+#define dst_wh_dentry sd[AuDST].wh_dentry
20482+#define dst_hdir sd[AuDST].hdir
20483+#define dst_hinode sd[AuDST].hinode
20484+#define dst_h_dir sd[AuDST].hdir->hi_inode
20485+#define dst_dt sd[AuDST].dt
20486+#define dst_btop sd[AuDST].btop
20487+#define dst_bdiropq sd[AuDST].bdiropq
20488+
20489+ struct dentry *h_trap;
20490+ struct au_branch *br;
20491+ struct path h_path;
20492+ struct au_nhash whlist;
20493+ aufs_bindex_t btgt, src_bwh;
20494+
20495+ struct {
20496+ unsigned short auren_flags;
20497+ unsigned char flags; /* syscall parameter */
20498+ unsigned char exchange;
20499+ } __packed;
20500+
20501+ struct au_whtmp_rmdir *thargs;
20502+ struct dentry *h_dst;
20503+};
20504+
20505+/* ---------------------------------------------------------------------- */
20506+
20507+/*
20508+ * functions for reverting.
20509+ * when an error happened in a single rename systemcall, we should revert
20510+ * everything as if nothing happened.
20511+ * we don't need to revert the copied-up/down the parent dir since they are
20512+ * harmless.
20513+ */
20514+
20515+#define RevertFailure(fmt, ...) do { \
20516+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
20517+ ##__VA_ARGS__, err, rerr); \
20518+ err = -EIO; \
20519+} while (0)
20520+
20521+static void au_ren_do_rev_diropq(int err, struct au_ren_args *a, int idx)
20522+{
20523+ int rerr;
20524+ struct dentry *d;
20525+#define src_or_dst(member) a->sd[idx].member
20526+
20527+ d = src_or_dst(dentry); /* {src,dst}_dentry */
20528+ au_hn_inode_lock_nested(src_or_dst(hinode), AuLsc_I_CHILD);
20529+ rerr = au_diropq_remove(d, a->btgt);
20530+ au_hn_inode_unlock(src_or_dst(hinode));
20531+ au_set_dbdiropq(d, src_or_dst(bdiropq));
20532+ if (rerr)
20533+ RevertFailure("remove diropq %pd", d);
20534+
20535+#undef src_or_dst_
20536+}
20537+
20538+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
20539+{
20540+ if (au_ftest_ren(a->auren_flags, DIROPQ_SRC))
20541+ au_ren_do_rev_diropq(err, a, AuSRC);
20542+ if (au_ftest_ren(a->auren_flags, DIROPQ_DST))
20543+ au_ren_do_rev_diropq(err, a, AuDST);
20544+}
20545+
20546+static void au_ren_rev_rename(int err, struct au_ren_args *a)
20547+{
20548+ int rerr;
20549+ struct inode *delegated;
20550+
20551+ a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name,
20552+ a->src_h_parent);
20553+ rerr = PTR_ERR(a->h_path.dentry);
20554+ if (IS_ERR(a->h_path.dentry)) {
20555+ RevertFailure("lkup one %pd", a->src_dentry);
20556+ return;
20557+ }
20558+
20559+ delegated = NULL;
20560+ rerr = vfsub_rename(a->dst_h_dir,
20561+ au_h_dptr(a->src_dentry, a->btgt),
20562+ a->src_h_dir, &a->h_path, &delegated, a->flags);
20563+ if (unlikely(rerr == -EWOULDBLOCK)) {
20564+ pr_warn("cannot retry for NFSv4 delegation"
20565+ " for an internal rename\n");
20566+ iput(delegated);
20567+ }
20568+ d_drop(a->h_path.dentry);
20569+ dput(a->h_path.dentry);
20570+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
20571+ if (rerr)
20572+ RevertFailure("rename %pd", a->src_dentry);
20573+}
20574+
20575+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
20576+{
20577+ int rerr;
20578+ struct inode *delegated;
20579+
20580+ a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name,
20581+ a->dst_h_parent);
20582+ rerr = PTR_ERR(a->h_path.dentry);
20583+ if (IS_ERR(a->h_path.dentry)) {
20584+ RevertFailure("lkup one %pd", a->dst_dentry);
20585+ return;
20586+ }
20587+ if (d_is_positive(a->h_path.dentry)) {
20588+ d_drop(a->h_path.dentry);
20589+ dput(a->h_path.dentry);
20590+ return;
20591+ }
20592+
20593+ delegated = NULL;
20594+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path,
20595+ &delegated, a->flags);
20596+ if (unlikely(rerr == -EWOULDBLOCK)) {
20597+ pr_warn("cannot retry for NFSv4 delegation"
20598+ " for an internal rename\n");
20599+ iput(delegated);
20600+ }
20601+ d_drop(a->h_path.dentry);
20602+ dput(a->h_path.dentry);
20603+ if (!rerr)
20604+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
20605+ else
20606+ RevertFailure("rename %pd", a->h_dst);
20607+}
20608+
20609+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
20610+{
20611+ int rerr;
20612+
20613+ a->h_path.dentry = a->src_wh_dentry;
20614+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
20615+ au_set_dbwh(a->src_dentry, a->src_bwh);
20616+ if (rerr)
20617+ RevertFailure("unlink %pd", a->src_wh_dentry);
20618+}
20619+#undef RevertFailure
20620+
20621+/* ---------------------------------------------------------------------- */
20622+
20623+/*
20624+ * when we have to copyup the renaming entry, do it with the rename-target name
20625+ * in order to minimize the cost (the later actual rename is unnecessary).
20626+ * otherwise rename it on the target branch.
20627+ */
20628+static int au_ren_or_cpup(struct au_ren_args *a)
20629+{
20630+ int err;
20631+ struct dentry *d;
20632+ struct inode *delegated;
20633+
20634+ d = a->src_dentry;
20635+ if (au_dbtop(d) == a->btgt) {
20636+ a->h_path.dentry = a->dst_h_dentry;
20637+ AuDebugOn(au_dbtop(d) != a->btgt);
20638+ delegated = NULL;
20639+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
20640+ a->dst_h_dir, &a->h_path, &delegated,
20641+ a->flags);
20642+ if (unlikely(err == -EWOULDBLOCK)) {
20643+ pr_warn("cannot retry for NFSv4 delegation"
20644+ " for an internal rename\n");
20645+ iput(delegated);
20646+ }
20647+ } else
20648+ BUG();
20649+
20650+ if (!err && a->h_dst)
20651+ /* it will be set to dinfo later */
20652+ dget(a->h_dst);
20653+
20654+ return err;
20655+}
20656+
20657+/* cf. aufs_rmdir() */
20658+static int au_ren_del_whtmp(struct au_ren_args *a)
20659+{
20660+ int err;
20661+ struct inode *dir;
20662+
20663+ dir = a->dst_dir;
20664+ SiMustAnyLock(dir->i_sb);
20665+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
20666+ au_sbi(dir->i_sb)->si_dirwh)
20667+ || au_test_fs_remote(a->h_dst->d_sb)) {
20668+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
20669+ if (unlikely(err))
20670+ pr_warn("failed removing whtmp dir %pd (%d), "
20671+ "ignored.\n", a->h_dst, err);
20672+ } else {
20673+ au_nhash_wh_free(&a->thargs->whlist);
20674+ a->thargs->whlist = a->whlist;
20675+ a->whlist.nh_num = 0;
20676+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
20677+ dput(a->h_dst);
20678+ a->thargs = NULL;
20679+ }
20680+
20681+ return 0;
20682+}
20683+
20684+/* make it 'opaque' dir. */
20685+static int au_ren_do_diropq(struct au_ren_args *a, int idx)
20686+{
20687+ int err;
20688+ struct dentry *d, *diropq;
20689+#define src_or_dst(member) a->sd[idx].member
20690+
20691+ err = 0;
20692+ d = src_or_dst(dentry); /* {src,dst}_dentry */
20693+ src_or_dst(bdiropq) = au_dbdiropq(d);
20694+ src_or_dst(hinode) = au_hi(src_or_dst(inode), a->btgt);
20695+ au_hn_inode_lock_nested(src_or_dst(hinode), AuLsc_I_CHILD);
20696+ diropq = au_diropq_create(d, a->btgt);
20697+ au_hn_inode_unlock(src_or_dst(hinode));
20698+ if (IS_ERR(diropq))
20699+ err = PTR_ERR(diropq);
20700+ else
20701+ dput(diropq);
20702+
20703+#undef src_or_dst_
20704+ return err;
20705+}
20706+
20707+static int au_ren_diropq(struct au_ren_args *a)
20708+{
20709+ int err;
20710+ unsigned char always;
20711+ struct dentry *d;
20712+
20713+ err = 0;
20714+ d = a->dst_dentry; /* already renamed on the branch */
20715+ always = !!au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ);
20716+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)
20717+ && a->btgt != au_dbdiropq(a->src_dentry)
20718+ && (a->dst_wh_dentry
20719+ || a->btgt <= au_dbdiropq(d)
20720+ /* hide the lower to keep xino */
20721+ /* the lowers may not be a dir, but we hide them anyway */
20722+ || a->btgt < au_dbbot(d)
20723+ || always)) {
20724+ AuDbg("here\n");
20725+ err = au_ren_do_diropq(a, AuSRC);
20726+ if (unlikely(err))
20727+ goto out;
20728+ au_fset_ren(a->auren_flags, DIROPQ_SRC);
20729+ }
20730+ if (!a->exchange)
20731+ goto out; /* success */
20732+
20733+ d = a->src_dentry; /* already renamed on the branch */
20734+ if (au_ftest_ren(a->auren_flags, ISDIR_DST)
20735+ && a->btgt != au_dbdiropq(a->dst_dentry)
20736+ && (a->btgt < au_dbdiropq(d)
20737+ || a->btgt < au_dbbot(d)
20738+ || always)) {
20739+ AuDbgDentry(a->src_dentry);
20740+ AuDbgDentry(a->dst_dentry);
20741+ err = au_ren_do_diropq(a, AuDST);
20742+ if (unlikely(err))
20743+ goto out_rev_src;
20744+ au_fset_ren(a->auren_flags, DIROPQ_DST);
20745+ }
20746+ goto out; /* success */
20747+
20748+out_rev_src:
20749+ AuDbg("err %d, reverting src\n", err);
20750+ au_ren_rev_diropq(err, a);
20751+out:
20752+ return err;
20753+}
20754+
20755+static int do_rename(struct au_ren_args *a)
20756+{
20757+ int err;
20758+ struct dentry *d, *h_d;
20759+
20760+ if (!a->exchange) {
20761+ /* prepare workqueue args for asynchronous rmdir */
20762+ h_d = a->dst_h_dentry;
20763+ if (au_ftest_ren(a->auren_flags, ISDIR_DST)
20764+ && d_is_positive(h_d)) {
20765+ err = -ENOMEM;
20766+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb,
20767+ GFP_NOFS);
20768+ if (unlikely(!a->thargs))
20769+ goto out;
20770+ a->h_dst = dget(h_d);
20771+ }
20772+
20773+ /* create whiteout for src_dentry */
20774+ if (au_ftest_ren(a->auren_flags, WHSRC)) {
20775+ a->src_bwh = au_dbwh(a->src_dentry);
20776+ AuDebugOn(a->src_bwh >= 0);
20777+ a->src_wh_dentry = au_wh_create(a->src_dentry, a->btgt,
20778+ a->src_h_parent);
20779+ err = PTR_ERR(a->src_wh_dentry);
20780+ if (IS_ERR(a->src_wh_dentry))
20781+ goto out_thargs;
20782+ }
20783+
20784+ /* lookup whiteout for dentry */
20785+ if (au_ftest_ren(a->auren_flags, WHDST)) {
20786+ h_d = au_wh_lkup(a->dst_h_parent,
20787+ &a->dst_dentry->d_name, a->br);
20788+ err = PTR_ERR(h_d);
20789+ if (IS_ERR(h_d))
20790+ goto out_whsrc;
20791+ if (d_is_negative(h_d))
20792+ dput(h_d);
20793+ else
20794+ a->dst_wh_dentry = h_d;
20795+ }
20796+
20797+ /* rename dentry to tmpwh */
20798+ if (a->thargs) {
20799+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
20800+ if (unlikely(err))
20801+ goto out_whdst;
20802+
20803+ d = a->dst_dentry;
20804+ au_set_h_dptr(d, a->btgt, NULL);
20805+ err = au_lkup_neg(d, a->btgt, /*wh*/0);
20806+ if (unlikely(err))
20807+ goto out_whtmp;
20808+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
20809+ }
20810+ }
20811+
20812+ BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_btop != a->btgt);
20813+
20814+ /* rename by vfs_rename or cpup */
20815+ err = au_ren_or_cpup(a);
20816+ if (unlikely(err))
20817+ /* leave the copied-up one */
20818+ goto out_whtmp;
20819+
20820+ /* make dir opaque */
20821+ err = au_ren_diropq(a);
20822+ if (unlikely(err))
20823+ goto out_rename;
20824+
20825+ /* update target timestamps */
20826+ if (a->exchange) {
20827+ AuDebugOn(au_dbtop(a->dst_dentry) != a->btgt);
20828+ a->h_path.dentry = au_h_dptr(a->dst_dentry, a->btgt);
20829+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
20830+ a->dst_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
20831+ }
20832+ AuDebugOn(au_dbtop(a->src_dentry) != a->btgt);
20833+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
20834+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
20835+ a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
20836+
20837+ if (!a->exchange) {
20838+ /* remove whiteout for dentry */
20839+ if (a->dst_wh_dentry) {
20840+ a->h_path.dentry = a->dst_wh_dentry;
20841+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
20842+ a->dst_dentry);
20843+ if (unlikely(err))
20844+ goto out_diropq;
20845+ }
20846+
20847+ /* remove whtmp */
20848+ if (a->thargs)
20849+ au_ren_del_whtmp(a); /* ignore this error */
20850+
20851+ au_fhsm_wrote(a->src_dentry->d_sb, a->btgt, /*force*/0);
20852+ }
20853+ err = 0;
20854+ goto out_success;
20855+
20856+out_diropq:
20857+ au_ren_rev_diropq(err, a);
20858+out_rename:
20859+ au_ren_rev_rename(err, a);
20860+ dput(a->h_dst);
20861+out_whtmp:
20862+ if (a->thargs)
20863+ au_ren_rev_whtmp(err, a);
20864+out_whdst:
20865+ dput(a->dst_wh_dentry);
20866+ a->dst_wh_dentry = NULL;
20867+out_whsrc:
20868+ if (a->src_wh_dentry)
20869+ au_ren_rev_whsrc(err, a);
20870+out_success:
20871+ dput(a->src_wh_dentry);
20872+ dput(a->dst_wh_dentry);
20873+out_thargs:
20874+ if (a->thargs) {
20875+ dput(a->h_dst);
20876+ au_whtmp_rmdir_free(a->thargs);
20877+ a->thargs = NULL;
20878+ }
20879+out:
20880+ return err;
20881+}
20882+
20883+/* ---------------------------------------------------------------------- */
20884+
20885+/*
20886+ * test if @dentry dir can be rename destination or not.
20887+ * success means, it is a logically empty dir.
20888+ */
20889+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
20890+{
20891+ return au_test_empty(dentry, whlist);
20892+}
20893+
20894+/*
20895+ * test if @dentry dir can be rename source or not.
20896+ * if it can, return 0 and @children is filled.
20897+ * success means,
20898+ * - it is a logically empty dir.
20899+ * - or, it exists on writable branch and has no children including whiteouts
20900+ * on the lower branch.
20901+ */
20902+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
20903+{
20904+ int err;
20905+ unsigned int rdhash;
20906+ aufs_bindex_t btop;
20907+
20908+ btop = au_dbtop(dentry);
20909+ if (btop != btgt) {
20910+ struct au_nhash whlist;
20911+
20912+ SiMustAnyLock(dentry->d_sb);
20913+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
20914+ if (!rdhash)
20915+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
20916+ dentry));
20917+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
20918+ if (unlikely(err))
20919+ goto out;
20920+ err = au_test_empty(dentry, &whlist);
20921+ au_nhash_wh_free(&whlist);
20922+ goto out;
20923+ }
20924+
20925+ if (btop == au_dbtaildir(dentry))
20926+ return 0; /* success */
20927+
20928+ err = au_test_empty_lower(dentry);
20929+
20930+out:
20931+ if (err == -ENOTEMPTY) {
20932+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
20933+ " is not supported\n");
20934+ err = -EXDEV;
20935+ }
20936+ return err;
20937+}
20938+
20939+/* side effect: sets whlist and h_dentry */
20940+static int au_ren_may_dir(struct au_ren_args *a)
20941+{
20942+ int err;
20943+ unsigned int rdhash;
20944+ struct dentry *d;
20945+
20946+ d = a->dst_dentry;
20947+ SiMustAnyLock(d->d_sb);
20948+
20949+ err = 0;
20950+ if (au_ftest_ren(a->auren_flags, ISDIR_DST) && a->dst_inode) {
20951+ rdhash = au_sbi(d->d_sb)->si_rdhash;
20952+ if (!rdhash)
20953+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
20954+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
20955+ if (unlikely(err))
20956+ goto out;
20957+
20958+ if (!a->exchange) {
20959+ au_set_dbtop(d, a->dst_btop);
20960+ err = may_rename_dstdir(d, &a->whlist);
20961+ au_set_dbtop(d, a->btgt);
20962+ } else
20963+ err = may_rename_srcdir(d, a->btgt);
20964+ }
20965+ a->dst_h_dentry = au_h_dptr(d, au_dbtop(d));
20966+ if (unlikely(err))
20967+ goto out;
20968+
20969+ d = a->src_dentry;
20970+ a->src_h_dentry = au_h_dptr(d, au_dbtop(d));
20971+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)) {
20972+ err = may_rename_srcdir(d, a->btgt);
20973+ if (unlikely(err)) {
20974+ au_nhash_wh_free(&a->whlist);
20975+ a->whlist.nh_num = 0;
20976+ }
20977+ }
20978+out:
20979+ return err;
20980+}
20981+
20982+/* ---------------------------------------------------------------------- */
20983+
20984+/*
20985+ * simple tests for rename.
20986+ * following the checks in vfs, plus the parent-child relationship.
20987+ */
20988+static int au_may_ren(struct au_ren_args *a)
20989+{
20990+ int err, isdir;
20991+ struct inode *h_inode;
20992+
20993+ if (a->src_btop == a->btgt) {
20994+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
20995+ au_ftest_ren(a->auren_flags, ISDIR_SRC));
20996+ if (unlikely(err))
20997+ goto out;
20998+ err = -EINVAL;
20999+ if (unlikely(a->src_h_dentry == a->h_trap))
21000+ goto out;
21001+ }
21002+
21003+ err = 0;
21004+ if (a->dst_btop != a->btgt)
21005+ goto out;
21006+
21007+ err = -ENOTEMPTY;
21008+ if (unlikely(a->dst_h_dentry == a->h_trap))
21009+ goto out;
21010+
21011+ err = -EIO;
21012+ isdir = !!au_ftest_ren(a->auren_flags, ISDIR_DST);
21013+ if (d_really_is_negative(a->dst_dentry)) {
21014+ if (d_is_negative(a->dst_h_dentry))
21015+ err = au_may_add(a->dst_dentry, a->btgt,
21016+ a->dst_h_parent, isdir);
21017+ } else {
21018+ if (unlikely(d_is_negative(a->dst_h_dentry)))
21019+ goto out;
21020+ h_inode = d_inode(a->dst_h_dentry);
21021+ if (h_inode->i_nlink)
21022+ err = au_may_del(a->dst_dentry, a->btgt,
21023+ a->dst_h_parent, isdir);
21024+ }
21025+
21026+out:
21027+ if (unlikely(err == -ENOENT || err == -EEXIST))
21028+ err = -EIO;
21029+ AuTraceErr(err);
21030+ return err;
21031+}
21032+
21033+/* ---------------------------------------------------------------------- */
21034+
21035+/*
21036+ * locking order
21037+ * (VFS)
21038+ * - src_dir and dir by lock_rename()
21039+ * - inode if exitsts
21040+ * (aufs)
21041+ * - lock all
21042+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
21043+ * + si_read_lock
21044+ * + di_write_lock2_child()
21045+ * + di_write_lock_child()
21046+ * + ii_write_lock_child()
21047+ * + di_write_lock_child2()
21048+ * + ii_write_lock_child2()
21049+ * + src_parent and parent
21050+ * + di_write_lock_parent()
21051+ * + ii_write_lock_parent()
21052+ * + di_write_lock_parent2()
21053+ * + ii_write_lock_parent2()
21054+ * + lower src_dir and dir by vfsub_lock_rename()
21055+ * + verify the every relationships between child and parent. if any
21056+ * of them failed, unlock all and return -EBUSY.
21057+ */
21058+static void au_ren_unlock(struct au_ren_args *a)
21059+{
21060+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
21061+ a->dst_h_parent, a->dst_hdir);
21062+ if (au_ftest_ren(a->auren_flags, MNT_WRITE))
21063+ vfsub_mnt_drop_write(au_br_mnt(a->br));
21064+}
21065+
21066+static int au_ren_lock(struct au_ren_args *a)
21067+{
21068+ int err;
21069+ unsigned int udba;
21070+
21071+ err = 0;
21072+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
21073+ a->src_hdir = au_hi(a->src_dir, a->btgt);
21074+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
21075+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
21076+
21077+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
21078+ if (unlikely(err))
21079+ goto out;
21080+ au_fset_ren(a->auren_flags, MNT_WRITE);
21081+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
21082+ a->dst_h_parent, a->dst_hdir);
21083+ udba = au_opt_udba(a->src_dentry->d_sb);
21084+ if (unlikely(a->src_hdir->hi_inode != d_inode(a->src_h_parent)
21085+ || a->dst_hdir->hi_inode != d_inode(a->dst_h_parent)))
21086+ err = au_busy_or_stale();
21087+ if (!err && au_dbtop(a->src_dentry) == a->btgt)
21088+ err = au_h_verify(a->src_h_dentry, udba,
21089+ d_inode(a->src_h_parent), a->src_h_parent,
21090+ a->br);
21091+ if (!err && au_dbtop(a->dst_dentry) == a->btgt)
21092+ err = au_h_verify(a->dst_h_dentry, udba,
21093+ d_inode(a->dst_h_parent), a->dst_h_parent,
21094+ a->br);
21095+ if (!err)
21096+ goto out; /* success */
21097+
21098+ err = au_busy_or_stale();
21099+ au_ren_unlock(a);
21100+
21101+out:
21102+ return err;
21103+}
21104+
21105+/* ---------------------------------------------------------------------- */
21106+
21107+static void au_ren_refresh_dir(struct au_ren_args *a)
21108+{
21109+ struct inode *dir;
21110+
21111+ dir = a->dst_dir;
21112+ dir->i_version++;
21113+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)) {
21114+ /* is this updating defined in POSIX? */
21115+ au_cpup_attr_timesizes(a->src_inode);
21116+ au_cpup_attr_nlink(dir, /*force*/1);
21117+ }
21118+ au_dir_ts(dir, a->btgt);
21119+
21120+ if (a->exchange) {
21121+ dir = a->src_dir;
21122+ dir->i_version++;
21123+ if (au_ftest_ren(a->auren_flags, ISDIR_DST)) {
21124+ /* is this updating defined in POSIX? */
21125+ au_cpup_attr_timesizes(a->dst_inode);
21126+ au_cpup_attr_nlink(dir, /*force*/1);
21127+ }
21128+ au_dir_ts(dir, a->btgt);
21129+ }
21130+
21131+ if (au_ftest_ren(a->auren_flags, ISSAMEDIR))
21132+ return;
21133+
21134+ dir = a->src_dir;
21135+ dir->i_version++;
21136+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC))
21137+ au_cpup_attr_nlink(dir, /*force*/1);
21138+ au_dir_ts(dir, a->btgt);
21139+}
21140+
21141+static void au_ren_refresh(struct au_ren_args *a)
21142+{
21143+ aufs_bindex_t bbot, bindex;
21144+ struct dentry *d, *h_d;
21145+ struct inode *i, *h_i;
21146+ struct super_block *sb;
21147+
21148+ d = a->dst_dentry;
21149+ d_drop(d);
21150+ if (a->h_dst)
21151+ /* already dget-ed by au_ren_or_cpup() */
21152+ au_set_h_dptr(d, a->btgt, a->h_dst);
21153+
21154+ i = a->dst_inode;
21155+ if (i) {
21156+ if (!a->exchange) {
21157+ if (!au_ftest_ren(a->auren_flags, ISDIR_DST))
21158+ vfsub_drop_nlink(i);
21159+ else {
21160+ vfsub_dead_dir(i);
21161+ au_cpup_attr_timesizes(i);
21162+ }
21163+ au_update_dbrange(d, /*do_put_zero*/1);
21164+ } else
21165+ au_cpup_attr_nlink(i, /*force*/1);
21166+ } else {
21167+ bbot = a->btgt;
21168+ for (bindex = au_dbtop(d); bindex < bbot; bindex++)
21169+ au_set_h_dptr(d, bindex, NULL);
21170+ bbot = au_dbbot(d);
21171+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++)
21172+ au_set_h_dptr(d, bindex, NULL);
21173+ au_update_dbrange(d, /*do_put_zero*/0);
21174+ }
21175+
21176+ d = a->src_dentry;
21177+ if (!a->exchange) {
21178+ au_set_dbwh(d, -1);
21179+ bbot = au_dbbot(d);
21180+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
21181+ h_d = au_h_dptr(d, bindex);
21182+ if (h_d)
21183+ au_set_h_dptr(d, bindex, NULL);
21184+ }
21185+ au_set_dbbot(d, a->btgt);
21186+
21187+ sb = d->d_sb;
21188+ i = a->src_inode;
21189+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
21190+ return; /* success */
21191+
21192+ bbot = au_ibbot(i);
21193+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
21194+ h_i = au_h_iptr(i, bindex);
21195+ if (h_i) {
21196+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
21197+ /* ignore this error */
21198+ au_set_h_iptr(i, bindex, NULL, 0);
21199+ }
21200+ }
21201+ au_set_ibbot(i, a->btgt);
21202+ }
21203+ d_drop(a->src_dentry);
21204+}
21205+
21206+/* ---------------------------------------------------------------------- */
21207+
21208+/* mainly for link(2) and rename(2) */
21209+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
21210+{
21211+ aufs_bindex_t bdiropq, bwh;
21212+ struct dentry *parent;
21213+ struct au_branch *br;
21214+
21215+ parent = dentry->d_parent;
21216+ IMustLock(d_inode(parent)); /* dir is locked */
21217+
21218+ bdiropq = au_dbdiropq(parent);
21219+ bwh = au_dbwh(dentry);
21220+ br = au_sbr(dentry->d_sb, btgt);
21221+ if (au_br_rdonly(br)
21222+ || (0 <= bdiropq && bdiropq < btgt)
21223+ || (0 <= bwh && bwh < btgt))
21224+ btgt = -1;
21225+
21226+ AuDbg("btgt %d\n", btgt);
21227+ return btgt;
21228+}
21229+
21230+/* sets src_btop, dst_btop and btgt */
21231+static int au_ren_wbr(struct au_ren_args *a)
21232+{
21233+ int err;
21234+ struct au_wr_dir_args wr_dir_args = {
21235+ /* .force_btgt = -1, */
21236+ .flags = AuWrDir_ADD_ENTRY
21237+ };
21238+
21239+ a->src_btop = au_dbtop(a->src_dentry);
21240+ a->dst_btop = au_dbtop(a->dst_dentry);
21241+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)
21242+ || au_ftest_ren(a->auren_flags, ISDIR_DST))
21243+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
21244+ wr_dir_args.force_btgt = a->src_btop;
21245+ if (a->dst_inode && a->dst_btop < a->src_btop)
21246+ wr_dir_args.force_btgt = a->dst_btop;
21247+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
21248+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
21249+ a->btgt = err;
21250+ if (a->exchange)
21251+ au_update_dbtop(a->dst_dentry);
21252+
21253+ return err;
21254+}
21255+
21256+static void au_ren_dt(struct au_ren_args *a)
21257+{
21258+ a->h_path.dentry = a->src_h_parent;
21259+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
21260+ if (!au_ftest_ren(a->auren_flags, ISSAMEDIR)) {
21261+ a->h_path.dentry = a->dst_h_parent;
21262+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
21263+ }
21264+
21265+ au_fclr_ren(a->auren_flags, DT_DSTDIR);
21266+ if (!au_ftest_ren(a->auren_flags, ISDIR_SRC)
21267+ && !a->exchange)
21268+ return;
21269+
21270+ a->h_path.dentry = a->src_h_dentry;
21271+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
21272+ if (d_is_positive(a->dst_h_dentry)) {
21273+ au_fset_ren(a->auren_flags, DT_DSTDIR);
21274+ a->h_path.dentry = a->dst_h_dentry;
21275+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
21276+ }
21277+}
21278+
21279+static void au_ren_rev_dt(int err, struct au_ren_args *a)
21280+{
21281+ struct dentry *h_d;
21282+ struct inode *h_inode;
21283+
21284+ au_dtime_revert(a->src_dt + AuPARENT);
21285+ if (!au_ftest_ren(a->auren_flags, ISSAMEDIR))
21286+ au_dtime_revert(a->dst_dt + AuPARENT);
21287+
21288+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC) && err != -EIO) {
21289+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
21290+ h_inode = d_inode(h_d);
21291+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
21292+ au_dtime_revert(a->src_dt + AuCHILD);
21293+ inode_unlock(h_inode);
21294+
21295+ if (au_ftest_ren(a->auren_flags, DT_DSTDIR)) {
21296+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
21297+ h_inode = d_inode(h_d);
21298+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
21299+ au_dtime_revert(a->dst_dt + AuCHILD);
21300+ inode_unlock(h_inode);
21301+ }
21302+ }
21303+}
21304+
21305+/* ---------------------------------------------------------------------- */
21306+
21307+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
21308+ struct inode *_dst_dir, struct dentry *_dst_dentry,
21309+ unsigned int _flags)
21310+{
21311+ int err, lock_flags;
21312+ /* reduce stack space */
21313+ struct au_ren_args *a;
21314+ struct au_pin pin;
21315+
21316+ AuDbg("%pd, %pd, 0x%x\n", _src_dentry, _dst_dentry, _flags);
21317+ IMustLock(_src_dir);
21318+ IMustLock(_dst_dir);
21319+
21320+ err = -EINVAL;
21321+ if (unlikely(_flags & RENAME_WHITEOUT))
21322+ goto out;
21323+
21324+ err = -ENOMEM;
21325+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
21326+ a = kzalloc(sizeof(*a), GFP_NOFS);
21327+ if (unlikely(!a))
21328+ goto out;
21329+
21330+ a->flags = _flags;
21331+ a->exchange = _flags & RENAME_EXCHANGE;
21332+ a->src_dir = _src_dir;
21333+ a->src_dentry = _src_dentry;
21334+ a->src_inode = NULL;
21335+ if (d_really_is_positive(a->src_dentry))
21336+ a->src_inode = d_inode(a->src_dentry);
21337+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
21338+ a->dst_dir = _dst_dir;
21339+ a->dst_dentry = _dst_dentry;
21340+ a->dst_inode = NULL;
21341+ if (d_really_is_positive(a->dst_dentry))
21342+ a->dst_inode = d_inode(a->dst_dentry);
21343+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
21344+ if (a->dst_inode) {
21345+ /*
21346+ * if EXCHANGE && src is non-dir && dst is dir,
21347+ * dst is not locked.
21348+ */
21349+ /* IMustLock(a->dst_inode); */
21350+ au_igrab(a->dst_inode);
21351+ }
21352+
21353+ err = -ENOTDIR;
21354+ lock_flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
21355+ if (d_is_dir(a->src_dentry)) {
21356+ au_fset_ren(a->auren_flags, ISDIR_SRC);
21357+ if (unlikely(!a->exchange
21358+ && d_really_is_positive(a->dst_dentry)
21359+ && !d_is_dir(a->dst_dentry)))
21360+ goto out_free;
21361+ lock_flags |= AuLock_DIRS;
21362+ }
21363+ if (a->dst_inode && d_is_dir(a->dst_dentry)) {
21364+ au_fset_ren(a->auren_flags, ISDIR_DST);
21365+ if (unlikely(!a->exchange
21366+ && d_really_is_positive(a->src_dentry)
21367+ && !d_is_dir(a->src_dentry)))
21368+ goto out_free;
21369+ lock_flags |= AuLock_DIRS;
21370+ }
21371+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, lock_flags);
21372+ if (unlikely(err))
21373+ goto out_free;
21374+
21375+ err = au_d_hashed_positive(a->src_dentry);
21376+ if (unlikely(err))
21377+ goto out_unlock;
21378+ err = -ENOENT;
21379+ if (a->dst_inode) {
21380+ /*
21381+ * If it is a dir, VFS unhash it before this
21382+ * function. It means we cannot rely upon d_unhashed().
21383+ */
21384+ if (unlikely(!a->dst_inode->i_nlink))
21385+ goto out_unlock;
21386+ if (!au_ftest_ren(a->auren_flags, ISDIR_DST)) {
21387+ err = au_d_hashed_positive(a->dst_dentry);
21388+ if (unlikely(err && !a->exchange))
21389+ goto out_unlock;
21390+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
21391+ goto out_unlock;
21392+ } else if (unlikely(d_unhashed(a->dst_dentry)))
21393+ goto out_unlock;
21394+
21395+ /*
21396+ * is it possible?
21397+ * yes, it happened (in linux-3.3-rcN) but I don't know why.
21398+ * there may exist a problem somewhere else.
21399+ */
21400+ err = -EINVAL;
21401+ if (unlikely(d_inode(a->dst_parent) == d_inode(a->src_dentry)))
21402+ goto out_unlock;
21403+
21404+ au_fset_ren(a->auren_flags, ISSAMEDIR); /* temporary */
21405+ di_write_lock_parent(a->dst_parent);
21406+
21407+ /* which branch we process */
21408+ err = au_ren_wbr(a);
21409+ if (unlikely(err < 0))
21410+ goto out_parent;
21411+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
21412+ a->h_path.mnt = au_br_mnt(a->br);
21413+
21414+ /* are they available to be renamed */
21415+ err = au_ren_may_dir(a);
21416+ if (unlikely(err))
21417+ goto out_children;
21418+
21419+ /* prepare the writable parent dir on the same branch */
21420+ if (a->dst_btop == a->btgt) {
21421+ au_fset_ren(a->auren_flags, WHDST);
21422+ } else {
21423+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
21424+ if (unlikely(err))
21425+ goto out_children;
21426+ }
21427+
21428+ err = 0;
21429+ if (!a->exchange) {
21430+ if (a->src_dir != a->dst_dir) {
21431+ /*
21432+ * this temporary unlock is safe,
21433+ * because both dir->i_mutex are locked.
21434+ */
21435+ di_write_unlock(a->dst_parent);
21436+ di_write_lock_parent(a->src_parent);
21437+ err = au_wr_dir_need_wh(a->src_dentry,
21438+ au_ftest_ren(a->auren_flags,
21439+ ISDIR_SRC),
21440+ &a->btgt);
21441+ di_write_unlock(a->src_parent);
21442+ di_write_lock2_parent(a->src_parent, a->dst_parent,
21443+ /*isdir*/1);
21444+ au_fclr_ren(a->auren_flags, ISSAMEDIR);
21445+ } else
21446+ err = au_wr_dir_need_wh(a->src_dentry,
21447+ au_ftest_ren(a->auren_flags,
21448+ ISDIR_SRC),
21449+ &a->btgt);
21450+ }
21451+ if (unlikely(err < 0))
21452+ goto out_children;
21453+ if (err)
21454+ au_fset_ren(a->auren_flags, WHSRC);
21455+
21456+ /* cpup src */
21457+ if (a->src_btop != a->btgt) {
21458+ err = au_pin(&pin, a->src_dentry, a->btgt,
21459+ au_opt_udba(a->src_dentry->d_sb),
21460+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
21461+ if (!err) {
21462+ struct au_cp_generic cpg = {
21463+ .dentry = a->src_dentry,
21464+ .bdst = a->btgt,
21465+ .bsrc = a->src_btop,
21466+ .len = -1,
21467+ .pin = &pin,
21468+ .flags = AuCpup_DTIME | AuCpup_HOPEN
21469+ };
21470+ AuDebugOn(au_dbtop(a->src_dentry) != a->src_btop);
21471+ err = au_sio_cpup_simple(&cpg);
21472+ au_unpin(&pin);
21473+ }
21474+ if (unlikely(err))
21475+ goto out_children;
21476+ a->src_btop = a->btgt;
21477+ a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
21478+ if (!a->exchange)
21479+ au_fset_ren(a->auren_flags, WHSRC);
21480+ }
21481+
21482+ /* cpup dst */
21483+ if (a->exchange && a->dst_inode
21484+ && a->dst_btop != a->btgt) {
21485+ err = au_pin(&pin, a->dst_dentry, a->btgt,
21486+ au_opt_udba(a->dst_dentry->d_sb),
21487+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
21488+ if (!err) {
21489+ struct au_cp_generic cpg = {
21490+ .dentry = a->dst_dentry,
21491+ .bdst = a->btgt,
21492+ .bsrc = a->dst_btop,
21493+ .len = -1,
21494+ .pin = &pin,
21495+ .flags = AuCpup_DTIME | AuCpup_HOPEN
21496+ };
21497+ err = au_sio_cpup_simple(&cpg);
21498+ au_unpin(&pin);
21499+ }
21500+ if (unlikely(err))
21501+ goto out_children;
21502+ a->dst_btop = a->btgt;
21503+ a->dst_h_dentry = au_h_dptr(a->dst_dentry, a->btgt);
21504+ }
21505+
21506+ /* lock them all */
21507+ err = au_ren_lock(a);
21508+ if (unlikely(err))
21509+ /* leave the copied-up one */
21510+ goto out_children;
21511+
21512+ if (!a->exchange) {
21513+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
21514+ err = au_may_ren(a);
21515+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
21516+ err = -ENAMETOOLONG;
21517+ if (unlikely(err))
21518+ goto out_hdir;
21519+ }
21520+
21521+ /* store timestamps to be revertible */
21522+ au_ren_dt(a);
21523+
21524+ /* here we go */
21525+ err = do_rename(a);
21526+ if (unlikely(err))
21527+ goto out_dt;
21528+
21529+ /* update dir attributes */
21530+ au_ren_refresh_dir(a);
21531+
21532+ /* dput/iput all lower dentries */
21533+ au_ren_refresh(a);
21534+
21535+ goto out_hdir; /* success */
21536+
21537+out_dt:
21538+ au_ren_rev_dt(err, a);
21539+out_hdir:
21540+ au_ren_unlock(a);
21541+out_children:
21542+ au_nhash_wh_free(&a->whlist);
21543+ if (err && a->dst_inode && a->dst_btop != a->btgt) {
21544+ AuDbg("btop %d, btgt %d\n", a->dst_btop, a->btgt);
21545+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
21546+ au_set_dbtop(a->dst_dentry, a->dst_btop);
21547+ }
21548+out_parent:
21549+ if (!err) {
21550+ if (!a->exchange)
21551+ d_move(a->src_dentry, a->dst_dentry);
21552+ else
21553+ d_exchange(a->src_dentry, a->dst_dentry);
21554+ } else {
21555+ au_update_dbtop(a->dst_dentry);
21556+ if (!a->dst_inode)
21557+ d_drop(a->dst_dentry);
21558+ }
21559+ if (au_ftest_ren(a->auren_flags, ISSAMEDIR))
21560+ di_write_unlock(a->dst_parent);
21561+ else
21562+ di_write_unlock2(a->src_parent, a->dst_parent);
21563+out_unlock:
21564+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
21565+out_free:
21566+ iput(a->dst_inode);
21567+ if (a->thargs)
21568+ au_whtmp_rmdir_free(a->thargs);
21569+ au_delayed_kfree(a);
21570+out:
21571+ AuTraceErr(err);
21572+ return err;
21573+}
21574diff -urN /usr/share/empty/fs/aufs/Kconfig linux/fs/aufs/Kconfig
21575--- /usr/share/empty/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
21576+++ linux/fs/aufs/Kconfig 2016-10-09 16:55:36.482701377 +0200
21577@@ -0,0 +1,185 @@
21578+config AUFS_FS
21579+ tristate "Aufs (Advanced multi layered unification filesystem) support"
21580+ help
21581+ Aufs is a stackable unification filesystem such as Unionfs,
21582+ which unifies several directories and provides a merged single
21583+ directory.
21584+ In the early days, aufs was entirely re-designed and
21585+ re-implemented Unionfs Version 1.x series. Introducing many
21586+ original ideas, approaches and improvements, it becomes totally
21587+ different from Unionfs while keeping the basic features.
21588+
21589+if AUFS_FS
21590+choice
21591+ prompt "Maximum number of branches"
21592+ default AUFS_BRANCH_MAX_127
21593+ help
21594+ Specifies the maximum number of branches (or member directories)
21595+ in a single aufs. The larger value consumes more system
21596+ resources and has a minor impact to performance.
21597+config AUFS_BRANCH_MAX_127
21598+ bool "127"
21599+ help
21600+ Specifies the maximum number of branches (or member directories)
21601+ in a single aufs. The larger value consumes more system
21602+ resources and has a minor impact to performance.
21603+config AUFS_BRANCH_MAX_511
21604+ bool "511"
21605+ help
21606+ Specifies the maximum number of branches (or member directories)
21607+ in a single aufs. The larger value consumes more system
21608+ resources and has a minor impact to performance.
21609+config AUFS_BRANCH_MAX_1023
21610+ bool "1023"
21611+ help
21612+ Specifies the maximum number of branches (or member directories)
21613+ in a single aufs. The larger value consumes more system
21614+ resources and has a minor impact to performance.
21615+config AUFS_BRANCH_MAX_32767
21616+ bool "32767"
21617+ help
21618+ Specifies the maximum number of branches (or member directories)
21619+ in a single aufs. The larger value consumes more system
21620+ resources and has a minor impact to performance.
21621+endchoice
21622+
21623+config AUFS_SBILIST
21624+ bool
21625+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
21626+ default y
21627+ help
21628+ Automatic configuration for internal use.
21629+ When aufs supports Magic SysRq or /proc, enabled automatically.
21630+
21631+config AUFS_HNOTIFY
21632+ bool "Detect direct branch access (bypassing aufs)"
21633+ help
21634+ If you want to modify files on branches directly, eg. bypassing aufs,
21635+ and want aufs to detect the changes of them fully, then enable this
21636+ option and use 'udba=notify' mount option.
21637+ Currently there is only one available configuration, "fsnotify".
21638+ It will have a negative impact to the performance.
21639+ See detail in aufs.5.
21640+
21641+choice
21642+ prompt "method" if AUFS_HNOTIFY
21643+ default AUFS_HFSNOTIFY
21644+config AUFS_HFSNOTIFY
21645+ bool "fsnotify"
21646+ select FSNOTIFY
21647+endchoice
21648+
21649+config AUFS_EXPORT
21650+ bool "NFS-exportable aufs"
21651+ depends on EXPORTFS
21652+ help
21653+ If you want to export your mounted aufs via NFS, then enable this
21654+ option. There are several requirements for this configuration.
21655+ See detail in aufs.5.
21656+
21657+config AUFS_INO_T_64
21658+ bool
21659+ depends on AUFS_EXPORT
21660+ depends on 64BIT && !(ALPHA || S390)
21661+ default y
21662+ help
21663+ Automatic configuration for internal use.
21664+ /* typedef unsigned long/int __kernel_ino_t */
21665+ /* alpha and s390x are int */
21666+
21667+config AUFS_XATTR
21668+ bool "support for XATTR/EA (including Security Labels)"
21669+ help
21670+ If your branch fs supports XATTR/EA and you want to make them
21671+ available in aufs too, then enable this opsion and specify the
21672+ branch attributes for EA.
21673+ See detail in aufs.5.
21674+
21675+config AUFS_FHSM
21676+ bool "File-based Hierarchical Storage Management"
21677+ help
21678+ Hierarchical Storage Management (or HSM) is a well-known feature
21679+ in the storage world. Aufs provides this feature as file-based.
21680+ with multiple branches.
21681+ These multiple branches are prioritized, ie. the topmost one
21682+ should be the fastest drive and be used heavily.
21683+
21684+config AUFS_RDU
21685+ bool "Readdir in userspace"
21686+ help
21687+ Aufs has two methods to provide a merged view for a directory,
21688+ by a user-space library and by kernel-space natively. The latter
21689+ is always enabled but sometimes large and slow.
21690+ If you enable this option, install the library in aufs2-util
21691+ package, and set some environment variables for your readdir(3),
21692+ then the work will be handled in user-space which generally
21693+ shows better performance in most cases.
21694+ See detail in aufs.5.
21695+
21696+config AUFS_SHWH
21697+ bool "Show whiteouts"
21698+ help
21699+ If you want to make the whiteouts in aufs visible, then enable
21700+ this option and specify 'shwh' mount option. Although it may
21701+ sounds like philosophy or something, but in technically it
21702+ simply shows the name of whiteout with keeping its behaviour.
21703+
21704+config AUFS_BR_RAMFS
21705+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
21706+ help
21707+ If you want to use ramfs as an aufs branch fs, then enable this
21708+ option. Generally tmpfs is recommended.
21709+ Aufs prohibited them to be a branch fs by default, because
21710+ initramfs becomes unusable after switch_root or something
21711+ generally. If you sets initramfs as an aufs branch and boot your
21712+ system by switch_root, you will meet a problem easily since the
21713+ files in initramfs may be inaccessible.
21714+ Unless you are going to use ramfs as an aufs branch fs without
21715+ switch_root or something, leave it N.
21716+
21717+config AUFS_BR_FUSE
21718+ bool "Fuse fs as an aufs branch"
21719+ depends on FUSE_FS
21720+ select AUFS_POLL
21721+ help
21722+ If you want to use fuse-based userspace filesystem as an aufs
21723+ branch fs, then enable this option.
21724+ It implements the internal poll(2) operation which is
21725+ implemented by fuse only (curretnly).
21726+
21727+config AUFS_POLL
21728+ bool
21729+ help
21730+ Automatic configuration for internal use.
21731+
21732+config AUFS_BR_HFSPLUS
21733+ bool "Hfsplus as an aufs branch"
21734+ depends on HFSPLUS_FS
21735+ default y
21736+ help
21737+ If you want to use hfsplus fs as an aufs branch fs, then enable
21738+ this option. This option introduces a small overhead at
21739+ copying-up a file on hfsplus.
21740+
21741+config AUFS_BDEV_LOOP
21742+ bool
21743+ depends on BLK_DEV_LOOP
21744+ default y
21745+ help
21746+ Automatic configuration for internal use.
21747+ Convert =[ym] into =y.
21748+
21749+config AUFS_DEBUG
21750+ bool "Debug aufs"
21751+ help
21752+ Enable this to compile aufs internal debug code.
21753+ It will have a negative impact to the performance.
21754+
21755+config AUFS_MAGIC_SYSRQ
21756+ bool
21757+ depends on AUFS_DEBUG && MAGIC_SYSRQ
21758+ default y
21759+ help
21760+ Automatic configuration for internal use.
21761+ When aufs supports Magic SysRq, enabled automatically.
21762+endif
21763diff -urN /usr/share/empty/fs/aufs/loop.c linux/fs/aufs/loop.c
21764--- /usr/share/empty/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
21765+++ linux/fs/aufs/loop.c 2016-10-09 16:55:38.889431135 +0200
21766@@ -0,0 +1,147 @@
21767+/*
21768+ * Copyright (C) 2005-2016 Junjiro R. Okajima
21769+ *
21770+ * This program, aufs is free software; you can redistribute it and/or modify
21771+ * it under the terms of the GNU General Public License as published by
21772+ * the Free Software Foundation; either version 2 of the License, or
21773+ * (at your option) any later version.
21774+ *
21775+ * This program is distributed in the hope that it will be useful,
21776+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21777+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21778+ * GNU General Public License for more details.
21779+ *
21780+ * You should have received a copy of the GNU General Public License
21781+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
21782+ */
21783+
21784+/*
21785+ * support for loopback block device as a branch
21786+ */
21787+
21788+#include "aufs.h"
21789+
21790+/* added into drivers/block/loop.c */
21791+static struct file *(*backing_file_func)(struct super_block *sb);
21792+
21793+/*
21794+ * test if two lower dentries have overlapping branches.
21795+ */
21796+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
21797+{
21798+ struct super_block *h_sb;
21799+ struct file *backing_file;
21800+
21801+ if (unlikely(!backing_file_func)) {
21802+ /* don't load "loop" module here */
21803+ backing_file_func = symbol_get(loop_backing_file);
21804+ if (unlikely(!backing_file_func))
21805+ /* "loop" module is not loaded */
21806+ return 0;
21807+ }
21808+
21809+ h_sb = h_adding->d_sb;
21810+ backing_file = backing_file_func(h_sb);
21811+ if (!backing_file)
21812+ return 0;
21813+
21814+ h_adding = backing_file->f_path.dentry;
21815+ /*
21816+ * h_adding can be local NFS.
21817+ * in this case aufs cannot detect the loop.
21818+ */
21819+ if (unlikely(h_adding->d_sb == sb))
21820+ return 1;
21821+ return !!au_test_subdir(h_adding, sb->s_root);
21822+}
21823+
21824+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
21825+int au_test_loopback_kthread(void)
21826+{
21827+ int ret;
21828+ struct task_struct *tsk = current;
21829+ char c, comm[sizeof(tsk->comm)];
21830+
21831+ ret = 0;
21832+ if (tsk->flags & PF_KTHREAD) {
21833+ get_task_comm(comm, tsk);
21834+ c = comm[4];
21835+ ret = ('0' <= c && c <= '9'
21836+ && !strncmp(comm, "loop", 4));
21837+ }
21838+
21839+ return ret;
21840+}
21841+
21842+/* ---------------------------------------------------------------------- */
21843+
21844+#define au_warn_loopback_step 16
21845+static int au_warn_loopback_nelem = au_warn_loopback_step;
21846+static unsigned long *au_warn_loopback_array;
21847+
21848+void au_warn_loopback(struct super_block *h_sb)
21849+{
21850+ int i, new_nelem;
21851+ unsigned long *a, magic;
21852+ static DEFINE_SPINLOCK(spin);
21853+
21854+ magic = h_sb->s_magic;
21855+ spin_lock(&spin);
21856+ a = au_warn_loopback_array;
21857+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
21858+ if (a[i] == magic) {
21859+ spin_unlock(&spin);
21860+ return;
21861+ }
21862+
21863+ /* h_sb is new to us, print it */
21864+ if (i < au_warn_loopback_nelem) {
21865+ a[i] = magic;
21866+ goto pr;
21867+ }
21868+
21869+ /* expand the array */
21870+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
21871+ a = au_kzrealloc(au_warn_loopback_array,
21872+ au_warn_loopback_nelem * sizeof(unsigned long),
21873+ new_nelem * sizeof(unsigned long), GFP_ATOMIC,
21874+ /*may_shrink*/0);
21875+ if (a) {
21876+ au_warn_loopback_nelem = new_nelem;
21877+ au_warn_loopback_array = a;
21878+ a[i] = magic;
21879+ goto pr;
21880+ }
21881+
21882+ spin_unlock(&spin);
21883+ AuWarn1("realloc failed, ignored\n");
21884+ return;
21885+
21886+pr:
21887+ spin_unlock(&spin);
21888+ pr_warn("you may want to try another patch for loopback file "
21889+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
21890+}
21891+
21892+int au_loopback_init(void)
21893+{
21894+ int err;
21895+ struct super_block *sb __maybe_unused;
21896+
21897+ BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(unsigned long));
21898+
21899+ err = 0;
21900+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
21901+ sizeof(unsigned long), GFP_NOFS);
21902+ if (unlikely(!au_warn_loopback_array))
21903+ err = -ENOMEM;
21904+
21905+ return err;
21906+}
21907+
21908+void au_loopback_fin(void)
21909+{
21910+ if (backing_file_func)
21911+ symbol_put(loop_backing_file);
21912+ au_delayed_kfree(au_warn_loopback_array);
21913+}
21914diff -urN /usr/share/empty/fs/aufs/loop.h linux/fs/aufs/loop.h
21915--- /usr/share/empty/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
21916+++ linux/fs/aufs/loop.h 2016-10-09 16:55:36.492701639 +0200
21917@@ -0,0 +1,52 @@
21918+/*
21919+ * Copyright (C) 2005-2016 Junjiro R. Okajima
21920+ *
21921+ * This program, aufs is free software; you can redistribute it and/or modify
21922+ * it under the terms of the GNU General Public License as published by
21923+ * the Free Software Foundation; either version 2 of the License, or
21924+ * (at your option) any later version.
21925+ *
21926+ * This program is distributed in the hope that it will be useful,
21927+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21928+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21929+ * GNU General Public License for more details.
21930+ *
21931+ * You should have received a copy of the GNU General Public License
21932+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
21933+ */
21934+
21935+/*
21936+ * support for loopback mount as a branch
21937+ */
21938+
21939+#ifndef __AUFS_LOOP_H__
21940+#define __AUFS_LOOP_H__
21941+
21942+#ifdef __KERNEL__
21943+
21944+struct dentry;
21945+struct super_block;
21946+
21947+#ifdef CONFIG_AUFS_BDEV_LOOP
21948+/* drivers/block/loop.c */
21949+struct file *loop_backing_file(struct super_block *sb);
21950+
21951+/* loop.c */
21952+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
21953+int au_test_loopback_kthread(void);
21954+void au_warn_loopback(struct super_block *h_sb);
21955+
21956+int au_loopback_init(void);
21957+void au_loopback_fin(void);
21958+#else
21959+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
21960+ struct dentry *h_adding)
21961+AuStubInt0(au_test_loopback_kthread, void)
21962+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
21963+
21964+AuStubInt0(au_loopback_init, void)
21965+AuStubVoid(au_loopback_fin, void)
21966+#endif /* BLK_DEV_LOOP */
21967+
21968+#endif /* __KERNEL__ */
21969+#endif /* __AUFS_LOOP_H__ */
21970diff -urN /usr/share/empty/fs/aufs/magic.mk linux/fs/aufs/magic.mk
21971--- /usr/share/empty/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
21972+++ linux/fs/aufs/magic.mk 2016-10-09 16:55:36.492701639 +0200
21973@@ -0,0 +1,30 @@
21974+
21975+# defined in ${srctree}/fs/fuse/inode.c
21976+# tristate
21977+ifdef CONFIG_FUSE_FS
21978+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
21979+endif
21980+
21981+# defined in ${srctree}/fs/xfs/xfs_sb.h
21982+# tristate
21983+ifdef CONFIG_XFS_FS
21984+ccflags-y += -DXFS_SB_MAGIC=0x58465342
21985+endif
21986+
21987+# defined in ${srctree}/fs/configfs/mount.c
21988+# tristate
21989+ifdef CONFIG_CONFIGFS_FS
21990+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
21991+endif
21992+
21993+# defined in ${srctree}/fs/ubifs/ubifs.h
21994+# tristate
21995+ifdef CONFIG_UBIFS_FS
21996+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
21997+endif
21998+
21999+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
22000+# tristate
22001+ifdef CONFIG_HFSPLUS_FS
22002+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
22003+endif
22004diff -urN /usr/share/empty/fs/aufs/Makefile linux/fs/aufs/Makefile
22005--- /usr/share/empty/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
22006+++ linux/fs/aufs/Makefile 2016-10-09 16:55:36.486034798 +0200
22007@@ -0,0 +1,44 @@
22008+
22009+include ${src}/magic.mk
22010+ifeq (${CONFIG_AUFS_FS},m)
22011+include ${src}/conf.mk
22012+endif
22013+-include ${src}/priv_def.mk
22014+
22015+# cf. include/linux/kernel.h
22016+# enable pr_debug
22017+ccflags-y += -DDEBUG
22018+# sparse requires the full pathname
22019+ifdef M
22020+ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h
22021+else
22022+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
22023+endif
22024+
22025+obj-$(CONFIG_AUFS_FS) += aufs.o
22026+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
22027+ wkq.o vfsub.o dcsub.o \
22028+ cpup.o whout.o wbr_policy.o \
22029+ dinfo.o dentry.o \
22030+ dynop.o \
22031+ finfo.o file.o f_op.o \
22032+ dir.o vdir.o \
22033+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
22034+ mvdown.o ioctl.o
22035+
22036+# all are boolean
22037+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
22038+aufs-$(CONFIG_SYSFS) += sysfs.o
22039+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
22040+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
22041+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
22042+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
22043+aufs-$(CONFIG_AUFS_EXPORT) += export.o
22044+aufs-$(CONFIG_AUFS_XATTR) += xattr.o
22045+aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
22046+aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
22047+aufs-$(CONFIG_AUFS_POLL) += poll.o
22048+aufs-$(CONFIG_AUFS_RDU) += rdu.o
22049+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
22050+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
22051+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
22052diff -urN /usr/share/empty/fs/aufs/module.c linux/fs/aufs/module.c
22053--- /usr/share/empty/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
22054+++ linux/fs/aufs/module.c 2016-10-09 16:55:38.889431135 +0200
22055@@ -0,0 +1,333 @@
22056+/*
22057+ * Copyright (C) 2005-2016 Junjiro R. Okajima
22058+ *
22059+ * This program, aufs is free software; you can redistribute it and/or modify
22060+ * it under the terms of the GNU General Public License as published by
22061+ * the Free Software Foundation; either version 2 of the License, or
22062+ * (at your option) any later version.
22063+ *
22064+ * This program is distributed in the hope that it will be useful,
22065+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22066+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22067+ * GNU General Public License for more details.
22068+ *
22069+ * You should have received a copy of the GNU General Public License
22070+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
22071+ */
22072+
22073+/*
22074+ * module global variables and operations
22075+ */
22076+
22077+#include <linux/module.h>
22078+#include <linux/seq_file.h>
22079+#include "aufs.h"
22080+
22081+/* shrinkable realloc */
22082+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink)
22083+{
22084+ size_t sz;
22085+ int diff;
22086+
22087+ sz = 0;
22088+ diff = -1;
22089+ if (p) {
22090+#if 0 /* unused */
22091+ if (!new_sz) {
22092+ au_delayed_kfree(p);
22093+ p = NULL;
22094+ goto out;
22095+ }
22096+#else
22097+ AuDebugOn(!new_sz);
22098+#endif
22099+ sz = ksize(p);
22100+ diff = au_kmidx_sub(sz, new_sz);
22101+ }
22102+ if (sz && !diff)
22103+ goto out;
22104+
22105+ if (sz < new_sz)
22106+ /* expand or SLOB */
22107+ p = krealloc(p, new_sz, gfp);
22108+ else if (new_sz < sz && may_shrink) {
22109+ /* shrink */
22110+ void *q;
22111+
22112+ q = kmalloc(new_sz, gfp);
22113+ if (q) {
22114+ if (p) {
22115+ memcpy(q, p, new_sz);
22116+ au_delayed_kfree(p);
22117+ }
22118+ p = q;
22119+ } else
22120+ p = NULL;
22121+ }
22122+
22123+out:
22124+ return p;
22125+}
22126+
22127+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
22128+ int may_shrink)
22129+{
22130+ p = au_krealloc(p, new_sz, gfp, may_shrink);
22131+ if (p && new_sz > nused)
22132+ memset(p + nused, 0, new_sz - nused);
22133+ return p;
22134+}
22135+
22136+/* ---------------------------------------------------------------------- */
22137+/*
22138+ * aufs caches
22139+ */
22140+
22141+struct au_dfree au_dfree;
22142+
22143+/* delayed free */
22144+static void au_do_dfree(struct work_struct *work __maybe_unused)
22145+{
22146+ struct llist_head *head;
22147+ struct llist_node *node, *next;
22148+
22149+#define AU_CACHE_DFREE_DO_BODY(name, idx, lnode) do { \
22150+ head = &au_dfree.cache[AuCache_##idx].llist; \
22151+ node = llist_del_all(head); \
22152+ for (; node; node = next) { \
22153+ struct au_##name *p \
22154+ = llist_entry(node, struct au_##name, \
22155+ lnode); \
22156+ next = llist_next(node); \
22157+ au_cache_free_##name(p); \
22158+ } \
22159+ } while (0)
22160+
22161+ AU_CACHE_DFREE_DO_BODY(dinfo, DINFO, di_lnode);
22162+ AU_CACHE_DFREE_DO_BODY(icntnr, ICNTNR, lnode);
22163+ AU_CACHE_DFREE_DO_BODY(finfo, FINFO, fi_lnode);
22164+ AU_CACHE_DFREE_DO_BODY(vdir, VDIR, vd_lnode);
22165+ AU_CACHE_DFREE_DO_BODY(vdir_dehstr, DEHSTR, lnode);
22166+#ifdef CONFIG_AUFS_HNOTIFY
22167+ AU_CACHE_DFREE_DO_BODY(hnotify, HNOTIFY, hn_lnode);
22168+#endif
22169+
22170+#define AU_DFREE_DO_BODY(llist, func) do { \
22171+ node = llist_del_all(llist); \
22172+ for (; node; node = next) { \
22173+ next = llist_next(node); \
22174+ func(node); \
22175+ } \
22176+ } while (0)
22177+
22178+ AU_DFREE_DO_BODY(au_dfree.llist + AU_DFREE_KFREE, kfree);
22179+ AU_DFREE_DO_BODY(au_dfree.llist + AU_DFREE_FREE_PAGE, au_free_page);
22180+
22181+#undef AU_CACHE_DFREE_DO_BODY
22182+#undef AU_DFREE_DO_BODY
22183+}
22184+
22185+AU_CACHE_DFREE_FUNC(dinfo, DINFO, di_lnode);
22186+AU_CACHE_DFREE_FUNC(icntnr, ICNTNR, lnode);
22187+AU_CACHE_DFREE_FUNC(finfo, FINFO, fi_lnode);
22188+AU_CACHE_DFREE_FUNC(vdir, VDIR, vd_lnode);
22189+AU_CACHE_DFREE_FUNC(vdir_dehstr, DEHSTR, lnode);
22190+
22191+static void au_cache_fin(void)
22192+{
22193+ int i;
22194+ struct au_cache *cp;
22195+
22196+ /*
22197+ * Make sure all delayed rcu free inodes are flushed before we
22198+ * destroy cache.
22199+ */
22200+ rcu_barrier();
22201+
22202+ /* excluding AuCache_HNOTIFY */
22203+ BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
22204+ flush_delayed_work(&au_dfree.dwork);
22205+ for (i = 0; i < AuCache_HNOTIFY; i++) {
22206+ cp = au_dfree.cache + i;
22207+ AuDebugOn(!llist_empty(&cp->llist));
22208+ kmem_cache_destroy(cp->cache);
22209+ cp->cache = NULL;
22210+ }
22211+}
22212+
22213+static int __init au_cache_init(void)
22214+{
22215+ struct au_cache *cp;
22216+
22217+ cp = au_dfree.cache;
22218+ cp[AuCache_DINFO].cache = AuCacheCtor(au_dinfo, au_di_init_once);
22219+ if (cp[AuCache_DINFO].cache)
22220+ /* SLAB_DESTROY_BY_RCU */
22221+ cp[AuCache_ICNTNR].cache = AuCacheCtor(au_icntnr,
22222+ au_icntnr_init_once);
22223+ if (cp[AuCache_ICNTNR].cache)
22224+ cp[AuCache_FINFO].cache = AuCacheCtor(au_finfo,
22225+ au_fi_init_once);
22226+ if (cp[AuCache_FINFO].cache)
22227+ cp[AuCache_VDIR].cache = AuCache(au_vdir);
22228+ if (cp[AuCache_VDIR].cache)
22229+ cp[AuCache_DEHSTR].cache = AuCache(au_vdir_dehstr);
22230+ if (cp[AuCache_DEHSTR].cache)
22231+ return 0;
22232+
22233+ au_cache_fin();
22234+ return -ENOMEM;
22235+}
22236+
22237+/* ---------------------------------------------------------------------- */
22238+
22239+int au_dir_roflags;
22240+
22241+#ifdef CONFIG_AUFS_SBILIST
22242+/*
22243+ * iterate_supers_type() doesn't protect us from
22244+ * remounting (branch management)
22245+ */
22246+struct au_sphlhead au_sbilist;
22247+#endif
22248+
22249+/*
22250+ * functions for module interface.
22251+ */
22252+MODULE_LICENSE("GPL");
22253+/* MODULE_LICENSE("GPL v2"); */
22254+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
22255+MODULE_DESCRIPTION(AUFS_NAME
22256+ " -- Advanced multi layered unification filesystem");
22257+MODULE_VERSION(AUFS_VERSION);
22258+MODULE_ALIAS_FS(AUFS_NAME);
22259+
22260+/* this module parameter has no meaning when SYSFS is disabled */
22261+int sysaufs_brs = 1;
22262+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
22263+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
22264+
22265+/* this module parameter has no meaning when USER_NS is disabled */
22266+bool au_userns;
22267+MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
22268+module_param_named(allow_userns, au_userns, bool, S_IRUGO);
22269+
22270+/* ---------------------------------------------------------------------- */
22271+
22272+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
22273+
22274+int au_seq_path(struct seq_file *seq, struct path *path)
22275+{
22276+ int err;
22277+
22278+ err = seq_path(seq, path, au_esc_chars);
22279+ if (err > 0)
22280+ err = 0;
22281+ else if (err < 0)
22282+ err = -ENOMEM;
22283+
22284+ return err;
22285+}
22286+
22287+/* ---------------------------------------------------------------------- */
22288+
22289+static int __init aufs_init(void)
22290+{
22291+ int err, i;
22292+ char *p;
22293+ struct au_cache *cp;
22294+
22295+ p = au_esc_chars;
22296+ for (i = 1; i <= ' '; i++)
22297+ *p++ = i;
22298+ *p++ = '\\';
22299+ *p++ = '\x7f';
22300+ *p = 0;
22301+
22302+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
22303+
22304+ memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
22305+ for (i = 0; i < AuIop_Last; i++)
22306+ aufs_iop_nogetattr[i].getattr = NULL;
22307+
22308+ /* First, initialize au_dfree */
22309+ for (i = 0; i < AuCache_Last; i++) { /* including hnotify */
22310+ cp = au_dfree.cache + i;
22311+ cp->cache = NULL;
22312+ init_llist_head(&cp->llist);
22313+ }
22314+ for (i = 0; i < AU_DFREE_Last; i++)
22315+ init_llist_head(au_dfree.llist + i);
22316+ INIT_DELAYED_WORK(&au_dfree.dwork, au_do_dfree);
22317+
22318+ au_sbilist_init();
22319+ sysaufs_brs_init();
22320+ au_debug_init();
22321+ au_dy_init();
22322+ err = sysaufs_init();
22323+ if (unlikely(err))
22324+ goto out;
22325+ err = au_procfs_init();
22326+ if (unlikely(err))
22327+ goto out_sysaufs;
22328+ err = au_wkq_init();
22329+ if (unlikely(err))
22330+ goto out_procfs;
22331+ err = au_loopback_init();
22332+ if (unlikely(err))
22333+ goto out_wkq;
22334+ err = au_hnotify_init();
22335+ if (unlikely(err))
22336+ goto out_loopback;
22337+ err = au_sysrq_init();
22338+ if (unlikely(err))
22339+ goto out_hin;
22340+ err = au_cache_init();
22341+ if (unlikely(err))
22342+ goto out_sysrq;
22343+
22344+ aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
22345+ err = register_filesystem(&aufs_fs_type);
22346+ if (unlikely(err))
22347+ goto out_cache;
22348+
22349+ /* since we define pr_fmt, call printk directly */
22350+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
22351+ goto out; /* success */
22352+
22353+out_cache:
22354+ au_cache_fin();
22355+out_sysrq:
22356+ au_sysrq_fin();
22357+out_hin:
22358+ au_hnotify_fin();
22359+out_loopback:
22360+ au_loopback_fin();
22361+out_wkq:
22362+ au_wkq_fin();
22363+out_procfs:
22364+ au_procfs_fin();
22365+out_sysaufs:
22366+ sysaufs_fin();
22367+ au_dy_fin();
22368+ flush_delayed_work(&au_dfree.dwork);
22369+out:
22370+ return err;
22371+}
22372+
22373+static void __exit aufs_exit(void)
22374+{
22375+ unregister_filesystem(&aufs_fs_type);
22376+ au_cache_fin();
22377+ au_sysrq_fin();
22378+ au_hnotify_fin();
22379+ au_loopback_fin();
22380+ au_wkq_fin();
22381+ au_procfs_fin();
22382+ sysaufs_fin();
22383+ au_dy_fin();
22384+ flush_delayed_work(&au_dfree.dwork);
22385+}
22386+
22387+module_init(aufs_init);
22388+module_exit(aufs_exit);
22389diff -urN /usr/share/empty/fs/aufs/module.h linux/fs/aufs/module.h
22390--- /usr/share/empty/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
22391+++ linux/fs/aufs/module.h 2016-10-09 16:55:38.889431135 +0200
22392@@ -0,0 +1,156 @@
22393+/*
22394+ * Copyright (C) 2005-2016 Junjiro R. Okajima
22395+ *
22396+ * This program, aufs is free software; you can redistribute it and/or modify
22397+ * it under the terms of the GNU General Public License as published by
22398+ * the Free Software Foundation; either version 2 of the License, or
22399+ * (at your option) any later version.
22400+ *
22401+ * This program is distributed in the hope that it will be useful,
22402+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22403+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22404+ * GNU General Public License for more details.
22405+ *
22406+ * You should have received a copy of the GNU General Public License
22407+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
22408+ */
22409+
22410+/*
22411+ * module initialization and module-global
22412+ */
22413+
22414+#ifndef __AUFS_MODULE_H__
22415+#define __AUFS_MODULE_H__
22416+
22417+#ifdef __KERNEL__
22418+
22419+#include <linux/slab.h>
22420+#include "debug.h"
22421+
22422+struct path;
22423+struct seq_file;
22424+
22425+/* module parameters */
22426+extern int sysaufs_brs;
22427+extern bool au_userns;
22428+
22429+/* ---------------------------------------------------------------------- */
22430+
22431+extern int au_dir_roflags;
22432+
22433+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink);
22434+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
22435+ int may_shrink);
22436+
22437+static inline int au_kmidx_sub(size_t sz, size_t new_sz)
22438+{
22439+#ifndef CONFIG_SLOB
22440+ return kmalloc_index(sz) - kmalloc_index(new_sz);
22441+#else
22442+ return -1; /* SLOB is untested */
22443+#endif
22444+}
22445+
22446+int au_seq_path(struct seq_file *seq, struct path *path);
22447+
22448+#ifdef CONFIG_PROC_FS
22449+/* procfs.c */
22450+int __init au_procfs_init(void);
22451+void au_procfs_fin(void);
22452+#else
22453+AuStubInt0(au_procfs_init, void);
22454+AuStubVoid(au_procfs_fin, void);
22455+#endif
22456+
22457+/* ---------------------------------------------------------------------- */
22458+
22459+/* kmem cache and delayed free */
22460+enum {
22461+ AuCache_DINFO,
22462+ AuCache_ICNTNR,
22463+ AuCache_FINFO,
22464+ AuCache_VDIR,
22465+ AuCache_DEHSTR,
22466+ AuCache_HNOTIFY, /* must be last */
22467+ AuCache_Last
22468+};
22469+
22470+enum {
22471+ AU_DFREE_KFREE,
22472+ AU_DFREE_FREE_PAGE,
22473+ AU_DFREE_Last
22474+};
22475+
22476+struct au_cache {
22477+ struct kmem_cache *cache;
22478+ struct llist_head llist; /* delayed free */
22479+};
22480+
22481+/*
22482+ * in order to reduce the cost of the internal timer, consolidate all the
22483+ * delayed free works into a single delayed_work.
22484+ */
22485+struct au_dfree {
22486+ struct au_cache cache[AuCache_Last];
22487+ struct llist_head llist[AU_DFREE_Last];
22488+ struct delayed_work dwork;
22489+};
22490+
22491+extern struct au_dfree au_dfree;
22492+
22493+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
22494+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
22495+#define AuCacheCtor(type, ctor) \
22496+ kmem_cache_create(#type, sizeof(struct type), \
22497+ __alignof__(struct type), AuCacheFlags, ctor)
22498+
22499+#define AU_DFREE_DELAY msecs_to_jiffies(10)
22500+#define AU_DFREE_BODY(lnode, llist) do { \
22501+ if (llist_add(lnode, llist)) \
22502+ schedule_delayed_work(&au_dfree.dwork, \
22503+ AU_DFREE_DELAY); \
22504+ } while (0)
22505+#define AU_CACHE_DFREE_FUNC(name, idx, lnode) \
22506+ void au_cache_dfree_##name(struct au_##name *p) \
22507+ { \
22508+ struct au_cache *cp = au_dfree.cache + AuCache_##idx; \
22509+ AU_DFREE_BODY(&p->lnode, &cp->llist); \
22510+ }
22511+
22512+#define AuCacheFuncs(name, index) \
22513+static inline struct au_##name *au_cache_alloc_##name(void) \
22514+{ return kmem_cache_alloc(au_dfree.cache[AuCache_##index].cache, GFP_NOFS); } \
22515+static inline void au_cache_free_##name(struct au_##name *p) \
22516+{ kmem_cache_free(au_dfree.cache[AuCache_##index].cache, p); } \
22517+void au_cache_dfree_##name(struct au_##name *p)
22518+
22519+AuCacheFuncs(dinfo, DINFO);
22520+AuCacheFuncs(icntnr, ICNTNR);
22521+AuCacheFuncs(finfo, FINFO);
22522+AuCacheFuncs(vdir, VDIR);
22523+AuCacheFuncs(vdir_dehstr, DEHSTR);
22524+#ifdef CONFIG_AUFS_HNOTIFY
22525+AuCacheFuncs(hnotify, HNOTIFY);
22526+#endif
22527+
22528+static inline void au_delayed_kfree(const void *p)
22529+{
22530+ AuDebugOn(!p);
22531+ AuDebugOn(ksize(p) < sizeof(struct llist_node));
22532+
22533+ AU_DFREE_BODY((void *)p, au_dfree.llist + AU_DFREE_KFREE);
22534+}
22535+
22536+/* cast only */
22537+static inline void au_free_page(void *p)
22538+{
22539+ free_page((unsigned long)p);
22540+}
22541+
22542+static inline void au_delayed_free_page(unsigned long addr)
22543+{
22544+ AU_DFREE_BODY((void *)addr, au_dfree.llist + AU_DFREE_FREE_PAGE);
22545+}
22546+
22547+#endif /* __KERNEL__ */
22548+#endif /* __AUFS_MODULE_H__ */
22549diff -urN /usr/share/empty/fs/aufs/mvdown.c linux/fs/aufs/mvdown.c
22550--- /usr/share/empty/fs/aufs/mvdown.c 1970-01-01 01:00:00.000000000 +0100
22551+++ linux/fs/aufs/mvdown.c 2016-10-09 16:55:36.492701639 +0200
22552@@ -0,0 +1,704 @@
22553+/*
22554+ * Copyright (C) 2011-2016 Junjiro R. Okajima
22555+ *
22556+ * This program, aufs is free software; you can redistribute it and/or modify
22557+ * it under the terms of the GNU General Public License as published by
22558+ * the Free Software Foundation; either version 2 of the License, or
22559+ * (at your option) any later version.
22560+ *
22561+ * This program is distributed in the hope that it will be useful,
22562+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22563+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22564+ * GNU General Public License for more details.
22565+ *
22566+ * You should have received a copy of the GNU General Public License
22567+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
22568+ */
22569+
22570+/*
22571+ * move-down, opposite of copy-up
22572+ */
22573+
22574+#include "aufs.h"
22575+
22576+struct au_mvd_args {
22577+ struct {
22578+ struct super_block *h_sb;
22579+ struct dentry *h_parent;
22580+ struct au_hinode *hdir;
22581+ struct inode *h_dir, *h_inode;
22582+ struct au_pin pin;
22583+ } info[AUFS_MVDOWN_NARRAY];
22584+
22585+ struct aufs_mvdown mvdown;
22586+ struct dentry *dentry, *parent;
22587+ struct inode *inode, *dir;
22588+ struct super_block *sb;
22589+ aufs_bindex_t bopq, bwh, bfound;
22590+ unsigned char rename_lock;
22591+};
22592+
22593+#define mvd_errno mvdown.au_errno
22594+#define mvd_bsrc mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
22595+#define mvd_src_brid mvdown.stbr[AUFS_MVDOWN_UPPER].brid
22596+#define mvd_bdst mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
22597+#define mvd_dst_brid mvdown.stbr[AUFS_MVDOWN_LOWER].brid
22598+
22599+#define mvd_h_src_sb info[AUFS_MVDOWN_UPPER].h_sb
22600+#define mvd_h_src_parent info[AUFS_MVDOWN_UPPER].h_parent
22601+#define mvd_hdir_src info[AUFS_MVDOWN_UPPER].hdir
22602+#define mvd_h_src_dir info[AUFS_MVDOWN_UPPER].h_dir
22603+#define mvd_h_src_inode info[AUFS_MVDOWN_UPPER].h_inode
22604+#define mvd_pin_src info[AUFS_MVDOWN_UPPER].pin
22605+
22606+#define mvd_h_dst_sb info[AUFS_MVDOWN_LOWER].h_sb
22607+#define mvd_h_dst_parent info[AUFS_MVDOWN_LOWER].h_parent
22608+#define mvd_hdir_dst info[AUFS_MVDOWN_LOWER].hdir
22609+#define mvd_h_dst_dir info[AUFS_MVDOWN_LOWER].h_dir
22610+#define mvd_h_dst_inode info[AUFS_MVDOWN_LOWER].h_inode
22611+#define mvd_pin_dst info[AUFS_MVDOWN_LOWER].pin
22612+
22613+#define AU_MVD_PR(flag, ...) do { \
22614+ if (flag) \
22615+ pr_err(__VA_ARGS__); \
22616+ } while (0)
22617+
22618+static int find_lower_writable(struct au_mvd_args *a)
22619+{
22620+ struct super_block *sb;
22621+ aufs_bindex_t bindex, bbot;
22622+ struct au_branch *br;
22623+
22624+ sb = a->sb;
22625+ bindex = a->mvd_bsrc;
22626+ bbot = au_sbbot(sb);
22627+ if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
22628+ for (bindex++; bindex <= bbot; bindex++) {
22629+ br = au_sbr(sb, bindex);
22630+ if (au_br_fhsm(br->br_perm)
22631+ && (!(au_br_sb(br)->s_flags & MS_RDONLY)))
22632+ return bindex;
22633+ }
22634+ else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
22635+ for (bindex++; bindex <= bbot; bindex++) {
22636+ br = au_sbr(sb, bindex);
22637+ if (!au_br_rdonly(br))
22638+ return bindex;
22639+ }
22640+ else
22641+ for (bindex++; bindex <= bbot; bindex++) {
22642+ br = au_sbr(sb, bindex);
22643+ if (!(au_br_sb(br)->s_flags & MS_RDONLY)) {
22644+ if (au_br_rdonly(br))
22645+ a->mvdown.flags
22646+ |= AUFS_MVDOWN_ROLOWER_R;
22647+ return bindex;
22648+ }
22649+ }
22650+
22651+ return -1;
22652+}
22653+
22654+/* make the parent dir on bdst */
22655+static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
22656+{
22657+ int err;
22658+
22659+ err = 0;
22660+ a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
22661+ a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
22662+ a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
22663+ a->mvd_h_dst_parent = NULL;
22664+ if (au_dbbot(a->parent) >= a->mvd_bdst)
22665+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22666+ if (!a->mvd_h_dst_parent) {
22667+ err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
22668+ if (unlikely(err)) {
22669+ AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
22670+ goto out;
22671+ }
22672+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22673+ }
22674+
22675+out:
22676+ AuTraceErr(err);
22677+ return err;
22678+}
22679+
22680+/* lock them all */
22681+static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
22682+{
22683+ int err;
22684+ struct dentry *h_trap;
22685+
22686+ a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
22687+ a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
22688+ err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
22689+ au_opt_udba(a->sb),
22690+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22691+ AuTraceErr(err);
22692+ if (unlikely(err)) {
22693+ AU_MVD_PR(dmsg, "pin_dst failed\n");
22694+ goto out;
22695+ }
22696+
22697+ if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
22698+ a->rename_lock = 0;
22699+ au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22700+ AuLsc_DI_PARENT, AuLsc_I_PARENT3,
22701+ au_opt_udba(a->sb),
22702+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22703+ err = au_do_pin(&a->mvd_pin_src);
22704+ AuTraceErr(err);
22705+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
22706+ if (unlikely(err)) {
22707+ AU_MVD_PR(dmsg, "pin_src failed\n");
22708+ goto out_dst;
22709+ }
22710+ goto out; /* success */
22711+ }
22712+
22713+ a->rename_lock = 1;
22714+ au_pin_hdir_unlock(&a->mvd_pin_dst);
22715+ err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22716+ au_opt_udba(a->sb),
22717+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22718+ AuTraceErr(err);
22719+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
22720+ if (unlikely(err)) {
22721+ AU_MVD_PR(dmsg, "pin_src failed\n");
22722+ au_pin_hdir_lock(&a->mvd_pin_dst);
22723+ goto out_dst;
22724+ }
22725+ au_pin_hdir_unlock(&a->mvd_pin_src);
22726+ h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22727+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22728+ if (h_trap) {
22729+ err = (h_trap != a->mvd_h_src_parent);
22730+ if (err)
22731+ err = (h_trap != a->mvd_h_dst_parent);
22732+ }
22733+ BUG_ON(err); /* it should never happen */
22734+ if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
22735+ err = -EBUSY;
22736+ AuTraceErr(err);
22737+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22738+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22739+ au_pin_hdir_lock(&a->mvd_pin_src);
22740+ au_unpin(&a->mvd_pin_src);
22741+ au_pin_hdir_lock(&a->mvd_pin_dst);
22742+ goto out_dst;
22743+ }
22744+ goto out; /* success */
22745+
22746+out_dst:
22747+ au_unpin(&a->mvd_pin_dst);
22748+out:
22749+ AuTraceErr(err);
22750+ return err;
22751+}
22752+
22753+static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
22754+{
22755+ if (!a->rename_lock)
22756+ au_unpin(&a->mvd_pin_src);
22757+ else {
22758+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22759+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22760+ au_pin_hdir_lock(&a->mvd_pin_src);
22761+ au_unpin(&a->mvd_pin_src);
22762+ au_pin_hdir_lock(&a->mvd_pin_dst);
22763+ }
22764+ au_unpin(&a->mvd_pin_dst);
22765+}
22766+
22767+/* copy-down the file */
22768+static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
22769+{
22770+ int err;
22771+ struct au_cp_generic cpg = {
22772+ .dentry = a->dentry,
22773+ .bdst = a->mvd_bdst,
22774+ .bsrc = a->mvd_bsrc,
22775+ .len = -1,
22776+ .pin = &a->mvd_pin_dst,
22777+ .flags = AuCpup_DTIME | AuCpup_HOPEN
22778+ };
22779+
22780+ AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
22781+ if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
22782+ au_fset_cpup(cpg.flags, OVERWRITE);
22783+ if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
22784+ au_fset_cpup(cpg.flags, RWDST);
22785+ err = au_sio_cpdown_simple(&cpg);
22786+ if (unlikely(err))
22787+ AU_MVD_PR(dmsg, "cpdown failed\n");
22788+
22789+ AuTraceErr(err);
22790+ return err;
22791+}
22792+
22793+/*
22794+ * unlink the whiteout on bdst if exist which may be created by UDBA while we
22795+ * were sleeping
22796+ */
22797+static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
22798+{
22799+ int err;
22800+ struct path h_path;
22801+ struct au_branch *br;
22802+ struct inode *delegated;
22803+
22804+ br = au_sbr(a->sb, a->mvd_bdst);
22805+ h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
22806+ err = PTR_ERR(h_path.dentry);
22807+ if (IS_ERR(h_path.dentry)) {
22808+ AU_MVD_PR(dmsg, "wh_lkup failed\n");
22809+ goto out;
22810+ }
22811+
22812+ err = 0;
22813+ if (d_is_positive(h_path.dentry)) {
22814+ h_path.mnt = au_br_mnt(br);
22815+ delegated = NULL;
22816+ err = vfsub_unlink(d_inode(a->mvd_h_dst_parent), &h_path,
22817+ &delegated, /*force*/0);
22818+ if (unlikely(err == -EWOULDBLOCK)) {
22819+ pr_warn("cannot retry for NFSv4 delegation"
22820+ " for an internal unlink\n");
22821+ iput(delegated);
22822+ }
22823+ if (unlikely(err))
22824+ AU_MVD_PR(dmsg, "wh_unlink failed\n");
22825+ }
22826+ dput(h_path.dentry);
22827+
22828+out:
22829+ AuTraceErr(err);
22830+ return err;
22831+}
22832+
22833+/*
22834+ * unlink the topmost h_dentry
22835+ */
22836+static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
22837+{
22838+ int err;
22839+ struct path h_path;
22840+ struct inode *delegated;
22841+
22842+ h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
22843+ h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
22844+ delegated = NULL;
22845+ err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
22846+ if (unlikely(err == -EWOULDBLOCK)) {
22847+ pr_warn("cannot retry for NFSv4 delegation"
22848+ " for an internal unlink\n");
22849+ iput(delegated);
22850+ }
22851+ if (unlikely(err))
22852+ AU_MVD_PR(dmsg, "unlink failed\n");
22853+
22854+ AuTraceErr(err);
22855+ return err;
22856+}
22857+
22858+/* Since mvdown succeeded, we ignore an error of this function */
22859+static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
22860+{
22861+ int err;
22862+ struct au_branch *br;
22863+
22864+ a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
22865+ br = au_sbr(a->sb, a->mvd_bsrc);
22866+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
22867+ if (!err) {
22868+ br = au_sbr(a->sb, a->mvd_bdst);
22869+ a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
22870+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
22871+ }
22872+ if (!err)
22873+ a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
22874+ else
22875+ AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
22876+}
22877+
22878+/*
22879+ * copy-down the file and unlink the bsrc file.
22880+ * - unlink the bdst whout if exist
22881+ * - copy-down the file (with whtmp name and rename)
22882+ * - unlink the bsrc file
22883+ */
22884+static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
22885+{
22886+ int err;
22887+
22888+ err = au_do_mkdir(dmsg, a);
22889+ if (!err)
22890+ err = au_do_lock(dmsg, a);
22891+ if (unlikely(err))
22892+ goto out;
22893+
22894+ /*
22895+ * do not revert the activities we made on bdst since they should be
22896+ * harmless in aufs.
22897+ */
22898+
22899+ err = au_do_cpdown(dmsg, a);
22900+ if (!err)
22901+ err = au_do_unlink_wh(dmsg, a);
22902+ if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
22903+ err = au_do_unlink(dmsg, a);
22904+ if (unlikely(err))
22905+ goto out_unlock;
22906+
22907+ AuDbg("%pd2, 0x%x, %d --> %d\n",
22908+ a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
22909+ if (find_lower_writable(a) < 0)
22910+ a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
22911+
22912+ if (a->mvdown.flags & AUFS_MVDOWN_STFS)
22913+ au_do_stfs(dmsg, a);
22914+
22915+ /* maintain internal array */
22916+ if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
22917+ au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
22918+ au_set_dbtop(a->dentry, a->mvd_bdst);
22919+ au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
22920+ au_set_ibtop(a->inode, a->mvd_bdst);
22921+ } else {
22922+ /* hide the lower */
22923+ au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
22924+ au_set_dbbot(a->dentry, a->mvd_bsrc);
22925+ au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
22926+ au_set_ibbot(a->inode, a->mvd_bsrc);
22927+ }
22928+ if (au_dbbot(a->dentry) < a->mvd_bdst)
22929+ au_set_dbbot(a->dentry, a->mvd_bdst);
22930+ if (au_ibbot(a->inode) < a->mvd_bdst)
22931+ au_set_ibbot(a->inode, a->mvd_bdst);
22932+
22933+out_unlock:
22934+ au_do_unlock(dmsg, a);
22935+out:
22936+ AuTraceErr(err);
22937+ return err;
22938+}
22939+
22940+/* ---------------------------------------------------------------------- */
22941+
22942+/* make sure the file is idle */
22943+static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
22944+{
22945+ int err, plinked;
22946+
22947+ err = 0;
22948+ plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
22949+ if (au_dbtop(a->dentry) == a->mvd_bsrc
22950+ && au_dcount(a->dentry) == 1
22951+ && atomic_read(&a->inode->i_count) == 1
22952+ /* && a->mvd_h_src_inode->i_nlink == 1 */
22953+ && (!plinked || !au_plink_test(a->inode))
22954+ && a->inode->i_nlink == 1)
22955+ goto out;
22956+
22957+ err = -EBUSY;
22958+ AU_MVD_PR(dmsg,
22959+ "b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
22960+ a->mvd_bsrc, au_dbtop(a->dentry), au_dcount(a->dentry),
22961+ atomic_read(&a->inode->i_count), a->inode->i_nlink,
22962+ a->mvd_h_src_inode->i_nlink,
22963+ plinked, plinked ? au_plink_test(a->inode) : 0);
22964+
22965+out:
22966+ AuTraceErr(err);
22967+ return err;
22968+}
22969+
22970+/* make sure the parent dir is fine */
22971+static int au_mvd_args_parent(const unsigned char dmsg,
22972+ struct au_mvd_args *a)
22973+{
22974+ int err;
22975+ aufs_bindex_t bindex;
22976+
22977+ err = 0;
22978+ if (unlikely(au_alive_dir(a->parent))) {
22979+ err = -ENOENT;
22980+ AU_MVD_PR(dmsg, "parent dir is dead\n");
22981+ goto out;
22982+ }
22983+
22984+ a->bopq = au_dbdiropq(a->parent);
22985+ bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
22986+ AuDbg("b%d\n", bindex);
22987+ if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
22988+ || (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
22989+ err = -EINVAL;
22990+ a->mvd_errno = EAU_MVDOWN_OPAQUE;
22991+ AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
22992+ a->bopq, a->mvd_bdst);
22993+ }
22994+
22995+out:
22996+ AuTraceErr(err);
22997+ return err;
22998+}
22999+
23000+static int au_mvd_args_intermediate(const unsigned char dmsg,
23001+ struct au_mvd_args *a)
23002+{
23003+ int err;
23004+ struct au_dinfo *dinfo, *tmp;
23005+
23006+ /* lookup the next lower positive entry */
23007+ err = -ENOMEM;
23008+ tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
23009+ if (unlikely(!tmp))
23010+ goto out;
23011+
23012+ a->bfound = -1;
23013+ a->bwh = -1;
23014+ dinfo = au_di(a->dentry);
23015+ au_di_cp(tmp, dinfo);
23016+ au_di_swap(tmp, dinfo);
23017+
23018+ /* returns the number of positive dentries */
23019+ err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1,
23020+ /* AuLkup_IGNORE_PERM */ 0);
23021+ if (!err)
23022+ a->bwh = au_dbwh(a->dentry);
23023+ else if (err > 0)
23024+ a->bfound = au_dbtop(a->dentry);
23025+
23026+ au_di_swap(tmp, dinfo);
23027+ au_rw_write_unlock(&tmp->di_rwsem);
23028+ au_di_free(tmp);
23029+ if (unlikely(err < 0))
23030+ AU_MVD_PR(dmsg, "failed look-up lower\n");
23031+
23032+ /*
23033+ * here, we have these cases.
23034+ * bfound == -1
23035+ * no positive dentry under bsrc. there are more sub-cases.
23036+ * bwh < 0
23037+ * there no whiteout, we can safely move-down.
23038+ * bwh <= bsrc
23039+ * impossible
23040+ * bsrc < bwh && bwh < bdst
23041+ * there is a whiteout on RO branch. cannot proceed.
23042+ * bwh == bdst
23043+ * there is a whiteout on the RW target branch. it should
23044+ * be removed.
23045+ * bdst < bwh
23046+ * there is a whiteout somewhere unrelated branch.
23047+ * -1 < bfound && bfound <= bsrc
23048+ * impossible.
23049+ * bfound < bdst
23050+ * found, but it is on RO branch between bsrc and bdst. cannot
23051+ * proceed.
23052+ * bfound == bdst
23053+ * found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
23054+ * error.
23055+ * bdst < bfound
23056+ * found, after we create the file on bdst, it will be hidden.
23057+ */
23058+
23059+ AuDebugOn(a->bfound == -1
23060+ && a->bwh != -1
23061+ && a->bwh <= a->mvd_bsrc);
23062+ AuDebugOn(-1 < a->bfound
23063+ && a->bfound <= a->mvd_bsrc);
23064+
23065+ err = -EINVAL;
23066+ if (a->bfound == -1
23067+ && a->mvd_bsrc < a->bwh
23068+ && a->bwh != -1
23069+ && a->bwh < a->mvd_bdst) {
23070+ a->mvd_errno = EAU_MVDOWN_WHITEOUT;
23071+ AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
23072+ a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
23073+ goto out;
23074+ } else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
23075+ a->mvd_errno = EAU_MVDOWN_UPPER;
23076+ AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
23077+ a->mvd_bdst, a->bfound);
23078+ goto out;
23079+ }
23080+
23081+ err = 0; /* success */
23082+
23083+out:
23084+ AuTraceErr(err);
23085+ return err;
23086+}
23087+
23088+static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
23089+{
23090+ int err;
23091+
23092+ err = 0;
23093+ if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
23094+ && a->bfound == a->mvd_bdst)
23095+ err = -EEXIST;
23096+ AuTraceErr(err);
23097+ return err;
23098+}
23099+
23100+static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
23101+{
23102+ int err;
23103+ struct au_branch *br;
23104+
23105+ err = -EISDIR;
23106+ if (unlikely(S_ISDIR(a->inode->i_mode)))
23107+ goto out;
23108+
23109+ err = -EINVAL;
23110+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
23111+ a->mvd_bsrc = au_ibtop(a->inode);
23112+ else {
23113+ a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
23114+ if (unlikely(a->mvd_bsrc < 0
23115+ || (a->mvd_bsrc < au_dbtop(a->dentry)
23116+ || au_dbbot(a->dentry) < a->mvd_bsrc
23117+ || !au_h_dptr(a->dentry, a->mvd_bsrc))
23118+ || (a->mvd_bsrc < au_ibtop(a->inode)
23119+ || au_ibbot(a->inode) < a->mvd_bsrc
23120+ || !au_h_iptr(a->inode, a->mvd_bsrc)))) {
23121+ a->mvd_errno = EAU_MVDOWN_NOUPPER;
23122+ AU_MVD_PR(dmsg, "no upper\n");
23123+ goto out;
23124+ }
23125+ }
23126+ if (unlikely(a->mvd_bsrc == au_sbbot(a->sb))) {
23127+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
23128+ AU_MVD_PR(dmsg, "on the bottom\n");
23129+ goto out;
23130+ }
23131+ a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
23132+ br = au_sbr(a->sb, a->mvd_bsrc);
23133+ err = au_br_rdonly(br);
23134+ if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
23135+ if (unlikely(err))
23136+ goto out;
23137+ } else if (!(vfsub_native_ro(a->mvd_h_src_inode)
23138+ || IS_APPEND(a->mvd_h_src_inode))) {
23139+ if (err)
23140+ a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
23141+ /* go on */
23142+ } else
23143+ goto out;
23144+
23145+ err = -EINVAL;
23146+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
23147+ a->mvd_bdst = find_lower_writable(a);
23148+ if (unlikely(a->mvd_bdst < 0)) {
23149+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
23150+ AU_MVD_PR(dmsg, "no writable lower branch\n");
23151+ goto out;
23152+ }
23153+ } else {
23154+ a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
23155+ if (unlikely(a->mvd_bdst < 0
23156+ || au_sbbot(a->sb) < a->mvd_bdst)) {
23157+ a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
23158+ AU_MVD_PR(dmsg, "no lower brid\n");
23159+ goto out;
23160+ }
23161+ }
23162+
23163+ err = au_mvd_args_busy(dmsg, a);
23164+ if (!err)
23165+ err = au_mvd_args_parent(dmsg, a);
23166+ if (!err)
23167+ err = au_mvd_args_intermediate(dmsg, a);
23168+ if (!err)
23169+ err = au_mvd_args_exist(dmsg, a);
23170+ if (!err)
23171+ AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
23172+
23173+out:
23174+ AuTraceErr(err);
23175+ return err;
23176+}
23177+
23178+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
23179+{
23180+ int err, e;
23181+ unsigned char dmsg;
23182+ struct au_mvd_args *args;
23183+ struct inode *inode;
23184+
23185+ inode = d_inode(dentry);
23186+ err = -EPERM;
23187+ if (unlikely(!capable(CAP_SYS_ADMIN)))
23188+ goto out;
23189+
23190+ err = -ENOMEM;
23191+ args = kmalloc(sizeof(*args), GFP_NOFS);
23192+ if (unlikely(!args))
23193+ goto out;
23194+
23195+ err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
23196+ if (!err)
23197+ err = !access_ok(VERIFY_WRITE, uarg, sizeof(*uarg));
23198+ if (unlikely(err)) {
23199+ err = -EFAULT;
23200+ AuTraceErr(err);
23201+ goto out_free;
23202+ }
23203+ AuDbg("flags 0x%x\n", args->mvdown.flags);
23204+ args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
23205+ args->mvdown.au_errno = 0;
23206+ args->dentry = dentry;
23207+ args->inode = inode;
23208+ args->sb = dentry->d_sb;
23209+
23210+ err = -ENOENT;
23211+ dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
23212+ args->parent = dget_parent(dentry);
23213+ args->dir = d_inode(args->parent);
23214+ inode_lock_nested(args->dir, I_MUTEX_PARENT);
23215+ dput(args->parent);
23216+ if (unlikely(args->parent != dentry->d_parent)) {
23217+ AU_MVD_PR(dmsg, "parent dir is moved\n");
23218+ goto out_dir;
23219+ }
23220+
23221+ inode_lock_nested(inode, I_MUTEX_CHILD);
23222+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
23223+ if (unlikely(err))
23224+ goto out_inode;
23225+
23226+ di_write_lock_parent(args->parent);
23227+ err = au_mvd_args(dmsg, args);
23228+ if (unlikely(err))
23229+ goto out_parent;
23230+
23231+ err = au_do_mvdown(dmsg, args);
23232+ if (unlikely(err))
23233+ goto out_parent;
23234+
23235+ au_cpup_attr_timesizes(args->dir);
23236+ au_cpup_attr_timesizes(inode);
23237+ if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
23238+ au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
23239+ /* au_digen_dec(dentry); */
23240+
23241+out_parent:
23242+ di_write_unlock(args->parent);
23243+ aufs_read_unlock(dentry, AuLock_DW);
23244+out_inode:
23245+ inode_unlock(inode);
23246+out_dir:
23247+ inode_unlock(args->dir);
23248+out_free:
23249+ e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
23250+ if (unlikely(e))
23251+ err = -EFAULT;
23252+ au_delayed_kfree(args);
23253+out:
23254+ AuTraceErr(err);
23255+ return err;
23256+}
23257diff -urN /usr/share/empty/fs/aufs/opts.c linux/fs/aufs/opts.c
23258--- /usr/share/empty/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
23259+++ linux/fs/aufs/opts.c 2016-12-17 12:28:17.598545045 +0100
23260@@ -0,0 +1,1870 @@
23261+/*
23262+ * Copyright (C) 2005-2016 Junjiro R. Okajima
23263+ *
23264+ * This program, aufs is free software; you can redistribute it and/or modify
23265+ * it under the terms of the GNU General Public License as published by
23266+ * the Free Software Foundation; either version 2 of the License, or
23267+ * (at your option) any later version.
23268+ *
23269+ * This program is distributed in the hope that it will be useful,
23270+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23271+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23272+ * GNU General Public License for more details.
23273+ *
23274+ * You should have received a copy of the GNU General Public License
23275+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
23276+ */
23277+
23278+/*
23279+ * mount options/flags
23280+ */
23281+
23282+#include <linux/namei.h>
23283+#include <linux/types.h> /* a distribution requires */
23284+#include <linux/parser.h>
23285+#include "aufs.h"
23286+
23287+/* ---------------------------------------------------------------------- */
23288+
23289+enum {
23290+ Opt_br,
23291+ Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend,
23292+ Opt_idel, Opt_imod,
23293+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash,
23294+ Opt_rdblk_def, Opt_rdhash_def,
23295+ Opt_xino, Opt_noxino,
23296+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
23297+ Opt_trunc_xino_path, Opt_itrunc_xino,
23298+ Opt_trunc_xib, Opt_notrunc_xib,
23299+ Opt_shwh, Opt_noshwh,
23300+ Opt_plink, Opt_noplink, Opt_list_plink,
23301+ Opt_udba,
23302+ Opt_dio, Opt_nodio,
23303+ Opt_diropq_a, Opt_diropq_w,
23304+ Opt_warn_perm, Opt_nowarn_perm,
23305+ Opt_wbr_copyup, Opt_wbr_create,
23306+ Opt_fhsm_sec,
23307+ Opt_verbose, Opt_noverbose,
23308+ Opt_sum, Opt_nosum, Opt_wsum,
23309+ Opt_dirperm1, Opt_nodirperm1,
23310+ Opt_acl, Opt_noacl,
23311+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
23312+};
23313+
23314+static match_table_t options = {
23315+ {Opt_br, "br=%s"},
23316+ {Opt_br, "br:%s"},
23317+
23318+ {Opt_add, "add=%d:%s"},
23319+ {Opt_add, "add:%d:%s"},
23320+ {Opt_add, "ins=%d:%s"},
23321+ {Opt_add, "ins:%d:%s"},
23322+ {Opt_append, "append=%s"},
23323+ {Opt_append, "append:%s"},
23324+ {Opt_prepend, "prepend=%s"},
23325+ {Opt_prepend, "prepend:%s"},
23326+
23327+ {Opt_del, "del=%s"},
23328+ {Opt_del, "del:%s"},
23329+ /* {Opt_idel, "idel:%d"}, */
23330+ {Opt_mod, "mod=%s"},
23331+ {Opt_mod, "mod:%s"},
23332+ /* {Opt_imod, "imod:%d:%s"}, */
23333+
23334+ {Opt_dirwh, "dirwh=%d"},
23335+
23336+ {Opt_xino, "xino=%s"},
23337+ {Opt_noxino, "noxino"},
23338+ {Opt_trunc_xino, "trunc_xino"},
23339+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
23340+ {Opt_notrunc_xino, "notrunc_xino"},
23341+ {Opt_trunc_xino_path, "trunc_xino=%s"},
23342+ {Opt_itrunc_xino, "itrunc_xino=%d"},
23343+ /* {Opt_zxino, "zxino=%s"}, */
23344+ {Opt_trunc_xib, "trunc_xib"},
23345+ {Opt_notrunc_xib, "notrunc_xib"},
23346+
23347+#ifdef CONFIG_PROC_FS
23348+ {Opt_plink, "plink"},
23349+#else
23350+ {Opt_ignore_silent, "plink"},
23351+#endif
23352+
23353+ {Opt_noplink, "noplink"},
23354+
23355+#ifdef CONFIG_AUFS_DEBUG
23356+ {Opt_list_plink, "list_plink"},
23357+#endif
23358+
23359+ {Opt_udba, "udba=%s"},
23360+
23361+ {Opt_dio, "dio"},
23362+ {Opt_nodio, "nodio"},
23363+
23364+#ifdef CONFIG_AUFS_FHSM
23365+ {Opt_fhsm_sec, "fhsm_sec=%d"},
23366+#else
23367+ {Opt_ignore_silent, "fhsm_sec=%d"},
23368+#endif
23369+
23370+ {Opt_diropq_a, "diropq=always"},
23371+ {Opt_diropq_a, "diropq=a"},
23372+ {Opt_diropq_w, "diropq=whiteouted"},
23373+ {Opt_diropq_w, "diropq=w"},
23374+
23375+ {Opt_warn_perm, "warn_perm"},
23376+ {Opt_nowarn_perm, "nowarn_perm"},
23377+
23378+ /* keep them temporary */
23379+ {Opt_ignore_silent, "nodlgt"},
23380+ {Opt_ignore_silent, "clean_plink"},
23381+
23382+#ifdef CONFIG_AUFS_SHWH
23383+ {Opt_shwh, "shwh"},
23384+#endif
23385+ {Opt_noshwh, "noshwh"},
23386+
23387+ {Opt_dirperm1, "dirperm1"},
23388+ {Opt_nodirperm1, "nodirperm1"},
23389+
23390+ {Opt_verbose, "verbose"},
23391+ {Opt_verbose, "v"},
23392+ {Opt_noverbose, "noverbose"},
23393+ {Opt_noverbose, "quiet"},
23394+ {Opt_noverbose, "q"},
23395+ {Opt_noverbose, "silent"},
23396+
23397+ {Opt_sum, "sum"},
23398+ {Opt_nosum, "nosum"},
23399+ {Opt_wsum, "wsum"},
23400+
23401+ {Opt_rdcache, "rdcache=%d"},
23402+ {Opt_rdblk, "rdblk=%d"},
23403+ {Opt_rdblk_def, "rdblk=def"},
23404+ {Opt_rdhash, "rdhash=%d"},
23405+ {Opt_rdhash_def, "rdhash=def"},
23406+
23407+ {Opt_wbr_create, "create=%s"},
23408+ {Opt_wbr_create, "create_policy=%s"},
23409+ {Opt_wbr_copyup, "cpup=%s"},
23410+ {Opt_wbr_copyup, "copyup=%s"},
23411+ {Opt_wbr_copyup, "copyup_policy=%s"},
23412+
23413+ /* generic VFS flag */
23414+#ifdef CONFIG_FS_POSIX_ACL
23415+ {Opt_acl, "acl"},
23416+ {Opt_noacl, "noacl"},
23417+#else
23418+ {Opt_ignore_silent, "acl"},
23419+ {Opt_ignore_silent, "noacl"},
23420+#endif
23421+
23422+ /* internal use for the scripts */
23423+ {Opt_ignore_silent, "si=%s"},
23424+
23425+ {Opt_br, "dirs=%s"},
23426+ {Opt_ignore, "debug=%d"},
23427+ {Opt_ignore, "delete=whiteout"},
23428+ {Opt_ignore, "delete=all"},
23429+ {Opt_ignore, "imap=%s"},
23430+
23431+ /* temporary workaround, due to old mount(8)? */
23432+ {Opt_ignore_silent, "relatime"},
23433+
23434+ {Opt_err, NULL}
23435+};
23436+
23437+/* ---------------------------------------------------------------------- */
23438+
23439+static const char *au_parser_pattern(int val, match_table_t tbl)
23440+{
23441+ struct match_token *p;
23442+
23443+ p = tbl;
23444+ while (p->pattern) {
23445+ if (p->token == val)
23446+ return p->pattern;
23447+ p++;
23448+ }
23449+ BUG();
23450+ return "??";
23451+}
23452+
23453+static const char *au_optstr(int *val, match_table_t tbl)
23454+{
23455+ struct match_token *p;
23456+ int v;
23457+
23458+ v = *val;
23459+ if (!v)
23460+ goto out;
23461+ p = tbl;
23462+ while (p->pattern) {
23463+ if (p->token
23464+ && (v & p->token) == p->token) {
23465+ *val &= ~p->token;
23466+ return p->pattern;
23467+ }
23468+ p++;
23469+ }
23470+
23471+out:
23472+ return NULL;
23473+}
23474+
23475+/* ---------------------------------------------------------------------- */
23476+
23477+static match_table_t brperm = {
23478+ {AuBrPerm_RO, AUFS_BRPERM_RO},
23479+ {AuBrPerm_RR, AUFS_BRPERM_RR},
23480+ {AuBrPerm_RW, AUFS_BRPERM_RW},
23481+ {0, NULL}
23482+};
23483+
23484+static match_table_t brattr = {
23485+ /* general */
23486+ {AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG},
23487+ {AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL},
23488+ /* 'unpin' attrib is meaningless since linux-3.18-rc1 */
23489+ {AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN},
23490+#ifdef CONFIG_AUFS_FHSM
23491+ {AuBrAttr_FHSM, AUFS_BRATTR_FHSM},
23492+#endif
23493+#ifdef CONFIG_AUFS_XATTR
23494+ {AuBrAttr_ICEX, AUFS_BRATTR_ICEX},
23495+ {AuBrAttr_ICEX_SEC, AUFS_BRATTR_ICEX_SEC},
23496+ {AuBrAttr_ICEX_SYS, AUFS_BRATTR_ICEX_SYS},
23497+ {AuBrAttr_ICEX_TR, AUFS_BRATTR_ICEX_TR},
23498+ {AuBrAttr_ICEX_USR, AUFS_BRATTR_ICEX_USR},
23499+ {AuBrAttr_ICEX_OTH, AUFS_BRATTR_ICEX_OTH},
23500+#endif
23501+
23502+ /* ro/rr branch */
23503+ {AuBrRAttr_WH, AUFS_BRRATTR_WH},
23504+
23505+ /* rw branch */
23506+ {AuBrWAttr_MOO, AUFS_BRWATTR_MOO},
23507+ {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
23508+
23509+ {0, NULL}
23510+};
23511+
23512+static int br_attr_val(char *str, match_table_t table, substring_t args[])
23513+{
23514+ int attr, v;
23515+ char *p;
23516+
23517+ attr = 0;
23518+ do {
23519+ p = strchr(str, '+');
23520+ if (p)
23521+ *p = 0;
23522+ v = match_token(str, table, args);
23523+ if (v) {
23524+ if (v & AuBrAttr_CMOO_Mask)
23525+ attr &= ~AuBrAttr_CMOO_Mask;
23526+ attr |= v;
23527+ } else {
23528+ if (p)
23529+ *p = '+';
23530+ pr_warn("ignored branch attribute %s\n", str);
23531+ break;
23532+ }
23533+ if (p)
23534+ str = p + 1;
23535+ } while (p);
23536+
23537+ return attr;
23538+}
23539+
23540+static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm)
23541+{
23542+ int sz;
23543+ const char *p;
23544+ char *q;
23545+
23546+ q = str->a;
23547+ *q = 0;
23548+ p = au_optstr(&perm, brattr);
23549+ if (p) {
23550+ sz = strlen(p);
23551+ memcpy(q, p, sz + 1);
23552+ q += sz;
23553+ } else
23554+ goto out;
23555+
23556+ do {
23557+ p = au_optstr(&perm, brattr);
23558+ if (p) {
23559+ *q++ = '+';
23560+ sz = strlen(p);
23561+ memcpy(q, p, sz + 1);
23562+ q += sz;
23563+ }
23564+ } while (p);
23565+
23566+out:
23567+ return q - str->a;
23568+}
23569+
23570+static int noinline_for_stack br_perm_val(char *perm)
23571+{
23572+ int val, bad, sz;
23573+ char *p;
23574+ substring_t args[MAX_OPT_ARGS];
23575+ au_br_perm_str_t attr;
23576+
23577+ p = strchr(perm, '+');
23578+ if (p)
23579+ *p = 0;
23580+ val = match_token(perm, brperm, args);
23581+ if (!val) {
23582+ if (p)
23583+ *p = '+';
23584+ pr_warn("ignored branch permission %s\n", perm);
23585+ val = AuBrPerm_RO;
23586+ goto out;
23587+ }
23588+ if (!p)
23589+ goto out;
23590+
23591+ val |= br_attr_val(p + 1, brattr, args);
23592+
23593+ bad = 0;
23594+ switch (val & AuBrPerm_Mask) {
23595+ case AuBrPerm_RO:
23596+ case AuBrPerm_RR:
23597+ bad = val & AuBrWAttr_Mask;
23598+ val &= ~AuBrWAttr_Mask;
23599+ break;
23600+ case AuBrPerm_RW:
23601+ bad = val & AuBrRAttr_Mask;
23602+ val &= ~AuBrRAttr_Mask;
23603+ break;
23604+ }
23605+
23606+ /*
23607+ * 'unpin' attrib becomes meaningless since linux-3.18-rc1, but aufs
23608+ * does not treat it as an error, just warning.
23609+ * this is a tiny guard for the user operation.
23610+ */
23611+ if (val & AuBrAttr_UNPIN) {
23612+ bad |= AuBrAttr_UNPIN;
23613+ val &= ~AuBrAttr_UNPIN;
23614+ }
23615+
23616+ if (unlikely(bad)) {
23617+ sz = au_do_optstr_br_attr(&attr, bad);
23618+ AuDebugOn(!sz);
23619+ pr_warn("ignored branch attribute %s\n", attr.a);
23620+ }
23621+
23622+out:
23623+ return val;
23624+}
23625+
23626+void au_optstr_br_perm(au_br_perm_str_t *str, int perm)
23627+{
23628+ au_br_perm_str_t attr;
23629+ const char *p;
23630+ char *q;
23631+ int sz;
23632+
23633+ q = str->a;
23634+ p = au_optstr(&perm, brperm);
23635+ AuDebugOn(!p || !*p);
23636+ sz = strlen(p);
23637+ memcpy(q, p, sz + 1);
23638+ q += sz;
23639+
23640+ sz = au_do_optstr_br_attr(&attr, perm);
23641+ if (sz) {
23642+ *q++ = '+';
23643+ memcpy(q, attr.a, sz + 1);
23644+ }
23645+
23646+ AuDebugOn(strlen(str->a) >= sizeof(str->a));
23647+}
23648+
23649+/* ---------------------------------------------------------------------- */
23650+
23651+static match_table_t udbalevel = {
23652+ {AuOpt_UDBA_REVAL, "reval"},
23653+ {AuOpt_UDBA_NONE, "none"},
23654+#ifdef CONFIG_AUFS_HNOTIFY
23655+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
23656+#ifdef CONFIG_AUFS_HFSNOTIFY
23657+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
23658+#endif
23659+#endif
23660+ {-1, NULL}
23661+};
23662+
23663+static int noinline_for_stack udba_val(char *str)
23664+{
23665+ substring_t args[MAX_OPT_ARGS];
23666+
23667+ return match_token(str, udbalevel, args);
23668+}
23669+
23670+const char *au_optstr_udba(int udba)
23671+{
23672+ return au_parser_pattern(udba, udbalevel);
23673+}
23674+
23675+/* ---------------------------------------------------------------------- */
23676+
23677+static match_table_t au_wbr_create_policy = {
23678+ {AuWbrCreate_TDP, "tdp"},
23679+ {AuWbrCreate_TDP, "top-down-parent"},
23680+ {AuWbrCreate_RR, "rr"},
23681+ {AuWbrCreate_RR, "round-robin"},
23682+ {AuWbrCreate_MFS, "mfs"},
23683+ {AuWbrCreate_MFS, "most-free-space"},
23684+ {AuWbrCreate_MFSV, "mfs:%d"},
23685+ {AuWbrCreate_MFSV, "most-free-space:%d"},
23686+
23687+ /* top-down regardless the parent, and then mfs */
23688+ {AuWbrCreate_TDMFS, "tdmfs:%d"},
23689+ {AuWbrCreate_TDMFSV, "tdmfs:%d:%d"},
23690+
23691+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
23692+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
23693+ {AuWbrCreate_PMFS, "pmfs"},
23694+ {AuWbrCreate_PMFSV, "pmfs:%d"},
23695+ {AuWbrCreate_PMFSRR, "pmfsrr:%d"},
23696+ {AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"},
23697+
23698+ {-1, NULL}
23699+};
23700+
23701+/*
23702+ * cf. linux/lib/parser.c and cmdline.c
23703+ * gave up calling memparse() since it uses simple_strtoull() instead of
23704+ * kstrto...().
23705+ */
23706+static int noinline_for_stack
23707+au_match_ull(substring_t *s, unsigned long long *result)
23708+{
23709+ int err;
23710+ unsigned int len;
23711+ char a[32];
23712+
23713+ err = -ERANGE;
23714+ len = s->to - s->from;
23715+ if (len + 1 <= sizeof(a)) {
23716+ memcpy(a, s->from, len);
23717+ a[len] = '\0';
23718+ err = kstrtoull(a, 0, result);
23719+ }
23720+ return err;
23721+}
23722+
23723+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
23724+ struct au_opt_wbr_create *create)
23725+{
23726+ int err;
23727+ unsigned long long ull;
23728+
23729+ err = 0;
23730+ if (!au_match_ull(arg, &ull))
23731+ create->mfsrr_watermark = ull;
23732+ else {
23733+ pr_err("bad integer in %s\n", str);
23734+ err = -EINVAL;
23735+ }
23736+
23737+ return err;
23738+}
23739+
23740+static int au_wbr_mfs_sec(substring_t *arg, char *str,
23741+ struct au_opt_wbr_create *create)
23742+{
23743+ int n, err;
23744+
23745+ err = 0;
23746+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
23747+ create->mfs_second = n;
23748+ else {
23749+ pr_err("bad integer in %s\n", str);
23750+ err = -EINVAL;
23751+ }
23752+
23753+ return err;
23754+}
23755+
23756+static int noinline_for_stack
23757+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
23758+{
23759+ int err, e;
23760+ substring_t args[MAX_OPT_ARGS];
23761+
23762+ err = match_token(str, au_wbr_create_policy, args);
23763+ create->wbr_create = err;
23764+ switch (err) {
23765+ case AuWbrCreate_MFSRRV:
23766+ case AuWbrCreate_TDMFSV:
23767+ case AuWbrCreate_PMFSRRV:
23768+ e = au_wbr_mfs_wmark(&args[0], str, create);
23769+ if (!e)
23770+ e = au_wbr_mfs_sec(&args[1], str, create);
23771+ if (unlikely(e))
23772+ err = e;
23773+ break;
23774+ case AuWbrCreate_MFSRR:
23775+ case AuWbrCreate_TDMFS:
23776+ case AuWbrCreate_PMFSRR:
23777+ e = au_wbr_mfs_wmark(&args[0], str, create);
23778+ if (unlikely(e)) {
23779+ err = e;
23780+ break;
23781+ }
23782+ /*FALLTHROUGH*/
23783+ case AuWbrCreate_MFS:
23784+ case AuWbrCreate_PMFS:
23785+ create->mfs_second = AUFS_MFS_DEF_SEC;
23786+ break;
23787+ case AuWbrCreate_MFSV:
23788+ case AuWbrCreate_PMFSV:
23789+ e = au_wbr_mfs_sec(&args[0], str, create);
23790+ if (unlikely(e))
23791+ err = e;
23792+ break;
23793+ }
23794+
23795+ return err;
23796+}
23797+
23798+const char *au_optstr_wbr_create(int wbr_create)
23799+{
23800+ return au_parser_pattern(wbr_create, au_wbr_create_policy);
23801+}
23802+
23803+static match_table_t au_wbr_copyup_policy = {
23804+ {AuWbrCopyup_TDP, "tdp"},
23805+ {AuWbrCopyup_TDP, "top-down-parent"},
23806+ {AuWbrCopyup_BUP, "bup"},
23807+ {AuWbrCopyup_BUP, "bottom-up-parent"},
23808+ {AuWbrCopyup_BU, "bu"},
23809+ {AuWbrCopyup_BU, "bottom-up"},
23810+ {-1, NULL}
23811+};
23812+
23813+static int noinline_for_stack au_wbr_copyup_val(char *str)
23814+{
23815+ substring_t args[MAX_OPT_ARGS];
23816+
23817+ return match_token(str, au_wbr_copyup_policy, args);
23818+}
23819+
23820+const char *au_optstr_wbr_copyup(int wbr_copyup)
23821+{
23822+ return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy);
23823+}
23824+
23825+/* ---------------------------------------------------------------------- */
23826+
23827+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
23828+
23829+static void dump_opts(struct au_opts *opts)
23830+{
23831+#ifdef CONFIG_AUFS_DEBUG
23832+ /* reduce stack space */
23833+ union {
23834+ struct au_opt_add *add;
23835+ struct au_opt_del *del;
23836+ struct au_opt_mod *mod;
23837+ struct au_opt_xino *xino;
23838+ struct au_opt_xino_itrunc *xino_itrunc;
23839+ struct au_opt_wbr_create *create;
23840+ } u;
23841+ struct au_opt *opt;
23842+
23843+ opt = opts->opt;
23844+ while (opt->type != Opt_tail) {
23845+ switch (opt->type) {
23846+ case Opt_add:
23847+ u.add = &opt->add;
23848+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
23849+ u.add->bindex, u.add->pathname, u.add->perm,
23850+ u.add->path.dentry);
23851+ break;
23852+ case Opt_del:
23853+ case Opt_idel:
23854+ u.del = &opt->del;
23855+ AuDbg("del {%s, %p}\n",
23856+ u.del->pathname, u.del->h_path.dentry);
23857+ break;
23858+ case Opt_mod:
23859+ case Opt_imod:
23860+ u.mod = &opt->mod;
23861+ AuDbg("mod {%s, 0x%x, %p}\n",
23862+ u.mod->path, u.mod->perm, u.mod->h_root);
23863+ break;
23864+ case Opt_append:
23865+ u.add = &opt->add;
23866+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
23867+ u.add->bindex, u.add->pathname, u.add->perm,
23868+ u.add->path.dentry);
23869+ break;
23870+ case Opt_prepend:
23871+ u.add = &opt->add;
23872+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
23873+ u.add->bindex, u.add->pathname, u.add->perm,
23874+ u.add->path.dentry);
23875+ break;
23876+ case Opt_dirwh:
23877+ AuDbg("dirwh %d\n", opt->dirwh);
23878+ break;
23879+ case Opt_rdcache:
23880+ AuDbg("rdcache %d\n", opt->rdcache);
23881+ break;
23882+ case Opt_rdblk:
23883+ AuDbg("rdblk %u\n", opt->rdblk);
23884+ break;
23885+ case Opt_rdblk_def:
23886+ AuDbg("rdblk_def\n");
23887+ break;
23888+ case Opt_rdhash:
23889+ AuDbg("rdhash %u\n", opt->rdhash);
23890+ break;
23891+ case Opt_rdhash_def:
23892+ AuDbg("rdhash_def\n");
23893+ break;
23894+ case Opt_xino:
23895+ u.xino = &opt->xino;
23896+ AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
23897+ break;
23898+ case Opt_trunc_xino:
23899+ AuLabel(trunc_xino);
23900+ break;
23901+ case Opt_notrunc_xino:
23902+ AuLabel(notrunc_xino);
23903+ break;
23904+ case Opt_trunc_xino_path:
23905+ case Opt_itrunc_xino:
23906+ u.xino_itrunc = &opt->xino_itrunc;
23907+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
23908+ break;
23909+ case Opt_noxino:
23910+ AuLabel(noxino);
23911+ break;
23912+ case Opt_trunc_xib:
23913+ AuLabel(trunc_xib);
23914+ break;
23915+ case Opt_notrunc_xib:
23916+ AuLabel(notrunc_xib);
23917+ break;
23918+ case Opt_shwh:
23919+ AuLabel(shwh);
23920+ break;
23921+ case Opt_noshwh:
23922+ AuLabel(noshwh);
23923+ break;
23924+ case Opt_dirperm1:
23925+ AuLabel(dirperm1);
23926+ break;
23927+ case Opt_nodirperm1:
23928+ AuLabel(nodirperm1);
23929+ break;
23930+ case Opt_plink:
23931+ AuLabel(plink);
23932+ break;
23933+ case Opt_noplink:
23934+ AuLabel(noplink);
23935+ break;
23936+ case Opt_list_plink:
23937+ AuLabel(list_plink);
23938+ break;
23939+ case Opt_udba:
23940+ AuDbg("udba %d, %s\n",
23941+ opt->udba, au_optstr_udba(opt->udba));
23942+ break;
23943+ case Opt_dio:
23944+ AuLabel(dio);
23945+ break;
23946+ case Opt_nodio:
23947+ AuLabel(nodio);
23948+ break;
23949+ case Opt_diropq_a:
23950+ AuLabel(diropq_a);
23951+ break;
23952+ case Opt_diropq_w:
23953+ AuLabel(diropq_w);
23954+ break;
23955+ case Opt_warn_perm:
23956+ AuLabel(warn_perm);
23957+ break;
23958+ case Opt_nowarn_perm:
23959+ AuLabel(nowarn_perm);
23960+ break;
23961+ case Opt_verbose:
23962+ AuLabel(verbose);
23963+ break;
23964+ case Opt_noverbose:
23965+ AuLabel(noverbose);
23966+ break;
23967+ case Opt_sum:
23968+ AuLabel(sum);
23969+ break;
23970+ case Opt_nosum:
23971+ AuLabel(nosum);
23972+ break;
23973+ case Opt_wsum:
23974+ AuLabel(wsum);
23975+ break;
23976+ case Opt_wbr_create:
23977+ u.create = &opt->wbr_create;
23978+ AuDbg("create %d, %s\n", u.create->wbr_create,
23979+ au_optstr_wbr_create(u.create->wbr_create));
23980+ switch (u.create->wbr_create) {
23981+ case AuWbrCreate_MFSV:
23982+ case AuWbrCreate_PMFSV:
23983+ AuDbg("%d sec\n", u.create->mfs_second);
23984+ break;
23985+ case AuWbrCreate_MFSRR:
23986+ case AuWbrCreate_TDMFS:
23987+ AuDbg("%llu watermark\n",
23988+ u.create->mfsrr_watermark);
23989+ break;
23990+ case AuWbrCreate_MFSRRV:
23991+ case AuWbrCreate_TDMFSV:
23992+ case AuWbrCreate_PMFSRRV:
23993+ AuDbg("%llu watermark, %d sec\n",
23994+ u.create->mfsrr_watermark,
23995+ u.create->mfs_second);
23996+ break;
23997+ }
23998+ break;
23999+ case Opt_wbr_copyup:
24000+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
24001+ au_optstr_wbr_copyup(opt->wbr_copyup));
24002+ break;
24003+ case Opt_fhsm_sec:
24004+ AuDbg("fhsm_sec %u\n", opt->fhsm_second);
24005+ break;
24006+ case Opt_acl:
24007+ AuLabel(acl);
24008+ break;
24009+ case Opt_noacl:
24010+ AuLabel(noacl);
24011+ break;
24012+ default:
24013+ BUG();
24014+ }
24015+ opt++;
24016+ }
24017+#endif
24018+}
24019+
24020+void au_opts_free(struct au_opts *opts)
24021+{
24022+ struct au_opt *opt;
24023+
24024+ opt = opts->opt;
24025+ while (opt->type != Opt_tail) {
24026+ switch (opt->type) {
24027+ case Opt_add:
24028+ case Opt_append:
24029+ case Opt_prepend:
24030+ path_put(&opt->add.path);
24031+ break;
24032+ case Opt_del:
24033+ case Opt_idel:
24034+ path_put(&opt->del.h_path);
24035+ break;
24036+ case Opt_mod:
24037+ case Opt_imod:
24038+ dput(opt->mod.h_root);
24039+ break;
24040+ case Opt_xino:
24041+ fput(opt->xino.file);
24042+ break;
24043+ }
24044+ opt++;
24045+ }
24046+}
24047+
24048+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
24049+ aufs_bindex_t bindex)
24050+{
24051+ int err;
24052+ struct au_opt_add *add = &opt->add;
24053+ char *p;
24054+
24055+ add->bindex = bindex;
24056+ add->perm = AuBrPerm_RO;
24057+ add->pathname = opt_str;
24058+ p = strchr(opt_str, '=');
24059+ if (p) {
24060+ *p++ = 0;
24061+ if (*p)
24062+ add->perm = br_perm_val(p);
24063+ }
24064+
24065+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
24066+ if (!err) {
24067+ if (!p) {
24068+ add->perm = AuBrPerm_RO;
24069+ if (au_test_fs_rr(add->path.dentry->d_sb))
24070+ add->perm = AuBrPerm_RR;
24071+ else if (!bindex && !(sb_flags & MS_RDONLY))
24072+ add->perm = AuBrPerm_RW;
24073+ }
24074+ opt->type = Opt_add;
24075+ goto out;
24076+ }
24077+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
24078+ err = -EINVAL;
24079+
24080+out:
24081+ return err;
24082+}
24083+
24084+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
24085+{
24086+ int err;
24087+
24088+ del->pathname = args[0].from;
24089+ AuDbg("del path %s\n", del->pathname);
24090+
24091+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
24092+ if (unlikely(err))
24093+ pr_err("lookup failed %s (%d)\n", del->pathname, err);
24094+
24095+ return err;
24096+}
24097+
24098+#if 0 /* reserved for future use */
24099+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
24100+ struct au_opt_del *del, substring_t args[])
24101+{
24102+ int err;
24103+ struct dentry *root;
24104+
24105+ err = -EINVAL;
24106+ root = sb->s_root;
24107+ aufs_read_lock(root, AuLock_FLUSH);
24108+ if (bindex < 0 || au_sbbot(sb) < bindex) {
24109+ pr_err("out of bounds, %d\n", bindex);
24110+ goto out;
24111+ }
24112+
24113+ err = 0;
24114+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
24115+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
24116+
24117+out:
24118+ aufs_read_unlock(root, !AuLock_IR);
24119+ return err;
24120+}
24121+#endif
24122+
24123+static int noinline_for_stack
24124+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
24125+{
24126+ int err;
24127+ struct path path;
24128+ char *p;
24129+
24130+ err = -EINVAL;
24131+ mod->path = args[0].from;
24132+ p = strchr(mod->path, '=');
24133+ if (unlikely(!p)) {
24134+ pr_err("no permssion %s\n", args[0].from);
24135+ goto out;
24136+ }
24137+
24138+ *p++ = 0;
24139+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
24140+ if (unlikely(err)) {
24141+ pr_err("lookup failed %s (%d)\n", mod->path, err);
24142+ goto out;
24143+ }
24144+
24145+ mod->perm = br_perm_val(p);
24146+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
24147+ mod->h_root = dget(path.dentry);
24148+ path_put(&path);
24149+
24150+out:
24151+ return err;
24152+}
24153+
24154+#if 0 /* reserved for future use */
24155+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
24156+ struct au_opt_mod *mod, substring_t args[])
24157+{
24158+ int err;
24159+ struct dentry *root;
24160+
24161+ err = -EINVAL;
24162+ root = sb->s_root;
24163+ aufs_read_lock(root, AuLock_FLUSH);
24164+ if (bindex < 0 || au_sbbot(sb) < bindex) {
24165+ pr_err("out of bounds, %d\n", bindex);
24166+ goto out;
24167+ }
24168+
24169+ err = 0;
24170+ mod->perm = br_perm_val(args[1].from);
24171+ AuDbg("mod path %s, perm 0x%x, %s\n",
24172+ mod->path, mod->perm, args[1].from);
24173+ mod->h_root = dget(au_h_dptr(root, bindex));
24174+
24175+out:
24176+ aufs_read_unlock(root, !AuLock_IR);
24177+ return err;
24178+}
24179+#endif
24180+
24181+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
24182+ substring_t args[])
24183+{
24184+ int err;
24185+ struct file *file;
24186+
24187+ file = au_xino_create(sb, args[0].from, /*silent*/0);
24188+ err = PTR_ERR(file);
24189+ if (IS_ERR(file))
24190+ goto out;
24191+
24192+ err = -EINVAL;
24193+ if (unlikely(file->f_path.dentry->d_sb == sb)) {
24194+ fput(file);
24195+ pr_err("%s must be outside\n", args[0].from);
24196+ goto out;
24197+ }
24198+
24199+ err = 0;
24200+ xino->file = file;
24201+ xino->path = args[0].from;
24202+
24203+out:
24204+ return err;
24205+}
24206+
24207+static int noinline_for_stack
24208+au_opts_parse_xino_itrunc_path(struct super_block *sb,
24209+ struct au_opt_xino_itrunc *xino_itrunc,
24210+ substring_t args[])
24211+{
24212+ int err;
24213+ aufs_bindex_t bbot, bindex;
24214+ struct path path;
24215+ struct dentry *root;
24216+
24217+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
24218+ if (unlikely(err)) {
24219+ pr_err("lookup failed %s (%d)\n", args[0].from, err);
24220+ goto out;
24221+ }
24222+
24223+ xino_itrunc->bindex = -1;
24224+ root = sb->s_root;
24225+ aufs_read_lock(root, AuLock_FLUSH);
24226+ bbot = au_sbbot(sb);
24227+ for (bindex = 0; bindex <= bbot; bindex++) {
24228+ if (au_h_dptr(root, bindex) == path.dentry) {
24229+ xino_itrunc->bindex = bindex;
24230+ break;
24231+ }
24232+ }
24233+ aufs_read_unlock(root, !AuLock_IR);
24234+ path_put(&path);
24235+
24236+ if (unlikely(xino_itrunc->bindex < 0)) {
24237+ pr_err("no such branch %s\n", args[0].from);
24238+ err = -EINVAL;
24239+ }
24240+
24241+out:
24242+ return err;
24243+}
24244+
24245+/* called without aufs lock */
24246+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
24247+{
24248+ int err, n, token;
24249+ aufs_bindex_t bindex;
24250+ unsigned char skipped;
24251+ struct dentry *root;
24252+ struct au_opt *opt, *opt_tail;
24253+ char *opt_str;
24254+ /* reduce the stack space */
24255+ union {
24256+ struct au_opt_xino_itrunc *xino_itrunc;
24257+ struct au_opt_wbr_create *create;
24258+ } u;
24259+ struct {
24260+ substring_t args[MAX_OPT_ARGS];
24261+ } *a;
24262+
24263+ err = -ENOMEM;
24264+ a = kmalloc(sizeof(*a), GFP_NOFS);
24265+ if (unlikely(!a))
24266+ goto out;
24267+
24268+ root = sb->s_root;
24269+ err = 0;
24270+ bindex = 0;
24271+ opt = opts->opt;
24272+ opt_tail = opt + opts->max_opt - 1;
24273+ opt->type = Opt_tail;
24274+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
24275+ err = -EINVAL;
24276+ skipped = 0;
24277+ token = match_token(opt_str, options, a->args);
24278+ switch (token) {
24279+ case Opt_br:
24280+ err = 0;
24281+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
24282+ && *opt_str) {
24283+ err = opt_add(opt, opt_str, opts->sb_flags,
24284+ bindex++);
24285+ if (unlikely(!err && ++opt > opt_tail)) {
24286+ err = -E2BIG;
24287+ break;
24288+ }
24289+ opt->type = Opt_tail;
24290+ skipped = 1;
24291+ }
24292+ break;
24293+ case Opt_add:
24294+ if (unlikely(match_int(&a->args[0], &n))) {
24295+ pr_err("bad integer in %s\n", opt_str);
24296+ break;
24297+ }
24298+ bindex = n;
24299+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
24300+ bindex);
24301+ if (!err)
24302+ opt->type = token;
24303+ break;
24304+ case Opt_append:
24305+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
24306+ /*dummy bindex*/1);
24307+ if (!err)
24308+ opt->type = token;
24309+ break;
24310+ case Opt_prepend:
24311+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
24312+ /*bindex*/0);
24313+ if (!err)
24314+ opt->type = token;
24315+ break;
24316+ case Opt_del:
24317+ err = au_opts_parse_del(&opt->del, a->args);
24318+ if (!err)
24319+ opt->type = token;
24320+ break;
24321+#if 0 /* reserved for future use */
24322+ case Opt_idel:
24323+ del->pathname = "(indexed)";
24324+ if (unlikely(match_int(&args[0], &n))) {
24325+ pr_err("bad integer in %s\n", opt_str);
24326+ break;
24327+ }
24328+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
24329+ if (!err)
24330+ opt->type = token;
24331+ break;
24332+#endif
24333+ case Opt_mod:
24334+ err = au_opts_parse_mod(&opt->mod, a->args);
24335+ if (!err)
24336+ opt->type = token;
24337+ break;
24338+#ifdef IMOD /* reserved for future use */
24339+ case Opt_imod:
24340+ u.mod->path = "(indexed)";
24341+ if (unlikely(match_int(&a->args[0], &n))) {
24342+ pr_err("bad integer in %s\n", opt_str);
24343+ break;
24344+ }
24345+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
24346+ if (!err)
24347+ opt->type = token;
24348+ break;
24349+#endif
24350+ case Opt_xino:
24351+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
24352+ if (!err)
24353+ opt->type = token;
24354+ break;
24355+
24356+ case Opt_trunc_xino_path:
24357+ err = au_opts_parse_xino_itrunc_path
24358+ (sb, &opt->xino_itrunc, a->args);
24359+ if (!err)
24360+ opt->type = token;
24361+ break;
24362+
24363+ case Opt_itrunc_xino:
24364+ u.xino_itrunc = &opt->xino_itrunc;
24365+ if (unlikely(match_int(&a->args[0], &n))) {
24366+ pr_err("bad integer in %s\n", opt_str);
24367+ break;
24368+ }
24369+ u.xino_itrunc->bindex = n;
24370+ aufs_read_lock(root, AuLock_FLUSH);
24371+ if (n < 0 || au_sbbot(sb) < n) {
24372+ pr_err("out of bounds, %d\n", n);
24373+ aufs_read_unlock(root, !AuLock_IR);
24374+ break;
24375+ }
24376+ aufs_read_unlock(root, !AuLock_IR);
24377+ err = 0;
24378+ opt->type = token;
24379+ break;
24380+
24381+ case Opt_dirwh:
24382+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
24383+ break;
24384+ err = 0;
24385+ opt->type = token;
24386+ break;
24387+
24388+ case Opt_rdcache:
24389+ if (unlikely(match_int(&a->args[0], &n))) {
24390+ pr_err("bad integer in %s\n", opt_str);
24391+ break;
24392+ }
24393+ if (unlikely(n > AUFS_RDCACHE_MAX)) {
24394+ pr_err("rdcache must be smaller than %d\n",
24395+ AUFS_RDCACHE_MAX);
24396+ break;
24397+ }
24398+ opt->rdcache = n;
24399+ err = 0;
24400+ opt->type = token;
24401+ break;
24402+ case Opt_rdblk:
24403+ if (unlikely(match_int(&a->args[0], &n)
24404+ || n < 0
24405+ || n > KMALLOC_MAX_SIZE)) {
24406+ pr_err("bad integer in %s\n", opt_str);
24407+ break;
24408+ }
24409+ if (unlikely(n && n < NAME_MAX)) {
24410+ pr_err("rdblk must be larger than %d\n",
24411+ NAME_MAX);
24412+ break;
24413+ }
24414+ opt->rdblk = n;
24415+ err = 0;
24416+ opt->type = token;
24417+ break;
24418+ case Opt_rdhash:
24419+ if (unlikely(match_int(&a->args[0], &n)
24420+ || n < 0
24421+ || n * sizeof(struct hlist_head)
24422+ > KMALLOC_MAX_SIZE)) {
24423+ pr_err("bad integer in %s\n", opt_str);
24424+ break;
24425+ }
24426+ opt->rdhash = n;
24427+ err = 0;
24428+ opt->type = token;
24429+ break;
24430+
24431+ case Opt_trunc_xino:
24432+ case Opt_notrunc_xino:
24433+ case Opt_noxino:
24434+ case Opt_trunc_xib:
24435+ case Opt_notrunc_xib:
24436+ case Opt_shwh:
24437+ case Opt_noshwh:
24438+ case Opt_dirperm1:
24439+ case Opt_nodirperm1:
24440+ case Opt_plink:
24441+ case Opt_noplink:
24442+ case Opt_list_plink:
24443+ case Opt_dio:
24444+ case Opt_nodio:
24445+ case Opt_diropq_a:
24446+ case Opt_diropq_w:
24447+ case Opt_warn_perm:
24448+ case Opt_nowarn_perm:
24449+ case Opt_verbose:
24450+ case Opt_noverbose:
24451+ case Opt_sum:
24452+ case Opt_nosum:
24453+ case Opt_wsum:
24454+ case Opt_rdblk_def:
24455+ case Opt_rdhash_def:
24456+ case Opt_acl:
24457+ case Opt_noacl:
24458+ err = 0;
24459+ opt->type = token;
24460+ break;
24461+
24462+ case Opt_udba:
24463+ opt->udba = udba_val(a->args[0].from);
24464+ if (opt->udba >= 0) {
24465+ err = 0;
24466+ opt->type = token;
24467+ } else
24468+ pr_err("wrong value, %s\n", opt_str);
24469+ break;
24470+
24471+ case Opt_wbr_create:
24472+ u.create = &opt->wbr_create;
24473+ u.create->wbr_create
24474+ = au_wbr_create_val(a->args[0].from, u.create);
24475+ if (u.create->wbr_create >= 0) {
24476+ err = 0;
24477+ opt->type = token;
24478+ } else
24479+ pr_err("wrong value, %s\n", opt_str);
24480+ break;
24481+ case Opt_wbr_copyup:
24482+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
24483+ if (opt->wbr_copyup >= 0) {
24484+ err = 0;
24485+ opt->type = token;
24486+ } else
24487+ pr_err("wrong value, %s\n", opt_str);
24488+ break;
24489+
24490+ case Opt_fhsm_sec:
24491+ if (unlikely(match_int(&a->args[0], &n)
24492+ || n < 0)) {
24493+ pr_err("bad integer in %s\n", opt_str);
24494+ break;
24495+ }
24496+ if (sysaufs_brs) {
24497+ opt->fhsm_second = n;
24498+ opt->type = token;
24499+ } else
24500+ pr_warn("ignored %s\n", opt_str);
24501+ err = 0;
24502+ break;
24503+
24504+ case Opt_ignore:
24505+ pr_warn("ignored %s\n", opt_str);
24506+ /*FALLTHROUGH*/
24507+ case Opt_ignore_silent:
24508+ skipped = 1;
24509+ err = 0;
24510+ break;
24511+ case Opt_err:
24512+ pr_err("unknown option %s\n", opt_str);
24513+ break;
24514+ }
24515+
24516+ if (!err && !skipped) {
24517+ if (unlikely(++opt > opt_tail)) {
24518+ err = -E2BIG;
24519+ opt--;
24520+ opt->type = Opt_tail;
24521+ break;
24522+ }
24523+ opt->type = Opt_tail;
24524+ }
24525+ }
24526+
24527+ au_delayed_kfree(a);
24528+ dump_opts(opts);
24529+ if (unlikely(err))
24530+ au_opts_free(opts);
24531+
24532+out:
24533+ return err;
24534+}
24535+
24536+static int au_opt_wbr_create(struct super_block *sb,
24537+ struct au_opt_wbr_create *create)
24538+{
24539+ int err;
24540+ struct au_sbinfo *sbinfo;
24541+
24542+ SiMustWriteLock(sb);
24543+
24544+ err = 1; /* handled */
24545+ sbinfo = au_sbi(sb);
24546+ if (sbinfo->si_wbr_create_ops->fin) {
24547+ err = sbinfo->si_wbr_create_ops->fin(sb);
24548+ if (!err)
24549+ err = 1;
24550+ }
24551+
24552+ sbinfo->si_wbr_create = create->wbr_create;
24553+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
24554+ switch (create->wbr_create) {
24555+ case AuWbrCreate_MFSRRV:
24556+ case AuWbrCreate_MFSRR:
24557+ case AuWbrCreate_TDMFS:
24558+ case AuWbrCreate_TDMFSV:
24559+ case AuWbrCreate_PMFSRR:
24560+ case AuWbrCreate_PMFSRRV:
24561+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
24562+ /*FALLTHROUGH*/
24563+ case AuWbrCreate_MFS:
24564+ case AuWbrCreate_MFSV:
24565+ case AuWbrCreate_PMFS:
24566+ case AuWbrCreate_PMFSV:
24567+ sbinfo->si_wbr_mfs.mfs_expire
24568+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
24569+ break;
24570+ }
24571+
24572+ if (sbinfo->si_wbr_create_ops->init)
24573+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
24574+
24575+ return err;
24576+}
24577+
24578+/*
24579+ * returns,
24580+ * plus: processed without an error
24581+ * zero: unprocessed
24582+ */
24583+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
24584+ struct au_opts *opts)
24585+{
24586+ int err;
24587+ struct au_sbinfo *sbinfo;
24588+
24589+ SiMustWriteLock(sb);
24590+
24591+ err = 1; /* handled */
24592+ sbinfo = au_sbi(sb);
24593+ switch (opt->type) {
24594+ case Opt_udba:
24595+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
24596+ sbinfo->si_mntflags |= opt->udba;
24597+ opts->given_udba |= opt->udba;
24598+ break;
24599+
24600+ case Opt_plink:
24601+ au_opt_set(sbinfo->si_mntflags, PLINK);
24602+ break;
24603+ case Opt_noplink:
24604+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
24605+ au_plink_put(sb, /*verbose*/1);
24606+ au_opt_clr(sbinfo->si_mntflags, PLINK);
24607+ break;
24608+ case Opt_list_plink:
24609+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
24610+ au_plink_list(sb);
24611+ break;
24612+
24613+ case Opt_dio:
24614+ au_opt_set(sbinfo->si_mntflags, DIO);
24615+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24616+ break;
24617+ case Opt_nodio:
24618+ au_opt_clr(sbinfo->si_mntflags, DIO);
24619+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24620+ break;
24621+
24622+ case Opt_fhsm_sec:
24623+ au_fhsm_set(sbinfo, opt->fhsm_second);
24624+ break;
24625+
24626+ case Opt_diropq_a:
24627+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24628+ break;
24629+ case Opt_diropq_w:
24630+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24631+ break;
24632+
24633+ case Opt_warn_perm:
24634+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
24635+ break;
24636+ case Opt_nowarn_perm:
24637+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
24638+ break;
24639+
24640+ case Opt_verbose:
24641+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
24642+ break;
24643+ case Opt_noverbose:
24644+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
24645+ break;
24646+
24647+ case Opt_sum:
24648+ au_opt_set(sbinfo->si_mntflags, SUM);
24649+ break;
24650+ case Opt_wsum:
24651+ au_opt_clr(sbinfo->si_mntflags, SUM);
24652+ au_opt_set(sbinfo->si_mntflags, SUM_W);
24653+ case Opt_nosum:
24654+ au_opt_clr(sbinfo->si_mntflags, SUM);
24655+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
24656+ break;
24657+
24658+ case Opt_wbr_create:
24659+ err = au_opt_wbr_create(sb, &opt->wbr_create);
24660+ break;
24661+ case Opt_wbr_copyup:
24662+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
24663+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
24664+ break;
24665+
24666+ case Opt_dirwh:
24667+ sbinfo->si_dirwh = opt->dirwh;
24668+ break;
24669+
24670+ case Opt_rdcache:
24671+ sbinfo->si_rdcache
24672+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
24673+ break;
24674+ case Opt_rdblk:
24675+ sbinfo->si_rdblk = opt->rdblk;
24676+ break;
24677+ case Opt_rdblk_def:
24678+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
24679+ break;
24680+ case Opt_rdhash:
24681+ sbinfo->si_rdhash = opt->rdhash;
24682+ break;
24683+ case Opt_rdhash_def:
24684+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
24685+ break;
24686+
24687+ case Opt_shwh:
24688+ au_opt_set(sbinfo->si_mntflags, SHWH);
24689+ break;
24690+ case Opt_noshwh:
24691+ au_opt_clr(sbinfo->si_mntflags, SHWH);
24692+ break;
24693+
24694+ case Opt_dirperm1:
24695+ au_opt_set(sbinfo->si_mntflags, DIRPERM1);
24696+ break;
24697+ case Opt_nodirperm1:
24698+ au_opt_clr(sbinfo->si_mntflags, DIRPERM1);
24699+ break;
24700+
24701+ case Opt_trunc_xino:
24702+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
24703+ break;
24704+ case Opt_notrunc_xino:
24705+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
24706+ break;
24707+
24708+ case Opt_trunc_xino_path:
24709+ case Opt_itrunc_xino:
24710+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
24711+ if (!err)
24712+ err = 1;
24713+ break;
24714+
24715+ case Opt_trunc_xib:
24716+ au_fset_opts(opts->flags, TRUNC_XIB);
24717+ break;
24718+ case Opt_notrunc_xib:
24719+ au_fclr_opts(opts->flags, TRUNC_XIB);
24720+ break;
24721+
24722+ case Opt_acl:
24723+ sb->s_flags |= MS_POSIXACL;
24724+ break;
24725+ case Opt_noacl:
24726+ sb->s_flags &= ~MS_POSIXACL;
24727+ break;
24728+
24729+ default:
24730+ err = 0;
24731+ break;
24732+ }
24733+
24734+ return err;
24735+}
24736+
24737+/*
24738+ * returns tri-state.
24739+ * plus: processed without an error
24740+ * zero: unprocessed
24741+ * minus: error
24742+ */
24743+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
24744+ struct au_opts *opts)
24745+{
24746+ int err, do_refresh;
24747+
24748+ err = 0;
24749+ switch (opt->type) {
24750+ case Opt_append:
24751+ opt->add.bindex = au_sbbot(sb) + 1;
24752+ if (opt->add.bindex < 0)
24753+ opt->add.bindex = 0;
24754+ goto add;
24755+ case Opt_prepend:
24756+ opt->add.bindex = 0;
24757+ add: /* indented label */
24758+ case Opt_add:
24759+ err = au_br_add(sb, &opt->add,
24760+ au_ftest_opts(opts->flags, REMOUNT));
24761+ if (!err) {
24762+ err = 1;
24763+ au_fset_opts(opts->flags, REFRESH);
24764+ }
24765+ break;
24766+
24767+ case Opt_del:
24768+ case Opt_idel:
24769+ err = au_br_del(sb, &opt->del,
24770+ au_ftest_opts(opts->flags, REMOUNT));
24771+ if (!err) {
24772+ err = 1;
24773+ au_fset_opts(opts->flags, TRUNC_XIB);
24774+ au_fset_opts(opts->flags, REFRESH);
24775+ }
24776+ break;
24777+
24778+ case Opt_mod:
24779+ case Opt_imod:
24780+ err = au_br_mod(sb, &opt->mod,
24781+ au_ftest_opts(opts->flags, REMOUNT),
24782+ &do_refresh);
24783+ if (!err) {
24784+ err = 1;
24785+ if (do_refresh)
24786+ au_fset_opts(opts->flags, REFRESH);
24787+ }
24788+ break;
24789+ }
24790+
24791+ return err;
24792+}
24793+
24794+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
24795+ struct au_opt_xino **opt_xino,
24796+ struct au_opts *opts)
24797+{
24798+ int err;
24799+ aufs_bindex_t bbot, bindex;
24800+ struct dentry *root, *parent, *h_root;
24801+
24802+ err = 0;
24803+ switch (opt->type) {
24804+ case Opt_xino:
24805+ err = au_xino_set(sb, &opt->xino,
24806+ !!au_ftest_opts(opts->flags, REMOUNT));
24807+ if (unlikely(err))
24808+ break;
24809+
24810+ *opt_xino = &opt->xino;
24811+ au_xino_brid_set(sb, -1);
24812+
24813+ /* safe d_parent access */
24814+ parent = opt->xino.file->f_path.dentry->d_parent;
24815+ root = sb->s_root;
24816+ bbot = au_sbbot(sb);
24817+ for (bindex = 0; bindex <= bbot; bindex++) {
24818+ h_root = au_h_dptr(root, bindex);
24819+ if (h_root == parent) {
24820+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
24821+ break;
24822+ }
24823+ }
24824+ break;
24825+
24826+ case Opt_noxino:
24827+ au_xino_clr(sb);
24828+ au_xino_brid_set(sb, -1);
24829+ *opt_xino = (void *)-1;
24830+ break;
24831+ }
24832+
24833+ return err;
24834+}
24835+
24836+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
24837+ unsigned int pending)
24838+{
24839+ int err, fhsm;
24840+ aufs_bindex_t bindex, bbot;
24841+ unsigned char do_plink, skip, do_free, can_no_dreval;
24842+ struct au_branch *br;
24843+ struct au_wbr *wbr;
24844+ struct dentry *root, *dentry;
24845+ struct inode *dir, *h_dir;
24846+ struct au_sbinfo *sbinfo;
24847+ struct au_hinode *hdir;
24848+
24849+ SiMustAnyLock(sb);
24850+
24851+ sbinfo = au_sbi(sb);
24852+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
24853+
24854+ if (!(sb_flags & MS_RDONLY)) {
24855+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
24856+ pr_warn("first branch should be rw\n");
24857+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
24858+ pr_warn_once("shwh should be used with ro\n");
24859+ }
24860+
24861+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
24862+ && !au_opt_test(sbinfo->si_mntflags, XINO))
24863+ pr_warn_once("udba=*notify requires xino\n");
24864+
24865+ if (au_opt_test(sbinfo->si_mntflags, DIRPERM1))
24866+ pr_warn_once("dirperm1 breaks the protection"
24867+ " by the permission bits on the lower branch\n");
24868+
24869+ err = 0;
24870+ fhsm = 0;
24871+ root = sb->s_root;
24872+ dir = d_inode(root);
24873+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
24874+ can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
24875+ UDBA_NONE);
24876+ bbot = au_sbbot(sb);
24877+ for (bindex = 0; !err && bindex <= bbot; bindex++) {
24878+ skip = 0;
24879+ h_dir = au_h_iptr(dir, bindex);
24880+ br = au_sbr(sb, bindex);
24881+
24882+ if ((br->br_perm & AuBrAttr_ICEX)
24883+ && !h_dir->i_op->listxattr)
24884+ br->br_perm &= ~AuBrAttr_ICEX;
24885+#if 0
24886+ if ((br->br_perm & AuBrAttr_ICEX_SEC)
24887+ && (au_br_sb(br)->s_flags & MS_NOSEC))
24888+ br->br_perm &= ~AuBrAttr_ICEX_SEC;
24889+#endif
24890+
24891+ do_free = 0;
24892+ wbr = br->br_wbr;
24893+ if (wbr)
24894+ wbr_wh_read_lock(wbr);
24895+
24896+ if (!au_br_writable(br->br_perm)) {
24897+ do_free = !!wbr;
24898+ skip = (!wbr
24899+ || (!wbr->wbr_whbase
24900+ && !wbr->wbr_plink
24901+ && !wbr->wbr_orph));
24902+ } else if (!au_br_wh_linkable(br->br_perm)) {
24903+ /* skip = (!br->br_whbase && !br->br_orph); */
24904+ skip = (!wbr || !wbr->wbr_whbase);
24905+ if (skip && wbr) {
24906+ if (do_plink)
24907+ skip = !!wbr->wbr_plink;
24908+ else
24909+ skip = !wbr->wbr_plink;
24910+ }
24911+ } else {
24912+ /* skip = (br->br_whbase && br->br_ohph); */
24913+ skip = (wbr && wbr->wbr_whbase);
24914+ if (skip) {
24915+ if (do_plink)
24916+ skip = !!wbr->wbr_plink;
24917+ else
24918+ skip = !wbr->wbr_plink;
24919+ }
24920+ }
24921+ if (wbr)
24922+ wbr_wh_read_unlock(wbr);
24923+
24924+ if (can_no_dreval) {
24925+ dentry = br->br_path.dentry;
24926+ spin_lock(&dentry->d_lock);
24927+ if (dentry->d_flags &
24928+ (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE))
24929+ can_no_dreval = 0;
24930+ spin_unlock(&dentry->d_lock);
24931+ }
24932+
24933+ if (au_br_fhsm(br->br_perm)) {
24934+ fhsm++;
24935+ AuDebugOn(!br->br_fhsm);
24936+ }
24937+
24938+ if (skip)
24939+ continue;
24940+
24941+ hdir = au_hi(dir, bindex);
24942+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
24943+ if (wbr)
24944+ wbr_wh_write_lock(wbr);
24945+ err = au_wh_init(br, sb);
24946+ if (wbr)
24947+ wbr_wh_write_unlock(wbr);
24948+ au_hn_inode_unlock(hdir);
24949+
24950+ if (!err && do_free) {
24951+ if (wbr)
24952+ au_delayed_kfree(wbr);
24953+ br->br_wbr = NULL;
24954+ }
24955+ }
24956+
24957+ if (can_no_dreval)
24958+ au_fset_si(sbinfo, NO_DREVAL);
24959+ else
24960+ au_fclr_si(sbinfo, NO_DREVAL);
24961+
24962+ if (fhsm >= 2) {
24963+ au_fset_si(sbinfo, FHSM);
24964+ for (bindex = bbot; bindex >= 0; bindex--) {
24965+ br = au_sbr(sb, bindex);
24966+ if (au_br_fhsm(br->br_perm)) {
24967+ au_fhsm_set_bottom(sb, bindex);
24968+ break;
24969+ }
24970+ }
24971+ } else {
24972+ au_fclr_si(sbinfo, FHSM);
24973+ au_fhsm_set_bottom(sb, -1);
24974+ }
24975+
24976+ return err;
24977+}
24978+
24979+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
24980+{
24981+ int err;
24982+ unsigned int tmp;
24983+ aufs_bindex_t bindex, bbot;
24984+ struct au_opt *opt;
24985+ struct au_opt_xino *opt_xino, xino;
24986+ struct au_sbinfo *sbinfo;
24987+ struct au_branch *br;
24988+ struct inode *dir;
24989+
24990+ SiMustWriteLock(sb);
24991+
24992+ err = 0;
24993+ opt_xino = NULL;
24994+ opt = opts->opt;
24995+ while (err >= 0 && opt->type != Opt_tail)
24996+ err = au_opt_simple(sb, opt++, opts);
24997+ if (err > 0)
24998+ err = 0;
24999+ else if (unlikely(err < 0))
25000+ goto out;
25001+
25002+ /* disable xino and udba temporary */
25003+ sbinfo = au_sbi(sb);
25004+ tmp = sbinfo->si_mntflags;
25005+ au_opt_clr(sbinfo->si_mntflags, XINO);
25006+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
25007+
25008+ opt = opts->opt;
25009+ while (err >= 0 && opt->type != Opt_tail)
25010+ err = au_opt_br(sb, opt++, opts);
25011+ if (err > 0)
25012+ err = 0;
25013+ else if (unlikely(err < 0))
25014+ goto out;
25015+
25016+ bbot = au_sbbot(sb);
25017+ if (unlikely(bbot < 0)) {
25018+ err = -EINVAL;
25019+ pr_err("no branches\n");
25020+ goto out;
25021+ }
25022+
25023+ if (au_opt_test(tmp, XINO))
25024+ au_opt_set(sbinfo->si_mntflags, XINO);
25025+ opt = opts->opt;
25026+ while (!err && opt->type != Opt_tail)
25027+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
25028+ if (unlikely(err))
25029+ goto out;
25030+
25031+ err = au_opts_verify(sb, sb->s_flags, tmp);
25032+ if (unlikely(err))
25033+ goto out;
25034+
25035+ /* restore xino */
25036+ if (au_opt_test(tmp, XINO) && !opt_xino) {
25037+ xino.file = au_xino_def(sb);
25038+ err = PTR_ERR(xino.file);
25039+ if (IS_ERR(xino.file))
25040+ goto out;
25041+
25042+ err = au_xino_set(sb, &xino, /*remount*/0);
25043+ fput(xino.file);
25044+ if (unlikely(err))
25045+ goto out;
25046+ }
25047+
25048+ /* restore udba */
25049+ tmp &= AuOptMask_UDBA;
25050+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
25051+ sbinfo->si_mntflags |= tmp;
25052+ bbot = au_sbbot(sb);
25053+ for (bindex = 0; bindex <= bbot; bindex++) {
25054+ br = au_sbr(sb, bindex);
25055+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
25056+ if (unlikely(err))
25057+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
25058+ bindex, err);
25059+ /* go on even if err */
25060+ }
25061+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
25062+ dir = d_inode(sb->s_root);
25063+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
25064+ }
25065+
25066+out:
25067+ return err;
25068+}
25069+
25070+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
25071+{
25072+ int err, rerr;
25073+ unsigned char no_dreval;
25074+ struct inode *dir;
25075+ struct au_opt_xino *opt_xino;
25076+ struct au_opt *opt;
25077+ struct au_sbinfo *sbinfo;
25078+
25079+ SiMustWriteLock(sb);
25080+
25081+ err = 0;
25082+ dir = d_inode(sb->s_root);
25083+ sbinfo = au_sbi(sb);
25084+ opt_xino = NULL;
25085+ opt = opts->opt;
25086+ while (err >= 0 && opt->type != Opt_tail) {
25087+ err = au_opt_simple(sb, opt, opts);
25088+ if (!err)
25089+ err = au_opt_br(sb, opt, opts);
25090+ if (!err)
25091+ err = au_opt_xino(sb, opt, &opt_xino, opts);
25092+ opt++;
25093+ }
25094+ if (err > 0)
25095+ err = 0;
25096+ AuTraceErr(err);
25097+ /* go on even err */
25098+
25099+ no_dreval = !!au_ftest_si(sbinfo, NO_DREVAL);
25100+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
25101+ if (unlikely(rerr && !err))
25102+ err = rerr;
25103+
25104+ if (no_dreval != !!au_ftest_si(sbinfo, NO_DREVAL))
25105+ au_fset_opts(opts->flags, REFRESH_IDOP);
25106+
25107+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
25108+ rerr = au_xib_trunc(sb);
25109+ if (unlikely(rerr && !err))
25110+ err = rerr;
25111+ }
25112+
25113+ /* will be handled by the caller */
25114+ if (!au_ftest_opts(opts->flags, REFRESH)
25115+ && (opts->given_udba
25116+ || au_opt_test(sbinfo->si_mntflags, XINO)
25117+ || au_ftest_opts(opts->flags, REFRESH_IDOP)
25118+ ))
25119+ au_fset_opts(opts->flags, REFRESH);
25120+
25121+ AuDbg("status 0x%x\n", opts->flags);
25122+ return err;
25123+}
25124+
25125+/* ---------------------------------------------------------------------- */
25126+
25127+unsigned int au_opt_udba(struct super_block *sb)
25128+{
25129+ return au_mntflags(sb) & AuOptMask_UDBA;
25130+}
25131diff -urN /usr/share/empty/fs/aufs/opts.h linux/fs/aufs/opts.h
25132--- /usr/share/empty/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
25133+++ linux/fs/aufs/opts.h 2016-12-17 12:28:17.598545045 +0100
25134@@ -0,0 +1,213 @@
25135+/*
25136+ * Copyright (C) 2005-2016 Junjiro R. Okajima
25137+ *
25138+ * This program, aufs is free software; you can redistribute it and/or modify
25139+ * it under the terms of the GNU General Public License as published by
25140+ * the Free Software Foundation; either version 2 of the License, or
25141+ * (at your option) any later version.
25142+ *
25143+ * This program is distributed in the hope that it will be useful,
25144+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25145+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25146+ * GNU General Public License for more details.
25147+ *
25148+ * You should have received a copy of the GNU General Public License
25149+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25150+ */
25151+
25152+/*
25153+ * mount options/flags
25154+ */
25155+
25156+#ifndef __AUFS_OPTS_H__
25157+#define __AUFS_OPTS_H__
25158+
25159+#ifdef __KERNEL__
25160+
25161+#include <linux/path.h>
25162+
25163+struct file;
25164+struct super_block;
25165+
25166+/* ---------------------------------------------------------------------- */
25167+
25168+/* mount flags */
25169+#define AuOpt_XINO 1 /* external inode number bitmap
25170+ and translation table */
25171+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
25172+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
25173+#define AuOpt_UDBA_REVAL (1 << 3)
25174+#define AuOpt_UDBA_HNOTIFY (1 << 4)
25175+#define AuOpt_SHWH (1 << 5) /* show whiteout */
25176+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
25177+#define AuOpt_DIRPERM1 (1 << 7) /* ignore the lower dir's perm
25178+ bits */
25179+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
25180+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
25181+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
25182+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
25183+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
25184+#define AuOpt_DIO (1 << 14) /* direct io */
25185+
25186+#ifndef CONFIG_AUFS_HNOTIFY
25187+#undef AuOpt_UDBA_HNOTIFY
25188+#define AuOpt_UDBA_HNOTIFY 0
25189+#endif
25190+#ifndef CONFIG_AUFS_SHWH
25191+#undef AuOpt_SHWH
25192+#define AuOpt_SHWH 0
25193+#endif
25194+
25195+#define AuOpt_Def (AuOpt_XINO \
25196+ | AuOpt_UDBA_REVAL \
25197+ | AuOpt_PLINK \
25198+ /* | AuOpt_DIRPERM1 */ \
25199+ | AuOpt_WARN_PERM)
25200+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
25201+ | AuOpt_UDBA_REVAL \
25202+ | AuOpt_UDBA_HNOTIFY)
25203+
25204+#define au_opt_test(flags, name) (flags & AuOpt_##name)
25205+#define au_opt_set(flags, name) do { \
25206+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
25207+ ((flags) |= AuOpt_##name); \
25208+} while (0)
25209+#define au_opt_set_udba(flags, name) do { \
25210+ (flags) &= ~AuOptMask_UDBA; \
25211+ ((flags) |= AuOpt_##name); \
25212+} while (0)
25213+#define au_opt_clr(flags, name) do { \
25214+ ((flags) &= ~AuOpt_##name); \
25215+} while (0)
25216+
25217+static inline unsigned int au_opts_plink(unsigned int mntflags)
25218+{
25219+#ifdef CONFIG_PROC_FS
25220+ return mntflags;
25221+#else
25222+ return mntflags & ~AuOpt_PLINK;
25223+#endif
25224+}
25225+
25226+/* ---------------------------------------------------------------------- */
25227+
25228+/* policies to select one among multiple writable branches */
25229+enum {
25230+ AuWbrCreate_TDP, /* top down parent */
25231+ AuWbrCreate_RR, /* round robin */
25232+ AuWbrCreate_MFS, /* most free space */
25233+ AuWbrCreate_MFSV, /* mfs with seconds */
25234+ AuWbrCreate_MFSRR, /* mfs then rr */
25235+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
25236+ AuWbrCreate_TDMFS, /* top down regardless parent and mfs */
25237+ AuWbrCreate_TDMFSV, /* top down regardless parent and mfs */
25238+ AuWbrCreate_PMFS, /* parent and mfs */
25239+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
25240+ AuWbrCreate_PMFSRR, /* parent, mfs and round-robin */
25241+ AuWbrCreate_PMFSRRV, /* plus seconds */
25242+
25243+ AuWbrCreate_Def = AuWbrCreate_TDP
25244+};
25245+
25246+enum {
25247+ AuWbrCopyup_TDP, /* top down parent */
25248+ AuWbrCopyup_BUP, /* bottom up parent */
25249+ AuWbrCopyup_BU, /* bottom up */
25250+
25251+ AuWbrCopyup_Def = AuWbrCopyup_TDP
25252+};
25253+
25254+/* ---------------------------------------------------------------------- */
25255+
25256+struct au_opt_add {
25257+ aufs_bindex_t bindex;
25258+ char *pathname;
25259+ int perm;
25260+ struct path path;
25261+};
25262+
25263+struct au_opt_del {
25264+ char *pathname;
25265+ struct path h_path;
25266+};
25267+
25268+struct au_opt_mod {
25269+ char *path;
25270+ int perm;
25271+ struct dentry *h_root;
25272+};
25273+
25274+struct au_opt_xino {
25275+ char *path;
25276+ struct file *file;
25277+};
25278+
25279+struct au_opt_xino_itrunc {
25280+ aufs_bindex_t bindex;
25281+};
25282+
25283+struct au_opt_wbr_create {
25284+ int wbr_create;
25285+ int mfs_second;
25286+ unsigned long long mfsrr_watermark;
25287+};
25288+
25289+struct au_opt {
25290+ int type;
25291+ union {
25292+ struct au_opt_xino xino;
25293+ struct au_opt_xino_itrunc xino_itrunc;
25294+ struct au_opt_add add;
25295+ struct au_opt_del del;
25296+ struct au_opt_mod mod;
25297+ int dirwh;
25298+ int rdcache;
25299+ unsigned int rdblk;
25300+ unsigned int rdhash;
25301+ int udba;
25302+ struct au_opt_wbr_create wbr_create;
25303+ int wbr_copyup;
25304+ unsigned int fhsm_second;
25305+ };
25306+};
25307+
25308+/* opts flags */
25309+#define AuOpts_REMOUNT 1
25310+#define AuOpts_REFRESH (1 << 1)
25311+#define AuOpts_TRUNC_XIB (1 << 2)
25312+#define AuOpts_REFRESH_DYAOP (1 << 3)
25313+#define AuOpts_REFRESH_IDOP (1 << 4)
25314+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
25315+#define au_fset_opts(flags, name) \
25316+ do { (flags) |= AuOpts_##name; } while (0)
25317+#define au_fclr_opts(flags, name) \
25318+ do { (flags) &= ~AuOpts_##name; } while (0)
25319+
25320+struct au_opts {
25321+ struct au_opt *opt;
25322+ int max_opt;
25323+
25324+ unsigned int given_udba;
25325+ unsigned int flags;
25326+ unsigned long sb_flags;
25327+};
25328+
25329+/* ---------------------------------------------------------------------- */
25330+
25331+/* opts.c */
25332+void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
25333+const char *au_optstr_udba(int udba);
25334+const char *au_optstr_wbr_copyup(int wbr_copyup);
25335+const char *au_optstr_wbr_create(int wbr_create);
25336+
25337+void au_opts_free(struct au_opts *opts);
25338+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
25339+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
25340+ unsigned int pending);
25341+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
25342+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
25343+
25344+unsigned int au_opt_udba(struct super_block *sb);
25345+
25346+#endif /* __KERNEL__ */
25347+#endif /* __AUFS_OPTS_H__ */
25348diff -urN /usr/share/empty/fs/aufs/plink.c linux/fs/aufs/plink.c
25349--- /usr/share/empty/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
25350+++ linux/fs/aufs/plink.c 2016-10-09 16:55:36.496035060 +0200
25351@@ -0,0 +1,514 @@
25352+/*
25353+ * Copyright (C) 2005-2016 Junjiro R. Okajima
25354+ *
25355+ * This program, aufs is free software; you can redistribute it and/or modify
25356+ * it under the terms of the GNU General Public License as published by
25357+ * the Free Software Foundation; either version 2 of the License, or
25358+ * (at your option) any later version.
25359+ *
25360+ * This program is distributed in the hope that it will be useful,
25361+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25362+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25363+ * GNU General Public License for more details.
25364+ *
25365+ * You should have received a copy of the GNU General Public License
25366+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25367+ */
25368+
25369+/*
25370+ * pseudo-link
25371+ */
25372+
25373+#include "aufs.h"
25374+
25375+/*
25376+ * the pseudo-link maintenance mode.
25377+ * during a user process maintains the pseudo-links,
25378+ * prohibit adding a new plink and branch manipulation.
25379+ *
25380+ * Flags
25381+ * NOPLM:
25382+ * For entry functions which will handle plink, and i_mutex is already held
25383+ * in VFS.
25384+ * They cannot wait and should return an error at once.
25385+ * Callers has to check the error.
25386+ * NOPLMW:
25387+ * For entry functions which will handle plink, but i_mutex is not held
25388+ * in VFS.
25389+ * They can wait the plink maintenance mode to finish.
25390+ *
25391+ * They behave like F_SETLK and F_SETLKW.
25392+ * If the caller never handle plink, then both flags are unnecessary.
25393+ */
25394+
25395+int au_plink_maint(struct super_block *sb, int flags)
25396+{
25397+ int err;
25398+ pid_t pid, ppid;
25399+ struct task_struct *parent, *prev;
25400+ struct au_sbinfo *sbi;
25401+
25402+ SiMustAnyLock(sb);
25403+
25404+ err = 0;
25405+ if (!au_opt_test(au_mntflags(sb), PLINK))
25406+ goto out;
25407+
25408+ sbi = au_sbi(sb);
25409+ pid = sbi->si_plink_maint_pid;
25410+ if (!pid || pid == current->pid)
25411+ goto out;
25412+
25413+ /* todo: it highly depends upon /sbin/mount.aufs */
25414+ prev = NULL;
25415+ parent = current;
25416+ ppid = 0;
25417+ rcu_read_lock();
25418+ while (1) {
25419+ parent = rcu_dereference(parent->real_parent);
25420+ if (parent == prev)
25421+ break;
25422+ ppid = task_pid_vnr(parent);
25423+ if (pid == ppid) {
25424+ rcu_read_unlock();
25425+ goto out;
25426+ }
25427+ prev = parent;
25428+ }
25429+ rcu_read_unlock();
25430+
25431+ if (au_ftest_lock(flags, NOPLMW)) {
25432+ /* if there is no i_mutex lock in VFS, we don't need to wait */
25433+ /* AuDebugOn(!lockdep_depth(current)); */
25434+ while (sbi->si_plink_maint_pid) {
25435+ si_read_unlock(sb);
25436+ /* gave up wake_up_bit() */
25437+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
25438+
25439+ if (au_ftest_lock(flags, FLUSH))
25440+ au_nwt_flush(&sbi->si_nowait);
25441+ si_noflush_read_lock(sb);
25442+ }
25443+ } else if (au_ftest_lock(flags, NOPLM)) {
25444+ AuDbg("ppid %d, pid %d\n", ppid, pid);
25445+ err = -EAGAIN;
25446+ }
25447+
25448+out:
25449+ return err;
25450+}
25451+
25452+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
25453+{
25454+ spin_lock(&sbinfo->si_plink_maint_lock);
25455+ sbinfo->si_plink_maint_pid = 0;
25456+ spin_unlock(&sbinfo->si_plink_maint_lock);
25457+ wake_up_all(&sbinfo->si_plink_wq);
25458+}
25459+
25460+int au_plink_maint_enter(struct super_block *sb)
25461+{
25462+ int err;
25463+ struct au_sbinfo *sbinfo;
25464+
25465+ err = 0;
25466+ sbinfo = au_sbi(sb);
25467+ /* make sure i am the only one in this fs */
25468+ si_write_lock(sb, AuLock_FLUSH);
25469+ if (au_opt_test(au_mntflags(sb), PLINK)) {
25470+ spin_lock(&sbinfo->si_plink_maint_lock);
25471+ if (!sbinfo->si_plink_maint_pid)
25472+ sbinfo->si_plink_maint_pid = current->pid;
25473+ else
25474+ err = -EBUSY;
25475+ spin_unlock(&sbinfo->si_plink_maint_lock);
25476+ }
25477+ si_write_unlock(sb);
25478+
25479+ return err;
25480+}
25481+
25482+/* ---------------------------------------------------------------------- */
25483+
25484+#ifdef CONFIG_AUFS_DEBUG
25485+void au_plink_list(struct super_block *sb)
25486+{
25487+ int i;
25488+ struct au_sbinfo *sbinfo;
25489+ struct hlist_head *plink_hlist;
25490+ struct au_icntnr *icntnr;
25491+
25492+ SiMustAnyLock(sb);
25493+
25494+ sbinfo = au_sbi(sb);
25495+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
25496+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
25497+
25498+ for (i = 0; i < AuPlink_NHASH; i++) {
25499+ plink_hlist = &sbinfo->si_plink[i].head;
25500+ rcu_read_lock();
25501+ hlist_for_each_entry_rcu(icntnr, plink_hlist, plink)
25502+ AuDbg("%lu\n", icntnr->vfs_inode.i_ino);
25503+ rcu_read_unlock();
25504+ }
25505+}
25506+#endif
25507+
25508+/* is the inode pseudo-linked? */
25509+int au_plink_test(struct inode *inode)
25510+{
25511+ int found, i;
25512+ struct au_sbinfo *sbinfo;
25513+ struct hlist_head *plink_hlist;
25514+ struct au_icntnr *icntnr;
25515+
25516+ sbinfo = au_sbi(inode->i_sb);
25517+ AuRwMustAnyLock(&sbinfo->si_rwsem);
25518+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
25519+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
25520+
25521+ found = 0;
25522+ i = au_plink_hash(inode->i_ino);
25523+ plink_hlist = &sbinfo->si_plink[i].head;
25524+ rcu_read_lock();
25525+ hlist_for_each_entry_rcu(icntnr, plink_hlist, plink)
25526+ if (&icntnr->vfs_inode == inode) {
25527+ found = 1;
25528+ break;
25529+ }
25530+ rcu_read_unlock();
25531+ return found;
25532+}
25533+
25534+/* ---------------------------------------------------------------------- */
25535+
25536+/*
25537+ * generate a name for plink.
25538+ * the file will be stored under AUFS_WH_PLINKDIR.
25539+ */
25540+/* 20 is max digits length of ulong 64 */
25541+#define PLINK_NAME_LEN ((20 + 1) * 2)
25542+
25543+static int plink_name(char *name, int len, struct inode *inode,
25544+ aufs_bindex_t bindex)
25545+{
25546+ int rlen;
25547+ struct inode *h_inode;
25548+
25549+ h_inode = au_h_iptr(inode, bindex);
25550+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
25551+ return rlen;
25552+}
25553+
25554+struct au_do_plink_lkup_args {
25555+ struct dentry **errp;
25556+ struct qstr *tgtname;
25557+ struct dentry *h_parent;
25558+ struct au_branch *br;
25559+};
25560+
25561+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
25562+ struct dentry *h_parent,
25563+ struct au_branch *br)
25564+{
25565+ struct dentry *h_dentry;
25566+ struct inode *h_inode;
25567+
25568+ h_inode = d_inode(h_parent);
25569+ inode_lock_nested(h_inode, AuLsc_I_CHILD2);
25570+ h_dentry = vfsub_lkup_one(tgtname, h_parent);
25571+ inode_unlock(h_inode);
25572+ return h_dentry;
25573+}
25574+
25575+static void au_call_do_plink_lkup(void *args)
25576+{
25577+ struct au_do_plink_lkup_args *a = args;
25578+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
25579+}
25580+
25581+/* lookup the plink-ed @inode under the branch at @bindex */
25582+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
25583+{
25584+ struct dentry *h_dentry, *h_parent;
25585+ struct au_branch *br;
25586+ int wkq_err;
25587+ char a[PLINK_NAME_LEN];
25588+ struct qstr tgtname = QSTR_INIT(a, 0);
25589+
25590+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
25591+
25592+ br = au_sbr(inode->i_sb, bindex);
25593+ h_parent = br->br_wbr->wbr_plink;
25594+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25595+
25596+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
25597+ struct au_do_plink_lkup_args args = {
25598+ .errp = &h_dentry,
25599+ .tgtname = &tgtname,
25600+ .h_parent = h_parent,
25601+ .br = br
25602+ };
25603+
25604+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
25605+ if (unlikely(wkq_err))
25606+ h_dentry = ERR_PTR(wkq_err);
25607+ } else
25608+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
25609+
25610+ return h_dentry;
25611+}
25612+
25613+/* create a pseudo-link */
25614+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
25615+ struct dentry *h_dentry, struct au_branch *br)
25616+{
25617+ int err;
25618+ struct path h_path = {
25619+ .mnt = au_br_mnt(br)
25620+ };
25621+ struct inode *h_dir, *delegated;
25622+
25623+ h_dir = d_inode(h_parent);
25624+ inode_lock_nested(h_dir, AuLsc_I_CHILD2);
25625+again:
25626+ h_path.dentry = vfsub_lkup_one(tgt, h_parent);
25627+ err = PTR_ERR(h_path.dentry);
25628+ if (IS_ERR(h_path.dentry))
25629+ goto out;
25630+
25631+ err = 0;
25632+ /* wh.plink dir is not monitored */
25633+ /* todo: is it really safe? */
25634+ if (d_is_positive(h_path.dentry)
25635+ && d_inode(h_path.dentry) != d_inode(h_dentry)) {
25636+ delegated = NULL;
25637+ err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
25638+ if (unlikely(err == -EWOULDBLOCK)) {
25639+ pr_warn("cannot retry for NFSv4 delegation"
25640+ " for an internal unlink\n");
25641+ iput(delegated);
25642+ }
25643+ dput(h_path.dentry);
25644+ h_path.dentry = NULL;
25645+ if (!err)
25646+ goto again;
25647+ }
25648+ if (!err && d_is_negative(h_path.dentry)) {
25649+ delegated = NULL;
25650+ err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
25651+ if (unlikely(err == -EWOULDBLOCK)) {
25652+ pr_warn("cannot retry for NFSv4 delegation"
25653+ " for an internal link\n");
25654+ iput(delegated);
25655+ }
25656+ }
25657+ dput(h_path.dentry);
25658+
25659+out:
25660+ inode_unlock(h_dir);
25661+ return err;
25662+}
25663+
25664+struct do_whplink_args {
25665+ int *errp;
25666+ struct qstr *tgt;
25667+ struct dentry *h_parent;
25668+ struct dentry *h_dentry;
25669+ struct au_branch *br;
25670+};
25671+
25672+static void call_do_whplink(void *args)
25673+{
25674+ struct do_whplink_args *a = args;
25675+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
25676+}
25677+
25678+static int whplink(struct dentry *h_dentry, struct inode *inode,
25679+ aufs_bindex_t bindex, struct au_branch *br)
25680+{
25681+ int err, wkq_err;
25682+ struct au_wbr *wbr;
25683+ struct dentry *h_parent;
25684+ char a[PLINK_NAME_LEN];
25685+ struct qstr tgtname = QSTR_INIT(a, 0);
25686+
25687+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
25688+ h_parent = wbr->wbr_plink;
25689+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25690+
25691+ /* always superio. */
25692+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
25693+ struct do_whplink_args args = {
25694+ .errp = &err,
25695+ .tgt = &tgtname,
25696+ .h_parent = h_parent,
25697+ .h_dentry = h_dentry,
25698+ .br = br
25699+ };
25700+ wkq_err = au_wkq_wait(call_do_whplink, &args);
25701+ if (unlikely(wkq_err))
25702+ err = wkq_err;
25703+ } else
25704+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
25705+
25706+ return err;
25707+}
25708+
25709+/*
25710+ * create a new pseudo-link for @h_dentry on @bindex.
25711+ * the linked inode is held in aufs @inode.
25712+ */
25713+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
25714+ struct dentry *h_dentry)
25715+{
25716+ struct super_block *sb;
25717+ struct au_sbinfo *sbinfo;
25718+ struct hlist_head *plink_hlist;
25719+ struct au_icntnr *icntnr;
25720+ struct au_sphlhead *sphl;
25721+ int found, err, cnt, i;
25722+
25723+ sb = inode->i_sb;
25724+ sbinfo = au_sbi(sb);
25725+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
25726+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
25727+
25728+ found = au_plink_test(inode);
25729+ if (found)
25730+ return;
25731+
25732+ i = au_plink_hash(inode->i_ino);
25733+ sphl = sbinfo->si_plink + i;
25734+ plink_hlist = &sphl->head;
25735+ au_igrab(inode);
25736+
25737+ spin_lock(&sphl->spin);
25738+ hlist_for_each_entry(icntnr, plink_hlist, plink) {
25739+ if (&icntnr->vfs_inode == inode) {
25740+ found = 1;
25741+ break;
25742+ }
25743+ }
25744+ if (!found) {
25745+ icntnr = container_of(inode, struct au_icntnr, vfs_inode);
25746+ hlist_add_head_rcu(&icntnr->plink, plink_hlist);
25747+ }
25748+ spin_unlock(&sphl->spin);
25749+ if (!found) {
25750+ cnt = au_sphl_count(sphl);
25751+#define msg "unexpectedly unblanced or too many pseudo-links"
25752+ if (cnt > AUFS_PLINK_WARN)
25753+ AuWarn1(msg ", %d\n", cnt);
25754+#undef msg
25755+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
25756+ if (unlikely(err)) {
25757+ pr_warn("err %d, damaged pseudo link.\n", err);
25758+ au_sphl_del_rcu(&icntnr->plink, sphl);
25759+ iput(&icntnr->vfs_inode);
25760+ }
25761+ } else
25762+ iput(&icntnr->vfs_inode);
25763+}
25764+
25765+/* free all plinks */
25766+void au_plink_put(struct super_block *sb, int verbose)
25767+{
25768+ int i, warned;
25769+ struct au_sbinfo *sbinfo;
25770+ struct hlist_head *plink_hlist;
25771+ struct hlist_node *tmp;
25772+ struct au_icntnr *icntnr;
25773+
25774+ SiMustWriteLock(sb);
25775+
25776+ sbinfo = au_sbi(sb);
25777+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
25778+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
25779+
25780+ /* no spin_lock since sbinfo is write-locked */
25781+ warned = 0;
25782+ for (i = 0; i < AuPlink_NHASH; i++) {
25783+ plink_hlist = &sbinfo->si_plink[i].head;
25784+ if (!warned && verbose && !hlist_empty(plink_hlist)) {
25785+ pr_warn("pseudo-link is not flushed");
25786+ warned = 1;
25787+ }
25788+ hlist_for_each_entry_safe(icntnr, tmp, plink_hlist, plink)
25789+ iput(&icntnr->vfs_inode);
25790+ INIT_HLIST_HEAD(plink_hlist);
25791+ }
25792+}
25793+
25794+void au_plink_clean(struct super_block *sb, int verbose)
25795+{
25796+ struct dentry *root;
25797+
25798+ root = sb->s_root;
25799+ aufs_write_lock(root);
25800+ if (au_opt_test(au_mntflags(sb), PLINK))
25801+ au_plink_put(sb, verbose);
25802+ aufs_write_unlock(root);
25803+}
25804+
25805+static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
25806+{
25807+ int do_put;
25808+ aufs_bindex_t btop, bbot, bindex;
25809+
25810+ do_put = 0;
25811+ btop = au_ibtop(inode);
25812+ bbot = au_ibbot(inode);
25813+ if (btop >= 0) {
25814+ for (bindex = btop; bindex <= bbot; bindex++) {
25815+ if (!au_h_iptr(inode, bindex)
25816+ || au_ii_br_id(inode, bindex) != br_id)
25817+ continue;
25818+ au_set_h_iptr(inode, bindex, NULL, 0);
25819+ do_put = 1;
25820+ break;
25821+ }
25822+ if (do_put)
25823+ for (bindex = btop; bindex <= bbot; bindex++)
25824+ if (au_h_iptr(inode, bindex)) {
25825+ do_put = 0;
25826+ break;
25827+ }
25828+ } else
25829+ do_put = 1;
25830+
25831+ return do_put;
25832+}
25833+
25834+/* free the plinks on a branch specified by @br_id */
25835+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
25836+{
25837+ struct au_sbinfo *sbinfo;
25838+ struct hlist_head *plink_hlist;
25839+ struct hlist_node *tmp;
25840+ struct au_icntnr *icntnr;
25841+ struct inode *inode;
25842+ int i, do_put;
25843+
25844+ SiMustWriteLock(sb);
25845+
25846+ sbinfo = au_sbi(sb);
25847+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
25848+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
25849+
25850+ /* no spin_lock since sbinfo is write-locked */
25851+ for (i = 0; i < AuPlink_NHASH; i++) {
25852+ plink_hlist = &sbinfo->si_plink[i].head;
25853+ hlist_for_each_entry_safe(icntnr, tmp, plink_hlist, plink) {
25854+ inode = au_igrab(&icntnr->vfs_inode);
25855+ ii_write_lock_child(inode);
25856+ do_put = au_plink_do_half_refresh(inode, br_id);
25857+ if (do_put) {
25858+ hlist_del(&icntnr->plink);
25859+ iput(inode);
25860+ }
25861+ ii_write_unlock(inode);
25862+ iput(inode);
25863+ }
25864+ }
25865+}
25866diff -urN /usr/share/empty/fs/aufs/poll.c linux/fs/aufs/poll.c
25867--- /usr/share/empty/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
25868+++ linux/fs/aufs/poll.c 2016-10-09 16:55:36.496035060 +0200
25869@@ -0,0 +1,52 @@
25870+/*
25871+ * Copyright (C) 2005-2016 Junjiro R. Okajima
25872+ *
25873+ * This program, aufs is free software; you can redistribute it and/or modify
25874+ * it under the terms of the GNU General Public License as published by
25875+ * the Free Software Foundation; either version 2 of the License, or
25876+ * (at your option) any later version.
25877+ *
25878+ * This program is distributed in the hope that it will be useful,
25879+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25880+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25881+ * GNU General Public License for more details.
25882+ *
25883+ * You should have received a copy of the GNU General Public License
25884+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25885+ */
25886+
25887+/*
25888+ * poll operation
25889+ * There is only one filesystem which implements ->poll operation, currently.
25890+ */
25891+
25892+#include "aufs.h"
25893+
25894+unsigned int aufs_poll(struct file *file, poll_table *wait)
25895+{
25896+ unsigned int mask;
25897+ int err;
25898+ struct file *h_file;
25899+ struct super_block *sb;
25900+
25901+ /* We should pretend an error happened. */
25902+ mask = POLLERR /* | POLLIN | POLLOUT */;
25903+ sb = file->f_path.dentry->d_sb;
25904+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
25905+
25906+ h_file = au_read_pre(file, /*keep_fi*/0);
25907+ err = PTR_ERR(h_file);
25908+ if (IS_ERR(h_file))
25909+ goto out;
25910+
25911+ /* it is not an error if h_file has no operation */
25912+ mask = DEFAULT_POLLMASK;
25913+ if (h_file->f_op->poll)
25914+ mask = h_file->f_op->poll(h_file, wait);
25915+ fput(h_file); /* instead of au_read_post() */
25916+
25917+out:
25918+ si_read_unlock(sb);
25919+ AuTraceErr((int)mask);
25920+ return mask;
25921+}
25922diff -urN /usr/share/empty/fs/aufs/posix_acl.c linux/fs/aufs/posix_acl.c
25923--- /usr/share/empty/fs/aufs/posix_acl.c 1970-01-01 01:00:00.000000000 +0100
25924+++ linux/fs/aufs/posix_acl.c 2016-12-17 12:28:17.598545045 +0100
25925@@ -0,0 +1,98 @@
25926+/*
25927+ * Copyright (C) 2014-2016 Junjiro R. Okajima
25928+ *
25929+ * This program, aufs is free software; you can redistribute it and/or modify
25930+ * it under the terms of the GNU General Public License as published by
25931+ * the Free Software Foundation; either version 2 of the License, or
25932+ * (at your option) any later version.
25933+ *
25934+ * This program is distributed in the hope that it will be useful,
25935+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25936+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25937+ * GNU General Public License for more details.
25938+ *
25939+ * You should have received a copy of the GNU General Public License
25940+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25941+ */
25942+
25943+/*
25944+ * posix acl operations
25945+ */
25946+
25947+#include <linux/fs.h>
25948+#include "aufs.h"
25949+
25950+struct posix_acl *aufs_get_acl(struct inode *inode, int type)
25951+{
25952+ struct posix_acl *acl;
25953+ int err;
25954+ aufs_bindex_t bindex;
25955+ struct inode *h_inode;
25956+ struct super_block *sb;
25957+
25958+ acl = NULL;
25959+ sb = inode->i_sb;
25960+ si_read_lock(sb, AuLock_FLUSH);
25961+ ii_read_lock_child(inode);
25962+ if (!(sb->s_flags & MS_POSIXACL))
25963+ goto out;
25964+
25965+ bindex = au_ibtop(inode);
25966+ h_inode = au_h_iptr(inode, bindex);
25967+ if (unlikely(!h_inode
25968+ || ((h_inode->i_mode & S_IFMT)
25969+ != (inode->i_mode & S_IFMT)))) {
25970+ err = au_busy_or_stale();
25971+ acl = ERR_PTR(err);
25972+ goto out;
25973+ }
25974+
25975+ /* always topmost only */
25976+ acl = get_acl(h_inode, type);
25977+
25978+out:
25979+ ii_read_unlock(inode);
25980+ si_read_unlock(sb);
25981+
25982+ AuTraceErrPtr(acl);
25983+ return acl;
25984+}
25985+
25986+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
25987+{
25988+ int err;
25989+ ssize_t ssz;
25990+ struct dentry *dentry;
25991+ struct au_sxattr arg = {
25992+ .type = AU_ACL_SET,
25993+ .u.acl_set = {
25994+ .acl = acl,
25995+ .type = type
25996+ },
25997+ };
25998+
25999+ IMustLock(inode);
26000+
26001+ if (inode->i_ino == AUFS_ROOT_INO)
26002+ dentry = dget(inode->i_sb->s_root);
26003+ else {
26004+ dentry = d_find_alias(inode);
26005+ if (!dentry)
26006+ dentry = d_find_any_alias(inode);
26007+ if (!dentry) {
26008+ pr_warn("cannot handle this inode, "
26009+ "please report to aufs-users ML\n");
26010+ err = -ENOENT;
26011+ goto out;
26012+ }
26013+ }
26014+
26015+ ssz = au_sxattr(dentry, inode, &arg);
26016+ dput(dentry);
26017+ err = ssz;
26018+ if (ssz >= 0)
26019+ err = 0;
26020+
26021+out:
26022+ return err;
26023+}
26024diff -urN /usr/share/empty/fs/aufs/procfs.c linux/fs/aufs/procfs.c
26025--- /usr/share/empty/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
26026+++ linux/fs/aufs/procfs.c 2016-10-09 16:55:36.496035060 +0200
26027@@ -0,0 +1,169 @@
26028+/*
26029+ * Copyright (C) 2010-2016 Junjiro R. Okajima
26030+ *
26031+ * This program, aufs is free software; you can redistribute it and/or modify
26032+ * it under the terms of the GNU General Public License as published by
26033+ * the Free Software Foundation; either version 2 of the License, or
26034+ * (at your option) any later version.
26035+ *
26036+ * This program is distributed in the hope that it will be useful,
26037+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26038+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26039+ * GNU General Public License for more details.
26040+ *
26041+ * You should have received a copy of the GNU General Public License
26042+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
26043+ */
26044+
26045+/*
26046+ * procfs interfaces
26047+ */
26048+
26049+#include <linux/proc_fs.h>
26050+#include "aufs.h"
26051+
26052+static int au_procfs_plm_release(struct inode *inode, struct file *file)
26053+{
26054+ struct au_sbinfo *sbinfo;
26055+
26056+ sbinfo = file->private_data;
26057+ if (sbinfo) {
26058+ au_plink_maint_leave(sbinfo);
26059+ kobject_put(&sbinfo->si_kobj);
26060+ }
26061+
26062+ return 0;
26063+}
26064+
26065+static void au_procfs_plm_write_clean(struct file *file)
26066+{
26067+ struct au_sbinfo *sbinfo;
26068+
26069+ sbinfo = file->private_data;
26070+ if (sbinfo)
26071+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
26072+}
26073+
26074+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
26075+{
26076+ int err;
26077+ struct super_block *sb;
26078+ struct au_sbinfo *sbinfo;
26079+
26080+ err = -EBUSY;
26081+ if (unlikely(file->private_data))
26082+ goto out;
26083+
26084+ sb = NULL;
26085+ /* don't use au_sbilist_lock() here */
26086+ spin_lock(&au_sbilist.spin);
26087+ hlist_for_each_entry(sbinfo, &au_sbilist.head, si_list)
26088+ if (id == sysaufs_si_id(sbinfo)) {
26089+ kobject_get(&sbinfo->si_kobj);
26090+ sb = sbinfo->si_sb;
26091+ break;
26092+ }
26093+ spin_unlock(&au_sbilist.spin);
26094+
26095+ err = -EINVAL;
26096+ if (unlikely(!sb))
26097+ goto out;
26098+
26099+ err = au_plink_maint_enter(sb);
26100+ if (!err)
26101+ /* keep kobject_get() */
26102+ file->private_data = sbinfo;
26103+ else
26104+ kobject_put(&sbinfo->si_kobj);
26105+out:
26106+ return err;
26107+}
26108+
26109+/*
26110+ * Accept a valid "si=xxxx" only.
26111+ * Once it is accepted successfully, accept "clean" too.
26112+ */
26113+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
26114+ size_t count, loff_t *ppos)
26115+{
26116+ ssize_t err;
26117+ unsigned long id;
26118+ /* last newline is allowed */
26119+ char buf[3 + sizeof(unsigned long) * 2 + 1];
26120+
26121+ err = -EACCES;
26122+ if (unlikely(!capable(CAP_SYS_ADMIN)))
26123+ goto out;
26124+
26125+ err = -EINVAL;
26126+ if (unlikely(count > sizeof(buf)))
26127+ goto out;
26128+
26129+ err = copy_from_user(buf, ubuf, count);
26130+ if (unlikely(err)) {
26131+ err = -EFAULT;
26132+ goto out;
26133+ }
26134+ buf[count] = 0;
26135+
26136+ err = -EINVAL;
26137+ if (!strcmp("clean", buf)) {
26138+ au_procfs_plm_write_clean(file);
26139+ goto out_success;
26140+ } else if (unlikely(strncmp("si=", buf, 3)))
26141+ goto out;
26142+
26143+ err = kstrtoul(buf + 3, 16, &id);
26144+ if (unlikely(err))
26145+ goto out;
26146+
26147+ err = au_procfs_plm_write_si(file, id);
26148+ if (unlikely(err))
26149+ goto out;
26150+
26151+out_success:
26152+ err = count; /* success */
26153+out:
26154+ return err;
26155+}
26156+
26157+static const struct file_operations au_procfs_plm_fop = {
26158+ .write = au_procfs_plm_write,
26159+ .release = au_procfs_plm_release,
26160+ .owner = THIS_MODULE
26161+};
26162+
26163+/* ---------------------------------------------------------------------- */
26164+
26165+static struct proc_dir_entry *au_procfs_dir;
26166+
26167+void au_procfs_fin(void)
26168+{
26169+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
26170+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
26171+}
26172+
26173+int __init au_procfs_init(void)
26174+{
26175+ int err;
26176+ struct proc_dir_entry *entry;
26177+
26178+ err = -ENOMEM;
26179+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
26180+ if (unlikely(!au_procfs_dir))
26181+ goto out;
26182+
26183+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
26184+ au_procfs_dir, &au_procfs_plm_fop);
26185+ if (unlikely(!entry))
26186+ goto out_dir;
26187+
26188+ err = 0;
26189+ goto out; /* success */
26190+
26191+
26192+out_dir:
26193+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
26194+out:
26195+ return err;
26196+}
26197diff -urN /usr/share/empty/fs/aufs/rdu.c linux/fs/aufs/rdu.c
26198--- /usr/share/empty/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
26199+++ linux/fs/aufs/rdu.c 2016-10-09 16:55:36.496035060 +0200
26200@@ -0,0 +1,381 @@
26201+/*
26202+ * Copyright (C) 2005-2016 Junjiro R. Okajima
26203+ *
26204+ * This program, aufs is free software; you can redistribute it and/or modify
26205+ * it under the terms of the GNU General Public License as published by
26206+ * the Free Software Foundation; either version 2 of the License, or
26207+ * (at your option) any later version.
26208+ *
26209+ * This program is distributed in the hope that it will be useful,
26210+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26211+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26212+ * GNU General Public License for more details.
26213+ *
26214+ * You should have received a copy of the GNU General Public License
26215+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
26216+ */
26217+
26218+/*
26219+ * readdir in userspace.
26220+ */
26221+
26222+#include <linux/compat.h>
26223+#include <linux/fs_stack.h>
26224+#include <linux/security.h>
26225+#include "aufs.h"
26226+
26227+/* bits for struct aufs_rdu.flags */
26228+#define AuRdu_CALLED 1
26229+#define AuRdu_CONT (1 << 1)
26230+#define AuRdu_FULL (1 << 2)
26231+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
26232+#define au_fset_rdu(flags, name) \
26233+ do { (flags) |= AuRdu_##name; } while (0)
26234+#define au_fclr_rdu(flags, name) \
26235+ do { (flags) &= ~AuRdu_##name; } while (0)
26236+
26237+struct au_rdu_arg {
26238+ struct dir_context ctx;
26239+ struct aufs_rdu *rdu;
26240+ union au_rdu_ent_ul ent;
26241+ unsigned long end;
26242+
26243+ struct super_block *sb;
26244+ int err;
26245+};
26246+
26247+static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
26248+ loff_t offset, u64 h_ino, unsigned int d_type)
26249+{
26250+ int err, len;
26251+ struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
26252+ struct aufs_rdu *rdu = arg->rdu;
26253+ struct au_rdu_ent ent;
26254+
26255+ err = 0;
26256+ arg->err = 0;
26257+ au_fset_rdu(rdu->cookie.flags, CALLED);
26258+ len = au_rdu_len(nlen);
26259+ if (arg->ent.ul + len < arg->end) {
26260+ ent.ino = h_ino;
26261+ ent.bindex = rdu->cookie.bindex;
26262+ ent.type = d_type;
26263+ ent.nlen = nlen;
26264+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
26265+ ent.type = DT_UNKNOWN;
26266+
26267+ /* unnecessary to support mmap_sem since this is a dir */
26268+ err = -EFAULT;
26269+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
26270+ goto out;
26271+ if (copy_to_user(arg->ent.e->name, name, nlen))
26272+ goto out;
26273+ /* the terminating NULL */
26274+ if (__put_user(0, arg->ent.e->name + nlen))
26275+ goto out;
26276+ err = 0;
26277+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
26278+ arg->ent.ul += len;
26279+ rdu->rent++;
26280+ } else {
26281+ err = -EFAULT;
26282+ au_fset_rdu(rdu->cookie.flags, FULL);
26283+ rdu->full = 1;
26284+ rdu->tail = arg->ent;
26285+ }
26286+
26287+out:
26288+ /* AuTraceErr(err); */
26289+ return err;
26290+}
26291+
26292+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
26293+{
26294+ int err;
26295+ loff_t offset;
26296+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
26297+
26298+ /* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
26299+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
26300+ err = offset;
26301+ if (unlikely(offset != cookie->h_pos))
26302+ goto out;
26303+
26304+ err = 0;
26305+ do {
26306+ arg->err = 0;
26307+ au_fclr_rdu(cookie->flags, CALLED);
26308+ /* smp_mb(); */
26309+ err = vfsub_iterate_dir(h_file, &arg->ctx);
26310+ if (err >= 0)
26311+ err = arg->err;
26312+ } while (!err
26313+ && au_ftest_rdu(cookie->flags, CALLED)
26314+ && !au_ftest_rdu(cookie->flags, FULL));
26315+ cookie->h_pos = h_file->f_pos;
26316+
26317+out:
26318+ AuTraceErr(err);
26319+ return err;
26320+}
26321+
26322+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
26323+{
26324+ int err;
26325+ aufs_bindex_t bbot;
26326+ struct au_rdu_arg arg = {
26327+ .ctx = {
26328+ .actor = au_rdu_fill
26329+ }
26330+ };
26331+ struct dentry *dentry;
26332+ struct inode *inode;
26333+ struct file *h_file;
26334+ struct au_rdu_cookie *cookie = &rdu->cookie;
26335+
26336+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
26337+ if (unlikely(err)) {
26338+ err = -EFAULT;
26339+ AuTraceErr(err);
26340+ goto out;
26341+ }
26342+ rdu->rent = 0;
26343+ rdu->tail = rdu->ent;
26344+ rdu->full = 0;
26345+ arg.rdu = rdu;
26346+ arg.ent = rdu->ent;
26347+ arg.end = arg.ent.ul;
26348+ arg.end += rdu->sz;
26349+
26350+ err = -ENOTDIR;
26351+ if (unlikely(!file->f_op->iterate && !file->f_op->iterate_shared))
26352+ goto out;
26353+
26354+ err = security_file_permission(file, MAY_READ);
26355+ AuTraceErr(err);
26356+ if (unlikely(err))
26357+ goto out;
26358+
26359+ dentry = file->f_path.dentry;
26360+ inode = d_inode(dentry);
26361+ inode_lock_shared(inode);
26362+
26363+ arg.sb = inode->i_sb;
26364+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
26365+ if (unlikely(err))
26366+ goto out_mtx;
26367+ err = au_alive_dir(dentry);
26368+ if (unlikely(err))
26369+ goto out_si;
26370+ /* todo: reval? */
26371+ fi_read_lock(file);
26372+
26373+ err = -EAGAIN;
26374+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
26375+ && cookie->generation != au_figen(file)))
26376+ goto out_unlock;
26377+
26378+ err = 0;
26379+ if (!rdu->blk) {
26380+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
26381+ if (!rdu->blk)
26382+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
26383+ }
26384+ bbot = au_fbtop(file);
26385+ if (cookie->bindex < bbot)
26386+ cookie->bindex = bbot;
26387+ bbot = au_fbbot_dir(file);
26388+ /* AuDbg("b%d, b%d\n", cookie->bindex, bbot); */
26389+ for (; !err && cookie->bindex <= bbot;
26390+ cookie->bindex++, cookie->h_pos = 0) {
26391+ h_file = au_hf_dir(file, cookie->bindex);
26392+ if (!h_file)
26393+ continue;
26394+
26395+ au_fclr_rdu(cookie->flags, FULL);
26396+ err = au_rdu_do(h_file, &arg);
26397+ AuTraceErr(err);
26398+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
26399+ break;
26400+ }
26401+ AuDbg("rent %llu\n", rdu->rent);
26402+
26403+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
26404+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
26405+ au_fset_rdu(cookie->flags, CONT);
26406+ cookie->generation = au_figen(file);
26407+ }
26408+
26409+ ii_read_lock_child(inode);
26410+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibtop(inode)));
26411+ ii_read_unlock(inode);
26412+
26413+out_unlock:
26414+ fi_read_unlock(file);
26415+out_si:
26416+ si_read_unlock(arg.sb);
26417+out_mtx:
26418+ inode_unlock_shared(inode);
26419+out:
26420+ AuTraceErr(err);
26421+ return err;
26422+}
26423+
26424+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
26425+{
26426+ int err;
26427+ ino_t ino;
26428+ unsigned long long nent;
26429+ union au_rdu_ent_ul *u;
26430+ struct au_rdu_ent ent;
26431+ struct super_block *sb;
26432+
26433+ err = 0;
26434+ nent = rdu->nent;
26435+ u = &rdu->ent;
26436+ sb = file->f_path.dentry->d_sb;
26437+ si_read_lock(sb, AuLock_FLUSH);
26438+ while (nent-- > 0) {
26439+ /* unnecessary to support mmap_sem since this is a dir */
26440+ err = copy_from_user(&ent, u->e, sizeof(ent));
26441+ if (!err)
26442+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
26443+ if (unlikely(err)) {
26444+ err = -EFAULT;
26445+ AuTraceErr(err);
26446+ break;
26447+ }
26448+
26449+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
26450+ if (!ent.wh)
26451+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
26452+ else
26453+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
26454+ &ino);
26455+ if (unlikely(err)) {
26456+ AuTraceErr(err);
26457+ break;
26458+ }
26459+
26460+ err = __put_user(ino, &u->e->ino);
26461+ if (unlikely(err)) {
26462+ err = -EFAULT;
26463+ AuTraceErr(err);
26464+ break;
26465+ }
26466+ u->ul += au_rdu_len(ent.nlen);
26467+ }
26468+ si_read_unlock(sb);
26469+
26470+ return err;
26471+}
26472+
26473+/* ---------------------------------------------------------------------- */
26474+
26475+static int au_rdu_verify(struct aufs_rdu *rdu)
26476+{
26477+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
26478+ "%llu, b%d, 0x%x, g%u}\n",
26479+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
26480+ rdu->blk,
26481+ rdu->rent, rdu->shwh, rdu->full,
26482+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
26483+ rdu->cookie.generation);
26484+
26485+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
26486+ return 0;
26487+
26488+ AuDbg("%u:%u\n",
26489+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
26490+ return -EINVAL;
26491+}
26492+
26493+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
26494+{
26495+ long err, e;
26496+ struct aufs_rdu rdu;
26497+ void __user *p = (void __user *)arg;
26498+
26499+ err = copy_from_user(&rdu, p, sizeof(rdu));
26500+ if (unlikely(err)) {
26501+ err = -EFAULT;
26502+ AuTraceErr(err);
26503+ goto out;
26504+ }
26505+ err = au_rdu_verify(&rdu);
26506+ if (unlikely(err))
26507+ goto out;
26508+
26509+ switch (cmd) {
26510+ case AUFS_CTL_RDU:
26511+ err = au_rdu(file, &rdu);
26512+ if (unlikely(err))
26513+ break;
26514+
26515+ e = copy_to_user(p, &rdu, sizeof(rdu));
26516+ if (unlikely(e)) {
26517+ err = -EFAULT;
26518+ AuTraceErr(err);
26519+ }
26520+ break;
26521+ case AUFS_CTL_RDU_INO:
26522+ err = au_rdu_ino(file, &rdu);
26523+ break;
26524+
26525+ default:
26526+ /* err = -ENOTTY; */
26527+ err = -EINVAL;
26528+ }
26529+
26530+out:
26531+ AuTraceErr(err);
26532+ return err;
26533+}
26534+
26535+#ifdef CONFIG_COMPAT
26536+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
26537+{
26538+ long err, e;
26539+ struct aufs_rdu rdu;
26540+ void __user *p = compat_ptr(arg);
26541+
26542+ /* todo: get_user()? */
26543+ err = copy_from_user(&rdu, p, sizeof(rdu));
26544+ if (unlikely(err)) {
26545+ err = -EFAULT;
26546+ AuTraceErr(err);
26547+ goto out;
26548+ }
26549+ rdu.ent.e = compat_ptr(rdu.ent.ul);
26550+ err = au_rdu_verify(&rdu);
26551+ if (unlikely(err))
26552+ goto out;
26553+
26554+ switch (cmd) {
26555+ case AUFS_CTL_RDU:
26556+ err = au_rdu(file, &rdu);
26557+ if (unlikely(err))
26558+ break;
26559+
26560+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
26561+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
26562+ e = copy_to_user(p, &rdu, sizeof(rdu));
26563+ if (unlikely(e)) {
26564+ err = -EFAULT;
26565+ AuTraceErr(err);
26566+ }
26567+ break;
26568+ case AUFS_CTL_RDU_INO:
26569+ err = au_rdu_ino(file, &rdu);
26570+ break;
26571+
26572+ default:
26573+ /* err = -ENOTTY; */
26574+ err = -EINVAL;
26575+ }
26576+
26577+out:
26578+ AuTraceErr(err);
26579+ return err;
26580+}
26581+#endif
26582diff -urN /usr/share/empty/fs/aufs/rwsem.h linux/fs/aufs/rwsem.h
26583--- /usr/share/empty/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
26584+++ linux/fs/aufs/rwsem.h 2016-10-09 16:55:36.496035060 +0200
26585@@ -0,0 +1,198 @@
26586+/*
26587+ * Copyright (C) 2005-2016 Junjiro R. Okajima
26588+ *
26589+ * This program, aufs is free software; you can redistribute it and/or modify
26590+ * it under the terms of the GNU General Public License as published by
26591+ * the Free Software Foundation; either version 2 of the License, or
26592+ * (at your option) any later version.
26593+ *
26594+ * This program is distributed in the hope that it will be useful,
26595+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26596+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26597+ * GNU General Public License for more details.
26598+ *
26599+ * You should have received a copy of the GNU General Public License
26600+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
26601+ */
26602+
26603+/*
26604+ * simple read-write semaphore wrappers
26605+ */
26606+
26607+#ifndef __AUFS_RWSEM_H__
26608+#define __AUFS_RWSEM_H__
26609+
26610+#ifdef __KERNEL__
26611+
26612+#include "debug.h"
26613+
26614+struct au_rwsem {
26615+ struct rw_semaphore rwsem;
26616+#ifdef CONFIG_AUFS_DEBUG
26617+ /* just for debugging, not almighty counter */
26618+ atomic_t rcnt, wcnt;
26619+#endif
26620+};
26621+
26622+#ifdef CONFIG_LOCKDEP
26623+#define au_lockdep_set_name(rw) \
26624+ lockdep_set_class_and_name(&(rw)->rwsem, \
26625+ /*original key*/(rw)->rwsem.dep_map.key, \
26626+ /*name*/#rw)
26627+#else
26628+#define au_lockdep_set_name(rw) do {} while (0)
26629+#endif
26630+
26631+#ifdef CONFIG_AUFS_DEBUG
26632+#define AuDbgCntInit(rw) do { \
26633+ atomic_set(&(rw)->rcnt, 0); \
26634+ atomic_set(&(rw)->wcnt, 0); \
26635+ smp_mb(); /* atomic set */ \
26636+} while (0)
26637+
26638+#define AuDbgCnt(rw, cnt) atomic_read(&(rw)->cnt)
26639+#define AuDbgCntInc(rw, cnt) atomic_inc(&(rw)->cnt)
26640+#define AuDbgCntDec(rw, cnt) WARN_ON(atomic_dec_return(&(rw)->cnt) < 0)
26641+#define AuDbgRcntInc(rw) AuDbgCntInc(rw, rcnt)
26642+#define AuDbgRcntDec(rw) AuDbgCntDec(rw, rcnt)
26643+#define AuDbgWcntInc(rw) AuDbgCntInc(rw, wcnt)
26644+#define AuDbgWcntDec(rw) AuDbgCntDec(rw, wcnt)
26645+#else
26646+#define AuDbgCnt(rw, cnt) 0
26647+#define AuDbgCntInit(rw) do {} while (0)
26648+#define AuDbgRcntInc(rw) do {} while (0)
26649+#define AuDbgRcntDec(rw) do {} while (0)
26650+#define AuDbgWcntInc(rw) do {} while (0)
26651+#define AuDbgWcntDec(rw) do {} while (0)
26652+#endif /* CONFIG_AUFS_DEBUG */
26653+
26654+/* to debug easier, do not make them inlined functions */
26655+#define AuRwMustNoWaiters(rw) AuDebugOn(rwsem_is_contended(&(rw)->rwsem))
26656+/* rwsem_is_locked() is unusable */
26657+#define AuRwMustReadLock(rw) AuDebugOn(AuDbgCnt(rw, rcnt) <= 0)
26658+#define AuRwMustWriteLock(rw) AuDebugOn(AuDbgCnt(rw, wcnt) <= 0)
26659+#define AuRwMustAnyLock(rw) AuDebugOn(AuDbgCnt(rw, rcnt) <= 0 \
26660+ && AuDbgCnt(rw, wcnt) <= 0)
26661+#define AuRwDestroy(rw) AuDebugOn(AuDbgCnt(rw, rcnt) \
26662+ || AuDbgCnt(rw, wcnt))
26663+
26664+#define au_rw_init(rw) do { \
26665+ AuDbgCntInit(rw); \
26666+ init_rwsem(&(rw)->rwsem); \
26667+ au_lockdep_set_name(rw); \
26668+ } while (0)
26669+
26670+#define au_rw_init_wlock(rw) do { \
26671+ au_rw_init(rw); \
26672+ down_write(&(rw)->rwsem); \
26673+ AuDbgWcntInc(rw); \
26674+ } while (0)
26675+
26676+#define au_rw_init_wlock_nested(rw, lsc) do { \
26677+ au_rw_init(rw); \
26678+ down_write_nested(&(rw)->rwsem, lsc); \
26679+ AuDbgWcntInc(rw); \
26680+ } while (0)
26681+
26682+static inline void au_rw_read_lock(struct au_rwsem *rw)
26683+{
26684+ down_read(&rw->rwsem);
26685+ AuDbgRcntInc(rw);
26686+}
26687+
26688+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
26689+{
26690+ down_read_nested(&rw->rwsem, lsc);
26691+ AuDbgRcntInc(rw);
26692+}
26693+
26694+static inline void au_rw_read_unlock(struct au_rwsem *rw)
26695+{
26696+ AuRwMustReadLock(rw);
26697+ AuDbgRcntDec(rw);
26698+ up_read(&rw->rwsem);
26699+}
26700+
26701+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
26702+{
26703+ AuRwMustWriteLock(rw);
26704+ AuDbgRcntInc(rw);
26705+ AuDbgWcntDec(rw);
26706+ downgrade_write(&rw->rwsem);
26707+}
26708+
26709+static inline void au_rw_write_lock(struct au_rwsem *rw)
26710+{
26711+ down_write(&rw->rwsem);
26712+ AuDbgWcntInc(rw);
26713+}
26714+
26715+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
26716+ unsigned int lsc)
26717+{
26718+ down_write_nested(&rw->rwsem, lsc);
26719+ AuDbgWcntInc(rw);
26720+}
26721+
26722+static inline void au_rw_write_unlock(struct au_rwsem *rw)
26723+{
26724+ AuRwMustWriteLock(rw);
26725+ AuDbgWcntDec(rw);
26726+ up_write(&rw->rwsem);
26727+}
26728+
26729+/* why is not _nested version defined */
26730+static inline int au_rw_read_trylock(struct au_rwsem *rw)
26731+{
26732+ int ret;
26733+
26734+ ret = down_read_trylock(&rw->rwsem);
26735+ if (ret)
26736+ AuDbgRcntInc(rw);
26737+ return ret;
26738+}
26739+
26740+static inline int au_rw_write_trylock(struct au_rwsem *rw)
26741+{
26742+ int ret;
26743+
26744+ ret = down_write_trylock(&rw->rwsem);
26745+ if (ret)
26746+ AuDbgWcntInc(rw);
26747+ return ret;
26748+}
26749+
26750+#undef AuDbgCntDec
26751+#undef AuDbgRcntInc
26752+#undef AuDbgRcntDec
26753+#undef AuDbgWcntDec
26754+
26755+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26756+static inline void prefix##_read_lock(param) \
26757+{ au_rw_read_lock(rwsem); } \
26758+static inline void prefix##_write_lock(param) \
26759+{ au_rw_write_lock(rwsem); } \
26760+static inline int prefix##_read_trylock(param) \
26761+{ return au_rw_read_trylock(rwsem); } \
26762+static inline int prefix##_write_trylock(param) \
26763+{ return au_rw_write_trylock(rwsem); }
26764+/* why is not _nested version defined */
26765+/* static inline void prefix##_read_trylock_nested(param, lsc)
26766+{ au_rw_read_trylock_nested(rwsem, lsc)); }
26767+static inline void prefix##_write_trylock_nestd(param, lsc)
26768+{ au_rw_write_trylock_nested(rwsem, lsc); } */
26769+
26770+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
26771+static inline void prefix##_read_unlock(param) \
26772+{ au_rw_read_unlock(rwsem); } \
26773+static inline void prefix##_write_unlock(param) \
26774+{ au_rw_write_unlock(rwsem); } \
26775+static inline void prefix##_downgrade_lock(param) \
26776+{ au_rw_dgrade_lock(rwsem); }
26777+
26778+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
26779+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26780+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
26781+
26782+#endif /* __KERNEL__ */
26783+#endif /* __AUFS_RWSEM_H__ */
26784diff -urN /usr/share/empty/fs/aufs/sbinfo.c linux/fs/aufs/sbinfo.c
26785--- /usr/share/empty/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
26786+++ linux/fs/aufs/sbinfo.c 2016-10-09 16:55:38.889431135 +0200
26787@@ -0,0 +1,355 @@
26788+/*
26789+ * Copyright (C) 2005-2016 Junjiro R. Okajima
26790+ *
26791+ * This program, aufs is free software; you can redistribute it and/or modify
26792+ * it under the terms of the GNU General Public License as published by
26793+ * the Free Software Foundation; either version 2 of the License, or
26794+ * (at your option) any later version.
26795+ *
26796+ * This program is distributed in the hope that it will be useful,
26797+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26798+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26799+ * GNU General Public License for more details.
26800+ *
26801+ * You should have received a copy of the GNU General Public License
26802+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
26803+ */
26804+
26805+/*
26806+ * superblock private data
26807+ */
26808+
26809+#include "aufs.h"
26810+
26811+/*
26812+ * they are necessary regardless sysfs is disabled.
26813+ */
26814+void au_si_free(struct kobject *kobj)
26815+{
26816+ int i;
26817+ struct au_sbinfo *sbinfo;
26818+ char *locked __maybe_unused; /* debug only */
26819+
26820+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
26821+ for (i = 0; i < AuPlink_NHASH; i++)
26822+ AuDebugOn(!hlist_empty(&sbinfo->si_plink[i].head));
26823+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
26824+
26825+ AuDebugOn(percpu_counter_sum(&sbinfo->si_ninodes));
26826+ percpu_counter_destroy(&sbinfo->si_ninodes);
26827+ AuDebugOn(percpu_counter_sum(&sbinfo->si_nfiles));
26828+ percpu_counter_destroy(&sbinfo->si_nfiles);
26829+
26830+ au_rw_write_lock(&sbinfo->si_rwsem);
26831+ au_br_free(sbinfo);
26832+ au_rw_write_unlock(&sbinfo->si_rwsem);
26833+
26834+ au_delayed_kfree(sbinfo->si_branch);
26835+ for (i = 0; i < AU_NPIDMAP; i++)
26836+ if (sbinfo->au_si_pid.pid_bitmap[i])
26837+ au_delayed_kfree(sbinfo->au_si_pid.pid_bitmap[i]);
26838+ mutex_destroy(&sbinfo->au_si_pid.pid_mtx);
26839+ mutex_destroy(&sbinfo->si_xib_mtx);
26840+ AuRwDestroy(&sbinfo->si_rwsem);
26841+
26842+ au_delayed_kfree(sbinfo);
26843+}
26844+
26845+int au_si_alloc(struct super_block *sb)
26846+{
26847+ int err, i;
26848+ struct au_sbinfo *sbinfo;
26849+
26850+ err = -ENOMEM;
26851+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
26852+ if (unlikely(!sbinfo))
26853+ goto out;
26854+
26855+ /* will be reallocated separately */
26856+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
26857+ if (unlikely(!sbinfo->si_branch))
26858+ goto out_sbinfo;
26859+
26860+ err = sysaufs_si_init(sbinfo);
26861+ if (unlikely(err))
26862+ goto out_br;
26863+
26864+ au_nwt_init(&sbinfo->si_nowait);
26865+ au_rw_init_wlock(&sbinfo->si_rwsem);
26866+ mutex_init(&sbinfo->au_si_pid.pid_mtx);
26867+
26868+ percpu_counter_init(&sbinfo->si_ninodes, 0, GFP_NOFS);
26869+ percpu_counter_init(&sbinfo->si_nfiles, 0, GFP_NOFS);
26870+
26871+ sbinfo->si_bbot = -1;
26872+ sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
26873+
26874+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
26875+ sbinfo->si_wbr_create = AuWbrCreate_Def;
26876+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
26877+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
26878+
26879+ au_fhsm_init(sbinfo);
26880+
26881+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
26882+
26883+ sbinfo->si_xino_jiffy = jiffies;
26884+ sbinfo->si_xino_expire
26885+ = msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
26886+ mutex_init(&sbinfo->si_xib_mtx);
26887+ sbinfo->si_xino_brid = -1;
26888+ /* leave si_xib_last_pindex and si_xib_next_bit */
26889+
26890+ au_sphl_init(&sbinfo->si_aopen);
26891+
26892+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
26893+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
26894+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
26895+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
26896+
26897+ for (i = 0; i < AuPlink_NHASH; i++)
26898+ au_sphl_init(sbinfo->si_plink + i);
26899+ init_waitqueue_head(&sbinfo->si_plink_wq);
26900+ spin_lock_init(&sbinfo->si_plink_maint_lock);
26901+
26902+ au_sphl_init(&sbinfo->si_files);
26903+
26904+ /* with getattr by default */
26905+ sbinfo->si_iop_array = aufs_iop;
26906+
26907+ /* leave other members for sysaufs and si_mnt. */
26908+ sbinfo->si_sb = sb;
26909+ sb->s_fs_info = sbinfo;
26910+ si_pid_set(sb);
26911+ return 0; /* success */
26912+
26913+out_br:
26914+ au_delayed_kfree(sbinfo->si_branch);
26915+out_sbinfo:
26916+ au_delayed_kfree(sbinfo);
26917+out:
26918+ return err;
26919+}
26920+
26921+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink)
26922+{
26923+ int err, sz;
26924+ struct au_branch **brp;
26925+
26926+ AuRwMustWriteLock(&sbinfo->si_rwsem);
26927+
26928+ err = -ENOMEM;
26929+ sz = sizeof(*brp) * (sbinfo->si_bbot + 1);
26930+ if (unlikely(!sz))
26931+ sz = sizeof(*brp);
26932+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS,
26933+ may_shrink);
26934+ if (brp) {
26935+ sbinfo->si_branch = brp;
26936+ err = 0;
26937+ }
26938+
26939+ return err;
26940+}
26941+
26942+/* ---------------------------------------------------------------------- */
26943+
26944+unsigned int au_sigen_inc(struct super_block *sb)
26945+{
26946+ unsigned int gen;
26947+ struct inode *inode;
26948+
26949+ SiMustWriteLock(sb);
26950+
26951+ gen = ++au_sbi(sb)->si_generation;
26952+ au_update_digen(sb->s_root);
26953+ inode = d_inode(sb->s_root);
26954+ au_update_iigen(inode, /*half*/0);
26955+ inode->i_version++;
26956+ return gen;
26957+}
26958+
26959+aufs_bindex_t au_new_br_id(struct super_block *sb)
26960+{
26961+ aufs_bindex_t br_id;
26962+ int i;
26963+ struct au_sbinfo *sbinfo;
26964+
26965+ SiMustWriteLock(sb);
26966+
26967+ sbinfo = au_sbi(sb);
26968+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
26969+ br_id = ++sbinfo->si_last_br_id;
26970+ AuDebugOn(br_id < 0);
26971+ if (br_id && au_br_index(sb, br_id) < 0)
26972+ return br_id;
26973+ }
26974+
26975+ return -1;
26976+}
26977+
26978+/* ---------------------------------------------------------------------- */
26979+
26980+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
26981+int si_read_lock(struct super_block *sb, int flags)
26982+{
26983+ int err;
26984+
26985+ err = 0;
26986+ if (au_ftest_lock(flags, FLUSH))
26987+ au_nwt_flush(&au_sbi(sb)->si_nowait);
26988+
26989+ si_noflush_read_lock(sb);
26990+ err = au_plink_maint(sb, flags);
26991+ if (unlikely(err))
26992+ si_read_unlock(sb);
26993+
26994+ return err;
26995+}
26996+
26997+int si_write_lock(struct super_block *sb, int flags)
26998+{
26999+ int err;
27000+
27001+ if (au_ftest_lock(flags, FLUSH))
27002+ au_nwt_flush(&au_sbi(sb)->si_nowait);
27003+
27004+ si_noflush_write_lock(sb);
27005+ err = au_plink_maint(sb, flags);
27006+ if (unlikely(err))
27007+ si_write_unlock(sb);
27008+
27009+ return err;
27010+}
27011+
27012+/* dentry and super_block lock. call at entry point */
27013+int aufs_read_lock(struct dentry *dentry, int flags)
27014+{
27015+ int err;
27016+ struct super_block *sb;
27017+
27018+ sb = dentry->d_sb;
27019+ err = si_read_lock(sb, flags);
27020+ if (unlikely(err))
27021+ goto out;
27022+
27023+ if (au_ftest_lock(flags, DW))
27024+ di_write_lock_child(dentry);
27025+ else
27026+ di_read_lock_child(dentry, flags);
27027+
27028+ if (au_ftest_lock(flags, GEN)) {
27029+ err = au_digen_test(dentry, au_sigen(sb));
27030+ if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
27031+ AuDebugOn(!err && au_dbrange_test(dentry));
27032+ else if (!err)
27033+ err = au_dbrange_test(dentry);
27034+ if (unlikely(err))
27035+ aufs_read_unlock(dentry, flags);
27036+ }
27037+
27038+out:
27039+ return err;
27040+}
27041+
27042+void aufs_read_unlock(struct dentry *dentry, int flags)
27043+{
27044+ if (au_ftest_lock(flags, DW))
27045+ di_write_unlock(dentry);
27046+ else
27047+ di_read_unlock(dentry, flags);
27048+ si_read_unlock(dentry->d_sb);
27049+}
27050+
27051+void aufs_write_lock(struct dentry *dentry)
27052+{
27053+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
27054+ di_write_lock_child(dentry);
27055+}
27056+
27057+void aufs_write_unlock(struct dentry *dentry)
27058+{
27059+ di_write_unlock(dentry);
27060+ si_write_unlock(dentry->d_sb);
27061+}
27062+
27063+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
27064+{
27065+ int err;
27066+ unsigned int sigen;
27067+ struct super_block *sb;
27068+
27069+ sb = d1->d_sb;
27070+ err = si_read_lock(sb, flags);
27071+ if (unlikely(err))
27072+ goto out;
27073+
27074+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
27075+
27076+ if (au_ftest_lock(flags, GEN)) {
27077+ sigen = au_sigen(sb);
27078+ err = au_digen_test(d1, sigen);
27079+ AuDebugOn(!err && au_dbrange_test(d1));
27080+ if (!err) {
27081+ err = au_digen_test(d2, sigen);
27082+ AuDebugOn(!err && au_dbrange_test(d2));
27083+ }
27084+ if (unlikely(err))
27085+ aufs_read_and_write_unlock2(d1, d2);
27086+ }
27087+
27088+out:
27089+ return err;
27090+}
27091+
27092+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
27093+{
27094+ di_write_unlock2(d1, d2);
27095+ si_read_unlock(d1->d_sb);
27096+}
27097+
27098+/* ---------------------------------------------------------------------- */
27099+
27100+static void si_pid_alloc(struct au_si_pid *au_si_pid, int idx)
27101+{
27102+ unsigned long *p;
27103+
27104+ BUILD_BUG_ON(sizeof(unsigned long) !=
27105+ sizeof(*au_si_pid->pid_bitmap));
27106+
27107+ mutex_lock(&au_si_pid->pid_mtx);
27108+ p = au_si_pid->pid_bitmap[idx];
27109+ while (!p) {
27110+ /*
27111+ * bad approach.
27112+ * but keeping 'si_pid_set()' void is more important.
27113+ */
27114+ p = kcalloc(BITS_TO_LONGS(AU_PIDSTEP),
27115+ sizeof(*au_si_pid->pid_bitmap),
27116+ GFP_NOFS);
27117+ if (p)
27118+ break;
27119+ cond_resched();
27120+ }
27121+ au_si_pid->pid_bitmap[idx] = p;
27122+ mutex_unlock(&au_si_pid->pid_mtx);
27123+}
27124+
27125+void si_pid_set(struct super_block *sb)
27126+{
27127+ pid_t bit;
27128+ int idx;
27129+ unsigned long *bitmap;
27130+ struct au_si_pid *au_si_pid;
27131+
27132+ si_pid_idx_bit(&idx, &bit);
27133+ au_si_pid = &au_sbi(sb)->au_si_pid;
27134+ bitmap = au_si_pid->pid_bitmap[idx];
27135+ if (!bitmap) {
27136+ si_pid_alloc(au_si_pid, idx);
27137+ bitmap = au_si_pid->pid_bitmap[idx];
27138+ }
27139+ AuDebugOn(test_bit(bit, bitmap));
27140+ set_bit(bit, bitmap);
27141+ /* smp_mb(); */
27142+}
27143diff -urN /usr/share/empty/fs/aufs/spl.h linux/fs/aufs/spl.h
27144--- /usr/share/empty/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100
27145+++ linux/fs/aufs/spl.h 2016-10-09 16:55:36.496035060 +0200
27146@@ -0,0 +1,113 @@
27147+/*
27148+ * Copyright (C) 2005-2016 Junjiro R. Okajima
27149+ *
27150+ * This program, aufs is free software; you can redistribute it and/or modify
27151+ * it under the terms of the GNU General Public License as published by
27152+ * the Free Software Foundation; either version 2 of the License, or
27153+ * (at your option) any later version.
27154+ *
27155+ * This program is distributed in the hope that it will be useful,
27156+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27157+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27158+ * GNU General Public License for more details.
27159+ *
27160+ * You should have received a copy of the GNU General Public License
27161+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
27162+ */
27163+
27164+/*
27165+ * simple list protected by a spinlock
27166+ */
27167+
27168+#ifndef __AUFS_SPL_H__
27169+#define __AUFS_SPL_H__
27170+
27171+#ifdef __KERNEL__
27172+
27173+#if 0
27174+struct au_splhead {
27175+ spinlock_t spin;
27176+ struct list_head head;
27177+};
27178+
27179+static inline void au_spl_init(struct au_splhead *spl)
27180+{
27181+ spin_lock_init(&spl->spin);
27182+ INIT_LIST_HEAD(&spl->head);
27183+}
27184+
27185+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
27186+{
27187+ spin_lock(&spl->spin);
27188+ list_add(list, &spl->head);
27189+ spin_unlock(&spl->spin);
27190+}
27191+
27192+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
27193+{
27194+ spin_lock(&spl->spin);
27195+ list_del(list);
27196+ spin_unlock(&spl->spin);
27197+}
27198+
27199+static inline void au_spl_del_rcu(struct list_head *list,
27200+ struct au_splhead *spl)
27201+{
27202+ spin_lock(&spl->spin);
27203+ list_del_rcu(list);
27204+ spin_unlock(&spl->spin);
27205+}
27206+#endif
27207+
27208+/* ---------------------------------------------------------------------- */
27209+
27210+struct au_sphlhead {
27211+ spinlock_t spin;
27212+ struct hlist_head head;
27213+};
27214+
27215+static inline void au_sphl_init(struct au_sphlhead *sphl)
27216+{
27217+ spin_lock_init(&sphl->spin);
27218+ INIT_HLIST_HEAD(&sphl->head);
27219+}
27220+
27221+static inline void au_sphl_add(struct hlist_node *hlist,
27222+ struct au_sphlhead *sphl)
27223+{
27224+ spin_lock(&sphl->spin);
27225+ hlist_add_head(hlist, &sphl->head);
27226+ spin_unlock(&sphl->spin);
27227+}
27228+
27229+static inline void au_sphl_del(struct hlist_node *hlist,
27230+ struct au_sphlhead *sphl)
27231+{
27232+ spin_lock(&sphl->spin);
27233+ hlist_del(hlist);
27234+ spin_unlock(&sphl->spin);
27235+}
27236+
27237+static inline void au_sphl_del_rcu(struct hlist_node *hlist,
27238+ struct au_sphlhead *sphl)
27239+{
27240+ spin_lock(&sphl->spin);
27241+ hlist_del_rcu(hlist);
27242+ spin_unlock(&sphl->spin);
27243+}
27244+
27245+static inline unsigned long au_sphl_count(struct au_sphlhead *sphl)
27246+{
27247+ unsigned long cnt;
27248+ struct hlist_node *pos;
27249+
27250+ cnt = 0;
27251+ spin_lock(&sphl->spin);
27252+ hlist_for_each(pos, &sphl->head)
27253+ cnt++;
27254+ spin_unlock(&sphl->spin);
27255+ return cnt;
27256+}
27257+
27258+#endif /* __KERNEL__ */
27259+#endif /* __AUFS_SPL_H__ */
27260diff -urN /usr/share/empty/fs/aufs/super.c linux/fs/aufs/super.c
27261--- /usr/share/empty/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
27262+++ linux/fs/aufs/super.c 2016-12-17 12:28:17.598545045 +0100
27263@@ -0,0 +1,1046 @@
27264+/*
27265+ * Copyright (C) 2005-2016 Junjiro R. Okajima
27266+ *
27267+ * This program, aufs is free software; you can redistribute it and/or modify
27268+ * it under the terms of the GNU General Public License as published by
27269+ * the Free Software Foundation; either version 2 of the License, or
27270+ * (at your option) any later version.
27271+ *
27272+ * This program is distributed in the hope that it will be useful,
27273+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27274+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27275+ * GNU General Public License for more details.
27276+ *
27277+ * You should have received a copy of the GNU General Public License
27278+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
27279+ */
27280+
27281+/*
27282+ * mount and super_block operations
27283+ */
27284+
27285+#include <linux/mm.h>
27286+#include <linux/seq_file.h>
27287+#include <linux/statfs.h>
27288+#include <linux/vmalloc.h>
27289+#include "aufs.h"
27290+
27291+/*
27292+ * super_operations
27293+ */
27294+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
27295+{
27296+ struct au_icntnr *c;
27297+
27298+ c = au_cache_alloc_icntnr();
27299+ if (c) {
27300+ au_icntnr_init(c);
27301+ c->vfs_inode.i_version = 1; /* sigen(sb); */
27302+ c->iinfo.ii_hinode = NULL;
27303+ return &c->vfs_inode;
27304+ }
27305+ return NULL;
27306+}
27307+
27308+static void aufs_destroy_inode_cb(struct rcu_head *head)
27309+{
27310+ struct inode *inode = container_of(head, struct inode, i_rcu);
27311+
27312+ au_cache_dfree_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
27313+}
27314+
27315+static void aufs_destroy_inode(struct inode *inode)
27316+{
27317+ if (!au_is_bad_inode(inode))
27318+ au_iinfo_fin(inode);
27319+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
27320+}
27321+
27322+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
27323+{
27324+ struct inode *inode;
27325+ int err;
27326+
27327+ inode = iget_locked(sb, ino);
27328+ if (unlikely(!inode)) {
27329+ inode = ERR_PTR(-ENOMEM);
27330+ goto out;
27331+ }
27332+ if (!(inode->i_state & I_NEW))
27333+ goto out;
27334+
27335+ err = au_xigen_new(inode);
27336+ if (!err)
27337+ err = au_iinfo_init(inode);
27338+ if (!err)
27339+ inode->i_version++;
27340+ else {
27341+ iget_failed(inode);
27342+ inode = ERR_PTR(err);
27343+ }
27344+
27345+out:
27346+ /* never return NULL */
27347+ AuDebugOn(!inode);
27348+ AuTraceErrPtr(inode);
27349+ return inode;
27350+}
27351+
27352+/* lock free root dinfo */
27353+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
27354+{
27355+ int err;
27356+ aufs_bindex_t bindex, bbot;
27357+ struct path path;
27358+ struct au_hdentry *hdp;
27359+ struct au_branch *br;
27360+ au_br_perm_str_t perm;
27361+
27362+ err = 0;
27363+ bbot = au_sbbot(sb);
27364+ bindex = 0;
27365+ hdp = au_hdentry(au_di(sb->s_root), bindex);
27366+ for (; !err && bindex <= bbot; bindex++, hdp++) {
27367+ br = au_sbr(sb, bindex);
27368+ path.mnt = au_br_mnt(br);
27369+ path.dentry = hdp->hd_dentry;
27370+ err = au_seq_path(seq, &path);
27371+ if (!err) {
27372+ au_optstr_br_perm(&perm, br->br_perm);
27373+ seq_printf(seq, "=%s", perm.a);
27374+ if (bindex != bbot)
27375+ seq_putc(seq, ':');
27376+ }
27377+ }
27378+ if (unlikely(err || seq_has_overflowed(seq)))
27379+ err = -E2BIG;
27380+
27381+ return err;
27382+}
27383+
27384+static void au_gen_fmt(char *fmt, int len __maybe_unused, const char *pat,
27385+ const char *append)
27386+{
27387+ char *p;
27388+
27389+ p = fmt;
27390+ while (*pat != ':')
27391+ *p++ = *pat++;
27392+ *p++ = *pat++;
27393+ strcpy(p, append);
27394+ AuDebugOn(strlen(fmt) >= len);
27395+}
27396+
27397+static void au_show_wbr_create(struct seq_file *m, int v,
27398+ struct au_sbinfo *sbinfo)
27399+{
27400+ const char *pat;
27401+ char fmt[32];
27402+ struct au_wbr_mfs *mfs;
27403+
27404+ AuRwMustAnyLock(&sbinfo->si_rwsem);
27405+
27406+ seq_puts(m, ",create=");
27407+ pat = au_optstr_wbr_create(v);
27408+ mfs = &sbinfo->si_wbr_mfs;
27409+ switch (v) {
27410+ case AuWbrCreate_TDP:
27411+ case AuWbrCreate_RR:
27412+ case AuWbrCreate_MFS:
27413+ case AuWbrCreate_PMFS:
27414+ seq_puts(m, pat);
27415+ break;
27416+ case AuWbrCreate_MFSRR:
27417+ case AuWbrCreate_TDMFS:
27418+ case AuWbrCreate_PMFSRR:
27419+ au_gen_fmt(fmt, sizeof(fmt), pat, "%llu");
27420+ seq_printf(m, fmt, mfs->mfsrr_watermark);
27421+ break;
27422+ case AuWbrCreate_MFSV:
27423+ case AuWbrCreate_PMFSV:
27424+ au_gen_fmt(fmt, sizeof(fmt), pat, "%lu");
27425+ seq_printf(m, fmt,
27426+ jiffies_to_msecs(mfs->mfs_expire)
27427+ / MSEC_PER_SEC);
27428+ break;
27429+ case AuWbrCreate_MFSRRV:
27430+ case AuWbrCreate_TDMFSV:
27431+ case AuWbrCreate_PMFSRRV:
27432+ au_gen_fmt(fmt, sizeof(fmt), pat, "%llu:%lu");
27433+ seq_printf(m, fmt, mfs->mfsrr_watermark,
27434+ jiffies_to_msecs(mfs->mfs_expire) / MSEC_PER_SEC);
27435+ break;
27436+ default:
27437+ BUG();
27438+ }
27439+}
27440+
27441+static int au_show_xino(struct seq_file *seq, struct super_block *sb)
27442+{
27443+#ifdef CONFIG_SYSFS
27444+ return 0;
27445+#else
27446+ int err;
27447+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
27448+ aufs_bindex_t bindex, brid;
27449+ struct qstr *name;
27450+ struct file *f;
27451+ struct dentry *d, *h_root;
27452+
27453+ AuRwMustAnyLock(&sbinfo->si_rwsem);
27454+
27455+ err = 0;
27456+ f = au_sbi(sb)->si_xib;
27457+ if (!f)
27458+ goto out;
27459+
27460+ /* stop printing the default xino path on the first writable branch */
27461+ h_root = NULL;
27462+ brid = au_xino_brid(sb);
27463+ if (brid >= 0) {
27464+ bindex = au_br_index(sb, brid);
27465+ h_root = au_hdentry(au_di(sb->s_root), bindex)->hd_dentry;
27466+ }
27467+ d = f->f_path.dentry;
27468+ name = &d->d_name;
27469+ /* safe ->d_parent because the file is unlinked */
27470+ if (d->d_parent == h_root
27471+ && name->len == len
27472+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
27473+ goto out;
27474+
27475+ seq_puts(seq, ",xino=");
27476+ err = au_xino_path(seq, f);
27477+
27478+out:
27479+ return err;
27480+#endif
27481+}
27482+
27483+/* seq_file will re-call me in case of too long string */
27484+static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
27485+{
27486+ int err;
27487+ unsigned int mnt_flags, v;
27488+ struct super_block *sb;
27489+ struct au_sbinfo *sbinfo;
27490+
27491+#define AuBool(name, str) do { \
27492+ v = au_opt_test(mnt_flags, name); \
27493+ if (v != au_opt_test(AuOpt_Def, name)) \
27494+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
27495+} while (0)
27496+
27497+#define AuStr(name, str) do { \
27498+ v = mnt_flags & AuOptMask_##name; \
27499+ if (v != (AuOpt_Def & AuOptMask_##name)) \
27500+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
27501+} while (0)
27502+
27503+#define AuUInt(name, str, val) do { \
27504+ if (val != AUFS_##name##_DEF) \
27505+ seq_printf(m, "," #str "=%u", val); \
27506+} while (0)
27507+
27508+ sb = dentry->d_sb;
27509+ if (sb->s_flags & MS_POSIXACL)
27510+ seq_puts(m, ",acl");
27511+
27512+ /* lock free root dinfo */
27513+ si_noflush_read_lock(sb);
27514+ sbinfo = au_sbi(sb);
27515+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
27516+
27517+ mnt_flags = au_mntflags(sb);
27518+ if (au_opt_test(mnt_flags, XINO)) {
27519+ err = au_show_xino(m, sb);
27520+ if (unlikely(err))
27521+ goto out;
27522+ } else
27523+ seq_puts(m, ",noxino");
27524+
27525+ AuBool(TRUNC_XINO, trunc_xino);
27526+ AuStr(UDBA, udba);
27527+ AuBool(SHWH, shwh);
27528+ AuBool(PLINK, plink);
27529+ AuBool(DIO, dio);
27530+ AuBool(DIRPERM1, dirperm1);
27531+
27532+ v = sbinfo->si_wbr_create;
27533+ if (v != AuWbrCreate_Def)
27534+ au_show_wbr_create(m, v, sbinfo);
27535+
27536+ v = sbinfo->si_wbr_copyup;
27537+ if (v != AuWbrCopyup_Def)
27538+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
27539+
27540+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
27541+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
27542+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
27543+
27544+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
27545+
27546+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
27547+ AuUInt(RDCACHE, rdcache, v);
27548+
27549+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
27550+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
27551+
27552+ au_fhsm_show(m, sbinfo);
27553+
27554+ AuBool(SUM, sum);
27555+ /* AuBool(SUM_W, wsum); */
27556+ AuBool(WARN_PERM, warn_perm);
27557+ AuBool(VERBOSE, verbose);
27558+
27559+out:
27560+ /* be sure to print "br:" last */
27561+ if (!sysaufs_brs) {
27562+ seq_puts(m, ",br:");
27563+ au_show_brs(m, sb);
27564+ }
27565+ si_read_unlock(sb);
27566+ return 0;
27567+
27568+#undef AuBool
27569+#undef AuStr
27570+#undef AuUInt
27571+}
27572+
27573+/* ---------------------------------------------------------------------- */
27574+
27575+/* sum mode which returns the summation for statfs(2) */
27576+
27577+static u64 au_add_till_max(u64 a, u64 b)
27578+{
27579+ u64 old;
27580+
27581+ old = a;
27582+ a += b;
27583+ if (old <= a)
27584+ return a;
27585+ return ULLONG_MAX;
27586+}
27587+
27588+static u64 au_mul_till_max(u64 a, long mul)
27589+{
27590+ u64 old;
27591+
27592+ old = a;
27593+ a *= mul;
27594+ if (old <= a)
27595+ return a;
27596+ return ULLONG_MAX;
27597+}
27598+
27599+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
27600+{
27601+ int err;
27602+ long bsize, factor;
27603+ u64 blocks, bfree, bavail, files, ffree;
27604+ aufs_bindex_t bbot, bindex, i;
27605+ unsigned char shared;
27606+ struct path h_path;
27607+ struct super_block *h_sb;
27608+
27609+ err = 0;
27610+ bsize = LONG_MAX;
27611+ files = 0;
27612+ ffree = 0;
27613+ blocks = 0;
27614+ bfree = 0;
27615+ bavail = 0;
27616+ bbot = au_sbbot(sb);
27617+ for (bindex = 0; bindex <= bbot; bindex++) {
27618+ h_path.mnt = au_sbr_mnt(sb, bindex);
27619+ h_sb = h_path.mnt->mnt_sb;
27620+ shared = 0;
27621+ for (i = 0; !shared && i < bindex; i++)
27622+ shared = (au_sbr_sb(sb, i) == h_sb);
27623+ if (shared)
27624+ continue;
27625+
27626+ /* sb->s_root for NFS is unreliable */
27627+ h_path.dentry = h_path.mnt->mnt_root;
27628+ err = vfs_statfs(&h_path, buf);
27629+ if (unlikely(err))
27630+ goto out;
27631+
27632+ if (bsize > buf->f_bsize) {
27633+ /*
27634+ * we will reduce bsize, so we have to expand blocks
27635+ * etc. to match them again
27636+ */
27637+ factor = (bsize / buf->f_bsize);
27638+ blocks = au_mul_till_max(blocks, factor);
27639+ bfree = au_mul_till_max(bfree, factor);
27640+ bavail = au_mul_till_max(bavail, factor);
27641+ bsize = buf->f_bsize;
27642+ }
27643+
27644+ factor = (buf->f_bsize / bsize);
27645+ blocks = au_add_till_max(blocks,
27646+ au_mul_till_max(buf->f_blocks, factor));
27647+ bfree = au_add_till_max(bfree,
27648+ au_mul_till_max(buf->f_bfree, factor));
27649+ bavail = au_add_till_max(bavail,
27650+ au_mul_till_max(buf->f_bavail, factor));
27651+ files = au_add_till_max(files, buf->f_files);
27652+ ffree = au_add_till_max(ffree, buf->f_ffree);
27653+ }
27654+
27655+ buf->f_bsize = bsize;
27656+ buf->f_blocks = blocks;
27657+ buf->f_bfree = bfree;
27658+ buf->f_bavail = bavail;
27659+ buf->f_files = files;
27660+ buf->f_ffree = ffree;
27661+ buf->f_frsize = 0;
27662+
27663+out:
27664+ return err;
27665+}
27666+
27667+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
27668+{
27669+ int err;
27670+ struct path h_path;
27671+ struct super_block *sb;
27672+
27673+ /* lock free root dinfo */
27674+ sb = dentry->d_sb;
27675+ si_noflush_read_lock(sb);
27676+ if (!au_opt_test(au_mntflags(sb), SUM)) {
27677+ /* sb->s_root for NFS is unreliable */
27678+ h_path.mnt = au_sbr_mnt(sb, 0);
27679+ h_path.dentry = h_path.mnt->mnt_root;
27680+ err = vfs_statfs(&h_path, buf);
27681+ } else
27682+ err = au_statfs_sum(sb, buf);
27683+ si_read_unlock(sb);
27684+
27685+ if (!err) {
27686+ buf->f_type = AUFS_SUPER_MAGIC;
27687+ buf->f_namelen = AUFS_MAX_NAMELEN;
27688+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
27689+ }
27690+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
27691+
27692+ return err;
27693+}
27694+
27695+/* ---------------------------------------------------------------------- */
27696+
27697+static int aufs_sync_fs(struct super_block *sb, int wait)
27698+{
27699+ int err, e;
27700+ aufs_bindex_t bbot, bindex;
27701+ struct au_branch *br;
27702+ struct super_block *h_sb;
27703+
27704+ err = 0;
27705+ si_noflush_read_lock(sb);
27706+ bbot = au_sbbot(sb);
27707+ for (bindex = 0; bindex <= bbot; bindex++) {
27708+ br = au_sbr(sb, bindex);
27709+ if (!au_br_writable(br->br_perm))
27710+ continue;
27711+
27712+ h_sb = au_sbr_sb(sb, bindex);
27713+ if (h_sb->s_op->sync_fs) {
27714+ e = h_sb->s_op->sync_fs(h_sb, wait);
27715+ if (unlikely(e && !err))
27716+ err = e;
27717+ /* go on even if an error happens */
27718+ }
27719+ }
27720+ si_read_unlock(sb);
27721+
27722+ return err;
27723+}
27724+
27725+/* ---------------------------------------------------------------------- */
27726+
27727+/* final actions when unmounting a file system */
27728+static void aufs_put_super(struct super_block *sb)
27729+{
27730+ struct au_sbinfo *sbinfo;
27731+
27732+ sbinfo = au_sbi(sb);
27733+ if (!sbinfo)
27734+ return;
27735+
27736+ dbgaufs_si_fin(sbinfo);
27737+ kobject_put(&sbinfo->si_kobj);
27738+}
27739+
27740+/* ---------------------------------------------------------------------- */
27741+
27742+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
27743+ struct super_block *sb, void *arg)
27744+{
27745+ void *array;
27746+ unsigned long long n, sz;
27747+
27748+ array = NULL;
27749+ n = 0;
27750+ if (!*hint)
27751+ goto out;
27752+
27753+ if (*hint > ULLONG_MAX / sizeof(array)) {
27754+ array = ERR_PTR(-EMFILE);
27755+ pr_err("hint %llu\n", *hint);
27756+ goto out;
27757+ }
27758+
27759+ sz = sizeof(array) * *hint;
27760+ array = kzalloc(sz, GFP_NOFS);
27761+ if (unlikely(!array))
27762+ array = vzalloc(sz);
27763+ if (unlikely(!array)) {
27764+ array = ERR_PTR(-ENOMEM);
27765+ goto out;
27766+ }
27767+
27768+ n = cb(sb, array, *hint, arg);
27769+ AuDebugOn(n > *hint);
27770+
27771+out:
27772+ *hint = n;
27773+ return array;
27774+}
27775+
27776+static unsigned long long au_iarray_cb(struct super_block *sb, void *a,
27777+ unsigned long long max __maybe_unused,
27778+ void *arg)
27779+{
27780+ unsigned long long n;
27781+ struct inode **p, *inode;
27782+ struct list_head *head;
27783+
27784+ n = 0;
27785+ p = a;
27786+ head = arg;
27787+ spin_lock(&sb->s_inode_list_lock);
27788+ list_for_each_entry(inode, head, i_sb_list) {
27789+ if (!au_is_bad_inode(inode)
27790+ && au_ii(inode)->ii_btop >= 0) {
27791+ spin_lock(&inode->i_lock);
27792+ if (atomic_read(&inode->i_count)) {
27793+ au_igrab(inode);
27794+ *p++ = inode;
27795+ n++;
27796+ AuDebugOn(n > max);
27797+ }
27798+ spin_unlock(&inode->i_lock);
27799+ }
27800+ }
27801+ spin_unlock(&sb->s_inode_list_lock);
27802+
27803+ return n;
27804+}
27805+
27806+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
27807+{
27808+ *max = au_ninodes(sb);
27809+ return au_array_alloc(max, au_iarray_cb, sb, &sb->s_inodes);
27810+}
27811+
27812+void au_iarray_free(struct inode **a, unsigned long long max)
27813+{
27814+ unsigned long long ull;
27815+
27816+ for (ull = 0; ull < max; ull++)
27817+ iput(a[ull]);
27818+ kvfree(a);
27819+}
27820+
27821+/* ---------------------------------------------------------------------- */
27822+
27823+/*
27824+ * refresh dentry and inode at remount time.
27825+ */
27826+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
27827+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
27828+ struct dentry *parent)
27829+{
27830+ int err;
27831+
27832+ di_write_lock_child(dentry);
27833+ di_read_lock_parent(parent, AuLock_IR);
27834+ err = au_refresh_dentry(dentry, parent);
27835+ if (!err && dir_flags)
27836+ au_hn_reset(d_inode(dentry), dir_flags);
27837+ di_read_unlock(parent, AuLock_IR);
27838+ di_write_unlock(dentry);
27839+
27840+ return err;
27841+}
27842+
27843+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
27844+ struct au_sbinfo *sbinfo,
27845+ const unsigned int dir_flags, unsigned int do_idop)
27846+{
27847+ int err;
27848+ struct dentry *parent;
27849+
27850+ err = 0;
27851+ parent = dget_parent(dentry);
27852+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
27853+ if (d_really_is_positive(dentry)) {
27854+ if (!d_is_dir(dentry))
27855+ err = au_do_refresh(dentry, /*dir_flags*/0,
27856+ parent);
27857+ else {
27858+ err = au_do_refresh(dentry, dir_flags, parent);
27859+ if (unlikely(err))
27860+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
27861+ }
27862+ } else
27863+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
27864+ AuDbgDentry(dentry);
27865+ }
27866+ dput(parent);
27867+
27868+ if (!err) {
27869+ if (do_idop)
27870+ au_refresh_dop(dentry, /*force_reval*/0);
27871+ } else
27872+ au_refresh_dop(dentry, /*force_reval*/1);
27873+
27874+ AuTraceErr(err);
27875+ return err;
27876+}
27877+
27878+static int au_refresh_d(struct super_block *sb, unsigned int do_idop)
27879+{
27880+ int err, i, j, ndentry, e;
27881+ unsigned int sigen;
27882+ struct au_dcsub_pages dpages;
27883+ struct au_dpage *dpage;
27884+ struct dentry **dentries, *d;
27885+ struct au_sbinfo *sbinfo;
27886+ struct dentry *root = sb->s_root;
27887+ const unsigned int dir_flags = au_hi_flags(d_inode(root), /*isdir*/1);
27888+
27889+ if (do_idop)
27890+ au_refresh_dop(root, /*force_reval*/0);
27891+
27892+ err = au_dpages_init(&dpages, GFP_NOFS);
27893+ if (unlikely(err))
27894+ goto out;
27895+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
27896+ if (unlikely(err))
27897+ goto out_dpages;
27898+
27899+ sigen = au_sigen(sb);
27900+ sbinfo = au_sbi(sb);
27901+ for (i = 0; i < dpages.ndpage; i++) {
27902+ dpage = dpages.dpages + i;
27903+ dentries = dpage->dentries;
27904+ ndentry = dpage->ndentry;
27905+ for (j = 0; j < ndentry; j++) {
27906+ d = dentries[j];
27907+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags,
27908+ do_idop);
27909+ if (unlikely(e && !err))
27910+ err = e;
27911+ /* go on even err */
27912+ }
27913+ }
27914+
27915+out_dpages:
27916+ au_dpages_free(&dpages);
27917+out:
27918+ return err;
27919+}
27920+
27921+static int au_refresh_i(struct super_block *sb, unsigned int do_idop)
27922+{
27923+ int err, e;
27924+ unsigned int sigen;
27925+ unsigned long long max, ull;
27926+ struct inode *inode, **array;
27927+
27928+ array = au_iarray_alloc(sb, &max);
27929+ err = PTR_ERR(array);
27930+ if (IS_ERR(array))
27931+ goto out;
27932+
27933+ err = 0;
27934+ sigen = au_sigen(sb);
27935+ for (ull = 0; ull < max; ull++) {
27936+ inode = array[ull];
27937+ if (unlikely(!inode))
27938+ break;
27939+
27940+ e = 0;
27941+ ii_write_lock_child(inode);
27942+ if (au_iigen(inode, NULL) != sigen) {
27943+ e = au_refresh_hinode_self(inode);
27944+ if (unlikely(e)) {
27945+ au_refresh_iop(inode, /*force_getattr*/1);
27946+ pr_err("error %d, i%lu\n", e, inode->i_ino);
27947+ if (!err)
27948+ err = e;
27949+ /* go on even if err */
27950+ }
27951+ }
27952+ if (!e && do_idop)
27953+ au_refresh_iop(inode, /*force_getattr*/0);
27954+ ii_write_unlock(inode);
27955+ }
27956+
27957+ au_iarray_free(array, max);
27958+
27959+out:
27960+ return err;
27961+}
27962+
27963+static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
27964+{
27965+ int err, e;
27966+ unsigned int udba;
27967+ aufs_bindex_t bindex, bbot;
27968+ struct dentry *root;
27969+ struct inode *inode;
27970+ struct au_branch *br;
27971+ struct au_sbinfo *sbi;
27972+
27973+ au_sigen_inc(sb);
27974+ sbi = au_sbi(sb);
27975+ au_fclr_si(sbi, FAILED_REFRESH_DIR);
27976+
27977+ root = sb->s_root;
27978+ DiMustNoWaiters(root);
27979+ inode = d_inode(root);
27980+ IiMustNoWaiters(inode);
27981+
27982+ udba = au_opt_udba(sb);
27983+ bbot = au_sbbot(sb);
27984+ for (bindex = 0; bindex <= bbot; bindex++) {
27985+ br = au_sbr(sb, bindex);
27986+ err = au_hnotify_reset_br(udba, br, br->br_perm);
27987+ if (unlikely(err))
27988+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
27989+ bindex, err);
27990+ /* go on even if err */
27991+ }
27992+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
27993+
27994+ if (do_idop) {
27995+ if (au_ftest_si(sbi, NO_DREVAL)) {
27996+ AuDebugOn(sb->s_d_op == &aufs_dop_noreval);
27997+ sb->s_d_op = &aufs_dop_noreval;
27998+ AuDebugOn(sbi->si_iop_array == aufs_iop_nogetattr);
27999+ sbi->si_iop_array = aufs_iop_nogetattr;
28000+ } else {
28001+ AuDebugOn(sb->s_d_op == &aufs_dop);
28002+ sb->s_d_op = &aufs_dop;
28003+ AuDebugOn(sbi->si_iop_array == aufs_iop);
28004+ sbi->si_iop_array = aufs_iop;
28005+ }
28006+ pr_info("reset to %pf and %pf\n",
28007+ sb->s_d_op, sbi->si_iop_array);
28008+ }
28009+
28010+ di_write_unlock(root);
28011+ err = au_refresh_d(sb, do_idop);
28012+ e = au_refresh_i(sb, do_idop);
28013+ if (unlikely(e && !err))
28014+ err = e;
28015+ /* aufs_write_lock() calls ..._child() */
28016+ di_write_lock_child(root);
28017+
28018+ au_cpup_attr_all(inode, /*force*/1);
28019+
28020+ if (unlikely(err))
28021+ AuIOErr("refresh failed, ignored, %d\n", err);
28022+}
28023+
28024+/* stop extra interpretation of errno in mount(8), and strange error messages */
28025+static int cvt_err(int err)
28026+{
28027+ AuTraceErr(err);
28028+
28029+ switch (err) {
28030+ case -ENOENT:
28031+ case -ENOTDIR:
28032+ case -EEXIST:
28033+ case -EIO:
28034+ err = -EINVAL;
28035+ }
28036+ return err;
28037+}
28038+
28039+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
28040+{
28041+ int err, do_dx;
28042+ unsigned int mntflags;
28043+ struct au_opts opts = {
28044+ .opt = NULL
28045+ };
28046+ struct dentry *root;
28047+ struct inode *inode;
28048+ struct au_sbinfo *sbinfo;
28049+
28050+ err = 0;
28051+ root = sb->s_root;
28052+ if (!data || !*data) {
28053+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
28054+ if (!err) {
28055+ di_write_lock_child(root);
28056+ err = au_opts_verify(sb, *flags, /*pending*/0);
28057+ aufs_write_unlock(root);
28058+ }
28059+ goto out;
28060+ }
28061+
28062+ err = -ENOMEM;
28063+ opts.opt = (void *)__get_free_page(GFP_NOFS);
28064+ if (unlikely(!opts.opt))
28065+ goto out;
28066+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
28067+ opts.flags = AuOpts_REMOUNT;
28068+ opts.sb_flags = *flags;
28069+
28070+ /* parse it before aufs lock */
28071+ err = au_opts_parse(sb, data, &opts);
28072+ if (unlikely(err))
28073+ goto out_opts;
28074+
28075+ sbinfo = au_sbi(sb);
28076+ inode = d_inode(root);
28077+ inode_lock(inode);
28078+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
28079+ if (unlikely(err))
28080+ goto out_mtx;
28081+ di_write_lock_child(root);
28082+
28083+ /* au_opts_remount() may return an error */
28084+ err = au_opts_remount(sb, &opts);
28085+ au_opts_free(&opts);
28086+
28087+ if (au_ftest_opts(opts.flags, REFRESH))
28088+ au_remount_refresh(sb, au_ftest_opts(opts.flags, REFRESH_IDOP));
28089+
28090+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
28091+ mntflags = au_mntflags(sb);
28092+ do_dx = !!au_opt_test(mntflags, DIO);
28093+ au_dy_arefresh(do_dx);
28094+ }
28095+
28096+ au_fhsm_wrote_all(sb, /*force*/1); /* ?? */
28097+ aufs_write_unlock(root);
28098+
28099+out_mtx:
28100+ inode_unlock(inode);
28101+out_opts:
28102+ au_delayed_free_page((unsigned long)opts.opt);
28103+out:
28104+ err = cvt_err(err);
28105+ AuTraceErr(err);
28106+ return err;
28107+}
28108+
28109+static const struct super_operations aufs_sop = {
28110+ .alloc_inode = aufs_alloc_inode,
28111+ .destroy_inode = aufs_destroy_inode,
28112+ /* always deleting, no clearing */
28113+ .drop_inode = generic_delete_inode,
28114+ .show_options = aufs_show_options,
28115+ .statfs = aufs_statfs,
28116+ .put_super = aufs_put_super,
28117+ .sync_fs = aufs_sync_fs,
28118+ .remount_fs = aufs_remount_fs
28119+};
28120+
28121+/* ---------------------------------------------------------------------- */
28122+
28123+static int alloc_root(struct super_block *sb)
28124+{
28125+ int err;
28126+ struct inode *inode;
28127+ struct dentry *root;
28128+
28129+ err = -ENOMEM;
28130+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
28131+ err = PTR_ERR(inode);
28132+ if (IS_ERR(inode))
28133+ goto out;
28134+
28135+ inode->i_op = aufs_iop + AuIop_DIR; /* with getattr by default */
28136+ inode->i_fop = &aufs_dir_fop;
28137+ inode->i_mode = S_IFDIR;
28138+ set_nlink(inode, 2);
28139+ unlock_new_inode(inode);
28140+
28141+ root = d_make_root(inode);
28142+ if (unlikely(!root))
28143+ goto out;
28144+ err = PTR_ERR(root);
28145+ if (IS_ERR(root))
28146+ goto out;
28147+
28148+ err = au_di_init(root);
28149+ if (!err) {
28150+ sb->s_root = root;
28151+ return 0; /* success */
28152+ }
28153+ dput(root);
28154+
28155+out:
28156+ return err;
28157+}
28158+
28159+static int aufs_fill_super(struct super_block *sb, void *raw_data,
28160+ int silent __maybe_unused)
28161+{
28162+ int err;
28163+ struct au_opts opts = {
28164+ .opt = NULL
28165+ };
28166+ struct au_sbinfo *sbinfo;
28167+ struct dentry *root;
28168+ struct inode *inode;
28169+ char *arg = raw_data;
28170+
28171+ if (unlikely(!arg || !*arg)) {
28172+ err = -EINVAL;
28173+ pr_err("no arg\n");
28174+ goto out;
28175+ }
28176+
28177+ err = -ENOMEM;
28178+ opts.opt = (void *)__get_free_page(GFP_NOFS);
28179+ if (unlikely(!opts.opt))
28180+ goto out;
28181+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
28182+ opts.sb_flags = sb->s_flags;
28183+
28184+ err = au_si_alloc(sb);
28185+ if (unlikely(err))
28186+ goto out_opts;
28187+ sbinfo = au_sbi(sb);
28188+
28189+ /* all timestamps always follow the ones on the branch */
28190+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
28191+ sb->s_op = &aufs_sop;
28192+ sb->s_d_op = &aufs_dop;
28193+ sb->s_magic = AUFS_SUPER_MAGIC;
28194+ sb->s_maxbytes = 0;
28195+ sb->s_stack_depth = 1;
28196+ au_export_init(sb);
28197+ au_xattr_init(sb);
28198+
28199+ err = alloc_root(sb);
28200+ if (unlikely(err)) {
28201+ si_write_unlock(sb);
28202+ goto out_info;
28203+ }
28204+ root = sb->s_root;
28205+ inode = d_inode(root);
28206+
28207+ /*
28208+ * actually we can parse options regardless aufs lock here.
28209+ * but at remount time, parsing must be done before aufs lock.
28210+ * so we follow the same rule.
28211+ */
28212+ ii_write_lock_parent(inode);
28213+ aufs_write_unlock(root);
28214+ err = au_opts_parse(sb, arg, &opts);
28215+ if (unlikely(err))
28216+ goto out_root;
28217+
28218+ /* lock vfs_inode first, then aufs. */
28219+ inode_lock(inode);
28220+ aufs_write_lock(root);
28221+ err = au_opts_mount(sb, &opts);
28222+ au_opts_free(&opts);
28223+ if (!err && au_ftest_si(sbinfo, NO_DREVAL)) {
28224+ sb->s_d_op = &aufs_dop_noreval;
28225+ pr_info("%pf\n", sb->s_d_op);
28226+ au_refresh_dop(root, /*force_reval*/0);
28227+ sbinfo->si_iop_array = aufs_iop_nogetattr;
28228+ au_refresh_iop(inode, /*force_getattr*/0);
28229+ }
28230+ aufs_write_unlock(root);
28231+ inode_unlock(inode);
28232+ if (!err)
28233+ goto out_opts; /* success */
28234+
28235+out_root:
28236+ dput(root);
28237+ sb->s_root = NULL;
28238+out_info:
28239+ dbgaufs_si_fin(sbinfo);
28240+ kobject_put(&sbinfo->si_kobj);
28241+ sb->s_fs_info = NULL;
28242+out_opts:
28243+ au_delayed_free_page((unsigned long)opts.opt);
28244+out:
28245+ AuTraceErr(err);
28246+ err = cvt_err(err);
28247+ AuTraceErr(err);
28248+ return err;
28249+}
28250+
28251+/* ---------------------------------------------------------------------- */
28252+
28253+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
28254+ const char *dev_name __maybe_unused,
28255+ void *raw_data)
28256+{
28257+ struct dentry *root;
28258+ struct super_block *sb;
28259+
28260+ /* all timestamps always follow the ones on the branch */
28261+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
28262+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
28263+ if (IS_ERR(root))
28264+ goto out;
28265+
28266+ sb = root->d_sb;
28267+ si_write_lock(sb, !AuLock_FLUSH);
28268+ sysaufs_brs_add(sb, 0);
28269+ si_write_unlock(sb);
28270+ au_sbilist_add(sb);
28271+
28272+out:
28273+ return root;
28274+}
28275+
28276+static void aufs_kill_sb(struct super_block *sb)
28277+{
28278+ struct au_sbinfo *sbinfo;
28279+
28280+ sbinfo = au_sbi(sb);
28281+ if (sbinfo) {
28282+ au_sbilist_del(sb);
28283+ aufs_write_lock(sb->s_root);
28284+ au_fhsm_fin(sb);
28285+ if (sbinfo->si_wbr_create_ops->fin)
28286+ sbinfo->si_wbr_create_ops->fin(sb);
28287+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
28288+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
28289+ au_remount_refresh(sb, /*do_idop*/0);
28290+ }
28291+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
28292+ au_plink_put(sb, /*verbose*/1);
28293+ au_xino_clr(sb);
28294+ sbinfo->si_sb = NULL;
28295+ aufs_write_unlock(sb->s_root);
28296+ au_nwt_flush(&sbinfo->si_nowait);
28297+ }
28298+ kill_anon_super(sb);
28299+}
28300+
28301+struct file_system_type aufs_fs_type = {
28302+ .name = AUFS_FSTYPE,
28303+ /* a race between rename and others */
28304+ .fs_flags = FS_RENAME_DOES_D_MOVE,
28305+ .mount = aufs_mount,
28306+ .kill_sb = aufs_kill_sb,
28307+ /* no need to __module_get() and module_put(). */
28308+ .owner = THIS_MODULE,
28309+};
28310diff -urN /usr/share/empty/fs/aufs/super.h linux/fs/aufs/super.h
28311--- /usr/share/empty/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
28312+++ linux/fs/aufs/super.h 2016-12-17 12:28:17.598545045 +0100
28313@@ -0,0 +1,639 @@
28314+/*
28315+ * Copyright (C) 2005-2016 Junjiro R. Okajima
28316+ *
28317+ * This program, aufs is free software; you can redistribute it and/or modify
28318+ * it under the terms of the GNU General Public License as published by
28319+ * the Free Software Foundation; either version 2 of the License, or
28320+ * (at your option) any later version.
28321+ *
28322+ * This program is distributed in the hope that it will be useful,
28323+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28324+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28325+ * GNU General Public License for more details.
28326+ *
28327+ * You should have received a copy of the GNU General Public License
28328+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
28329+ */
28330+
28331+/*
28332+ * super_block operations
28333+ */
28334+
28335+#ifndef __AUFS_SUPER_H__
28336+#define __AUFS_SUPER_H__
28337+
28338+#ifdef __KERNEL__
28339+
28340+#include <linux/fs.h>
28341+#include <linux/kobject.h>
28342+#include "rwsem.h"
28343+#include "spl.h"
28344+#include "wkq.h"
28345+
28346+/* policies to select one among multiple writable branches */
28347+struct au_wbr_copyup_operations {
28348+ int (*copyup)(struct dentry *dentry);
28349+};
28350+
28351+#define AuWbr_DIR 1 /* target is a dir */
28352+#define AuWbr_PARENT (1 << 1) /* always require a parent */
28353+
28354+#define au_ftest_wbr(flags, name) ((flags) & AuWbr_##name)
28355+#define au_fset_wbr(flags, name) { (flags) |= AuWbr_##name; }
28356+#define au_fclr_wbr(flags, name) { (flags) &= ~AuWbr_##name; }
28357+
28358+struct au_wbr_create_operations {
28359+ int (*create)(struct dentry *dentry, unsigned int flags);
28360+ int (*init)(struct super_block *sb);
28361+ int (*fin)(struct super_block *sb);
28362+};
28363+
28364+struct au_wbr_mfs {
28365+ struct mutex mfs_lock; /* protect this structure */
28366+ unsigned long mfs_jiffy;
28367+ unsigned long mfs_expire;
28368+ aufs_bindex_t mfs_bindex;
28369+
28370+ unsigned long long mfsrr_bytes;
28371+ unsigned long long mfsrr_watermark;
28372+};
28373+
28374+#define AuPlink_NHASH 100
28375+static inline int au_plink_hash(ino_t ino)
28376+{
28377+ return ino % AuPlink_NHASH;
28378+}
28379+
28380+/* File-based Hierarchical Storage Management */
28381+struct au_fhsm {
28382+#ifdef CONFIG_AUFS_FHSM
28383+ /* allow only one process who can receive the notification */
28384+ spinlock_t fhsm_spin;
28385+ pid_t fhsm_pid;
28386+ wait_queue_head_t fhsm_wqh;
28387+ atomic_t fhsm_readable;
28388+
28389+ /* these are protected by si_rwsem */
28390+ unsigned long fhsm_expire;
28391+ aufs_bindex_t fhsm_bottom;
28392+#endif
28393+};
28394+
28395+#define AU_PIDSTEP (int)(BITS_TO_LONGS(PID_MAX_DEFAULT) * BITS_PER_LONG)
28396+#define AU_NPIDMAP (int)DIV_ROUND_UP(PID_MAX_LIMIT, AU_PIDSTEP)
28397+struct au_si_pid {
28398+ unsigned long *pid_bitmap[AU_NPIDMAP];
28399+ struct mutex pid_mtx;
28400+};
28401+
28402+struct au_branch;
28403+struct au_sbinfo {
28404+ /* nowait tasks in the system-wide workqueue */
28405+ struct au_nowait_tasks si_nowait;
28406+
28407+ /*
28408+ * tried sb->s_umount, but failed due to the dependecy between i_mutex.
28409+ * rwsem for au_sbinfo is necessary.
28410+ */
28411+ struct au_rwsem si_rwsem;
28412+
28413+ /* prevent recursive locking in deleting inode */
28414+ struct au_si_pid au_si_pid;
28415+
28416+ /*
28417+ * dirty approach to protect sb->sb_inodes and ->s_files (gone) from
28418+ * remount.
28419+ */
28420+ struct percpu_counter si_ninodes, si_nfiles;
28421+
28422+ /* branch management */
28423+ unsigned int si_generation;
28424+
28425+ /* see AuSi_ flags */
28426+ unsigned char au_si_status;
28427+
28428+ aufs_bindex_t si_bbot;
28429+
28430+ /* dirty trick to keep br_id plus */
28431+ unsigned int si_last_br_id :
28432+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
28433+ struct au_branch **si_branch;
28434+
28435+ /* policy to select a writable branch */
28436+ unsigned char si_wbr_copyup;
28437+ unsigned char si_wbr_create;
28438+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
28439+ struct au_wbr_create_operations *si_wbr_create_ops;
28440+
28441+ /* round robin */
28442+ atomic_t si_wbr_rr_next;
28443+
28444+ /* most free space */
28445+ struct au_wbr_mfs si_wbr_mfs;
28446+
28447+ /* File-based Hierarchical Storage Management */
28448+ struct au_fhsm si_fhsm;
28449+
28450+ /* mount flags */
28451+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
28452+ unsigned int si_mntflags;
28453+
28454+ /* external inode number (bitmap and translation table) */
28455+ vfs_readf_t si_xread;
28456+ vfs_writef_t si_xwrite;
28457+ struct file *si_xib;
28458+ struct mutex si_xib_mtx; /* protect xib members */
28459+ unsigned long *si_xib_buf;
28460+ unsigned long si_xib_last_pindex;
28461+ int si_xib_next_bit;
28462+ aufs_bindex_t si_xino_brid;
28463+ unsigned long si_xino_jiffy;
28464+ unsigned long si_xino_expire;
28465+ /* reserved for future use */
28466+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
28467+
28468+#ifdef CONFIG_AUFS_EXPORT
28469+ /* i_generation */
28470+ struct file *si_xigen;
28471+ atomic_t si_xigen_next;
28472+#endif
28473+
28474+ /* dirty trick to suppoer atomic_open */
28475+ struct au_sphlhead si_aopen;
28476+
28477+ /* vdir parameters */
28478+ unsigned long si_rdcache; /* max cache time in jiffies */
28479+ unsigned int si_rdblk; /* deblk size */
28480+ unsigned int si_rdhash; /* hash size */
28481+
28482+ /*
28483+ * If the number of whiteouts are larger than si_dirwh, leave all of
28484+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
28485+ * future fsck.aufs or kernel thread will remove them later.
28486+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
28487+ */
28488+ unsigned int si_dirwh;
28489+
28490+ /* pseudo_link list */
28491+ struct au_sphlhead si_plink[AuPlink_NHASH];
28492+ wait_queue_head_t si_plink_wq;
28493+ spinlock_t si_plink_maint_lock;
28494+ pid_t si_plink_maint_pid;
28495+
28496+ /* file list */
28497+ struct au_sphlhead si_files;
28498+
28499+ /* with/without getattr, brother of sb->s_d_op */
28500+ struct inode_operations *si_iop_array;
28501+
28502+ /*
28503+ * sysfs and lifetime management.
28504+ * this is not a small structure and it may be a waste of memory in case
28505+ * of sysfs is disabled, particulary when many aufs-es are mounted.
28506+ * but using sysfs is majority.
28507+ */
28508+ struct kobject si_kobj;
28509+#ifdef CONFIG_DEBUG_FS
28510+ struct dentry *si_dbgaufs;
28511+ struct dentry *si_dbgaufs_plink;
28512+ struct dentry *si_dbgaufs_xib;
28513+#ifdef CONFIG_AUFS_EXPORT
28514+ struct dentry *si_dbgaufs_xigen;
28515+#endif
28516+#endif
28517+
28518+#ifdef CONFIG_AUFS_SBILIST
28519+ struct hlist_node si_list;
28520+#endif
28521+
28522+ /* dirty, necessary for unmounting, sysfs and sysrq */
28523+ struct super_block *si_sb;
28524+};
28525+
28526+/* sbinfo status flags */
28527+/*
28528+ * set true when refresh_dirs() failed at remount time.
28529+ * then try refreshing dirs at access time again.
28530+ * if it is false, refreshing dirs at access time is unnecesary
28531+ */
28532+#define AuSi_FAILED_REFRESH_DIR 1
28533+#define AuSi_FHSM (1 << 1) /* fhsm is active now */
28534+#define AuSi_NO_DREVAL (1 << 2) /* disable all d_revalidate */
28535+
28536+#ifndef CONFIG_AUFS_FHSM
28537+#undef AuSi_FHSM
28538+#define AuSi_FHSM 0
28539+#endif
28540+
28541+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
28542+ unsigned int flag)
28543+{
28544+ AuRwMustAnyLock(&sbi->si_rwsem);
28545+ return sbi->au_si_status & flag;
28546+}
28547+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
28548+#define au_fset_si(sbinfo, name) do { \
28549+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
28550+ (sbinfo)->au_si_status |= AuSi_##name; \
28551+} while (0)
28552+#define au_fclr_si(sbinfo, name) do { \
28553+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
28554+ (sbinfo)->au_si_status &= ~AuSi_##name; \
28555+} while (0)
28556+
28557+/* ---------------------------------------------------------------------- */
28558+
28559+/* policy to select one among writable branches */
28560+#define AuWbrCopyup(sbinfo, ...) \
28561+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
28562+#define AuWbrCreate(sbinfo, ...) \
28563+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
28564+
28565+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
28566+#define AuLock_DW 1 /* write-lock dentry */
28567+#define AuLock_IR (1 << 1) /* read-lock inode */
28568+#define AuLock_IW (1 << 2) /* write-lock inode */
28569+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
28570+#define AuLock_DIRS (1 << 4) /* target is a pair of dirs */
28571+ /* except RENAME_EXCHANGE */
28572+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
28573+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
28574+#define AuLock_GEN (1 << 7) /* test digen/iigen */
28575+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
28576+#define au_fset_lock(flags, name) \
28577+ do { (flags) |= AuLock_##name; } while (0)
28578+#define au_fclr_lock(flags, name) \
28579+ do { (flags) &= ~AuLock_##name; } while (0)
28580+
28581+/* ---------------------------------------------------------------------- */
28582+
28583+/* super.c */
28584+extern struct file_system_type aufs_fs_type;
28585+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
28586+typedef unsigned long long (*au_arraycb_t)(struct super_block *sb, void *array,
28587+ unsigned long long max, void *arg);
28588+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
28589+ struct super_block *sb, void *arg);
28590+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
28591+void au_iarray_free(struct inode **a, unsigned long long max);
28592+
28593+/* sbinfo.c */
28594+void au_si_free(struct kobject *kobj);
28595+int au_si_alloc(struct super_block *sb);
28596+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink);
28597+
28598+unsigned int au_sigen_inc(struct super_block *sb);
28599+aufs_bindex_t au_new_br_id(struct super_block *sb);
28600+
28601+int si_read_lock(struct super_block *sb, int flags);
28602+int si_write_lock(struct super_block *sb, int flags);
28603+int aufs_read_lock(struct dentry *dentry, int flags);
28604+void aufs_read_unlock(struct dentry *dentry, int flags);
28605+void aufs_write_lock(struct dentry *dentry);
28606+void aufs_write_unlock(struct dentry *dentry);
28607+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
28608+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
28609+
28610+/* wbr_policy.c */
28611+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
28612+extern struct au_wbr_create_operations au_wbr_create_ops[];
28613+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
28614+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
28615+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop);
28616+
28617+/* mvdown.c */
28618+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
28619+
28620+#ifdef CONFIG_AUFS_FHSM
28621+/* fhsm.c */
28622+
28623+static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
28624+{
28625+ pid_t pid;
28626+
28627+ spin_lock(&fhsm->fhsm_spin);
28628+ pid = fhsm->fhsm_pid;
28629+ spin_unlock(&fhsm->fhsm_spin);
28630+
28631+ return pid;
28632+}
28633+
28634+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
28635+void au_fhsm_wrote_all(struct super_block *sb, int force);
28636+int au_fhsm_fd(struct super_block *sb, int oflags);
28637+int au_fhsm_br_alloc(struct au_branch *br);
28638+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
28639+void au_fhsm_fin(struct super_block *sb);
28640+void au_fhsm_init(struct au_sbinfo *sbinfo);
28641+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
28642+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
28643+#else
28644+AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
28645+ int force)
28646+AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
28647+AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
28648+AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
28649+AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
28650+AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
28651+AuStubVoid(au_fhsm_fin, struct super_block *sb)
28652+AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
28653+AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
28654+AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
28655+#endif
28656+
28657+/* ---------------------------------------------------------------------- */
28658+
28659+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
28660+{
28661+ return sb->s_fs_info;
28662+}
28663+
28664+/* ---------------------------------------------------------------------- */
28665+
28666+#ifdef CONFIG_AUFS_EXPORT
28667+int au_test_nfsd(void);
28668+void au_export_init(struct super_block *sb);
28669+void au_xigen_inc(struct inode *inode);
28670+int au_xigen_new(struct inode *inode);
28671+int au_xigen_set(struct super_block *sb, struct file *base);
28672+void au_xigen_clr(struct super_block *sb);
28673+
28674+static inline int au_busy_or_stale(void)
28675+{
28676+ if (!au_test_nfsd())
28677+ return -EBUSY;
28678+ return -ESTALE;
28679+}
28680+#else
28681+AuStubInt0(au_test_nfsd, void)
28682+AuStubVoid(au_export_init, struct super_block *sb)
28683+AuStubVoid(au_xigen_inc, struct inode *inode)
28684+AuStubInt0(au_xigen_new, struct inode *inode)
28685+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
28686+AuStubVoid(au_xigen_clr, struct super_block *sb)
28687+AuStub(int, au_busy_or_stale, return -EBUSY, void)
28688+#endif /* CONFIG_AUFS_EXPORT */
28689+
28690+/* ---------------------------------------------------------------------- */
28691+
28692+#ifdef CONFIG_AUFS_SBILIST
28693+/* module.c */
28694+extern struct au_sphlhead au_sbilist;
28695+
28696+static inline void au_sbilist_init(void)
28697+{
28698+ au_sphl_init(&au_sbilist);
28699+}
28700+
28701+static inline void au_sbilist_add(struct super_block *sb)
28702+{
28703+ au_sphl_add(&au_sbi(sb)->si_list, &au_sbilist);
28704+}
28705+
28706+static inline void au_sbilist_del(struct super_block *sb)
28707+{
28708+ au_sphl_del(&au_sbi(sb)->si_list, &au_sbilist);
28709+}
28710+
28711+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
28712+static inline void au_sbilist_lock(void)
28713+{
28714+ spin_lock(&au_sbilist.spin);
28715+}
28716+
28717+static inline void au_sbilist_unlock(void)
28718+{
28719+ spin_unlock(&au_sbilist.spin);
28720+}
28721+#define AuGFP_SBILIST GFP_ATOMIC
28722+#else
28723+AuStubVoid(au_sbilist_lock, void)
28724+AuStubVoid(au_sbilist_unlock, void)
28725+#define AuGFP_SBILIST GFP_NOFS
28726+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
28727+#else
28728+AuStubVoid(au_sbilist_init, void)
28729+AuStubVoid(au_sbilist_add, struct super_block *sb)
28730+AuStubVoid(au_sbilist_del, struct super_block *sb)
28731+AuStubVoid(au_sbilist_lock, void)
28732+AuStubVoid(au_sbilist_unlock, void)
28733+#define AuGFP_SBILIST GFP_NOFS
28734+#endif
28735+
28736+/* ---------------------------------------------------------------------- */
28737+
28738+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
28739+{
28740+ /*
28741+ * This function is a dynamic '__init' function actually,
28742+ * so the tiny check for si_rwsem is unnecessary.
28743+ */
28744+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
28745+#ifdef CONFIG_DEBUG_FS
28746+ sbinfo->si_dbgaufs = NULL;
28747+ sbinfo->si_dbgaufs_plink = NULL;
28748+ sbinfo->si_dbgaufs_xib = NULL;
28749+#ifdef CONFIG_AUFS_EXPORT
28750+ sbinfo->si_dbgaufs_xigen = NULL;
28751+#endif
28752+#endif
28753+}
28754+
28755+/* ---------------------------------------------------------------------- */
28756+
28757+static inline void si_pid_idx_bit(int *idx, pid_t *bit)
28758+{
28759+ /* the origin of pid is 1, but the bitmap's is 0 */
28760+ *bit = current->pid - 1;
28761+ *idx = *bit / AU_PIDSTEP;
28762+ *bit %= AU_PIDSTEP;
28763+}
28764+
28765+static inline int si_pid_test(struct super_block *sb)
28766+{
28767+ pid_t bit;
28768+ int idx;
28769+ unsigned long *bitmap;
28770+
28771+ si_pid_idx_bit(&idx, &bit);
28772+ bitmap = au_sbi(sb)->au_si_pid.pid_bitmap[idx];
28773+ if (bitmap)
28774+ return test_bit(bit, bitmap);
28775+ return 0;
28776+}
28777+
28778+static inline void si_pid_clr(struct super_block *sb)
28779+{
28780+ pid_t bit;
28781+ int idx;
28782+ unsigned long *bitmap;
28783+
28784+ si_pid_idx_bit(&idx, &bit);
28785+ bitmap = au_sbi(sb)->au_si_pid.pid_bitmap[idx];
28786+ BUG_ON(!bitmap);
28787+ AuDebugOn(!test_bit(bit, bitmap));
28788+ clear_bit(bit, bitmap);
28789+ /* smp_mb(); */
28790+}
28791+
28792+void si_pid_set(struct super_block *sb);
28793+
28794+/* ---------------------------------------------------------------------- */
28795+
28796+/* lock superblock. mainly for entry point functions */
28797+/*
28798+ * __si_read_lock, __si_write_lock,
28799+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
28800+ */
28801+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
28802+
28803+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
28804+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
28805+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
28806+
28807+static inline void si_noflush_read_lock(struct super_block *sb)
28808+{
28809+ __si_read_lock(sb);
28810+ si_pid_set(sb);
28811+}
28812+
28813+static inline int si_noflush_read_trylock(struct super_block *sb)
28814+{
28815+ int locked;
28816+
28817+ locked = __si_read_trylock(sb);
28818+ if (locked)
28819+ si_pid_set(sb);
28820+ return locked;
28821+}
28822+
28823+static inline void si_noflush_write_lock(struct super_block *sb)
28824+{
28825+ __si_write_lock(sb);
28826+ si_pid_set(sb);
28827+}
28828+
28829+static inline int si_noflush_write_trylock(struct super_block *sb)
28830+{
28831+ int locked;
28832+
28833+ locked = __si_write_trylock(sb);
28834+ if (locked)
28835+ si_pid_set(sb);
28836+ return locked;
28837+}
28838+
28839+#if 0 /* reserved */
28840+static inline int si_read_trylock(struct super_block *sb, int flags)
28841+{
28842+ if (au_ftest_lock(flags, FLUSH))
28843+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28844+ return si_noflush_read_trylock(sb);
28845+}
28846+#endif
28847+
28848+static inline void si_read_unlock(struct super_block *sb)
28849+{
28850+ si_pid_clr(sb);
28851+ __si_read_unlock(sb);
28852+}
28853+
28854+#if 0 /* reserved */
28855+static inline int si_write_trylock(struct super_block *sb, int flags)
28856+{
28857+ if (au_ftest_lock(flags, FLUSH))
28858+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28859+ return si_noflush_write_trylock(sb);
28860+}
28861+#endif
28862+
28863+static inline void si_write_unlock(struct super_block *sb)
28864+{
28865+ si_pid_clr(sb);
28866+ __si_write_unlock(sb);
28867+}
28868+
28869+#if 0 /* reserved */
28870+static inline void si_downgrade_lock(struct super_block *sb)
28871+{
28872+ __si_downgrade_lock(sb);
28873+}
28874+#endif
28875+
28876+/* ---------------------------------------------------------------------- */
28877+
28878+static inline aufs_bindex_t au_sbbot(struct super_block *sb)
28879+{
28880+ SiMustAnyLock(sb);
28881+ return au_sbi(sb)->si_bbot;
28882+}
28883+
28884+static inline unsigned int au_mntflags(struct super_block *sb)
28885+{
28886+ SiMustAnyLock(sb);
28887+ return au_sbi(sb)->si_mntflags;
28888+}
28889+
28890+static inline unsigned int au_sigen(struct super_block *sb)
28891+{
28892+ SiMustAnyLock(sb);
28893+ return au_sbi(sb)->si_generation;
28894+}
28895+
28896+static inline unsigned long long au_ninodes(struct super_block *sb)
28897+{
28898+ s64 n = percpu_counter_sum(&au_sbi(sb)->si_ninodes);
28899+
28900+ BUG_ON(n < 0);
28901+ return n;
28902+}
28903+
28904+static inline void au_ninodes_inc(struct super_block *sb)
28905+{
28906+ percpu_counter_inc(&au_sbi(sb)->si_ninodes);
28907+}
28908+
28909+static inline void au_ninodes_dec(struct super_block *sb)
28910+{
28911+ percpu_counter_dec(&au_sbi(sb)->si_ninodes);
28912+}
28913+
28914+static inline unsigned long long au_nfiles(struct super_block *sb)
28915+{
28916+ s64 n = percpu_counter_sum(&au_sbi(sb)->si_nfiles);
28917+
28918+ BUG_ON(n < 0);
28919+ return n;
28920+}
28921+
28922+static inline void au_nfiles_inc(struct super_block *sb)
28923+{
28924+ percpu_counter_inc(&au_sbi(sb)->si_nfiles);
28925+}
28926+
28927+static inline void au_nfiles_dec(struct super_block *sb)
28928+{
28929+ percpu_counter_dec(&au_sbi(sb)->si_nfiles);
28930+}
28931+
28932+static inline struct au_branch *au_sbr(struct super_block *sb,
28933+ aufs_bindex_t bindex)
28934+{
28935+ SiMustAnyLock(sb);
28936+ return au_sbi(sb)->si_branch[0 + bindex];
28937+}
28938+
28939+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
28940+{
28941+ SiMustWriteLock(sb);
28942+ au_sbi(sb)->si_xino_brid = brid;
28943+}
28944+
28945+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
28946+{
28947+ SiMustAnyLock(sb);
28948+ return au_sbi(sb)->si_xino_brid;
28949+}
28950+
28951+#endif /* __KERNEL__ */
28952+#endif /* __AUFS_SUPER_H__ */
28953diff -urN /usr/share/empty/fs/aufs/sysaufs.c linux/fs/aufs/sysaufs.c
28954--- /usr/share/empty/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
28955+++ linux/fs/aufs/sysaufs.c 2016-10-09 16:55:36.496035060 +0200
28956@@ -0,0 +1,104 @@
28957+/*
28958+ * Copyright (C) 2005-2016 Junjiro R. Okajima
28959+ *
28960+ * This program, aufs is free software; you can redistribute it and/or modify
28961+ * it under the terms of the GNU General Public License as published by
28962+ * the Free Software Foundation; either version 2 of the License, or
28963+ * (at your option) any later version.
28964+ *
28965+ * This program is distributed in the hope that it will be useful,
28966+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28967+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28968+ * GNU General Public License for more details.
28969+ *
28970+ * You should have received a copy of the GNU General Public License
28971+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
28972+ */
28973+
28974+/*
28975+ * sysfs interface and lifetime management
28976+ * they are necessary regardless sysfs is disabled.
28977+ */
28978+
28979+#include <linux/random.h>
28980+#include "aufs.h"
28981+
28982+unsigned long sysaufs_si_mask;
28983+struct kset *sysaufs_kset;
28984+
28985+#define AuSiAttr(_name) { \
28986+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
28987+ .show = sysaufs_si_##_name, \
28988+}
28989+
28990+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
28991+struct attribute *sysaufs_si_attrs[] = {
28992+ &sysaufs_si_attr_xi_path.attr,
28993+ NULL,
28994+};
28995+
28996+static const struct sysfs_ops au_sbi_ops = {
28997+ .show = sysaufs_si_show
28998+};
28999+
29000+static struct kobj_type au_sbi_ktype = {
29001+ .release = au_si_free,
29002+ .sysfs_ops = &au_sbi_ops,
29003+ .default_attrs = sysaufs_si_attrs
29004+};
29005+
29006+/* ---------------------------------------------------------------------- */
29007+
29008+int sysaufs_si_init(struct au_sbinfo *sbinfo)
29009+{
29010+ int err;
29011+
29012+ sbinfo->si_kobj.kset = sysaufs_kset;
29013+ /* cf. sysaufs_name() */
29014+ err = kobject_init_and_add
29015+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
29016+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
29017+
29018+ dbgaufs_si_null(sbinfo);
29019+ if (!err) {
29020+ err = dbgaufs_si_init(sbinfo);
29021+ if (unlikely(err))
29022+ kobject_put(&sbinfo->si_kobj);
29023+ }
29024+ return err;
29025+}
29026+
29027+void sysaufs_fin(void)
29028+{
29029+ dbgaufs_fin();
29030+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
29031+ kset_unregister(sysaufs_kset);
29032+}
29033+
29034+int __init sysaufs_init(void)
29035+{
29036+ int err;
29037+
29038+ do {
29039+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
29040+ } while (!sysaufs_si_mask);
29041+
29042+ err = -EINVAL;
29043+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
29044+ if (unlikely(!sysaufs_kset))
29045+ goto out;
29046+ err = PTR_ERR(sysaufs_kset);
29047+ if (IS_ERR(sysaufs_kset))
29048+ goto out;
29049+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
29050+ if (unlikely(err)) {
29051+ kset_unregister(sysaufs_kset);
29052+ goto out;
29053+ }
29054+
29055+ err = dbgaufs_init();
29056+ if (unlikely(err))
29057+ sysaufs_fin();
29058+out:
29059+ return err;
29060+}
29061diff -urN /usr/share/empty/fs/aufs/sysaufs.h linux/fs/aufs/sysaufs.h
29062--- /usr/share/empty/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
29063+++ linux/fs/aufs/sysaufs.h 2016-10-09 16:55:36.496035060 +0200
29064@@ -0,0 +1,101 @@
29065+/*
29066+ * Copyright (C) 2005-2016 Junjiro R. Okajima
29067+ *
29068+ * This program, aufs is free software; you can redistribute it and/or modify
29069+ * it under the terms of the GNU General Public License as published by
29070+ * the Free Software Foundation; either version 2 of the License, or
29071+ * (at your option) any later version.
29072+ *
29073+ * This program is distributed in the hope that it will be useful,
29074+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29075+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29076+ * GNU General Public License for more details.
29077+ *
29078+ * You should have received a copy of the GNU General Public License
29079+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
29080+ */
29081+
29082+/*
29083+ * sysfs interface and mount lifetime management
29084+ */
29085+
29086+#ifndef __SYSAUFS_H__
29087+#define __SYSAUFS_H__
29088+
29089+#ifdef __KERNEL__
29090+
29091+#include <linux/sysfs.h>
29092+#include "module.h"
29093+
29094+struct super_block;
29095+struct au_sbinfo;
29096+
29097+struct sysaufs_si_attr {
29098+ struct attribute attr;
29099+ int (*show)(struct seq_file *seq, struct super_block *sb);
29100+};
29101+
29102+/* ---------------------------------------------------------------------- */
29103+
29104+/* sysaufs.c */
29105+extern unsigned long sysaufs_si_mask;
29106+extern struct kset *sysaufs_kset;
29107+extern struct attribute *sysaufs_si_attrs[];
29108+int sysaufs_si_init(struct au_sbinfo *sbinfo);
29109+int __init sysaufs_init(void);
29110+void sysaufs_fin(void);
29111+
29112+/* ---------------------------------------------------------------------- */
29113+
29114+/* some people doesn't like to show a pointer in kernel */
29115+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
29116+{
29117+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
29118+}
29119+
29120+#define SysaufsSiNamePrefix "si_"
29121+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
29122+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
29123+{
29124+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
29125+ sysaufs_si_id(sbinfo));
29126+}
29127+
29128+struct au_branch;
29129+#ifdef CONFIG_SYSFS
29130+/* sysfs.c */
29131+extern struct attribute_group *sysaufs_attr_group;
29132+
29133+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
29134+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
29135+ char *buf);
29136+long au_brinfo_ioctl(struct file *file, unsigned long arg);
29137+#ifdef CONFIG_COMPAT
29138+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
29139+#endif
29140+
29141+void sysaufs_br_init(struct au_branch *br);
29142+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
29143+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
29144+
29145+#define sysaufs_brs_init() do {} while (0)
29146+
29147+#else
29148+#define sysaufs_attr_group NULL
29149+
29150+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
29151+AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
29152+ struct attribute *attr, char *buf)
29153+AuStubVoid(sysaufs_br_init, struct au_branch *br)
29154+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
29155+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
29156+
29157+static inline void sysaufs_brs_init(void)
29158+{
29159+ sysaufs_brs = 0;
29160+}
29161+
29162+#endif /* CONFIG_SYSFS */
29163+
29164+#endif /* __KERNEL__ */
29165+#endif /* __SYSAUFS_H__ */
29166diff -urN /usr/share/empty/fs/aufs/sysfs.c linux/fs/aufs/sysfs.c
29167--- /usr/share/empty/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
29168+++ linux/fs/aufs/sysfs.c 2016-10-09 16:55:36.496035060 +0200
29169@@ -0,0 +1,376 @@
29170+/*
29171+ * Copyright (C) 2005-2016 Junjiro R. Okajima
29172+ *
29173+ * This program, aufs is free software; you can redistribute it and/or modify
29174+ * it under the terms of the GNU General Public License as published by
29175+ * the Free Software Foundation; either version 2 of the License, or
29176+ * (at your option) any later version.
29177+ *
29178+ * This program is distributed in the hope that it will be useful,
29179+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29180+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29181+ * GNU General Public License for more details.
29182+ *
29183+ * You should have received a copy of the GNU General Public License
29184+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
29185+ */
29186+
29187+/*
29188+ * sysfs interface
29189+ */
29190+
29191+#include <linux/compat.h>
29192+#include <linux/seq_file.h>
29193+#include "aufs.h"
29194+
29195+#ifdef CONFIG_AUFS_FS_MODULE
29196+/* this entry violates the "one line per file" policy of sysfs */
29197+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
29198+ char *buf)
29199+{
29200+ ssize_t err;
29201+ static char *conf =
29202+/* this file is generated at compiling */
29203+#include "conf.str"
29204+ ;
29205+
29206+ err = snprintf(buf, PAGE_SIZE, conf);
29207+ if (unlikely(err >= PAGE_SIZE))
29208+ err = -EFBIG;
29209+ return err;
29210+}
29211+
29212+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
29213+#endif
29214+
29215+static struct attribute *au_attr[] = {
29216+#ifdef CONFIG_AUFS_FS_MODULE
29217+ &au_config_attr.attr,
29218+#endif
29219+ NULL, /* need to NULL terminate the list of attributes */
29220+};
29221+
29222+static struct attribute_group sysaufs_attr_group_body = {
29223+ .attrs = au_attr
29224+};
29225+
29226+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
29227+
29228+/* ---------------------------------------------------------------------- */
29229+
29230+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
29231+{
29232+ int err;
29233+
29234+ SiMustAnyLock(sb);
29235+
29236+ err = 0;
29237+ if (au_opt_test(au_mntflags(sb), XINO)) {
29238+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
29239+ seq_putc(seq, '\n');
29240+ }
29241+ return err;
29242+}
29243+
29244+/*
29245+ * the lifetime of branch is independent from the entry under sysfs.
29246+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
29247+ * unlinked.
29248+ */
29249+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
29250+ aufs_bindex_t bindex, int idx)
29251+{
29252+ int err;
29253+ struct path path;
29254+ struct dentry *root;
29255+ struct au_branch *br;
29256+ au_br_perm_str_t perm;
29257+
29258+ AuDbg("b%d\n", bindex);
29259+
29260+ err = 0;
29261+ root = sb->s_root;
29262+ di_read_lock_parent(root, !AuLock_IR);
29263+ br = au_sbr(sb, bindex);
29264+
29265+ switch (idx) {
29266+ case AuBrSysfs_BR:
29267+ path.mnt = au_br_mnt(br);
29268+ path.dentry = au_h_dptr(root, bindex);
29269+ err = au_seq_path(seq, &path);
29270+ if (!err) {
29271+ au_optstr_br_perm(&perm, br->br_perm);
29272+ seq_printf(seq, "=%s\n", perm.a);
29273+ }
29274+ break;
29275+ case AuBrSysfs_BRID:
29276+ seq_printf(seq, "%d\n", br->br_id);
29277+ break;
29278+ }
29279+ di_read_unlock(root, !AuLock_IR);
29280+ if (unlikely(err || seq_has_overflowed(seq)))
29281+ err = -E2BIG;
29282+
29283+ return err;
29284+}
29285+
29286+/* ---------------------------------------------------------------------- */
29287+
29288+static struct seq_file *au_seq(char *p, ssize_t len)
29289+{
29290+ struct seq_file *seq;
29291+
29292+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
29293+ if (seq) {
29294+ /* mutex_init(&seq.lock); */
29295+ seq->buf = p;
29296+ seq->size = len;
29297+ return seq; /* success */
29298+ }
29299+
29300+ seq = ERR_PTR(-ENOMEM);
29301+ return seq;
29302+}
29303+
29304+#define SysaufsBr_PREFIX "br"
29305+#define SysaufsBrid_PREFIX "brid"
29306+
29307+/* todo: file size may exceed PAGE_SIZE */
29308+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
29309+ char *buf)
29310+{
29311+ ssize_t err;
29312+ int idx;
29313+ long l;
29314+ aufs_bindex_t bbot;
29315+ struct au_sbinfo *sbinfo;
29316+ struct super_block *sb;
29317+ struct seq_file *seq;
29318+ char *name;
29319+ struct attribute **cattr;
29320+
29321+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
29322+ sb = sbinfo->si_sb;
29323+
29324+ /*
29325+ * prevent a race condition between sysfs and aufs.
29326+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
29327+ * prohibits maintaining the sysfs entries.
29328+ * hew we acquire read lock after sysfs_get_active_two().
29329+ * on the other hand, the remount process may maintain the sysfs/aufs
29330+ * entries after acquiring write lock.
29331+ * it can cause a deadlock.
29332+ * simply we gave up processing read here.
29333+ */
29334+ err = -EBUSY;
29335+ if (unlikely(!si_noflush_read_trylock(sb)))
29336+ goto out;
29337+
29338+ seq = au_seq(buf, PAGE_SIZE);
29339+ err = PTR_ERR(seq);
29340+ if (IS_ERR(seq))
29341+ goto out_unlock;
29342+
29343+ name = (void *)attr->name;
29344+ cattr = sysaufs_si_attrs;
29345+ while (*cattr) {
29346+ if (!strcmp(name, (*cattr)->name)) {
29347+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
29348+ ->show(seq, sb);
29349+ goto out_seq;
29350+ }
29351+ cattr++;
29352+ }
29353+
29354+ if (!strncmp(name, SysaufsBrid_PREFIX,
29355+ sizeof(SysaufsBrid_PREFIX) - 1)) {
29356+ idx = AuBrSysfs_BRID;
29357+ name += sizeof(SysaufsBrid_PREFIX) - 1;
29358+ } else if (!strncmp(name, SysaufsBr_PREFIX,
29359+ sizeof(SysaufsBr_PREFIX) - 1)) {
29360+ idx = AuBrSysfs_BR;
29361+ name += sizeof(SysaufsBr_PREFIX) - 1;
29362+ } else
29363+ BUG();
29364+
29365+ err = kstrtol(name, 10, &l);
29366+ if (!err) {
29367+ bbot = au_sbbot(sb);
29368+ if (l <= bbot)
29369+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
29370+ else
29371+ err = -ENOENT;
29372+ }
29373+
29374+out_seq:
29375+ if (!err) {
29376+ err = seq->count;
29377+ /* sysfs limit */
29378+ if (unlikely(err == PAGE_SIZE))
29379+ err = -EFBIG;
29380+ }
29381+ au_delayed_kfree(seq);
29382+out_unlock:
29383+ si_read_unlock(sb);
29384+out:
29385+ return err;
29386+}
29387+
29388+/* ---------------------------------------------------------------------- */
29389+
29390+static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
29391+{
29392+ int err;
29393+ int16_t brid;
29394+ aufs_bindex_t bindex, bbot;
29395+ size_t sz;
29396+ char *buf;
29397+ struct seq_file *seq;
29398+ struct au_branch *br;
29399+
29400+ si_read_lock(sb, AuLock_FLUSH);
29401+ bbot = au_sbbot(sb);
29402+ err = bbot + 1;
29403+ if (!arg)
29404+ goto out;
29405+
29406+ err = -ENOMEM;
29407+ buf = (void *)__get_free_page(GFP_NOFS);
29408+ if (unlikely(!buf))
29409+ goto out;
29410+
29411+ seq = au_seq(buf, PAGE_SIZE);
29412+ err = PTR_ERR(seq);
29413+ if (IS_ERR(seq))
29414+ goto out_buf;
29415+
29416+ sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
29417+ for (bindex = 0; bindex <= bbot; bindex++, arg++) {
29418+ err = !access_ok(VERIFY_WRITE, arg, sizeof(*arg));
29419+ if (unlikely(err))
29420+ break;
29421+
29422+ br = au_sbr(sb, bindex);
29423+ brid = br->br_id;
29424+ BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
29425+ err = __put_user(brid, &arg->id);
29426+ if (unlikely(err))
29427+ break;
29428+
29429+ BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
29430+ err = __put_user(br->br_perm, &arg->perm);
29431+ if (unlikely(err))
29432+ break;
29433+
29434+ err = au_seq_path(seq, &br->br_path);
29435+ if (unlikely(err))
29436+ break;
29437+ seq_putc(seq, '\0');
29438+ if (!seq_has_overflowed(seq)) {
29439+ err = copy_to_user(arg->path, seq->buf, seq->count);
29440+ seq->count = 0;
29441+ if (unlikely(err))
29442+ break;
29443+ } else {
29444+ err = -E2BIG;
29445+ goto out_seq;
29446+ }
29447+ }
29448+ if (unlikely(err))
29449+ err = -EFAULT;
29450+
29451+out_seq:
29452+ au_delayed_kfree(seq);
29453+out_buf:
29454+ au_delayed_free_page((unsigned long)buf);
29455+out:
29456+ si_read_unlock(sb);
29457+ return err;
29458+}
29459+
29460+long au_brinfo_ioctl(struct file *file, unsigned long arg)
29461+{
29462+ return au_brinfo(file->f_path.dentry->d_sb, (void __user *)arg);
29463+}
29464+
29465+#ifdef CONFIG_COMPAT
29466+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
29467+{
29468+ return au_brinfo(file->f_path.dentry->d_sb, compat_ptr(arg));
29469+}
29470+#endif
29471+
29472+/* ---------------------------------------------------------------------- */
29473+
29474+void sysaufs_br_init(struct au_branch *br)
29475+{
29476+ int i;
29477+ struct au_brsysfs *br_sysfs;
29478+ struct attribute *attr;
29479+
29480+ br_sysfs = br->br_sysfs;
29481+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29482+ attr = &br_sysfs->attr;
29483+ sysfs_attr_init(attr);
29484+ attr->name = br_sysfs->name;
29485+ attr->mode = S_IRUGO;
29486+ br_sysfs++;
29487+ }
29488+}
29489+
29490+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
29491+{
29492+ struct au_branch *br;
29493+ struct kobject *kobj;
29494+ struct au_brsysfs *br_sysfs;
29495+ int i;
29496+ aufs_bindex_t bbot;
29497+
29498+ dbgaufs_brs_del(sb, bindex);
29499+
29500+ if (!sysaufs_brs)
29501+ return;
29502+
29503+ kobj = &au_sbi(sb)->si_kobj;
29504+ bbot = au_sbbot(sb);
29505+ for (; bindex <= bbot; bindex++) {
29506+ br = au_sbr(sb, bindex);
29507+ br_sysfs = br->br_sysfs;
29508+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29509+ sysfs_remove_file(kobj, &br_sysfs->attr);
29510+ br_sysfs++;
29511+ }
29512+ }
29513+}
29514+
29515+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
29516+{
29517+ int err, i;
29518+ aufs_bindex_t bbot;
29519+ struct kobject *kobj;
29520+ struct au_branch *br;
29521+ struct au_brsysfs *br_sysfs;
29522+
29523+ dbgaufs_brs_add(sb, bindex);
29524+
29525+ if (!sysaufs_brs)
29526+ return;
29527+
29528+ kobj = &au_sbi(sb)->si_kobj;
29529+ bbot = au_sbbot(sb);
29530+ for (; bindex <= bbot; bindex++) {
29531+ br = au_sbr(sb, bindex);
29532+ br_sysfs = br->br_sysfs;
29533+ snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
29534+ SysaufsBr_PREFIX "%d", bindex);
29535+ snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
29536+ SysaufsBrid_PREFIX "%d", bindex);
29537+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29538+ err = sysfs_create_file(kobj, &br_sysfs->attr);
29539+ if (unlikely(err))
29540+ pr_warn("failed %s under sysfs(%d)\n",
29541+ br_sysfs->name, err);
29542+ br_sysfs++;
29543+ }
29544+ }
29545+}
29546diff -urN /usr/share/empty/fs/aufs/sysrq.c linux/fs/aufs/sysrq.c
29547--- /usr/share/empty/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
29548+++ linux/fs/aufs/sysrq.c 2016-10-09 16:55:36.496035060 +0200
29549@@ -0,0 +1,157 @@
29550+/*
29551+ * Copyright (C) 2005-2016 Junjiro R. Okajima
29552+ *
29553+ * This program, aufs is free software; you can redistribute it and/or modify
29554+ * it under the terms of the GNU General Public License as published by
29555+ * the Free Software Foundation; either version 2 of the License, or
29556+ * (at your option) any later version.
29557+ *
29558+ * This program is distributed in the hope that it will be useful,
29559+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29560+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29561+ * GNU General Public License for more details.
29562+ *
29563+ * You should have received a copy of the GNU General Public License
29564+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
29565+ */
29566+
29567+/*
29568+ * magic sysrq hanlder
29569+ */
29570+
29571+/* #include <linux/sysrq.h> */
29572+#include <linux/writeback.h>
29573+#include "aufs.h"
29574+
29575+/* ---------------------------------------------------------------------- */
29576+
29577+static void sysrq_sb(struct super_block *sb)
29578+{
29579+ char *plevel;
29580+ struct au_sbinfo *sbinfo;
29581+ struct file *file;
29582+ struct au_sphlhead *files;
29583+ struct au_finfo *finfo;
29584+
29585+ plevel = au_plevel;
29586+ au_plevel = KERN_WARNING;
29587+
29588+ /* since we define pr_fmt, call printk directly */
29589+#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
29590+
29591+ sbinfo = au_sbi(sb);
29592+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
29593+ pr("superblock\n");
29594+ au_dpri_sb(sb);
29595+
29596+#if 0
29597+ pr("root dentry\n");
29598+ au_dpri_dentry(sb->s_root);
29599+ pr("root inode\n");
29600+ au_dpri_inode(d_inode(sb->s_root));
29601+#endif
29602+
29603+#if 0
29604+ do {
29605+ int err, i, j, ndentry;
29606+ struct au_dcsub_pages dpages;
29607+ struct au_dpage *dpage;
29608+
29609+ err = au_dpages_init(&dpages, GFP_ATOMIC);
29610+ if (unlikely(err))
29611+ break;
29612+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
29613+ if (!err)
29614+ for (i = 0; i < dpages.ndpage; i++) {
29615+ dpage = dpages.dpages + i;
29616+ ndentry = dpage->ndentry;
29617+ for (j = 0; j < ndentry; j++)
29618+ au_dpri_dentry(dpage->dentries[j]);
29619+ }
29620+ au_dpages_free(&dpages);
29621+ } while (0);
29622+#endif
29623+
29624+#if 1
29625+ {
29626+ struct inode *i;
29627+
29628+ pr("isolated inode\n");
29629+ spin_lock(&sb->s_inode_list_lock);
29630+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
29631+ spin_lock(&i->i_lock);
29632+ if (1 || hlist_empty(&i->i_dentry))
29633+ au_dpri_inode(i);
29634+ spin_unlock(&i->i_lock);
29635+ }
29636+ spin_unlock(&sb->s_inode_list_lock);
29637+ }
29638+#endif
29639+ pr("files\n");
29640+ files = &au_sbi(sb)->si_files;
29641+ spin_lock(&files->spin);
29642+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
29643+ umode_t mode;
29644+
29645+ file = finfo->fi_file;
29646+ mode = file_inode(file)->i_mode;
29647+ if (!special_file(mode))
29648+ au_dpri_file(file);
29649+ }
29650+ spin_unlock(&files->spin);
29651+ pr("done\n");
29652+
29653+#undef pr
29654+ au_plevel = plevel;
29655+}
29656+
29657+/* ---------------------------------------------------------------------- */
29658+
29659+/* module parameter */
29660+static char *aufs_sysrq_key = "a";
29661+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
29662+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
29663+
29664+static void au_sysrq(int key __maybe_unused)
29665+{
29666+ struct au_sbinfo *sbinfo;
29667+
29668+ lockdep_off();
29669+ au_sbilist_lock();
29670+ hlist_for_each_entry(sbinfo, &au_sbilist.head, si_list)
29671+ sysrq_sb(sbinfo->si_sb);
29672+ au_sbilist_unlock();
29673+ lockdep_on();
29674+}
29675+
29676+static struct sysrq_key_op au_sysrq_op = {
29677+ .handler = au_sysrq,
29678+ .help_msg = "Aufs",
29679+ .action_msg = "Aufs",
29680+ .enable_mask = SYSRQ_ENABLE_DUMP
29681+};
29682+
29683+/* ---------------------------------------------------------------------- */
29684+
29685+int __init au_sysrq_init(void)
29686+{
29687+ int err;
29688+ char key;
29689+
29690+ err = -1;
29691+ key = *aufs_sysrq_key;
29692+ if ('a' <= key && key <= 'z')
29693+ err = register_sysrq_key(key, &au_sysrq_op);
29694+ if (unlikely(err))
29695+ pr_err("err %d, sysrq=%c\n", err, key);
29696+ return err;
29697+}
29698+
29699+void au_sysrq_fin(void)
29700+{
29701+ int err;
29702+
29703+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
29704+ if (unlikely(err))
29705+ pr_err("err %d (ignored)\n", err);
29706+}
29707diff -urN /usr/share/empty/fs/aufs/vdir.c linux/fs/aufs/vdir.c
29708--- /usr/share/empty/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
29709+++ linux/fs/aufs/vdir.c 2016-10-09 16:55:38.889431135 +0200
29710@@ -0,0 +1,900 @@
29711+/*
29712+ * Copyright (C) 2005-2016 Junjiro R. Okajima
29713+ *
29714+ * This program, aufs is free software; you can redistribute it and/or modify
29715+ * it under the terms of the GNU General Public License as published by
29716+ * the Free Software Foundation; either version 2 of the License, or
29717+ * (at your option) any later version.
29718+ *
29719+ * This program is distributed in the hope that it will be useful,
29720+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29721+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29722+ * GNU General Public License for more details.
29723+ *
29724+ * You should have received a copy of the GNU General Public License
29725+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
29726+ */
29727+
29728+/*
29729+ * virtual or vertical directory
29730+ */
29731+
29732+#include "aufs.h"
29733+
29734+static unsigned int calc_size(int nlen)
29735+{
29736+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
29737+}
29738+
29739+static int set_deblk_end(union au_vdir_deblk_p *p,
29740+ union au_vdir_deblk_p *deblk_end)
29741+{
29742+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
29743+ p->de->de_str.len = 0;
29744+ /* smp_mb(); */
29745+ return 0;
29746+ }
29747+ return -1; /* error */
29748+}
29749+
29750+/* returns true or false */
29751+static int is_deblk_end(union au_vdir_deblk_p *p,
29752+ union au_vdir_deblk_p *deblk_end)
29753+{
29754+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
29755+ return !p->de->de_str.len;
29756+ return 1;
29757+}
29758+
29759+static unsigned char *last_deblk(struct au_vdir *vdir)
29760+{
29761+ return vdir->vd_deblk[vdir->vd_nblk - 1];
29762+}
29763+
29764+/* ---------------------------------------------------------------------- */
29765+
29766+/* estimate the appropriate size for name hash table */
29767+unsigned int au_rdhash_est(loff_t sz)
29768+{
29769+ unsigned int n;
29770+
29771+ n = UINT_MAX;
29772+ sz >>= 10;
29773+ if (sz < n)
29774+ n = sz;
29775+ if (sz < AUFS_RDHASH_DEF)
29776+ n = AUFS_RDHASH_DEF;
29777+ /* pr_info("n %u\n", n); */
29778+ return n;
29779+}
29780+
29781+/*
29782+ * the allocated memory has to be freed by
29783+ * au_nhash_wh_free() or au_nhash_de_free().
29784+ */
29785+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
29786+{
29787+ struct hlist_head *head;
29788+ unsigned int u;
29789+ size_t sz;
29790+
29791+ sz = sizeof(*nhash->nh_head) * num_hash;
29792+ head = kmalloc(sz, gfp);
29793+ if (head) {
29794+ nhash->nh_num = num_hash;
29795+ nhash->nh_head = head;
29796+ for (u = 0; u < num_hash; u++)
29797+ INIT_HLIST_HEAD(head++);
29798+ return 0; /* success */
29799+ }
29800+
29801+ return -ENOMEM;
29802+}
29803+
29804+static void nhash_count(struct hlist_head *head)
29805+{
29806+#if 0
29807+ unsigned long n;
29808+ struct hlist_node *pos;
29809+
29810+ n = 0;
29811+ hlist_for_each(pos, head)
29812+ n++;
29813+ pr_info("%lu\n", n);
29814+#endif
29815+}
29816+
29817+static void au_nhash_wh_do_free(struct hlist_head *head)
29818+{
29819+ struct au_vdir_wh *pos;
29820+ struct hlist_node *node;
29821+
29822+ hlist_for_each_entry_safe(pos, node, head, wh_hash)
29823+ au_delayed_kfree(pos);
29824+}
29825+
29826+static void au_nhash_de_do_free(struct hlist_head *head)
29827+{
29828+ struct au_vdir_dehstr *pos;
29829+ struct hlist_node *node;
29830+
29831+ hlist_for_each_entry_safe(pos, node, head, hash)
29832+ au_cache_dfree_vdir_dehstr(pos);
29833+}
29834+
29835+static void au_nhash_do_free(struct au_nhash *nhash,
29836+ void (*free)(struct hlist_head *head))
29837+{
29838+ unsigned int n;
29839+ struct hlist_head *head;
29840+
29841+ n = nhash->nh_num;
29842+ if (!n)
29843+ return;
29844+
29845+ head = nhash->nh_head;
29846+ while (n-- > 0) {
29847+ nhash_count(head);
29848+ free(head++);
29849+ }
29850+ au_delayed_kfree(nhash->nh_head);
29851+}
29852+
29853+void au_nhash_wh_free(struct au_nhash *whlist)
29854+{
29855+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
29856+}
29857+
29858+static void au_nhash_de_free(struct au_nhash *delist)
29859+{
29860+ au_nhash_do_free(delist, au_nhash_de_do_free);
29861+}
29862+
29863+/* ---------------------------------------------------------------------- */
29864+
29865+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
29866+ int limit)
29867+{
29868+ int num;
29869+ unsigned int u, n;
29870+ struct hlist_head *head;
29871+ struct au_vdir_wh *pos;
29872+
29873+ num = 0;
29874+ n = whlist->nh_num;
29875+ head = whlist->nh_head;
29876+ for (u = 0; u < n; u++, head++)
29877+ hlist_for_each_entry(pos, head, wh_hash)
29878+ if (pos->wh_bindex == btgt && ++num > limit)
29879+ return 1;
29880+ return 0;
29881+}
29882+
29883+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
29884+ unsigned char *name,
29885+ unsigned int len)
29886+{
29887+ unsigned int v;
29888+ /* const unsigned int magic_bit = 12; */
29889+
29890+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
29891+
29892+ v = 0;
29893+ if (len > 8)
29894+ len = 8;
29895+ while (len--)
29896+ v += *name++;
29897+ /* v = hash_long(v, magic_bit); */
29898+ v %= nhash->nh_num;
29899+ return nhash->nh_head + v;
29900+}
29901+
29902+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
29903+ int nlen)
29904+{
29905+ return str->len == nlen && !memcmp(str->name, name, nlen);
29906+}
29907+
29908+/* returns found or not */
29909+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
29910+{
29911+ struct hlist_head *head;
29912+ struct au_vdir_wh *pos;
29913+ struct au_vdir_destr *str;
29914+
29915+ head = au_name_hash(whlist, name, nlen);
29916+ hlist_for_each_entry(pos, head, wh_hash) {
29917+ str = &pos->wh_str;
29918+ AuDbg("%.*s\n", str->len, str->name);
29919+ if (au_nhash_test_name(str, name, nlen))
29920+ return 1;
29921+ }
29922+ return 0;
29923+}
29924+
29925+/* returns found(true) or not */
29926+static int test_known(struct au_nhash *delist, char *name, int nlen)
29927+{
29928+ struct hlist_head *head;
29929+ struct au_vdir_dehstr *pos;
29930+ struct au_vdir_destr *str;
29931+
29932+ head = au_name_hash(delist, name, nlen);
29933+ hlist_for_each_entry(pos, head, hash) {
29934+ str = pos->str;
29935+ AuDbg("%.*s\n", str->len, str->name);
29936+ if (au_nhash_test_name(str, name, nlen))
29937+ return 1;
29938+ }
29939+ return 0;
29940+}
29941+
29942+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
29943+ unsigned char d_type)
29944+{
29945+#ifdef CONFIG_AUFS_SHWH
29946+ wh->wh_ino = ino;
29947+ wh->wh_type = d_type;
29948+#endif
29949+}
29950+
29951+/* ---------------------------------------------------------------------- */
29952+
29953+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
29954+ unsigned int d_type, aufs_bindex_t bindex,
29955+ unsigned char shwh)
29956+{
29957+ int err;
29958+ struct au_vdir_destr *str;
29959+ struct au_vdir_wh *wh;
29960+
29961+ AuDbg("%.*s\n", nlen, name);
29962+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
29963+
29964+ err = -ENOMEM;
29965+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
29966+ if (unlikely(!wh))
29967+ goto out;
29968+
29969+ err = 0;
29970+ wh->wh_bindex = bindex;
29971+ if (shwh)
29972+ au_shwh_init_wh(wh, ino, d_type);
29973+ str = &wh->wh_str;
29974+ str->len = nlen;
29975+ memcpy(str->name, name, nlen);
29976+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
29977+ /* smp_mb(); */
29978+
29979+out:
29980+ return err;
29981+}
29982+
29983+static int append_deblk(struct au_vdir *vdir)
29984+{
29985+ int err;
29986+ unsigned long ul;
29987+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
29988+ union au_vdir_deblk_p p, deblk_end;
29989+ unsigned char **o;
29990+
29991+ err = -ENOMEM;
29992+ o = au_krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
29993+ GFP_NOFS, /*may_shrink*/0);
29994+ if (unlikely(!o))
29995+ goto out;
29996+
29997+ vdir->vd_deblk = o;
29998+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
29999+ if (p.deblk) {
30000+ ul = vdir->vd_nblk++;
30001+ vdir->vd_deblk[ul] = p.deblk;
30002+ vdir->vd_last.ul = ul;
30003+ vdir->vd_last.p.deblk = p.deblk;
30004+ deblk_end.deblk = p.deblk + deblk_sz;
30005+ err = set_deblk_end(&p, &deblk_end);
30006+ }
30007+
30008+out:
30009+ return err;
30010+}
30011+
30012+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
30013+ unsigned int d_type, struct au_nhash *delist)
30014+{
30015+ int err;
30016+ unsigned int sz;
30017+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
30018+ union au_vdir_deblk_p p, *room, deblk_end;
30019+ struct au_vdir_dehstr *dehstr;
30020+
30021+ p.deblk = last_deblk(vdir);
30022+ deblk_end.deblk = p.deblk + deblk_sz;
30023+ room = &vdir->vd_last.p;
30024+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
30025+ || !is_deblk_end(room, &deblk_end));
30026+
30027+ sz = calc_size(nlen);
30028+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
30029+ err = append_deblk(vdir);
30030+ if (unlikely(err))
30031+ goto out;
30032+
30033+ p.deblk = last_deblk(vdir);
30034+ deblk_end.deblk = p.deblk + deblk_sz;
30035+ /* smp_mb(); */
30036+ AuDebugOn(room->deblk != p.deblk);
30037+ }
30038+
30039+ err = -ENOMEM;
30040+ dehstr = au_cache_alloc_vdir_dehstr();
30041+ if (unlikely(!dehstr))
30042+ goto out;
30043+
30044+ dehstr->str = &room->de->de_str;
30045+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
30046+ room->de->de_ino = ino;
30047+ room->de->de_type = d_type;
30048+ room->de->de_str.len = nlen;
30049+ memcpy(room->de->de_str.name, name, nlen);
30050+
30051+ err = 0;
30052+ room->deblk += sz;
30053+ if (unlikely(set_deblk_end(room, &deblk_end)))
30054+ err = append_deblk(vdir);
30055+ /* smp_mb(); */
30056+
30057+out:
30058+ return err;
30059+}
30060+
30061+/* ---------------------------------------------------------------------- */
30062+
30063+void au_vdir_free(struct au_vdir *vdir, int atonce)
30064+{
30065+ unsigned char **deblk;
30066+
30067+ deblk = vdir->vd_deblk;
30068+ if (!atonce) {
30069+ while (vdir->vd_nblk--)
30070+ au_delayed_kfree(*deblk++);
30071+ au_delayed_kfree(vdir->vd_deblk);
30072+ au_cache_dfree_vdir(vdir);
30073+ } else {
30074+ /* not delayed */
30075+ while (vdir->vd_nblk--)
30076+ kfree(*deblk++);
30077+ kfree(vdir->vd_deblk);
30078+ au_cache_free_vdir(vdir);
30079+ }
30080+}
30081+
30082+static struct au_vdir *alloc_vdir(struct file *file)
30083+{
30084+ struct au_vdir *vdir;
30085+ struct super_block *sb;
30086+ int err;
30087+
30088+ sb = file->f_path.dentry->d_sb;
30089+ SiMustAnyLock(sb);
30090+
30091+ err = -ENOMEM;
30092+ vdir = au_cache_alloc_vdir();
30093+ if (unlikely(!vdir))
30094+ goto out;
30095+
30096+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
30097+ if (unlikely(!vdir->vd_deblk))
30098+ goto out_free;
30099+
30100+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
30101+ if (!vdir->vd_deblk_sz) {
30102+ /* estimate the appropriate size for deblk */
30103+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
30104+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
30105+ }
30106+ vdir->vd_nblk = 0;
30107+ vdir->vd_version = 0;
30108+ vdir->vd_jiffy = 0;
30109+ err = append_deblk(vdir);
30110+ if (!err)
30111+ return vdir; /* success */
30112+
30113+ au_delayed_kfree(vdir->vd_deblk);
30114+
30115+out_free:
30116+ au_cache_dfree_vdir(vdir);
30117+out:
30118+ vdir = ERR_PTR(err);
30119+ return vdir;
30120+}
30121+
30122+static int reinit_vdir(struct au_vdir *vdir)
30123+{
30124+ int err;
30125+ union au_vdir_deblk_p p, deblk_end;
30126+
30127+ while (vdir->vd_nblk > 1) {
30128+ au_delayed_kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
30129+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
30130+ vdir->vd_nblk--;
30131+ }
30132+ p.deblk = vdir->vd_deblk[0];
30133+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
30134+ err = set_deblk_end(&p, &deblk_end);
30135+ /* keep vd_dblk_sz */
30136+ vdir->vd_last.ul = 0;
30137+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
30138+ vdir->vd_version = 0;
30139+ vdir->vd_jiffy = 0;
30140+ /* smp_mb(); */
30141+ return err;
30142+}
30143+
30144+/* ---------------------------------------------------------------------- */
30145+
30146+#define AuFillVdir_CALLED 1
30147+#define AuFillVdir_WHABLE (1 << 1)
30148+#define AuFillVdir_SHWH (1 << 2)
30149+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
30150+#define au_fset_fillvdir(flags, name) \
30151+ do { (flags) |= AuFillVdir_##name; } while (0)
30152+#define au_fclr_fillvdir(flags, name) \
30153+ do { (flags) &= ~AuFillVdir_##name; } while (0)
30154+
30155+#ifndef CONFIG_AUFS_SHWH
30156+#undef AuFillVdir_SHWH
30157+#define AuFillVdir_SHWH 0
30158+#endif
30159+
30160+struct fillvdir_arg {
30161+ struct dir_context ctx;
30162+ struct file *file;
30163+ struct au_vdir *vdir;
30164+ struct au_nhash delist;
30165+ struct au_nhash whlist;
30166+ aufs_bindex_t bindex;
30167+ unsigned int flags;
30168+ int err;
30169+};
30170+
30171+static int fillvdir(struct dir_context *ctx, const char *__name, int nlen,
30172+ loff_t offset __maybe_unused, u64 h_ino,
30173+ unsigned int d_type)
30174+{
30175+ struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
30176+ char *name = (void *)__name;
30177+ struct super_block *sb;
30178+ ino_t ino;
30179+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
30180+
30181+ arg->err = 0;
30182+ sb = arg->file->f_path.dentry->d_sb;
30183+ au_fset_fillvdir(arg->flags, CALLED);
30184+ /* smp_mb(); */
30185+ if (nlen <= AUFS_WH_PFX_LEN
30186+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
30187+ if (test_known(&arg->delist, name, nlen)
30188+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
30189+ goto out; /* already exists or whiteouted */
30190+
30191+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
30192+ if (!arg->err) {
30193+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
30194+ d_type = DT_UNKNOWN;
30195+ arg->err = append_de(arg->vdir, name, nlen, ino,
30196+ d_type, &arg->delist);
30197+ }
30198+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
30199+ name += AUFS_WH_PFX_LEN;
30200+ nlen -= AUFS_WH_PFX_LEN;
30201+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
30202+ goto out; /* already whiteouted */
30203+
30204+ if (shwh)
30205+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
30206+ &ino);
30207+ if (!arg->err) {
30208+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
30209+ d_type = DT_UNKNOWN;
30210+ arg->err = au_nhash_append_wh
30211+ (&arg->whlist, name, nlen, ino, d_type,
30212+ arg->bindex, shwh);
30213+ }
30214+ }
30215+
30216+out:
30217+ if (!arg->err)
30218+ arg->vdir->vd_jiffy = jiffies;
30219+ /* smp_mb(); */
30220+ AuTraceErr(arg->err);
30221+ return arg->err;
30222+}
30223+
30224+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
30225+ struct au_nhash *whlist, struct au_nhash *delist)
30226+{
30227+#ifdef CONFIG_AUFS_SHWH
30228+ int err;
30229+ unsigned int nh, u;
30230+ struct hlist_head *head;
30231+ struct au_vdir_wh *pos;
30232+ struct hlist_node *n;
30233+ char *p, *o;
30234+ struct au_vdir_destr *destr;
30235+
30236+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
30237+
30238+ err = -ENOMEM;
30239+ o = p = (void *)__get_free_page(GFP_NOFS);
30240+ if (unlikely(!p))
30241+ goto out;
30242+
30243+ err = 0;
30244+ nh = whlist->nh_num;
30245+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
30246+ p += AUFS_WH_PFX_LEN;
30247+ for (u = 0; u < nh; u++) {
30248+ head = whlist->nh_head + u;
30249+ hlist_for_each_entry_safe(pos, n, head, wh_hash) {
30250+ destr = &pos->wh_str;
30251+ memcpy(p, destr->name, destr->len);
30252+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
30253+ pos->wh_ino, pos->wh_type, delist);
30254+ if (unlikely(err))
30255+ break;
30256+ }
30257+ }
30258+
30259+ au_delayed_free_page((unsigned long)o);
30260+
30261+out:
30262+ AuTraceErr(err);
30263+ return err;
30264+#else
30265+ return 0;
30266+#endif
30267+}
30268+
30269+static int au_do_read_vdir(struct fillvdir_arg *arg)
30270+{
30271+ int err;
30272+ unsigned int rdhash;
30273+ loff_t offset;
30274+ aufs_bindex_t bbot, bindex, btop;
30275+ unsigned char shwh;
30276+ struct file *hf, *file;
30277+ struct super_block *sb;
30278+
30279+ file = arg->file;
30280+ sb = file->f_path.dentry->d_sb;
30281+ SiMustAnyLock(sb);
30282+
30283+ rdhash = au_sbi(sb)->si_rdhash;
30284+ if (!rdhash)
30285+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
30286+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
30287+ if (unlikely(err))
30288+ goto out;
30289+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
30290+ if (unlikely(err))
30291+ goto out_delist;
30292+
30293+ err = 0;
30294+ arg->flags = 0;
30295+ shwh = 0;
30296+ if (au_opt_test(au_mntflags(sb), SHWH)) {
30297+ shwh = 1;
30298+ au_fset_fillvdir(arg->flags, SHWH);
30299+ }
30300+ btop = au_fbtop(file);
30301+ bbot = au_fbbot_dir(file);
30302+ for (bindex = btop; !err && bindex <= bbot; bindex++) {
30303+ hf = au_hf_dir(file, bindex);
30304+ if (!hf)
30305+ continue;
30306+
30307+ offset = vfsub_llseek(hf, 0, SEEK_SET);
30308+ err = offset;
30309+ if (unlikely(offset))
30310+ break;
30311+
30312+ arg->bindex = bindex;
30313+ au_fclr_fillvdir(arg->flags, WHABLE);
30314+ if (shwh
30315+ || (bindex != bbot
30316+ && au_br_whable(au_sbr_perm(sb, bindex))))
30317+ au_fset_fillvdir(arg->flags, WHABLE);
30318+ do {
30319+ arg->err = 0;
30320+ au_fclr_fillvdir(arg->flags, CALLED);
30321+ /* smp_mb(); */
30322+ err = vfsub_iterate_dir(hf, &arg->ctx);
30323+ if (err >= 0)
30324+ err = arg->err;
30325+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
30326+
30327+ /*
30328+ * dir_relax() may be good for concurrency, but aufs should not
30329+ * use it since it will cause a lockdep problem.
30330+ */
30331+ }
30332+
30333+ if (!err && shwh)
30334+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
30335+
30336+ au_nhash_wh_free(&arg->whlist);
30337+
30338+out_delist:
30339+ au_nhash_de_free(&arg->delist);
30340+out:
30341+ return err;
30342+}
30343+
30344+static int read_vdir(struct file *file, int may_read)
30345+{
30346+ int err;
30347+ unsigned long expire;
30348+ unsigned char do_read;
30349+ struct fillvdir_arg arg = {
30350+ .ctx = {
30351+ .actor = fillvdir
30352+ }
30353+ };
30354+ struct inode *inode;
30355+ struct au_vdir *vdir, *allocated;
30356+
30357+ err = 0;
30358+ inode = file_inode(file);
30359+ IMustLock(inode);
30360+ IiMustWriteLock(inode);
30361+ SiMustAnyLock(inode->i_sb);
30362+
30363+ allocated = NULL;
30364+ do_read = 0;
30365+ expire = au_sbi(inode->i_sb)->si_rdcache;
30366+ vdir = au_ivdir(inode);
30367+ if (!vdir) {
30368+ do_read = 1;
30369+ vdir = alloc_vdir(file);
30370+ err = PTR_ERR(vdir);
30371+ if (IS_ERR(vdir))
30372+ goto out;
30373+ err = 0;
30374+ allocated = vdir;
30375+ } else if (may_read
30376+ && (inode->i_version != vdir->vd_version
30377+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
30378+ do_read = 1;
30379+ err = reinit_vdir(vdir);
30380+ if (unlikely(err))
30381+ goto out;
30382+ }
30383+
30384+ if (!do_read)
30385+ return 0; /* success */
30386+
30387+ arg.file = file;
30388+ arg.vdir = vdir;
30389+ err = au_do_read_vdir(&arg);
30390+ if (!err) {
30391+ /* file->f_pos = 0; */ /* todo: ctx->pos? */
30392+ vdir->vd_version = inode->i_version;
30393+ vdir->vd_last.ul = 0;
30394+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
30395+ if (allocated)
30396+ au_set_ivdir(inode, allocated);
30397+ } else if (allocated)
30398+ au_vdir_free(allocated, /*atonce*/0);
30399+
30400+out:
30401+ return err;
30402+}
30403+
30404+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
30405+{
30406+ int err, rerr;
30407+ unsigned long ul, n;
30408+ const unsigned int deblk_sz = src->vd_deblk_sz;
30409+
30410+ AuDebugOn(tgt->vd_nblk != 1);
30411+
30412+ err = -ENOMEM;
30413+ if (tgt->vd_nblk < src->vd_nblk) {
30414+ unsigned char **p;
30415+
30416+ p = au_krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
30417+ GFP_NOFS, /*may_shrink*/0);
30418+ if (unlikely(!p))
30419+ goto out;
30420+ tgt->vd_deblk = p;
30421+ }
30422+
30423+ if (tgt->vd_deblk_sz != deblk_sz) {
30424+ unsigned char *p;
30425+
30426+ tgt->vd_deblk_sz = deblk_sz;
30427+ p = au_krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS,
30428+ /*may_shrink*/1);
30429+ if (unlikely(!p))
30430+ goto out;
30431+ tgt->vd_deblk[0] = p;
30432+ }
30433+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
30434+ tgt->vd_version = src->vd_version;
30435+ tgt->vd_jiffy = src->vd_jiffy;
30436+
30437+ n = src->vd_nblk;
30438+ for (ul = 1; ul < n; ul++) {
30439+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
30440+ GFP_NOFS);
30441+ if (unlikely(!tgt->vd_deblk[ul]))
30442+ goto out;
30443+ tgt->vd_nblk++;
30444+ }
30445+ tgt->vd_nblk = n;
30446+ tgt->vd_last.ul = tgt->vd_last.ul;
30447+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
30448+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
30449+ - src->vd_deblk[src->vd_last.ul];
30450+ /* smp_mb(); */
30451+ return 0; /* success */
30452+
30453+out:
30454+ rerr = reinit_vdir(tgt);
30455+ BUG_ON(rerr);
30456+ return err;
30457+}
30458+
30459+int au_vdir_init(struct file *file)
30460+{
30461+ int err;
30462+ struct inode *inode;
30463+ struct au_vdir *vdir_cache, *allocated;
30464+
30465+ /* test file->f_pos here instead of ctx->pos */
30466+ err = read_vdir(file, !file->f_pos);
30467+ if (unlikely(err))
30468+ goto out;
30469+
30470+ allocated = NULL;
30471+ vdir_cache = au_fvdir_cache(file);
30472+ if (!vdir_cache) {
30473+ vdir_cache = alloc_vdir(file);
30474+ err = PTR_ERR(vdir_cache);
30475+ if (IS_ERR(vdir_cache))
30476+ goto out;
30477+ allocated = vdir_cache;
30478+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
30479+ /* test file->f_pos here instead of ctx->pos */
30480+ err = reinit_vdir(vdir_cache);
30481+ if (unlikely(err))
30482+ goto out;
30483+ } else
30484+ return 0; /* success */
30485+
30486+ inode = file_inode(file);
30487+ err = copy_vdir(vdir_cache, au_ivdir(inode));
30488+ if (!err) {
30489+ file->f_version = inode->i_version;
30490+ if (allocated)
30491+ au_set_fvdir_cache(file, allocated);
30492+ } else if (allocated)
30493+ au_vdir_free(allocated, /*atonce*/0);
30494+
30495+out:
30496+ return err;
30497+}
30498+
30499+static loff_t calc_offset(struct au_vdir *vdir)
30500+{
30501+ loff_t offset;
30502+ union au_vdir_deblk_p p;
30503+
30504+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
30505+ offset = vdir->vd_last.p.deblk - p.deblk;
30506+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
30507+ return offset;
30508+}
30509+
30510+/* returns true or false */
30511+static int seek_vdir(struct file *file, struct dir_context *ctx)
30512+{
30513+ int valid;
30514+ unsigned int deblk_sz;
30515+ unsigned long ul, n;
30516+ loff_t offset;
30517+ union au_vdir_deblk_p p, deblk_end;
30518+ struct au_vdir *vdir_cache;
30519+
30520+ valid = 1;
30521+ vdir_cache = au_fvdir_cache(file);
30522+ offset = calc_offset(vdir_cache);
30523+ AuDbg("offset %lld\n", offset);
30524+ if (ctx->pos == offset)
30525+ goto out;
30526+
30527+ vdir_cache->vd_last.ul = 0;
30528+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
30529+ if (!ctx->pos)
30530+ goto out;
30531+
30532+ valid = 0;
30533+ deblk_sz = vdir_cache->vd_deblk_sz;
30534+ ul = div64_u64(ctx->pos, deblk_sz);
30535+ AuDbg("ul %lu\n", ul);
30536+ if (ul >= vdir_cache->vd_nblk)
30537+ goto out;
30538+
30539+ n = vdir_cache->vd_nblk;
30540+ for (; ul < n; ul++) {
30541+ p.deblk = vdir_cache->vd_deblk[ul];
30542+ deblk_end.deblk = p.deblk + deblk_sz;
30543+ offset = ul;
30544+ offset *= deblk_sz;
30545+ while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
30546+ unsigned int l;
30547+
30548+ l = calc_size(p.de->de_str.len);
30549+ offset += l;
30550+ p.deblk += l;
30551+ }
30552+ if (!is_deblk_end(&p, &deblk_end)) {
30553+ valid = 1;
30554+ vdir_cache->vd_last.ul = ul;
30555+ vdir_cache->vd_last.p = p;
30556+ break;
30557+ }
30558+ }
30559+
30560+out:
30561+ /* smp_mb(); */
30562+ AuTraceErr(!valid);
30563+ return valid;
30564+}
30565+
30566+int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
30567+{
30568+ unsigned int l, deblk_sz;
30569+ union au_vdir_deblk_p deblk_end;
30570+ struct au_vdir *vdir_cache;
30571+ struct au_vdir_de *de;
30572+
30573+ vdir_cache = au_fvdir_cache(file);
30574+ if (!seek_vdir(file, ctx))
30575+ return 0;
30576+
30577+ deblk_sz = vdir_cache->vd_deblk_sz;
30578+ while (1) {
30579+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
30580+ deblk_end.deblk += deblk_sz;
30581+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
30582+ de = vdir_cache->vd_last.p.de;
30583+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
30584+ de->de_str.len, de->de_str.name, ctx->pos,
30585+ (unsigned long)de->de_ino, de->de_type);
30586+ if (unlikely(!dir_emit(ctx, de->de_str.name,
30587+ de->de_str.len, de->de_ino,
30588+ de->de_type))) {
30589+ /* todo: ignore the error caused by udba? */
30590+ /* return err; */
30591+ return 0;
30592+ }
30593+
30594+ l = calc_size(de->de_str.len);
30595+ vdir_cache->vd_last.p.deblk += l;
30596+ ctx->pos += l;
30597+ }
30598+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
30599+ vdir_cache->vd_last.ul++;
30600+ vdir_cache->vd_last.p.deblk
30601+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
30602+ ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
30603+ continue;
30604+ }
30605+ break;
30606+ }
30607+
30608+ /* smp_mb(); */
30609+ return 0;
30610+}
30611diff -urN /usr/share/empty/fs/aufs/vfsub.c linux/fs/aufs/vfsub.c
30612--- /usr/share/empty/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
30613+++ linux/fs/aufs/vfsub.c 2016-12-17 12:28:17.598545045 +0100
30614@@ -0,0 +1,886 @@
30615+/*
30616+ * Copyright (C) 2005-2016 Junjiro R. Okajima
30617+ *
30618+ * This program, aufs is free software; you can redistribute it and/or modify
30619+ * it under the terms of the GNU General Public License as published by
30620+ * the Free Software Foundation; either version 2 of the License, or
30621+ * (at your option) any later version.
30622+ *
30623+ * This program is distributed in the hope that it will be useful,
30624+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30625+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30626+ * GNU General Public License for more details.
30627+ *
30628+ * You should have received a copy of the GNU General Public License
30629+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
30630+ */
30631+
30632+/*
30633+ * sub-routines for VFS
30634+ */
30635+
30636+#include <linux/namei.h>
30637+#include <linux/nsproxy.h>
30638+#include <linux/security.h>
30639+#include <linux/splice.h>
30640+#ifdef CONFIG_AUFS_BR_FUSE
30641+#include "../fs/mount.h"
30642+#endif
30643+#include "aufs.h"
30644+
30645+#ifdef CONFIG_AUFS_BR_FUSE
30646+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb)
30647+{
30648+ struct nsproxy *ns;
30649+
30650+ if (!au_test_fuse(h_sb) || !au_userns)
30651+ return 0;
30652+
30653+ ns = current->nsproxy;
30654+ /* no {get,put}_nsproxy(ns) */
30655+ return real_mount(mnt)->mnt_ns == ns->mnt_ns ? 0 : -EACCES;
30656+}
30657+#endif
30658+
30659+/* ---------------------------------------------------------------------- */
30660+
30661+int vfsub_update_h_iattr(struct path *h_path, int *did)
30662+{
30663+ int err;
30664+ struct kstat st;
30665+ struct super_block *h_sb;
30666+
30667+ /* for remote fs, leave work for its getattr or d_revalidate */
30668+ /* for bad i_attr fs, handle them in aufs_getattr() */
30669+ /* still some fs may acquire i_mutex. we need to skip them */
30670+ err = 0;
30671+ if (!did)
30672+ did = &err;
30673+ h_sb = h_path->dentry->d_sb;
30674+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
30675+ if (*did)
30676+ err = vfs_getattr(h_path, &st);
30677+
30678+ return err;
30679+}
30680+
30681+/* ---------------------------------------------------------------------- */
30682+
30683+struct file *vfsub_dentry_open(struct path *path, int flags)
30684+{
30685+ struct file *file;
30686+
30687+ file = dentry_open(path, flags /* | __FMODE_NONOTIFY */,
30688+ current_cred());
30689+ if (!IS_ERR_OR_NULL(file)
30690+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
30691+ i_readcount_inc(d_inode(path->dentry));
30692+
30693+ return file;
30694+}
30695+
30696+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
30697+{
30698+ struct file *file;
30699+
30700+ lockdep_off();
30701+ file = filp_open(path,
30702+ oflags /* | __FMODE_NONOTIFY */,
30703+ mode);
30704+ lockdep_on();
30705+ if (IS_ERR(file))
30706+ goto out;
30707+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30708+
30709+out:
30710+ return file;
30711+}
30712+
30713+/*
30714+ * Ideally this function should call VFS:do_last() in order to keep all its
30715+ * checkings. But it is very hard for aufs to regenerate several VFS internal
30716+ * structure such as nameidata. This is a second (or third) best approach.
30717+ * cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
30718+ */
30719+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
30720+ struct vfsub_aopen_args *args, struct au_branch *br)
30721+{
30722+ int err;
30723+ struct file *file = args->file;
30724+ /* copied from linux/fs/namei.c:atomic_open() */
30725+ struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
30726+
30727+ IMustLock(dir);
30728+ AuDebugOn(!dir->i_op->atomic_open);
30729+
30730+ err = au_br_test_oflag(args->open_flag, br);
30731+ if (unlikely(err))
30732+ goto out;
30733+
30734+ args->file->f_path.dentry = DENTRY_NOT_SET;
30735+ args->file->f_path.mnt = au_br_mnt(br);
30736+ err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
30737+ args->create_mode, args->opened);
30738+ if (err >= 0) {
30739+ /* some filesystems don't set FILE_CREATED while succeeded? */
30740+ if (*args->opened & FILE_CREATED)
30741+ fsnotify_create(dir, dentry);
30742+ } else
30743+ goto out;
30744+
30745+
30746+ if (!err) {
30747+ /* todo: call VFS:may_open() here */
30748+ err = open_check_o_direct(file);
30749+ /* todo: ima_file_check() too? */
30750+ if (!err && (args->open_flag & __FMODE_EXEC))
30751+ err = deny_write_access(file);
30752+ if (unlikely(err))
30753+ /* note that the file is created and still opened */
30754+ goto out;
30755+ }
30756+
30757+ au_br_get(br);
30758+ fsnotify_open(file);
30759+
30760+out:
30761+ return err;
30762+}
30763+
30764+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
30765+{
30766+ int err;
30767+
30768+ err = kern_path(name, flags, path);
30769+ if (!err && d_is_positive(path->dentry))
30770+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
30771+ return err;
30772+}
30773+
30774+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
30775+ struct dentry *parent, int len)
30776+{
30777+ struct path path = {
30778+ .mnt = NULL
30779+ };
30780+
30781+ path.dentry = lookup_one_len_unlocked(name, parent, len);
30782+ if (IS_ERR(path.dentry))
30783+ goto out;
30784+ if (d_is_positive(path.dentry))
30785+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
30786+
30787+out:
30788+ AuTraceErrPtr(path.dentry);
30789+ return path.dentry;
30790+}
30791+
30792+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
30793+ int len)
30794+{
30795+ struct path path = {
30796+ .mnt = NULL
30797+ };
30798+
30799+ /* VFS checks it too, but by WARN_ON_ONCE() */
30800+ IMustLock(d_inode(parent));
30801+
30802+ path.dentry = lookup_one_len(name, parent, len);
30803+ if (IS_ERR(path.dentry))
30804+ goto out;
30805+ if (d_is_positive(path.dentry))
30806+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
30807+
30808+out:
30809+ AuTraceErrPtr(path.dentry);
30810+ return path.dentry;
30811+}
30812+
30813+void vfsub_call_lkup_one(void *args)
30814+{
30815+ struct vfsub_lkup_one_args *a = args;
30816+ *a->errp = vfsub_lkup_one(a->name, a->parent);
30817+}
30818+
30819+/* ---------------------------------------------------------------------- */
30820+
30821+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
30822+ struct dentry *d2, struct au_hinode *hdir2)
30823+{
30824+ struct dentry *d;
30825+
30826+ lockdep_off();
30827+ d = lock_rename(d1, d2);
30828+ lockdep_on();
30829+ au_hn_suspend(hdir1);
30830+ if (hdir1 != hdir2)
30831+ au_hn_suspend(hdir2);
30832+
30833+ return d;
30834+}
30835+
30836+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
30837+ struct dentry *d2, struct au_hinode *hdir2)
30838+{
30839+ au_hn_resume(hdir1);
30840+ if (hdir1 != hdir2)
30841+ au_hn_resume(hdir2);
30842+ lockdep_off();
30843+ unlock_rename(d1, d2);
30844+ lockdep_on();
30845+}
30846+
30847+/* ---------------------------------------------------------------------- */
30848+
30849+int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
30850+{
30851+ int err;
30852+ struct dentry *d;
30853+
30854+ IMustLock(dir);
30855+
30856+ d = path->dentry;
30857+ path->dentry = d->d_parent;
30858+ err = security_path_mknod(path, d, mode, 0);
30859+ path->dentry = d;
30860+ if (unlikely(err))
30861+ goto out;
30862+
30863+ lockdep_off();
30864+ err = vfs_create(dir, path->dentry, mode, want_excl);
30865+ lockdep_on();
30866+ if (!err) {
30867+ struct path tmp = *path;
30868+ int did;
30869+
30870+ vfsub_update_h_iattr(&tmp, &did);
30871+ if (did) {
30872+ tmp.dentry = path->dentry->d_parent;
30873+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30874+ }
30875+ /*ignore*/
30876+ }
30877+
30878+out:
30879+ return err;
30880+}
30881+
30882+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
30883+{
30884+ int err;
30885+ struct dentry *d;
30886+
30887+ IMustLock(dir);
30888+
30889+ d = path->dentry;
30890+ path->dentry = d->d_parent;
30891+ err = security_path_symlink(path, d, symname);
30892+ path->dentry = d;
30893+ if (unlikely(err))
30894+ goto out;
30895+
30896+ lockdep_off();
30897+ err = vfs_symlink(dir, path->dentry, symname);
30898+ lockdep_on();
30899+ if (!err) {
30900+ struct path tmp = *path;
30901+ int did;
30902+
30903+ vfsub_update_h_iattr(&tmp, &did);
30904+ if (did) {
30905+ tmp.dentry = path->dentry->d_parent;
30906+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30907+ }
30908+ /*ignore*/
30909+ }
30910+
30911+out:
30912+ return err;
30913+}
30914+
30915+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
30916+{
30917+ int err;
30918+ struct dentry *d;
30919+
30920+ IMustLock(dir);
30921+
30922+ d = path->dentry;
30923+ path->dentry = d->d_parent;
30924+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
30925+ path->dentry = d;
30926+ if (unlikely(err))
30927+ goto out;
30928+
30929+ lockdep_off();
30930+ err = vfs_mknod(dir, path->dentry, mode, dev);
30931+ lockdep_on();
30932+ if (!err) {
30933+ struct path tmp = *path;
30934+ int did;
30935+
30936+ vfsub_update_h_iattr(&tmp, &did);
30937+ if (did) {
30938+ tmp.dentry = path->dentry->d_parent;
30939+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30940+ }
30941+ /*ignore*/
30942+ }
30943+
30944+out:
30945+ return err;
30946+}
30947+
30948+static int au_test_nlink(struct inode *inode)
30949+{
30950+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
30951+
30952+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
30953+ || inode->i_nlink < link_max)
30954+ return 0;
30955+ return -EMLINK;
30956+}
30957+
30958+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
30959+ struct inode **delegated_inode)
30960+{
30961+ int err;
30962+ struct dentry *d;
30963+
30964+ IMustLock(dir);
30965+
30966+ err = au_test_nlink(d_inode(src_dentry));
30967+ if (unlikely(err))
30968+ return err;
30969+
30970+ /* we don't call may_linkat() */
30971+ d = path->dentry;
30972+ path->dentry = d->d_parent;
30973+ err = security_path_link(src_dentry, path, d);
30974+ path->dentry = d;
30975+ if (unlikely(err))
30976+ goto out;
30977+
30978+ lockdep_off();
30979+ err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
30980+ lockdep_on();
30981+ if (!err) {
30982+ struct path tmp = *path;
30983+ int did;
30984+
30985+ /* fuse has different memory inode for the same inumber */
30986+ vfsub_update_h_iattr(&tmp, &did);
30987+ if (did) {
30988+ tmp.dentry = path->dentry->d_parent;
30989+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30990+ tmp.dentry = src_dentry;
30991+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30992+ }
30993+ /*ignore*/
30994+ }
30995+
30996+out:
30997+ return err;
30998+}
30999+
31000+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
31001+ struct inode *dir, struct path *path,
31002+ struct inode **delegated_inode, unsigned int flags)
31003+{
31004+ int err;
31005+ struct path tmp = {
31006+ .mnt = path->mnt
31007+ };
31008+ struct dentry *d;
31009+
31010+ IMustLock(dir);
31011+ IMustLock(src_dir);
31012+
31013+ d = path->dentry;
31014+ path->dentry = d->d_parent;
31015+ tmp.dentry = src_dentry->d_parent;
31016+ err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
31017+ path->dentry = d;
31018+ if (unlikely(err))
31019+ goto out;
31020+
31021+ lockdep_off();
31022+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
31023+ delegated_inode, flags);
31024+ lockdep_on();
31025+ if (!err) {
31026+ int did;
31027+
31028+ tmp.dentry = d->d_parent;
31029+ vfsub_update_h_iattr(&tmp, &did);
31030+ if (did) {
31031+ tmp.dentry = src_dentry;
31032+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31033+ tmp.dentry = src_dentry->d_parent;
31034+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31035+ }
31036+ /*ignore*/
31037+ }
31038+
31039+out:
31040+ return err;
31041+}
31042+
31043+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
31044+{
31045+ int err;
31046+ struct dentry *d;
31047+
31048+ IMustLock(dir);
31049+
31050+ d = path->dentry;
31051+ path->dentry = d->d_parent;
31052+ err = security_path_mkdir(path, d, mode);
31053+ path->dentry = d;
31054+ if (unlikely(err))
31055+ goto out;
31056+
31057+ lockdep_off();
31058+ err = vfs_mkdir(dir, path->dentry, mode);
31059+ lockdep_on();
31060+ if (!err) {
31061+ struct path tmp = *path;
31062+ int did;
31063+
31064+ vfsub_update_h_iattr(&tmp, &did);
31065+ if (did) {
31066+ tmp.dentry = path->dentry->d_parent;
31067+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31068+ }
31069+ /*ignore*/
31070+ }
31071+
31072+out:
31073+ return err;
31074+}
31075+
31076+int vfsub_rmdir(struct inode *dir, struct path *path)
31077+{
31078+ int err;
31079+ struct dentry *d;
31080+
31081+ IMustLock(dir);
31082+
31083+ d = path->dentry;
31084+ path->dentry = d->d_parent;
31085+ err = security_path_rmdir(path, d);
31086+ path->dentry = d;
31087+ if (unlikely(err))
31088+ goto out;
31089+
31090+ lockdep_off();
31091+ err = vfs_rmdir(dir, path->dentry);
31092+ lockdep_on();
31093+ if (!err) {
31094+ struct path tmp = {
31095+ .dentry = path->dentry->d_parent,
31096+ .mnt = path->mnt
31097+ };
31098+
31099+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
31100+ }
31101+
31102+out:
31103+ return err;
31104+}
31105+
31106+/* ---------------------------------------------------------------------- */
31107+
31108+/* todo: support mmap_sem? */
31109+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
31110+ loff_t *ppos)
31111+{
31112+ ssize_t err;
31113+
31114+ lockdep_off();
31115+ err = vfs_read(file, ubuf, count, ppos);
31116+ lockdep_on();
31117+ if (err >= 0)
31118+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
31119+ return err;
31120+}
31121+
31122+/* todo: kernel_read()? */
31123+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
31124+ loff_t *ppos)
31125+{
31126+ ssize_t err;
31127+ mm_segment_t oldfs;
31128+ union {
31129+ void *k;
31130+ char __user *u;
31131+ } buf;
31132+
31133+ buf.k = kbuf;
31134+ oldfs = get_fs();
31135+ set_fs(KERNEL_DS);
31136+ err = vfsub_read_u(file, buf.u, count, ppos);
31137+ set_fs(oldfs);
31138+ return err;
31139+}
31140+
31141+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
31142+ loff_t *ppos)
31143+{
31144+ ssize_t err;
31145+
31146+ lockdep_off();
31147+ err = vfs_write(file, ubuf, count, ppos);
31148+ lockdep_on();
31149+ if (err >= 0)
31150+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
31151+ return err;
31152+}
31153+
31154+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
31155+{
31156+ ssize_t err;
31157+ mm_segment_t oldfs;
31158+ union {
31159+ void *k;
31160+ const char __user *u;
31161+ } buf;
31162+
31163+ buf.k = kbuf;
31164+ oldfs = get_fs();
31165+ set_fs(KERNEL_DS);
31166+ err = vfsub_write_u(file, buf.u, count, ppos);
31167+ set_fs(oldfs);
31168+ return err;
31169+}
31170+
31171+int vfsub_flush(struct file *file, fl_owner_t id)
31172+{
31173+ int err;
31174+
31175+ err = 0;
31176+ if (file->f_op->flush) {
31177+ if (!au_test_nfs(file->f_path.dentry->d_sb))
31178+ err = file->f_op->flush(file, id);
31179+ else {
31180+ lockdep_off();
31181+ err = file->f_op->flush(file, id);
31182+ lockdep_on();
31183+ }
31184+ if (!err)
31185+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
31186+ /*ignore*/
31187+ }
31188+ return err;
31189+}
31190+
31191+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
31192+{
31193+ int err;
31194+
31195+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
31196+
31197+ lockdep_off();
31198+ err = iterate_dir(file, ctx);
31199+ lockdep_on();
31200+ if (err >= 0)
31201+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
31202+ return err;
31203+}
31204+
31205+long vfsub_splice_to(struct file *in, loff_t *ppos,
31206+ struct pipe_inode_info *pipe, size_t len,
31207+ unsigned int flags)
31208+{
31209+ long err;
31210+
31211+ lockdep_off();
31212+ err = do_splice_to(in, ppos, pipe, len, flags);
31213+ lockdep_on();
31214+ file_accessed(in);
31215+ if (err >= 0)
31216+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
31217+ return err;
31218+}
31219+
31220+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
31221+ loff_t *ppos, size_t len, unsigned int flags)
31222+{
31223+ long err;
31224+
31225+ lockdep_off();
31226+ err = do_splice_from(pipe, out, ppos, len, flags);
31227+ lockdep_on();
31228+ if (err >= 0)
31229+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
31230+ return err;
31231+}
31232+
31233+int vfsub_fsync(struct file *file, struct path *path, int datasync)
31234+{
31235+ int err;
31236+
31237+ /* file can be NULL */
31238+ lockdep_off();
31239+ err = vfs_fsync(file, datasync);
31240+ lockdep_on();
31241+ if (!err) {
31242+ if (!path) {
31243+ AuDebugOn(!file);
31244+ path = &file->f_path;
31245+ }
31246+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
31247+ }
31248+ return err;
31249+}
31250+
31251+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
31252+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
31253+ struct file *h_file)
31254+{
31255+ int err;
31256+ struct inode *h_inode;
31257+ struct super_block *h_sb;
31258+
31259+ if (!h_file) {
31260+ err = vfsub_truncate(h_path, length);
31261+ goto out;
31262+ }
31263+
31264+ h_inode = d_inode(h_path->dentry);
31265+ h_sb = h_inode->i_sb;
31266+ lockdep_off();
31267+ sb_start_write(h_sb);
31268+ lockdep_on();
31269+ err = locks_verify_truncate(h_inode, h_file, length);
31270+ if (!err)
31271+ err = security_path_truncate(h_path);
31272+ if (!err) {
31273+ lockdep_off();
31274+ err = do_truncate(h_path->dentry, length, attr, h_file);
31275+ lockdep_on();
31276+ }
31277+ lockdep_off();
31278+ sb_end_write(h_sb);
31279+ lockdep_on();
31280+
31281+out:
31282+ return err;
31283+}
31284+
31285+/* ---------------------------------------------------------------------- */
31286+
31287+struct au_vfsub_mkdir_args {
31288+ int *errp;
31289+ struct inode *dir;
31290+ struct path *path;
31291+ int mode;
31292+};
31293+
31294+static void au_call_vfsub_mkdir(void *args)
31295+{
31296+ struct au_vfsub_mkdir_args *a = args;
31297+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
31298+}
31299+
31300+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
31301+{
31302+ int err, do_sio, wkq_err;
31303+
31304+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
31305+ if (!do_sio) {
31306+ lockdep_off();
31307+ err = vfsub_mkdir(dir, path, mode);
31308+ lockdep_on();
31309+ } else {
31310+ struct au_vfsub_mkdir_args args = {
31311+ .errp = &err,
31312+ .dir = dir,
31313+ .path = path,
31314+ .mode = mode
31315+ };
31316+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
31317+ if (unlikely(wkq_err))
31318+ err = wkq_err;
31319+ }
31320+
31321+ return err;
31322+}
31323+
31324+struct au_vfsub_rmdir_args {
31325+ int *errp;
31326+ struct inode *dir;
31327+ struct path *path;
31328+};
31329+
31330+static void au_call_vfsub_rmdir(void *args)
31331+{
31332+ struct au_vfsub_rmdir_args *a = args;
31333+ *a->errp = vfsub_rmdir(a->dir, a->path);
31334+}
31335+
31336+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
31337+{
31338+ int err, do_sio, wkq_err;
31339+
31340+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
31341+ if (!do_sio) {
31342+ lockdep_off();
31343+ err = vfsub_rmdir(dir, path);
31344+ lockdep_on();
31345+ } else {
31346+ struct au_vfsub_rmdir_args args = {
31347+ .errp = &err,
31348+ .dir = dir,
31349+ .path = path
31350+ };
31351+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
31352+ if (unlikely(wkq_err))
31353+ err = wkq_err;
31354+ }
31355+
31356+ return err;
31357+}
31358+
31359+/* ---------------------------------------------------------------------- */
31360+
31361+struct notify_change_args {
31362+ int *errp;
31363+ struct path *path;
31364+ struct iattr *ia;
31365+ struct inode **delegated_inode;
31366+};
31367+
31368+static void call_notify_change(void *args)
31369+{
31370+ struct notify_change_args *a = args;
31371+ struct inode *h_inode;
31372+
31373+ h_inode = d_inode(a->path->dentry);
31374+ IMustLock(h_inode);
31375+
31376+ *a->errp = -EPERM;
31377+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
31378+ lockdep_off();
31379+ *a->errp = notify_change(a->path->dentry, a->ia,
31380+ a->delegated_inode);
31381+ lockdep_on();
31382+ if (!*a->errp)
31383+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
31384+ }
31385+ AuTraceErr(*a->errp);
31386+}
31387+
31388+int vfsub_notify_change(struct path *path, struct iattr *ia,
31389+ struct inode **delegated_inode)
31390+{
31391+ int err;
31392+ struct notify_change_args args = {
31393+ .errp = &err,
31394+ .path = path,
31395+ .ia = ia,
31396+ .delegated_inode = delegated_inode
31397+ };
31398+
31399+ call_notify_change(&args);
31400+
31401+ return err;
31402+}
31403+
31404+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
31405+ struct inode **delegated_inode)
31406+{
31407+ int err, wkq_err;
31408+ struct notify_change_args args = {
31409+ .errp = &err,
31410+ .path = path,
31411+ .ia = ia,
31412+ .delegated_inode = delegated_inode
31413+ };
31414+
31415+ wkq_err = au_wkq_wait(call_notify_change, &args);
31416+ if (unlikely(wkq_err))
31417+ err = wkq_err;
31418+
31419+ return err;
31420+}
31421+
31422+/* ---------------------------------------------------------------------- */
31423+
31424+struct unlink_args {
31425+ int *errp;
31426+ struct inode *dir;
31427+ struct path *path;
31428+ struct inode **delegated_inode;
31429+};
31430+
31431+static void call_unlink(void *args)
31432+{
31433+ struct unlink_args *a = args;
31434+ struct dentry *d = a->path->dentry;
31435+ struct inode *h_inode;
31436+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
31437+ && au_dcount(d) == 1);
31438+
31439+ IMustLock(a->dir);
31440+
31441+ a->path->dentry = d->d_parent;
31442+ *a->errp = security_path_unlink(a->path, d);
31443+ a->path->dentry = d;
31444+ if (unlikely(*a->errp))
31445+ return;
31446+
31447+ if (!stop_sillyrename)
31448+ dget(d);
31449+ h_inode = NULL;
31450+ if (d_is_positive(d)) {
31451+ h_inode = d_inode(d);
31452+ ihold(h_inode);
31453+ }
31454+
31455+ lockdep_off();
31456+ *a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
31457+ lockdep_on();
31458+ if (!*a->errp) {
31459+ struct path tmp = {
31460+ .dentry = d->d_parent,
31461+ .mnt = a->path->mnt
31462+ };
31463+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
31464+ }
31465+
31466+ if (!stop_sillyrename)
31467+ dput(d);
31468+ if (h_inode)
31469+ iput(h_inode);
31470+
31471+ AuTraceErr(*a->errp);
31472+}
31473+
31474+/*
31475+ * @dir: must be locked.
31476+ * @dentry: target dentry.
31477+ */
31478+int vfsub_unlink(struct inode *dir, struct path *path,
31479+ struct inode **delegated_inode, int force)
31480+{
31481+ int err;
31482+ struct unlink_args args = {
31483+ .errp = &err,
31484+ .dir = dir,
31485+ .path = path,
31486+ .delegated_inode = delegated_inode
31487+ };
31488+
31489+ if (!force)
31490+ call_unlink(&args);
31491+ else {
31492+ int wkq_err;
31493+
31494+ wkq_err = au_wkq_wait(call_unlink, &args);
31495+ if (unlikely(wkq_err))
31496+ err = wkq_err;
31497+ }
31498+
31499+ return err;
31500+}
31501diff -urN /usr/share/empty/fs/aufs/vfsub.h linux/fs/aufs/vfsub.h
31502--- /usr/share/empty/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
31503+++ linux/fs/aufs/vfsub.h 2016-12-17 12:28:17.598545045 +0100
31504@@ -0,0 +1,316 @@
31505+/*
31506+ * Copyright (C) 2005-2016 Junjiro R. Okajima
31507+ *
31508+ * This program, aufs is free software; you can redistribute it and/or modify
31509+ * it under the terms of the GNU General Public License as published by
31510+ * the Free Software Foundation; either version 2 of the License, or
31511+ * (at your option) any later version.
31512+ *
31513+ * This program is distributed in the hope that it will be useful,
31514+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31515+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31516+ * GNU General Public License for more details.
31517+ *
31518+ * You should have received a copy of the GNU General Public License
31519+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
31520+ */
31521+
31522+/*
31523+ * sub-routines for VFS
31524+ */
31525+
31526+#ifndef __AUFS_VFSUB_H__
31527+#define __AUFS_VFSUB_H__
31528+
31529+#ifdef __KERNEL__
31530+
31531+#include <linux/fs.h>
31532+#include <linux/mount.h>
31533+#include <linux/posix_acl.h>
31534+#include <linux/xattr.h>
31535+#include "debug.h"
31536+
31537+/* copied from linux/fs/internal.h */
31538+/* todo: BAD approach!! */
31539+extern void __mnt_drop_write(struct vfsmount *);
31540+extern int open_check_o_direct(struct file *f);
31541+
31542+/* ---------------------------------------------------------------------- */
31543+
31544+/* lock subclass for lower inode */
31545+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
31546+/* reduce? gave up. */
31547+enum {
31548+ AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
31549+ AuLsc_I_PARENT, /* lower inode, parent first */
31550+ AuLsc_I_PARENT2, /* copyup dirs */
31551+ AuLsc_I_PARENT3, /* copyup wh */
31552+ AuLsc_I_CHILD,
31553+ AuLsc_I_CHILD2,
31554+ AuLsc_I_End
31555+};
31556+
31557+/* to debug easier, do not make them inlined functions */
31558+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
31559+#define IMustLock(i) AuDebugOn(!inode_is_locked(i))
31560+
31561+/* ---------------------------------------------------------------------- */
31562+
31563+static inline void vfsub_drop_nlink(struct inode *inode)
31564+{
31565+ AuDebugOn(!inode->i_nlink);
31566+ drop_nlink(inode);
31567+}
31568+
31569+static inline void vfsub_dead_dir(struct inode *inode)
31570+{
31571+ AuDebugOn(!S_ISDIR(inode->i_mode));
31572+ inode->i_flags |= S_DEAD;
31573+ clear_nlink(inode);
31574+}
31575+
31576+static inline int vfsub_native_ro(struct inode *inode)
31577+{
31578+ return (inode->i_sb->s_flags & MS_RDONLY)
31579+ || IS_RDONLY(inode)
31580+ /* || IS_APPEND(inode) */
31581+ || IS_IMMUTABLE(inode);
31582+}
31583+
31584+#ifdef CONFIG_AUFS_BR_FUSE
31585+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb);
31586+#else
31587+AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
31588+#endif
31589+
31590+/* ---------------------------------------------------------------------- */
31591+
31592+int vfsub_update_h_iattr(struct path *h_path, int *did);
31593+struct file *vfsub_dentry_open(struct path *path, int flags);
31594+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
31595+struct vfsub_aopen_args {
31596+ struct file *file;
31597+ unsigned int open_flag;
31598+ umode_t create_mode;
31599+ int *opened;
31600+};
31601+struct au_branch;
31602+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
31603+ struct vfsub_aopen_args *args, struct au_branch *br);
31604+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
31605+
31606+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
31607+ struct dentry *parent, int len);
31608+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
31609+ int len);
31610+
31611+struct vfsub_lkup_one_args {
31612+ struct dentry **errp;
31613+ struct qstr *name;
31614+ struct dentry *parent;
31615+};
31616+
31617+static inline struct dentry *vfsub_lkup_one(struct qstr *name,
31618+ struct dentry *parent)
31619+{
31620+ return vfsub_lookup_one_len(name->name, parent, name->len);
31621+}
31622+
31623+void vfsub_call_lkup_one(void *args);
31624+
31625+/* ---------------------------------------------------------------------- */
31626+
31627+static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
31628+{
31629+ int err;
31630+
31631+ lockdep_off();
31632+ err = mnt_want_write(mnt);
31633+ lockdep_on();
31634+ return err;
31635+}
31636+
31637+static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
31638+{
31639+ lockdep_off();
31640+ mnt_drop_write(mnt);
31641+ lockdep_on();
31642+}
31643+
31644+#if 0 /* reserved */
31645+static inline void vfsub_mnt_drop_write_file(struct file *file)
31646+{
31647+ lockdep_off();
31648+ mnt_drop_write_file(file);
31649+ lockdep_on();
31650+}
31651+#endif
31652+
31653+/* ---------------------------------------------------------------------- */
31654+
31655+struct au_hinode;
31656+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
31657+ struct dentry *d2, struct au_hinode *hdir2);
31658+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
31659+ struct dentry *d2, struct au_hinode *hdir2);
31660+
31661+int vfsub_create(struct inode *dir, struct path *path, int mode,
31662+ bool want_excl);
31663+int vfsub_symlink(struct inode *dir, struct path *path,
31664+ const char *symname);
31665+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
31666+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
31667+ struct path *path, struct inode **delegated_inode);
31668+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
31669+ struct inode *hdir, struct path *path,
31670+ struct inode **delegated_inode, unsigned int flags);
31671+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
31672+int vfsub_rmdir(struct inode *dir, struct path *path);
31673+
31674+/* ---------------------------------------------------------------------- */
31675+
31676+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
31677+ loff_t *ppos);
31678+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
31679+ loff_t *ppos);
31680+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
31681+ loff_t *ppos);
31682+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
31683+ loff_t *ppos);
31684+int vfsub_flush(struct file *file, fl_owner_t id);
31685+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
31686+
31687+static inline loff_t vfsub_f_size_read(struct file *file)
31688+{
31689+ return i_size_read(file_inode(file));
31690+}
31691+
31692+static inline unsigned int vfsub_file_flags(struct file *file)
31693+{
31694+ unsigned int flags;
31695+
31696+ spin_lock(&file->f_lock);
31697+ flags = file->f_flags;
31698+ spin_unlock(&file->f_lock);
31699+
31700+ return flags;
31701+}
31702+
31703+static inline int vfsub_file_execed(struct file *file)
31704+{
31705+ /* todo: direct access f_flags */
31706+ return !!(vfsub_file_flags(file) & __FMODE_EXEC);
31707+}
31708+
31709+#if 0 /* reserved */
31710+static inline void vfsub_file_accessed(struct file *h_file)
31711+{
31712+ file_accessed(h_file);
31713+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
31714+}
31715+#endif
31716+
31717+#if 0 /* reserved */
31718+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
31719+ struct dentry *h_dentry)
31720+{
31721+ struct path h_path = {
31722+ .dentry = h_dentry,
31723+ .mnt = h_mnt
31724+ };
31725+ touch_atime(&h_path);
31726+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
31727+}
31728+#endif
31729+
31730+static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts,
31731+ int flags)
31732+{
31733+ return update_time(h_inode, ts, flags);
31734+ /* no vfsub_update_h_iattr() since we don't have struct path */
31735+}
31736+
31737+#ifdef CONFIG_FS_POSIX_ACL
31738+static inline int vfsub_acl_chmod(struct inode *h_inode, umode_t h_mode)
31739+{
31740+ int err;
31741+
31742+ err = posix_acl_chmod(h_inode, h_mode);
31743+ if (err == -EOPNOTSUPP)
31744+ err = 0;
31745+ return err;
31746+}
31747+#else
31748+AuStubInt0(vfsub_acl_chmod, struct inode *h_inode, umode_t h_mode);
31749+#endif
31750+
31751+long vfsub_splice_to(struct file *in, loff_t *ppos,
31752+ struct pipe_inode_info *pipe, size_t len,
31753+ unsigned int flags);
31754+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
31755+ loff_t *ppos, size_t len, unsigned int flags);
31756+
31757+static inline long vfsub_truncate(struct path *path, loff_t length)
31758+{
31759+ long err;
31760+
31761+ lockdep_off();
31762+ err = vfs_truncate(path, length);
31763+ lockdep_on();
31764+ return err;
31765+}
31766+
31767+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
31768+ struct file *h_file);
31769+int vfsub_fsync(struct file *file, struct path *path, int datasync);
31770+
31771+/* ---------------------------------------------------------------------- */
31772+
31773+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
31774+{
31775+ loff_t err;
31776+
31777+ lockdep_off();
31778+ err = vfs_llseek(file, offset, origin);
31779+ lockdep_on();
31780+ return err;
31781+}
31782+
31783+/* ---------------------------------------------------------------------- */
31784+
31785+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
31786+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
31787+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
31788+ struct inode **delegated_inode);
31789+int vfsub_notify_change(struct path *path, struct iattr *ia,
31790+ struct inode **delegated_inode);
31791+int vfsub_unlink(struct inode *dir, struct path *path,
31792+ struct inode **delegated_inode, int force);
31793+
31794+/* ---------------------------------------------------------------------- */
31795+
31796+static inline int vfsub_setxattr(struct dentry *dentry, const char *name,
31797+ const void *value, size_t size, int flags)
31798+{
31799+ int err;
31800+
31801+ lockdep_off();
31802+ err = vfs_setxattr(dentry, name, value, size, flags);
31803+ lockdep_on();
31804+
31805+ return err;
31806+}
31807+
31808+static inline int vfsub_removexattr(struct dentry *dentry, const char *name)
31809+{
31810+ int err;
31811+
31812+ lockdep_off();
31813+ err = vfs_removexattr(dentry, name);
31814+ lockdep_on();
31815+
31816+ return err;
31817+}
31818+
31819+#endif /* __KERNEL__ */
31820+#endif /* __AUFS_VFSUB_H__ */
31821diff -urN /usr/share/empty/fs/aufs/wbr_policy.c linux/fs/aufs/wbr_policy.c
31822--- /usr/share/empty/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
31823+++ linux/fs/aufs/wbr_policy.c 2016-12-17 12:28:17.598545045 +0100
31824@@ -0,0 +1,830 @@
31825+/*
31826+ * Copyright (C) 2005-2016 Junjiro R. Okajima
31827+ *
31828+ * This program, aufs is free software; you can redistribute it and/or modify
31829+ * it under the terms of the GNU General Public License as published by
31830+ * the Free Software Foundation; either version 2 of the License, or
31831+ * (at your option) any later version.
31832+ *
31833+ * This program is distributed in the hope that it will be useful,
31834+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31835+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31836+ * GNU General Public License for more details.
31837+ *
31838+ * You should have received a copy of the GNU General Public License
31839+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
31840+ */
31841+
31842+/*
31843+ * policies for selecting one among multiple writable branches
31844+ */
31845+
31846+#include <linux/statfs.h>
31847+#include "aufs.h"
31848+
31849+/* subset of cpup_attr() */
31850+static noinline_for_stack
31851+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
31852+{
31853+ int err, sbits;
31854+ struct iattr ia;
31855+ struct inode *h_isrc;
31856+
31857+ h_isrc = d_inode(h_src);
31858+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
31859+ ia.ia_mode = h_isrc->i_mode;
31860+ ia.ia_uid = h_isrc->i_uid;
31861+ ia.ia_gid = h_isrc->i_gid;
31862+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
31863+ au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags);
31864+ /* no delegation since it is just created */
31865+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
31866+
31867+ /* is this nfs only? */
31868+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
31869+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
31870+ ia.ia_mode = h_isrc->i_mode;
31871+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
31872+ }
31873+
31874+ return err;
31875+}
31876+
31877+#define AuCpdown_PARENT_OPQ 1
31878+#define AuCpdown_WHED (1 << 1)
31879+#define AuCpdown_MADE_DIR (1 << 2)
31880+#define AuCpdown_DIROPQ (1 << 3)
31881+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
31882+#define au_fset_cpdown(flags, name) \
31883+ do { (flags) |= AuCpdown_##name; } while (0)
31884+#define au_fclr_cpdown(flags, name) \
31885+ do { (flags) &= ~AuCpdown_##name; } while (0)
31886+
31887+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
31888+ unsigned int *flags)
31889+{
31890+ int err;
31891+ struct dentry *opq_dentry;
31892+
31893+ opq_dentry = au_diropq_create(dentry, bdst);
31894+ err = PTR_ERR(opq_dentry);
31895+ if (IS_ERR(opq_dentry))
31896+ goto out;
31897+ dput(opq_dentry);
31898+ au_fset_cpdown(*flags, DIROPQ);
31899+
31900+out:
31901+ return err;
31902+}
31903+
31904+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
31905+ struct inode *dir, aufs_bindex_t bdst)
31906+{
31907+ int err;
31908+ struct path h_path;
31909+ struct au_branch *br;
31910+
31911+ br = au_sbr(dentry->d_sb, bdst);
31912+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
31913+ err = PTR_ERR(h_path.dentry);
31914+ if (IS_ERR(h_path.dentry))
31915+ goto out;
31916+
31917+ err = 0;
31918+ if (d_is_positive(h_path.dentry)) {
31919+ h_path.mnt = au_br_mnt(br);
31920+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
31921+ dentry);
31922+ }
31923+ dput(h_path.dentry);
31924+
31925+out:
31926+ return err;
31927+}
31928+
31929+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
31930+ struct au_pin *pin,
31931+ struct dentry *h_parent, void *arg)
31932+{
31933+ int err, rerr;
31934+ aufs_bindex_t bopq, btop;
31935+ struct path h_path;
31936+ struct dentry *parent;
31937+ struct inode *h_dir, *h_inode, *inode, *dir;
31938+ unsigned int *flags = arg;
31939+
31940+ btop = au_dbtop(dentry);
31941+ /* dentry is di-locked */
31942+ parent = dget_parent(dentry);
31943+ dir = d_inode(parent);
31944+ h_dir = d_inode(h_parent);
31945+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
31946+ IMustLock(h_dir);
31947+
31948+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
31949+ if (unlikely(err < 0))
31950+ goto out;
31951+ h_path.dentry = au_h_dptr(dentry, bdst);
31952+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
31953+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
31954+ S_IRWXU | S_IRUGO | S_IXUGO);
31955+ if (unlikely(err))
31956+ goto out_put;
31957+ au_fset_cpdown(*flags, MADE_DIR);
31958+
31959+ bopq = au_dbdiropq(dentry);
31960+ au_fclr_cpdown(*flags, WHED);
31961+ au_fclr_cpdown(*flags, DIROPQ);
31962+ if (au_dbwh(dentry) == bdst)
31963+ au_fset_cpdown(*flags, WHED);
31964+ if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
31965+ au_fset_cpdown(*flags, PARENT_OPQ);
31966+ h_inode = d_inode(h_path.dentry);
31967+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
31968+ if (au_ftest_cpdown(*flags, WHED)) {
31969+ err = au_cpdown_dir_opq(dentry, bdst, flags);
31970+ if (unlikely(err)) {
31971+ inode_unlock(h_inode);
31972+ goto out_dir;
31973+ }
31974+ }
31975+
31976+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, btop));
31977+ inode_unlock(h_inode);
31978+ if (unlikely(err))
31979+ goto out_opq;
31980+
31981+ if (au_ftest_cpdown(*flags, WHED)) {
31982+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
31983+ if (unlikely(err))
31984+ goto out_opq;
31985+ }
31986+
31987+ inode = d_inode(dentry);
31988+ if (au_ibbot(inode) < bdst)
31989+ au_set_ibbot(inode, bdst);
31990+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
31991+ au_hi_flags(inode, /*isdir*/1));
31992+ au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
31993+ goto out; /* success */
31994+
31995+ /* revert */
31996+out_opq:
31997+ if (au_ftest_cpdown(*flags, DIROPQ)) {
31998+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
31999+ rerr = au_diropq_remove(dentry, bdst);
32000+ inode_unlock(h_inode);
32001+ if (unlikely(rerr)) {
32002+ AuIOErr("failed removing diropq for %pd b%d (%d)\n",
32003+ dentry, bdst, rerr);
32004+ err = -EIO;
32005+ goto out;
32006+ }
32007+ }
32008+out_dir:
32009+ if (au_ftest_cpdown(*flags, MADE_DIR)) {
32010+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
32011+ if (unlikely(rerr)) {
32012+ AuIOErr("failed removing %pd b%d (%d)\n",
32013+ dentry, bdst, rerr);
32014+ err = -EIO;
32015+ }
32016+ }
32017+out_put:
32018+ au_set_h_dptr(dentry, bdst, NULL);
32019+ if (au_dbbot(dentry) == bdst)
32020+ au_update_dbbot(dentry);
32021+out:
32022+ dput(parent);
32023+ return err;
32024+}
32025+
32026+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
32027+{
32028+ int err;
32029+ unsigned int flags;
32030+
32031+ flags = 0;
32032+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
32033+
32034+ return err;
32035+}
32036+
32037+/* ---------------------------------------------------------------------- */
32038+
32039+/* policies for create */
32040+
32041+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
32042+{
32043+ int err, i, j, ndentry;
32044+ aufs_bindex_t bopq;
32045+ struct au_dcsub_pages dpages;
32046+ struct au_dpage *dpage;
32047+ struct dentry **dentries, *parent, *d;
32048+
32049+ err = au_dpages_init(&dpages, GFP_NOFS);
32050+ if (unlikely(err))
32051+ goto out;
32052+ parent = dget_parent(dentry);
32053+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
32054+ if (unlikely(err))
32055+ goto out_free;
32056+
32057+ err = bindex;
32058+ for (i = 0; i < dpages.ndpage; i++) {
32059+ dpage = dpages.dpages + i;
32060+ dentries = dpage->dentries;
32061+ ndentry = dpage->ndentry;
32062+ for (j = 0; j < ndentry; j++) {
32063+ d = dentries[j];
32064+ di_read_lock_parent2(d, !AuLock_IR);
32065+ bopq = au_dbdiropq(d);
32066+ di_read_unlock(d, !AuLock_IR);
32067+ if (bopq >= 0 && bopq < err)
32068+ err = bopq;
32069+ }
32070+ }
32071+
32072+out_free:
32073+ dput(parent);
32074+ au_dpages_free(&dpages);
32075+out:
32076+ return err;
32077+}
32078+
32079+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
32080+{
32081+ for (; bindex >= 0; bindex--)
32082+ if (!au_br_rdonly(au_sbr(sb, bindex)))
32083+ return bindex;
32084+ return -EROFS;
32085+}
32086+
32087+/* top down parent */
32088+static int au_wbr_create_tdp(struct dentry *dentry,
32089+ unsigned int flags __maybe_unused)
32090+{
32091+ int err;
32092+ aufs_bindex_t btop, bindex;
32093+ struct super_block *sb;
32094+ struct dentry *parent, *h_parent;
32095+
32096+ sb = dentry->d_sb;
32097+ btop = au_dbtop(dentry);
32098+ err = btop;
32099+ if (!au_br_rdonly(au_sbr(sb, btop)))
32100+ goto out;
32101+
32102+ err = -EROFS;
32103+ parent = dget_parent(dentry);
32104+ for (bindex = au_dbtop(parent); bindex < btop; bindex++) {
32105+ h_parent = au_h_dptr(parent, bindex);
32106+ if (!h_parent || d_is_negative(h_parent))
32107+ continue;
32108+
32109+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
32110+ err = bindex;
32111+ break;
32112+ }
32113+ }
32114+ dput(parent);
32115+
32116+ /* bottom up here */
32117+ if (unlikely(err < 0)) {
32118+ err = au_wbr_bu(sb, btop - 1);
32119+ if (err >= 0)
32120+ err = au_wbr_nonopq(dentry, err);
32121+ }
32122+
32123+out:
32124+ AuDbg("b%d\n", err);
32125+ return err;
32126+}
32127+
32128+/* ---------------------------------------------------------------------- */
32129+
32130+/* an exception for the policy other than tdp */
32131+static int au_wbr_create_exp(struct dentry *dentry)
32132+{
32133+ int err;
32134+ aufs_bindex_t bwh, bdiropq;
32135+ struct dentry *parent;
32136+
32137+ err = -1;
32138+ bwh = au_dbwh(dentry);
32139+ parent = dget_parent(dentry);
32140+ bdiropq = au_dbdiropq(parent);
32141+ if (bwh >= 0) {
32142+ if (bdiropq >= 0)
32143+ err = min(bdiropq, bwh);
32144+ else
32145+ err = bwh;
32146+ AuDbg("%d\n", err);
32147+ } else if (bdiropq >= 0) {
32148+ err = bdiropq;
32149+ AuDbg("%d\n", err);
32150+ }
32151+ dput(parent);
32152+
32153+ if (err >= 0)
32154+ err = au_wbr_nonopq(dentry, err);
32155+
32156+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
32157+ err = -1;
32158+
32159+ AuDbg("%d\n", err);
32160+ return err;
32161+}
32162+
32163+/* ---------------------------------------------------------------------- */
32164+
32165+/* round robin */
32166+static int au_wbr_create_init_rr(struct super_block *sb)
32167+{
32168+ int err;
32169+
32170+ err = au_wbr_bu(sb, au_sbbot(sb));
32171+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
32172+ /* smp_mb(); */
32173+
32174+ AuDbg("b%d\n", err);
32175+ return err;
32176+}
32177+
32178+static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
32179+{
32180+ int err, nbr;
32181+ unsigned int u;
32182+ aufs_bindex_t bindex, bbot;
32183+ struct super_block *sb;
32184+ atomic_t *next;
32185+
32186+ err = au_wbr_create_exp(dentry);
32187+ if (err >= 0)
32188+ goto out;
32189+
32190+ sb = dentry->d_sb;
32191+ next = &au_sbi(sb)->si_wbr_rr_next;
32192+ bbot = au_sbbot(sb);
32193+ nbr = bbot + 1;
32194+ for (bindex = 0; bindex <= bbot; bindex++) {
32195+ if (!au_ftest_wbr(flags, DIR)) {
32196+ err = atomic_dec_return(next) + 1;
32197+ /* modulo for 0 is meaningless */
32198+ if (unlikely(!err))
32199+ err = atomic_dec_return(next) + 1;
32200+ } else
32201+ err = atomic_read(next);
32202+ AuDbg("%d\n", err);
32203+ u = err;
32204+ err = u % nbr;
32205+ AuDbg("%d\n", err);
32206+ if (!au_br_rdonly(au_sbr(sb, err)))
32207+ break;
32208+ err = -EROFS;
32209+ }
32210+
32211+ if (err >= 0)
32212+ err = au_wbr_nonopq(dentry, err);
32213+
32214+out:
32215+ AuDbg("%d\n", err);
32216+ return err;
32217+}
32218+
32219+/* ---------------------------------------------------------------------- */
32220+
32221+/* most free space */
32222+static void au_mfs(struct dentry *dentry, struct dentry *parent)
32223+{
32224+ struct super_block *sb;
32225+ struct au_branch *br;
32226+ struct au_wbr_mfs *mfs;
32227+ struct dentry *h_parent;
32228+ aufs_bindex_t bindex, bbot;
32229+ int err;
32230+ unsigned long long b, bavail;
32231+ struct path h_path;
32232+ /* reduce the stack usage */
32233+ struct kstatfs *st;
32234+
32235+ st = kmalloc(sizeof(*st), GFP_NOFS);
32236+ if (unlikely(!st)) {
32237+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
32238+ return;
32239+ }
32240+
32241+ bavail = 0;
32242+ sb = dentry->d_sb;
32243+ mfs = &au_sbi(sb)->si_wbr_mfs;
32244+ MtxMustLock(&mfs->mfs_lock);
32245+ mfs->mfs_bindex = -EROFS;
32246+ mfs->mfsrr_bytes = 0;
32247+ if (!parent) {
32248+ bindex = 0;
32249+ bbot = au_sbbot(sb);
32250+ } else {
32251+ bindex = au_dbtop(parent);
32252+ bbot = au_dbtaildir(parent);
32253+ }
32254+
32255+ for (; bindex <= bbot; bindex++) {
32256+ if (parent) {
32257+ h_parent = au_h_dptr(parent, bindex);
32258+ if (!h_parent || d_is_negative(h_parent))
32259+ continue;
32260+ }
32261+ br = au_sbr(sb, bindex);
32262+ if (au_br_rdonly(br))
32263+ continue;
32264+
32265+ /* sb->s_root for NFS is unreliable */
32266+ h_path.mnt = au_br_mnt(br);
32267+ h_path.dentry = h_path.mnt->mnt_root;
32268+ err = vfs_statfs(&h_path, st);
32269+ if (unlikely(err)) {
32270+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
32271+ continue;
32272+ }
32273+
32274+ /* when the available size is equal, select the lower one */
32275+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
32276+ || sizeof(b) < sizeof(st->f_bsize));
32277+ b = st->f_bavail * st->f_bsize;
32278+ br->br_wbr->wbr_bytes = b;
32279+ if (b >= bavail) {
32280+ bavail = b;
32281+ mfs->mfs_bindex = bindex;
32282+ mfs->mfs_jiffy = jiffies;
32283+ }
32284+ }
32285+
32286+ mfs->mfsrr_bytes = bavail;
32287+ AuDbg("b%d\n", mfs->mfs_bindex);
32288+ au_delayed_kfree(st);
32289+}
32290+
32291+static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
32292+{
32293+ int err;
32294+ struct dentry *parent;
32295+ struct super_block *sb;
32296+ struct au_wbr_mfs *mfs;
32297+
32298+ err = au_wbr_create_exp(dentry);
32299+ if (err >= 0)
32300+ goto out;
32301+
32302+ sb = dentry->d_sb;
32303+ parent = NULL;
32304+ if (au_ftest_wbr(flags, PARENT))
32305+ parent = dget_parent(dentry);
32306+ mfs = &au_sbi(sb)->si_wbr_mfs;
32307+ mutex_lock(&mfs->mfs_lock);
32308+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
32309+ || mfs->mfs_bindex < 0
32310+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
32311+ au_mfs(dentry, parent);
32312+ mutex_unlock(&mfs->mfs_lock);
32313+ err = mfs->mfs_bindex;
32314+ dput(parent);
32315+
32316+ if (err >= 0)
32317+ err = au_wbr_nonopq(dentry, err);
32318+
32319+out:
32320+ AuDbg("b%d\n", err);
32321+ return err;
32322+}
32323+
32324+static int au_wbr_create_init_mfs(struct super_block *sb)
32325+{
32326+ struct au_wbr_mfs *mfs;
32327+
32328+ mfs = &au_sbi(sb)->si_wbr_mfs;
32329+ mutex_init(&mfs->mfs_lock);
32330+ mfs->mfs_jiffy = 0;
32331+ mfs->mfs_bindex = -EROFS;
32332+
32333+ return 0;
32334+}
32335+
32336+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
32337+{
32338+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
32339+ return 0;
32340+}
32341+
32342+/* ---------------------------------------------------------------------- */
32343+
32344+/* top down regardless parent, and then mfs */
32345+static int au_wbr_create_tdmfs(struct dentry *dentry,
32346+ unsigned int flags __maybe_unused)
32347+{
32348+ int err;
32349+ aufs_bindex_t bwh, btail, bindex, bfound, bmfs;
32350+ unsigned long long watermark;
32351+ struct super_block *sb;
32352+ struct au_wbr_mfs *mfs;
32353+ struct au_branch *br;
32354+ struct dentry *parent;
32355+
32356+ sb = dentry->d_sb;
32357+ mfs = &au_sbi(sb)->si_wbr_mfs;
32358+ mutex_lock(&mfs->mfs_lock);
32359+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
32360+ || mfs->mfs_bindex < 0)
32361+ au_mfs(dentry, /*parent*/NULL);
32362+ watermark = mfs->mfsrr_watermark;
32363+ bmfs = mfs->mfs_bindex;
32364+ mutex_unlock(&mfs->mfs_lock);
32365+
32366+ /* another style of au_wbr_create_exp() */
32367+ bwh = au_dbwh(dentry);
32368+ parent = dget_parent(dentry);
32369+ btail = au_dbtaildir(parent);
32370+ if (bwh >= 0 && bwh < btail)
32371+ btail = bwh;
32372+
32373+ err = au_wbr_nonopq(dentry, btail);
32374+ if (unlikely(err < 0))
32375+ goto out;
32376+ btail = err;
32377+ bfound = -1;
32378+ for (bindex = 0; bindex <= btail; bindex++) {
32379+ br = au_sbr(sb, bindex);
32380+ if (au_br_rdonly(br))
32381+ continue;
32382+ if (br->br_wbr->wbr_bytes > watermark) {
32383+ bfound = bindex;
32384+ break;
32385+ }
32386+ }
32387+ err = bfound;
32388+ if (err < 0)
32389+ err = bmfs;
32390+
32391+out:
32392+ dput(parent);
32393+ AuDbg("b%d\n", err);
32394+ return err;
32395+}
32396+
32397+/* ---------------------------------------------------------------------- */
32398+
32399+/* most free space and then round robin */
32400+static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
32401+{
32402+ int err;
32403+ struct au_wbr_mfs *mfs;
32404+
32405+ err = au_wbr_create_mfs(dentry, flags);
32406+ if (err >= 0) {
32407+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
32408+ mutex_lock(&mfs->mfs_lock);
32409+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
32410+ err = au_wbr_create_rr(dentry, flags);
32411+ mutex_unlock(&mfs->mfs_lock);
32412+ }
32413+
32414+ AuDbg("b%d\n", err);
32415+ return err;
32416+}
32417+
32418+static int au_wbr_create_init_mfsrr(struct super_block *sb)
32419+{
32420+ int err;
32421+
32422+ au_wbr_create_init_mfs(sb); /* ignore */
32423+ err = au_wbr_create_init_rr(sb);
32424+
32425+ return err;
32426+}
32427+
32428+/* ---------------------------------------------------------------------- */
32429+
32430+/* top down parent and most free space */
32431+static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
32432+{
32433+ int err, e2;
32434+ unsigned long long b;
32435+ aufs_bindex_t bindex, btop, bbot;
32436+ struct super_block *sb;
32437+ struct dentry *parent, *h_parent;
32438+ struct au_branch *br;
32439+
32440+ err = au_wbr_create_tdp(dentry, flags);
32441+ if (unlikely(err < 0))
32442+ goto out;
32443+ parent = dget_parent(dentry);
32444+ btop = au_dbtop(parent);
32445+ bbot = au_dbtaildir(parent);
32446+ if (btop == bbot)
32447+ goto out_parent; /* success */
32448+
32449+ e2 = au_wbr_create_mfs(dentry, flags);
32450+ if (e2 < 0)
32451+ goto out_parent; /* success */
32452+
32453+ /* when the available size is equal, select upper one */
32454+ sb = dentry->d_sb;
32455+ br = au_sbr(sb, err);
32456+ b = br->br_wbr->wbr_bytes;
32457+ AuDbg("b%d, %llu\n", err, b);
32458+
32459+ for (bindex = btop; bindex <= bbot; bindex++) {
32460+ h_parent = au_h_dptr(parent, bindex);
32461+ if (!h_parent || d_is_negative(h_parent))
32462+ continue;
32463+
32464+ br = au_sbr(sb, bindex);
32465+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
32466+ b = br->br_wbr->wbr_bytes;
32467+ err = bindex;
32468+ AuDbg("b%d, %llu\n", err, b);
32469+ }
32470+ }
32471+
32472+ if (err >= 0)
32473+ err = au_wbr_nonopq(dentry, err);
32474+
32475+out_parent:
32476+ dput(parent);
32477+out:
32478+ AuDbg("b%d\n", err);
32479+ return err;
32480+}
32481+
32482+/* ---------------------------------------------------------------------- */
32483+
32484+/*
32485+ * - top down parent
32486+ * - most free space with parent
32487+ * - most free space round-robin regardless parent
32488+ */
32489+static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
32490+{
32491+ int err;
32492+ unsigned long long watermark;
32493+ struct super_block *sb;
32494+ struct au_branch *br;
32495+ struct au_wbr_mfs *mfs;
32496+
32497+ err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
32498+ if (unlikely(err < 0))
32499+ goto out;
32500+
32501+ sb = dentry->d_sb;
32502+ br = au_sbr(sb, err);
32503+ mfs = &au_sbi(sb)->si_wbr_mfs;
32504+ mutex_lock(&mfs->mfs_lock);
32505+ watermark = mfs->mfsrr_watermark;
32506+ mutex_unlock(&mfs->mfs_lock);
32507+ if (br->br_wbr->wbr_bytes < watermark)
32508+ /* regardless the parent dir */
32509+ err = au_wbr_create_mfsrr(dentry, flags);
32510+
32511+out:
32512+ AuDbg("b%d\n", err);
32513+ return err;
32514+}
32515+
32516+/* ---------------------------------------------------------------------- */
32517+
32518+/* policies for copyup */
32519+
32520+/* top down parent */
32521+static int au_wbr_copyup_tdp(struct dentry *dentry)
32522+{
32523+ return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
32524+}
32525+
32526+/* bottom up parent */
32527+static int au_wbr_copyup_bup(struct dentry *dentry)
32528+{
32529+ int err;
32530+ aufs_bindex_t bindex, btop;
32531+ struct dentry *parent, *h_parent;
32532+ struct super_block *sb;
32533+
32534+ err = -EROFS;
32535+ sb = dentry->d_sb;
32536+ parent = dget_parent(dentry);
32537+ btop = au_dbtop(parent);
32538+ for (bindex = au_dbtop(dentry); bindex >= btop; bindex--) {
32539+ h_parent = au_h_dptr(parent, bindex);
32540+ if (!h_parent || d_is_negative(h_parent))
32541+ continue;
32542+
32543+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
32544+ err = bindex;
32545+ break;
32546+ }
32547+ }
32548+ dput(parent);
32549+
32550+ /* bottom up here */
32551+ if (unlikely(err < 0))
32552+ err = au_wbr_bu(sb, btop - 1);
32553+
32554+ AuDbg("b%d\n", err);
32555+ return err;
32556+}
32557+
32558+/* bottom up */
32559+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop)
32560+{
32561+ int err;
32562+
32563+ err = au_wbr_bu(dentry->d_sb, btop);
32564+ AuDbg("b%d\n", err);
32565+ if (err > btop)
32566+ err = au_wbr_nonopq(dentry, err);
32567+
32568+ AuDbg("b%d\n", err);
32569+ return err;
32570+}
32571+
32572+static int au_wbr_copyup_bu(struct dentry *dentry)
32573+{
32574+ int err;
32575+ aufs_bindex_t btop;
32576+
32577+ btop = au_dbtop(dentry);
32578+ err = au_wbr_do_copyup_bu(dentry, btop);
32579+ return err;
32580+}
32581+
32582+/* ---------------------------------------------------------------------- */
32583+
32584+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
32585+ [AuWbrCopyup_TDP] = {
32586+ .copyup = au_wbr_copyup_tdp
32587+ },
32588+ [AuWbrCopyup_BUP] = {
32589+ .copyup = au_wbr_copyup_bup
32590+ },
32591+ [AuWbrCopyup_BU] = {
32592+ .copyup = au_wbr_copyup_bu
32593+ }
32594+};
32595+
32596+struct au_wbr_create_operations au_wbr_create_ops[] = {
32597+ [AuWbrCreate_TDP] = {
32598+ .create = au_wbr_create_tdp
32599+ },
32600+ [AuWbrCreate_RR] = {
32601+ .create = au_wbr_create_rr,
32602+ .init = au_wbr_create_init_rr
32603+ },
32604+ [AuWbrCreate_MFS] = {
32605+ .create = au_wbr_create_mfs,
32606+ .init = au_wbr_create_init_mfs,
32607+ .fin = au_wbr_create_fin_mfs
32608+ },
32609+ [AuWbrCreate_MFSV] = {
32610+ .create = au_wbr_create_mfs,
32611+ .init = au_wbr_create_init_mfs,
32612+ .fin = au_wbr_create_fin_mfs
32613+ },
32614+ [AuWbrCreate_MFSRR] = {
32615+ .create = au_wbr_create_mfsrr,
32616+ .init = au_wbr_create_init_mfsrr,
32617+ .fin = au_wbr_create_fin_mfs
32618+ },
32619+ [AuWbrCreate_MFSRRV] = {
32620+ .create = au_wbr_create_mfsrr,
32621+ .init = au_wbr_create_init_mfsrr,
32622+ .fin = au_wbr_create_fin_mfs
32623+ },
32624+ [AuWbrCreate_TDMFS] = {
32625+ .create = au_wbr_create_tdmfs,
32626+ .init = au_wbr_create_init_mfs,
32627+ .fin = au_wbr_create_fin_mfs
32628+ },
32629+ [AuWbrCreate_TDMFSV] = {
32630+ .create = au_wbr_create_tdmfs,
32631+ .init = au_wbr_create_init_mfs,
32632+ .fin = au_wbr_create_fin_mfs
32633+ },
32634+ [AuWbrCreate_PMFS] = {
32635+ .create = au_wbr_create_pmfs,
32636+ .init = au_wbr_create_init_mfs,
32637+ .fin = au_wbr_create_fin_mfs
32638+ },
32639+ [AuWbrCreate_PMFSV] = {
32640+ .create = au_wbr_create_pmfs,
32641+ .init = au_wbr_create_init_mfs,
32642+ .fin = au_wbr_create_fin_mfs
32643+ },
32644+ [AuWbrCreate_PMFSRR] = {
32645+ .create = au_wbr_create_pmfsrr,
32646+ .init = au_wbr_create_init_mfsrr,
32647+ .fin = au_wbr_create_fin_mfs
32648+ },
32649+ [AuWbrCreate_PMFSRRV] = {
32650+ .create = au_wbr_create_pmfsrr,
32651+ .init = au_wbr_create_init_mfsrr,
32652+ .fin = au_wbr_create_fin_mfs
32653+ }
32654+};
32655diff -urN /usr/share/empty/fs/aufs/whout.c linux/fs/aufs/whout.c
32656--- /usr/share/empty/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
32657+++ linux/fs/aufs/whout.c 2016-12-17 12:28:17.598545045 +0100
32658@@ -0,0 +1,1061 @@
32659+/*
32660+ * Copyright (C) 2005-2016 Junjiro R. Okajima
32661+ *
32662+ * This program, aufs is free software; you can redistribute it and/or modify
32663+ * it under the terms of the GNU General Public License as published by
32664+ * the Free Software Foundation; either version 2 of the License, or
32665+ * (at your option) any later version.
32666+ *
32667+ * This program is distributed in the hope that it will be useful,
32668+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
32669+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32670+ * GNU General Public License for more details.
32671+ *
32672+ * You should have received a copy of the GNU General Public License
32673+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
32674+ */
32675+
32676+/*
32677+ * whiteout for logical deletion and opaque directory
32678+ */
32679+
32680+#include "aufs.h"
32681+
32682+#define WH_MASK S_IRUGO
32683+
32684+/*
32685+ * If a directory contains this file, then it is opaque. We start with the
32686+ * .wh. flag so that it is blocked by lookup.
32687+ */
32688+static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ,
32689+ sizeof(AUFS_WH_DIROPQ) - 1);
32690+
32691+/*
32692+ * generate whiteout name, which is NOT terminated by NULL.
32693+ * @name: original d_name.name
32694+ * @len: original d_name.len
32695+ * @wh: whiteout qstr
32696+ * returns zero when succeeds, otherwise error.
32697+ * succeeded value as wh->name should be freed by kfree().
32698+ */
32699+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
32700+{
32701+ char *p;
32702+
32703+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
32704+ return -ENAMETOOLONG;
32705+
32706+ wh->len = name->len + AUFS_WH_PFX_LEN;
32707+ p = kmalloc(wh->len, GFP_NOFS);
32708+ wh->name = p;
32709+ if (p) {
32710+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
32711+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
32712+ /* smp_mb(); */
32713+ return 0;
32714+ }
32715+ return -ENOMEM;
32716+}
32717+
32718+/* ---------------------------------------------------------------------- */
32719+
32720+/*
32721+ * test if the @wh_name exists under @h_parent.
32722+ * @try_sio specifies the necessary of super-io.
32723+ */
32724+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio)
32725+{
32726+ int err;
32727+ struct dentry *wh_dentry;
32728+
32729+ if (!try_sio)
32730+ wh_dentry = vfsub_lkup_one(wh_name, h_parent);
32731+ else
32732+ wh_dentry = au_sio_lkup_one(wh_name, h_parent);
32733+ err = PTR_ERR(wh_dentry);
32734+ if (IS_ERR(wh_dentry)) {
32735+ if (err == -ENAMETOOLONG)
32736+ err = 0;
32737+ goto out;
32738+ }
32739+
32740+ err = 0;
32741+ if (d_is_negative(wh_dentry))
32742+ goto out_wh; /* success */
32743+
32744+ err = 1;
32745+ if (d_is_reg(wh_dentry))
32746+ goto out_wh; /* success */
32747+
32748+ err = -EIO;
32749+ AuIOErr("%pd Invalid whiteout entry type 0%o.\n",
32750+ wh_dentry, d_inode(wh_dentry)->i_mode);
32751+
32752+out_wh:
32753+ dput(wh_dentry);
32754+out:
32755+ return err;
32756+}
32757+
32758+/*
32759+ * test if the @h_dentry sets opaque or not.
32760+ */
32761+int au_diropq_test(struct dentry *h_dentry)
32762+{
32763+ int err;
32764+ struct inode *h_dir;
32765+
32766+ h_dir = d_inode(h_dentry);
32767+ err = au_wh_test(h_dentry, &diropq_name,
32768+ au_test_h_perm_sio(h_dir, MAY_EXEC));
32769+ return err;
32770+}
32771+
32772+/*
32773+ * returns a negative dentry whose name is unique and temporary.
32774+ */
32775+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
32776+ struct qstr *prefix)
32777+{
32778+ struct dentry *dentry;
32779+ int i;
32780+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
32781+ *name, *p;
32782+ /* strict atomic_t is unnecessary here */
32783+ static unsigned short cnt;
32784+ struct qstr qs;
32785+
32786+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
32787+
32788+ name = defname;
32789+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
32790+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
32791+ dentry = ERR_PTR(-ENAMETOOLONG);
32792+ if (unlikely(qs.len > NAME_MAX))
32793+ goto out;
32794+ dentry = ERR_PTR(-ENOMEM);
32795+ name = kmalloc(qs.len + 1, GFP_NOFS);
32796+ if (unlikely(!name))
32797+ goto out;
32798+ }
32799+
32800+ /* doubly whiteout-ed */
32801+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
32802+ p = name + AUFS_WH_PFX_LEN * 2;
32803+ memcpy(p, prefix->name, prefix->len);
32804+ p += prefix->len;
32805+ *p++ = '.';
32806+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
32807+
32808+ qs.name = name;
32809+ for (i = 0; i < 3; i++) {
32810+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
32811+ dentry = au_sio_lkup_one(&qs, h_parent);
32812+ if (IS_ERR(dentry) || d_is_negative(dentry))
32813+ goto out_name;
32814+ dput(dentry);
32815+ }
32816+ /* pr_warn("could not get random name\n"); */
32817+ dentry = ERR_PTR(-EEXIST);
32818+ AuDbg("%.*s\n", AuLNPair(&qs));
32819+ BUG();
32820+
32821+out_name:
32822+ if (name != defname)
32823+ au_delayed_kfree(name);
32824+out:
32825+ AuTraceErrPtr(dentry);
32826+ return dentry;
32827+}
32828+
32829+/*
32830+ * rename the @h_dentry on @br to the whiteouted temporary name.
32831+ */
32832+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
32833+{
32834+ int err;
32835+ struct path h_path = {
32836+ .mnt = au_br_mnt(br)
32837+ };
32838+ struct inode *h_dir, *delegated;
32839+ struct dentry *h_parent;
32840+
32841+ h_parent = h_dentry->d_parent; /* dir inode is locked */
32842+ h_dir = d_inode(h_parent);
32843+ IMustLock(h_dir);
32844+
32845+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
32846+ err = PTR_ERR(h_path.dentry);
32847+ if (IS_ERR(h_path.dentry))
32848+ goto out;
32849+
32850+ /* under the same dir, no need to lock_rename() */
32851+ delegated = NULL;
32852+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated,
32853+ /*flags*/0);
32854+ AuTraceErr(err);
32855+ if (unlikely(err == -EWOULDBLOCK)) {
32856+ pr_warn("cannot retry for NFSv4 delegation"
32857+ " for an internal rename\n");
32858+ iput(delegated);
32859+ }
32860+ dput(h_path.dentry);
32861+
32862+out:
32863+ AuTraceErr(err);
32864+ return err;
32865+}
32866+
32867+/* ---------------------------------------------------------------------- */
32868+/*
32869+ * functions for removing a whiteout
32870+ */
32871+
32872+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
32873+{
32874+ int err, force;
32875+ struct inode *delegated;
32876+
32877+ /*
32878+ * forces superio when the dir has a sticky bit.
32879+ * this may be a violation of unix fs semantics.
32880+ */
32881+ force = (h_dir->i_mode & S_ISVTX)
32882+ && !uid_eq(current_fsuid(), d_inode(h_path->dentry)->i_uid);
32883+ delegated = NULL;
32884+ err = vfsub_unlink(h_dir, h_path, &delegated, force);
32885+ if (unlikely(err == -EWOULDBLOCK)) {
32886+ pr_warn("cannot retry for NFSv4 delegation"
32887+ " for an internal unlink\n");
32888+ iput(delegated);
32889+ }
32890+ return err;
32891+}
32892+
32893+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
32894+ struct dentry *dentry)
32895+{
32896+ int err;
32897+
32898+ err = do_unlink_wh(h_dir, h_path);
32899+ if (!err && dentry)
32900+ au_set_dbwh(dentry, -1);
32901+
32902+ return err;
32903+}
32904+
32905+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
32906+ struct au_branch *br)
32907+{
32908+ int err;
32909+ struct path h_path = {
32910+ .mnt = au_br_mnt(br)
32911+ };
32912+
32913+ err = 0;
32914+ h_path.dentry = vfsub_lkup_one(wh, h_parent);
32915+ if (IS_ERR(h_path.dentry))
32916+ err = PTR_ERR(h_path.dentry);
32917+ else {
32918+ if (d_is_reg(h_path.dentry))
32919+ err = do_unlink_wh(d_inode(h_parent), &h_path);
32920+ dput(h_path.dentry);
32921+ }
32922+
32923+ return err;
32924+}
32925+
32926+/* ---------------------------------------------------------------------- */
32927+/*
32928+ * initialize/clean whiteout for a branch
32929+ */
32930+
32931+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
32932+ const int isdir)
32933+{
32934+ int err;
32935+ struct inode *delegated;
32936+
32937+ if (d_is_negative(whpath->dentry))
32938+ return;
32939+
32940+ if (isdir)
32941+ err = vfsub_rmdir(h_dir, whpath);
32942+ else {
32943+ delegated = NULL;
32944+ err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0);
32945+ if (unlikely(err == -EWOULDBLOCK)) {
32946+ pr_warn("cannot retry for NFSv4 delegation"
32947+ " for an internal unlink\n");
32948+ iput(delegated);
32949+ }
32950+ }
32951+ if (unlikely(err))
32952+ pr_warn("failed removing %pd (%d), ignored.\n",
32953+ whpath->dentry, err);
32954+}
32955+
32956+static int test_linkable(struct dentry *h_root)
32957+{
32958+ struct inode *h_dir = d_inode(h_root);
32959+
32960+ if (h_dir->i_op->link)
32961+ return 0;
32962+
32963+ pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n",
32964+ h_root, au_sbtype(h_root->d_sb));
32965+ return -ENOSYS;
32966+}
32967+
32968+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
32969+static int au_whdir(struct inode *h_dir, struct path *path)
32970+{
32971+ int err;
32972+
32973+ err = -EEXIST;
32974+ if (d_is_negative(path->dentry)) {
32975+ int mode = S_IRWXU;
32976+
32977+ if (au_test_nfs(path->dentry->d_sb))
32978+ mode |= S_IXUGO;
32979+ err = vfsub_mkdir(h_dir, path, mode);
32980+ } else if (d_is_dir(path->dentry))
32981+ err = 0;
32982+ else
32983+ pr_err("unknown %pd exists\n", path->dentry);
32984+
32985+ return err;
32986+}
32987+
32988+struct au_wh_base {
32989+ const struct qstr *name;
32990+ struct dentry *dentry;
32991+};
32992+
32993+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
32994+ struct path *h_path)
32995+{
32996+ h_path->dentry = base[AuBrWh_BASE].dentry;
32997+ au_wh_clean(h_dir, h_path, /*isdir*/0);
32998+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32999+ au_wh_clean(h_dir, h_path, /*isdir*/1);
33000+ h_path->dentry = base[AuBrWh_ORPH].dentry;
33001+ au_wh_clean(h_dir, h_path, /*isdir*/1);
33002+}
33003+
33004+/*
33005+ * returns tri-state,
33006+ * minus: error, caller should print the message
33007+ * zero: succuess
33008+ * plus: error, caller should NOT print the message
33009+ */
33010+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
33011+ int do_plink, struct au_wh_base base[],
33012+ struct path *h_path)
33013+{
33014+ int err;
33015+ struct inode *h_dir;
33016+
33017+ h_dir = d_inode(h_root);
33018+ h_path->dentry = base[AuBrWh_BASE].dentry;
33019+ au_wh_clean(h_dir, h_path, /*isdir*/0);
33020+ h_path->dentry = base[AuBrWh_PLINK].dentry;
33021+ if (do_plink) {
33022+ err = test_linkable(h_root);
33023+ if (unlikely(err)) {
33024+ err = 1;
33025+ goto out;
33026+ }
33027+
33028+ err = au_whdir(h_dir, h_path);
33029+ if (unlikely(err))
33030+ goto out;
33031+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
33032+ } else
33033+ au_wh_clean(h_dir, h_path, /*isdir*/1);
33034+ h_path->dentry = base[AuBrWh_ORPH].dentry;
33035+ err = au_whdir(h_dir, h_path);
33036+ if (unlikely(err))
33037+ goto out;
33038+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
33039+
33040+out:
33041+ return err;
33042+}
33043+
33044+/*
33045+ * for the moment, aufs supports the branch filesystem which does not support
33046+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
33047+ * copyup failed. finally, such filesystem will not be used as the writable
33048+ * branch.
33049+ *
33050+ * returns tri-state, see above.
33051+ */
33052+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
33053+ int do_plink, struct au_wh_base base[],
33054+ struct path *h_path)
33055+{
33056+ int err;
33057+ struct inode *h_dir;
33058+
33059+ WbrWhMustWriteLock(wbr);
33060+
33061+ err = test_linkable(h_root);
33062+ if (unlikely(err)) {
33063+ err = 1;
33064+ goto out;
33065+ }
33066+
33067+ /*
33068+ * todo: should this create be done in /sbin/mount.aufs helper?
33069+ */
33070+ err = -EEXIST;
33071+ h_dir = d_inode(h_root);
33072+ if (d_is_negative(base[AuBrWh_BASE].dentry)) {
33073+ h_path->dentry = base[AuBrWh_BASE].dentry;
33074+ err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true);
33075+ } else if (d_is_reg(base[AuBrWh_BASE].dentry))
33076+ err = 0;
33077+ else
33078+ pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry);
33079+ if (unlikely(err))
33080+ goto out;
33081+
33082+ h_path->dentry = base[AuBrWh_PLINK].dentry;
33083+ if (do_plink) {
33084+ err = au_whdir(h_dir, h_path);
33085+ if (unlikely(err))
33086+ goto out;
33087+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
33088+ } else
33089+ au_wh_clean(h_dir, h_path, /*isdir*/1);
33090+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
33091+
33092+ h_path->dentry = base[AuBrWh_ORPH].dentry;
33093+ err = au_whdir(h_dir, h_path);
33094+ if (unlikely(err))
33095+ goto out;
33096+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
33097+
33098+out:
33099+ return err;
33100+}
33101+
33102+/*
33103+ * initialize the whiteout base file/dir for @br.
33104+ */
33105+int au_wh_init(struct au_branch *br, struct super_block *sb)
33106+{
33107+ int err, i;
33108+ const unsigned char do_plink
33109+ = !!au_opt_test(au_mntflags(sb), PLINK);
33110+ struct inode *h_dir;
33111+ struct path path = br->br_path;
33112+ struct dentry *h_root = path.dentry;
33113+ struct au_wbr *wbr = br->br_wbr;
33114+ static const struct qstr base_name[] = {
33115+ [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME,
33116+ sizeof(AUFS_BASE_NAME) - 1),
33117+ [AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME,
33118+ sizeof(AUFS_PLINKDIR_NAME) - 1),
33119+ [AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME,
33120+ sizeof(AUFS_ORPHDIR_NAME) - 1)
33121+ };
33122+ struct au_wh_base base[] = {
33123+ [AuBrWh_BASE] = {
33124+ .name = base_name + AuBrWh_BASE,
33125+ .dentry = NULL
33126+ },
33127+ [AuBrWh_PLINK] = {
33128+ .name = base_name + AuBrWh_PLINK,
33129+ .dentry = NULL
33130+ },
33131+ [AuBrWh_ORPH] = {
33132+ .name = base_name + AuBrWh_ORPH,
33133+ .dentry = NULL
33134+ }
33135+ };
33136+
33137+ if (wbr)
33138+ WbrWhMustWriteLock(wbr);
33139+
33140+ for (i = 0; i < AuBrWh_Last; i++) {
33141+ /* doubly whiteouted */
33142+ struct dentry *d;
33143+
33144+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
33145+ err = PTR_ERR(d);
33146+ if (IS_ERR(d))
33147+ goto out;
33148+
33149+ base[i].dentry = d;
33150+ AuDebugOn(wbr
33151+ && wbr->wbr_wh[i]
33152+ && wbr->wbr_wh[i] != base[i].dentry);
33153+ }
33154+
33155+ if (wbr)
33156+ for (i = 0; i < AuBrWh_Last; i++) {
33157+ dput(wbr->wbr_wh[i]);
33158+ wbr->wbr_wh[i] = NULL;
33159+ }
33160+
33161+ err = 0;
33162+ if (!au_br_writable(br->br_perm)) {
33163+ h_dir = d_inode(h_root);
33164+ au_wh_init_ro(h_dir, base, &path);
33165+ } else if (!au_br_wh_linkable(br->br_perm)) {
33166+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
33167+ if (err > 0)
33168+ goto out;
33169+ else if (err)
33170+ goto out_err;
33171+ } else {
33172+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
33173+ if (err > 0)
33174+ goto out;
33175+ else if (err)
33176+ goto out_err;
33177+ }
33178+ goto out; /* success */
33179+
33180+out_err:
33181+ pr_err("an error(%d) on the writable branch %pd(%s)\n",
33182+ err, h_root, au_sbtype(h_root->d_sb));
33183+out:
33184+ for (i = 0; i < AuBrWh_Last; i++)
33185+ dput(base[i].dentry);
33186+ return err;
33187+}
33188+
33189+/* ---------------------------------------------------------------------- */
33190+/*
33191+ * whiteouts are all hard-linked usually.
33192+ * when its link count reaches a ceiling, we create a new whiteout base
33193+ * asynchronously.
33194+ */
33195+
33196+struct reinit_br_wh {
33197+ struct super_block *sb;
33198+ struct au_branch *br;
33199+};
33200+
33201+static void reinit_br_wh(void *arg)
33202+{
33203+ int err;
33204+ aufs_bindex_t bindex;
33205+ struct path h_path;
33206+ struct reinit_br_wh *a = arg;
33207+ struct au_wbr *wbr;
33208+ struct inode *dir, *delegated;
33209+ struct dentry *h_root;
33210+ struct au_hinode *hdir;
33211+
33212+ err = 0;
33213+ wbr = a->br->br_wbr;
33214+ /* big aufs lock */
33215+ si_noflush_write_lock(a->sb);
33216+ if (!au_br_writable(a->br->br_perm))
33217+ goto out;
33218+ bindex = au_br_index(a->sb, a->br->br_id);
33219+ if (unlikely(bindex < 0))
33220+ goto out;
33221+
33222+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
33223+ dir = d_inode(a->sb->s_root);
33224+ hdir = au_hi(dir, bindex);
33225+ h_root = au_h_dptr(a->sb->s_root, bindex);
33226+ AuDebugOn(h_root != au_br_dentry(a->br));
33227+
33228+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
33229+ wbr_wh_write_lock(wbr);
33230+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
33231+ h_root, a->br);
33232+ if (!err) {
33233+ h_path.dentry = wbr->wbr_whbase;
33234+ h_path.mnt = au_br_mnt(a->br);
33235+ delegated = NULL;
33236+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated,
33237+ /*force*/0);
33238+ if (unlikely(err == -EWOULDBLOCK)) {
33239+ pr_warn("cannot retry for NFSv4 delegation"
33240+ " for an internal unlink\n");
33241+ iput(delegated);
33242+ }
33243+ } else {
33244+ pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase);
33245+ err = 0;
33246+ }
33247+ dput(wbr->wbr_whbase);
33248+ wbr->wbr_whbase = NULL;
33249+ if (!err)
33250+ err = au_wh_init(a->br, a->sb);
33251+ wbr_wh_write_unlock(wbr);
33252+ au_hn_inode_unlock(hdir);
33253+ di_read_unlock(a->sb->s_root, AuLock_IR);
33254+ if (!err)
33255+ au_fhsm_wrote(a->sb, bindex, /*force*/0);
33256+
33257+out:
33258+ if (wbr)
33259+ atomic_dec(&wbr->wbr_wh_running);
33260+ au_br_put(a->br);
33261+ si_write_unlock(a->sb);
33262+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
33263+ au_delayed_kfree(arg);
33264+ if (unlikely(err))
33265+ AuIOErr("err %d\n", err);
33266+}
33267+
33268+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
33269+{
33270+ int do_dec, wkq_err;
33271+ struct reinit_br_wh *arg;
33272+
33273+ do_dec = 1;
33274+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
33275+ goto out;
33276+
33277+ /* ignore ENOMEM */
33278+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
33279+ if (arg) {
33280+ /*
33281+ * dec(wh_running), kfree(arg) and dec(br_count)
33282+ * in reinit function
33283+ */
33284+ arg->sb = sb;
33285+ arg->br = br;
33286+ au_br_get(br);
33287+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
33288+ if (unlikely(wkq_err)) {
33289+ atomic_dec(&br->br_wbr->wbr_wh_running);
33290+ au_br_put(br);
33291+ au_delayed_kfree(arg);
33292+ }
33293+ do_dec = 0;
33294+ }
33295+
33296+out:
33297+ if (do_dec)
33298+ atomic_dec(&br->br_wbr->wbr_wh_running);
33299+}
33300+
33301+/* ---------------------------------------------------------------------- */
33302+
33303+/*
33304+ * create the whiteout @wh.
33305+ */
33306+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
33307+ struct dentry *wh)
33308+{
33309+ int err;
33310+ struct path h_path = {
33311+ .dentry = wh
33312+ };
33313+ struct au_branch *br;
33314+ struct au_wbr *wbr;
33315+ struct dentry *h_parent;
33316+ struct inode *h_dir, *delegated;
33317+
33318+ h_parent = wh->d_parent; /* dir inode is locked */
33319+ h_dir = d_inode(h_parent);
33320+ IMustLock(h_dir);
33321+
33322+ br = au_sbr(sb, bindex);
33323+ h_path.mnt = au_br_mnt(br);
33324+ wbr = br->br_wbr;
33325+ wbr_wh_read_lock(wbr);
33326+ if (wbr->wbr_whbase) {
33327+ delegated = NULL;
33328+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated);
33329+ if (unlikely(err == -EWOULDBLOCK)) {
33330+ pr_warn("cannot retry for NFSv4 delegation"
33331+ " for an internal link\n");
33332+ iput(delegated);
33333+ }
33334+ if (!err || err != -EMLINK)
33335+ goto out;
33336+
33337+ /* link count full. re-initialize br_whbase. */
33338+ kick_reinit_br_wh(sb, br);
33339+ }
33340+
33341+ /* return this error in this context */
33342+ err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true);
33343+ if (!err)
33344+ au_fhsm_wrote(sb, bindex, /*force*/0);
33345+
33346+out:
33347+ wbr_wh_read_unlock(wbr);
33348+ return err;
33349+}
33350+
33351+/* ---------------------------------------------------------------------- */
33352+
33353+/*
33354+ * create or remove the diropq.
33355+ */
33356+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
33357+ unsigned int flags)
33358+{
33359+ struct dentry *opq_dentry, *h_dentry;
33360+ struct super_block *sb;
33361+ struct au_branch *br;
33362+ int err;
33363+
33364+ sb = dentry->d_sb;
33365+ br = au_sbr(sb, bindex);
33366+ h_dentry = au_h_dptr(dentry, bindex);
33367+ opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry);
33368+ if (IS_ERR(opq_dentry))
33369+ goto out;
33370+
33371+ if (au_ftest_diropq(flags, CREATE)) {
33372+ err = link_or_create_wh(sb, bindex, opq_dentry);
33373+ if (!err) {
33374+ au_set_dbdiropq(dentry, bindex);
33375+ goto out; /* success */
33376+ }
33377+ } else {
33378+ struct path tmp = {
33379+ .dentry = opq_dentry,
33380+ .mnt = au_br_mnt(br)
33381+ };
33382+ err = do_unlink_wh(au_h_iptr(d_inode(dentry), bindex), &tmp);
33383+ if (!err)
33384+ au_set_dbdiropq(dentry, -1);
33385+ }
33386+ dput(opq_dentry);
33387+ opq_dentry = ERR_PTR(err);
33388+
33389+out:
33390+ return opq_dentry;
33391+}
33392+
33393+struct do_diropq_args {
33394+ struct dentry **errp;
33395+ struct dentry *dentry;
33396+ aufs_bindex_t bindex;
33397+ unsigned int flags;
33398+};
33399+
33400+static void call_do_diropq(void *args)
33401+{
33402+ struct do_diropq_args *a = args;
33403+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
33404+}
33405+
33406+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
33407+ unsigned int flags)
33408+{
33409+ struct dentry *diropq, *h_dentry;
33410+
33411+ h_dentry = au_h_dptr(dentry, bindex);
33412+ if (!au_test_h_perm_sio(d_inode(h_dentry), MAY_EXEC | MAY_WRITE))
33413+ diropq = do_diropq(dentry, bindex, flags);
33414+ else {
33415+ int wkq_err;
33416+ struct do_diropq_args args = {
33417+ .errp = &diropq,
33418+ .dentry = dentry,
33419+ .bindex = bindex,
33420+ .flags = flags
33421+ };
33422+
33423+ wkq_err = au_wkq_wait(call_do_diropq, &args);
33424+ if (unlikely(wkq_err))
33425+ diropq = ERR_PTR(wkq_err);
33426+ }
33427+
33428+ return diropq;
33429+}
33430+
33431+/* ---------------------------------------------------------------------- */
33432+
33433+/*
33434+ * lookup whiteout dentry.
33435+ * @h_parent: lower parent dentry which must exist and be locked
33436+ * @base_name: name of dentry which will be whiteouted
33437+ * returns dentry for whiteout.
33438+ */
33439+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
33440+ struct au_branch *br)
33441+{
33442+ int err;
33443+ struct qstr wh_name;
33444+ struct dentry *wh_dentry;
33445+
33446+ err = au_wh_name_alloc(&wh_name, base_name);
33447+ wh_dentry = ERR_PTR(err);
33448+ if (!err) {
33449+ wh_dentry = vfsub_lkup_one(&wh_name, h_parent);
33450+ au_delayed_kfree(wh_name.name);
33451+ }
33452+ return wh_dentry;
33453+}
33454+
33455+/*
33456+ * link/create a whiteout for @dentry on @bindex.
33457+ */
33458+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
33459+ struct dentry *h_parent)
33460+{
33461+ struct dentry *wh_dentry;
33462+ struct super_block *sb;
33463+ int err;
33464+
33465+ sb = dentry->d_sb;
33466+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
33467+ if (!IS_ERR(wh_dentry) && d_is_negative(wh_dentry)) {
33468+ err = link_or_create_wh(sb, bindex, wh_dentry);
33469+ if (!err) {
33470+ au_set_dbwh(dentry, bindex);
33471+ au_fhsm_wrote(sb, bindex, /*force*/0);
33472+ } else {
33473+ dput(wh_dentry);
33474+ wh_dentry = ERR_PTR(err);
33475+ }
33476+ }
33477+
33478+ return wh_dentry;
33479+}
33480+
33481+/* ---------------------------------------------------------------------- */
33482+
33483+/* Delete all whiteouts in this directory on branch bindex. */
33484+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
33485+ aufs_bindex_t bindex, struct au_branch *br)
33486+{
33487+ int err;
33488+ unsigned long ul, n;
33489+ struct qstr wh_name;
33490+ char *p;
33491+ struct hlist_head *head;
33492+ struct au_vdir_wh *pos;
33493+ struct au_vdir_destr *str;
33494+
33495+ err = -ENOMEM;
33496+ p = (void *)__get_free_page(GFP_NOFS);
33497+ wh_name.name = p;
33498+ if (unlikely(!wh_name.name))
33499+ goto out;
33500+
33501+ err = 0;
33502+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
33503+ p += AUFS_WH_PFX_LEN;
33504+ n = whlist->nh_num;
33505+ head = whlist->nh_head;
33506+ for (ul = 0; !err && ul < n; ul++, head++) {
33507+ hlist_for_each_entry(pos, head, wh_hash) {
33508+ if (pos->wh_bindex != bindex)
33509+ continue;
33510+
33511+ str = &pos->wh_str;
33512+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
33513+ memcpy(p, str->name, str->len);
33514+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
33515+ err = unlink_wh_name(h_dentry, &wh_name, br);
33516+ if (!err)
33517+ continue;
33518+ break;
33519+ }
33520+ AuIOErr("whiteout name too long %.*s\n",
33521+ str->len, str->name);
33522+ err = -EIO;
33523+ break;
33524+ }
33525+ }
33526+ au_delayed_free_page((unsigned long)wh_name.name);
33527+
33528+out:
33529+ return err;
33530+}
33531+
33532+struct del_wh_children_args {
33533+ int *errp;
33534+ struct dentry *h_dentry;
33535+ struct au_nhash *whlist;
33536+ aufs_bindex_t bindex;
33537+ struct au_branch *br;
33538+};
33539+
33540+static void call_del_wh_children(void *args)
33541+{
33542+ struct del_wh_children_args *a = args;
33543+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
33544+}
33545+
33546+/* ---------------------------------------------------------------------- */
33547+
33548+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
33549+{
33550+ struct au_whtmp_rmdir *whtmp;
33551+ int err;
33552+ unsigned int rdhash;
33553+
33554+ SiMustAnyLock(sb);
33555+
33556+ whtmp = kzalloc(sizeof(*whtmp), gfp);
33557+ if (unlikely(!whtmp)) {
33558+ whtmp = ERR_PTR(-ENOMEM);
33559+ goto out;
33560+ }
33561+
33562+ /* no estimation for dir size */
33563+ rdhash = au_sbi(sb)->si_rdhash;
33564+ if (!rdhash)
33565+ rdhash = AUFS_RDHASH_DEF;
33566+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
33567+ if (unlikely(err)) {
33568+ au_delayed_kfree(whtmp);
33569+ whtmp = ERR_PTR(err);
33570+ }
33571+
33572+out:
33573+ return whtmp;
33574+}
33575+
33576+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
33577+{
33578+ if (whtmp->br)
33579+ au_br_put(whtmp->br);
33580+ dput(whtmp->wh_dentry);
33581+ iput(whtmp->dir);
33582+ au_nhash_wh_free(&whtmp->whlist);
33583+ au_delayed_kfree(whtmp);
33584+}
33585+
33586+/*
33587+ * rmdir the whiteouted temporary named dir @h_dentry.
33588+ * @whlist: whiteouted children.
33589+ */
33590+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
33591+ struct dentry *wh_dentry, struct au_nhash *whlist)
33592+{
33593+ int err;
33594+ unsigned int h_nlink;
33595+ struct path h_tmp;
33596+ struct inode *wh_inode, *h_dir;
33597+ struct au_branch *br;
33598+
33599+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
33600+ IMustLock(h_dir);
33601+
33602+ br = au_sbr(dir->i_sb, bindex);
33603+ wh_inode = d_inode(wh_dentry);
33604+ inode_lock_nested(wh_inode, AuLsc_I_CHILD);
33605+
33606+ /*
33607+ * someone else might change some whiteouts while we were sleeping.
33608+ * it means this whlist may have an obsoleted entry.
33609+ */
33610+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
33611+ err = del_wh_children(wh_dentry, whlist, bindex, br);
33612+ else {
33613+ int wkq_err;
33614+ struct del_wh_children_args args = {
33615+ .errp = &err,
33616+ .h_dentry = wh_dentry,
33617+ .whlist = whlist,
33618+ .bindex = bindex,
33619+ .br = br
33620+ };
33621+
33622+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
33623+ if (unlikely(wkq_err))
33624+ err = wkq_err;
33625+ }
33626+ inode_unlock(wh_inode);
33627+
33628+ if (!err) {
33629+ h_tmp.dentry = wh_dentry;
33630+ h_tmp.mnt = au_br_mnt(br);
33631+ h_nlink = h_dir->i_nlink;
33632+ err = vfsub_rmdir(h_dir, &h_tmp);
33633+ /* some fs doesn't change the parent nlink in some cases */
33634+ h_nlink -= h_dir->i_nlink;
33635+ }
33636+
33637+ if (!err) {
33638+ if (au_ibtop(dir) == bindex) {
33639+ /* todo: dir->i_mutex is necessary */
33640+ au_cpup_attr_timesizes(dir);
33641+ if (h_nlink)
33642+ vfsub_drop_nlink(dir);
33643+ }
33644+ return 0; /* success */
33645+ }
33646+
33647+ pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
33648+ return err;
33649+}
33650+
33651+static void call_rmdir_whtmp(void *args)
33652+{
33653+ int err;
33654+ aufs_bindex_t bindex;
33655+ struct au_whtmp_rmdir *a = args;
33656+ struct super_block *sb;
33657+ struct dentry *h_parent;
33658+ struct inode *h_dir;
33659+ struct au_hinode *hdir;
33660+
33661+ /* rmdir by nfsd may cause deadlock with this i_mutex */
33662+ /* inode_lock(a->dir); */
33663+ err = -EROFS;
33664+ sb = a->dir->i_sb;
33665+ si_read_lock(sb, !AuLock_FLUSH);
33666+ if (!au_br_writable(a->br->br_perm))
33667+ goto out;
33668+ bindex = au_br_index(sb, a->br->br_id);
33669+ if (unlikely(bindex < 0))
33670+ goto out;
33671+
33672+ err = -EIO;
33673+ ii_write_lock_parent(a->dir);
33674+ h_parent = dget_parent(a->wh_dentry);
33675+ h_dir = d_inode(h_parent);
33676+ hdir = au_hi(a->dir, bindex);
33677+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
33678+ if (unlikely(err))
33679+ goto out_mnt;
33680+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
33681+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
33682+ a->br);
33683+ if (!err)
33684+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
33685+ au_hn_inode_unlock(hdir);
33686+ vfsub_mnt_drop_write(au_br_mnt(a->br));
33687+
33688+out_mnt:
33689+ dput(h_parent);
33690+ ii_write_unlock(a->dir);
33691+out:
33692+ /* inode_unlock(a->dir); */
33693+ au_whtmp_rmdir_free(a);
33694+ si_read_unlock(sb);
33695+ au_nwt_done(&au_sbi(sb)->si_nowait);
33696+ if (unlikely(err))
33697+ AuIOErr("err %d\n", err);
33698+}
33699+
33700+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33701+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
33702+{
33703+ int wkq_err;
33704+ struct super_block *sb;
33705+
33706+ IMustLock(dir);
33707+
33708+ /* all post-process will be done in do_rmdir_whtmp(). */
33709+ sb = dir->i_sb;
33710+ args->dir = au_igrab(dir);
33711+ args->br = au_sbr(sb, bindex);
33712+ au_br_get(args->br);
33713+ args->wh_dentry = dget(wh_dentry);
33714+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
33715+ if (unlikely(wkq_err)) {
33716+ pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
33717+ au_whtmp_rmdir_free(args);
33718+ }
33719+}
33720diff -urN /usr/share/empty/fs/aufs/whout.h linux/fs/aufs/whout.h
33721--- /usr/share/empty/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
33722+++ linux/fs/aufs/whout.h 2016-10-09 16:55:36.496035060 +0200
33723@@ -0,0 +1,85 @@
33724+/*
33725+ * Copyright (C) 2005-2016 Junjiro R. Okajima
33726+ *
33727+ * This program, aufs is free software; you can redistribute it and/or modify
33728+ * it under the terms of the GNU General Public License as published by
33729+ * the Free Software Foundation; either version 2 of the License, or
33730+ * (at your option) any later version.
33731+ *
33732+ * This program is distributed in the hope that it will be useful,
33733+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33734+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33735+ * GNU General Public License for more details.
33736+ *
33737+ * You should have received a copy of the GNU General Public License
33738+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
33739+ */
33740+
33741+/*
33742+ * whiteout for logical deletion and opaque directory
33743+ */
33744+
33745+#ifndef __AUFS_WHOUT_H__
33746+#define __AUFS_WHOUT_H__
33747+
33748+#ifdef __KERNEL__
33749+
33750+#include "dir.h"
33751+
33752+/* whout.c */
33753+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
33754+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
33755+int au_diropq_test(struct dentry *h_dentry);
33756+struct au_branch;
33757+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
33758+ struct qstr *prefix);
33759+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
33760+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
33761+ struct dentry *dentry);
33762+int au_wh_init(struct au_branch *br, struct super_block *sb);
33763+
33764+/* diropq flags */
33765+#define AuDiropq_CREATE 1
33766+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
33767+#define au_fset_diropq(flags, name) \
33768+ do { (flags) |= AuDiropq_##name; } while (0)
33769+#define au_fclr_diropq(flags, name) \
33770+ do { (flags) &= ~AuDiropq_##name; } while (0)
33771+
33772+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
33773+ unsigned int flags);
33774+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
33775+ struct au_branch *br);
33776+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
33777+ struct dentry *h_parent);
33778+
33779+/* real rmdir for the whiteout-ed dir */
33780+struct au_whtmp_rmdir {
33781+ struct inode *dir;
33782+ struct au_branch *br;
33783+ struct dentry *wh_dentry;
33784+ struct au_nhash whlist;
33785+};
33786+
33787+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
33788+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
33789+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
33790+ struct dentry *wh_dentry, struct au_nhash *whlist);
33791+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33792+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
33793+
33794+/* ---------------------------------------------------------------------- */
33795+
33796+static inline struct dentry *au_diropq_create(struct dentry *dentry,
33797+ aufs_bindex_t bindex)
33798+{
33799+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
33800+}
33801+
33802+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
33803+{
33804+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
33805+}
33806+
33807+#endif /* __KERNEL__ */
33808+#endif /* __AUFS_WHOUT_H__ */
33809diff -urN /usr/share/empty/fs/aufs/wkq.c linux/fs/aufs/wkq.c
33810--- /usr/share/empty/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
33811+++ linux/fs/aufs/wkq.c 2016-10-09 16:55:36.496035060 +0200
33812@@ -0,0 +1,213 @@
33813+/*
33814+ * Copyright (C) 2005-2016 Junjiro R. Okajima
33815+ *
33816+ * This program, aufs is free software; you can redistribute it and/or modify
33817+ * it under the terms of the GNU General Public License as published by
33818+ * the Free Software Foundation; either version 2 of the License, or
33819+ * (at your option) any later version.
33820+ *
33821+ * This program is distributed in the hope that it will be useful,
33822+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33823+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33824+ * GNU General Public License for more details.
33825+ *
33826+ * You should have received a copy of the GNU General Public License
33827+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
33828+ */
33829+
33830+/*
33831+ * workqueue for asynchronous/super-io operations
33832+ * todo: try new dredential scheme
33833+ */
33834+
33835+#include <linux/module.h>
33836+#include "aufs.h"
33837+
33838+/* internal workqueue named AUFS_WKQ_NAME */
33839+
33840+static struct workqueue_struct *au_wkq;
33841+
33842+struct au_wkinfo {
33843+ struct work_struct wk;
33844+ struct kobject *kobj;
33845+
33846+ unsigned int flags; /* see wkq.h */
33847+
33848+ au_wkq_func_t func;
33849+ void *args;
33850+
33851+ struct completion *comp;
33852+};
33853+
33854+/* ---------------------------------------------------------------------- */
33855+
33856+static void wkq_func(struct work_struct *wk)
33857+{
33858+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
33859+
33860+ AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
33861+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
33862+
33863+ wkinfo->func(wkinfo->args);
33864+ if (au_ftest_wkq(wkinfo->flags, WAIT))
33865+ complete(wkinfo->comp);
33866+ else {
33867+ kobject_put(wkinfo->kobj);
33868+ module_put(THIS_MODULE); /* todo: ?? */
33869+ au_delayed_kfree(wkinfo);
33870+ }
33871+}
33872+
33873+/*
33874+ * Since struct completion is large, try allocating it dynamically.
33875+ */
33876+#if 1 /* defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS) */
33877+#define AuWkqCompDeclare(name) struct completion *comp = NULL
33878+
33879+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33880+{
33881+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
33882+ if (*comp) {
33883+ init_completion(*comp);
33884+ wkinfo->comp = *comp;
33885+ return 0;
33886+ }
33887+ return -ENOMEM;
33888+}
33889+
33890+static void au_wkq_comp_free(struct completion *comp)
33891+{
33892+ au_delayed_kfree(comp);
33893+}
33894+
33895+#else
33896+
33897+/* no braces */
33898+#define AuWkqCompDeclare(name) \
33899+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
33900+ struct completion *comp = &_ ## name
33901+
33902+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33903+{
33904+ wkinfo->comp = *comp;
33905+ return 0;
33906+}
33907+
33908+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
33909+{
33910+ /* empty */
33911+}
33912+#endif /* 4KSTACKS */
33913+
33914+static void au_wkq_run(struct au_wkinfo *wkinfo)
33915+{
33916+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
33917+ if (au_wkq_test()) {
33918+ AuWarn1("wkq from wkq, unless silly-rename on NFS,"
33919+ " due to a dead dir by UDBA?\n");
33920+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
33921+ }
33922+ } else
33923+ au_dbg_verify_kthread();
33924+
33925+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
33926+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
33927+ queue_work(au_wkq, &wkinfo->wk);
33928+ } else {
33929+ INIT_WORK(&wkinfo->wk, wkq_func);
33930+ schedule_work(&wkinfo->wk);
33931+ }
33932+}
33933+
33934+/*
33935+ * Be careful. It is easy to make deadlock happen.
33936+ * processA: lock, wkq and wait
33937+ * processB: wkq and wait, lock in wkq
33938+ * --> deadlock
33939+ */
33940+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
33941+{
33942+ int err;
33943+ AuWkqCompDeclare(comp);
33944+ struct au_wkinfo wkinfo = {
33945+ .flags = flags,
33946+ .func = func,
33947+ .args = args
33948+ };
33949+
33950+ err = au_wkq_comp_alloc(&wkinfo, &comp);
33951+ if (!err) {
33952+ au_wkq_run(&wkinfo);
33953+ /* no timeout, no interrupt */
33954+ wait_for_completion(wkinfo.comp);
33955+ au_wkq_comp_free(comp);
33956+ destroy_work_on_stack(&wkinfo.wk);
33957+ }
33958+
33959+ return err;
33960+
33961+}
33962+
33963+/*
33964+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
33965+ * problem in a concurrent umounting.
33966+ */
33967+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
33968+ unsigned int flags)
33969+{
33970+ int err;
33971+ struct au_wkinfo *wkinfo;
33972+
33973+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
33974+
33975+ /*
33976+ * wkq_func() must free this wkinfo.
33977+ * it highly depends upon the implementation of workqueue.
33978+ */
33979+ err = 0;
33980+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
33981+ if (wkinfo) {
33982+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
33983+ wkinfo->flags = flags & ~AuWkq_WAIT;
33984+ wkinfo->func = func;
33985+ wkinfo->args = args;
33986+ wkinfo->comp = NULL;
33987+ kobject_get(wkinfo->kobj);
33988+ __module_get(THIS_MODULE); /* todo: ?? */
33989+
33990+ au_wkq_run(wkinfo);
33991+ } else {
33992+ err = -ENOMEM;
33993+ au_nwt_done(&au_sbi(sb)->si_nowait);
33994+ }
33995+
33996+ return err;
33997+}
33998+
33999+/* ---------------------------------------------------------------------- */
34000+
34001+void au_nwt_init(struct au_nowait_tasks *nwt)
34002+{
34003+ atomic_set(&nwt->nw_len, 0);
34004+ /* smp_mb(); */ /* atomic_set */
34005+ init_waitqueue_head(&nwt->nw_wq);
34006+}
34007+
34008+void au_wkq_fin(void)
34009+{
34010+ destroy_workqueue(au_wkq);
34011+}
34012+
34013+int __init au_wkq_init(void)
34014+{
34015+ int err;
34016+
34017+ err = 0;
34018+ au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
34019+ if (IS_ERR(au_wkq))
34020+ err = PTR_ERR(au_wkq);
34021+ else if (!au_wkq)
34022+ err = -ENOMEM;
34023+
34024+ return err;
34025+}
34026diff -urN /usr/share/empty/fs/aufs/wkq.h linux/fs/aufs/wkq.h
34027--- /usr/share/empty/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
34028+++ linux/fs/aufs/wkq.h 2016-10-09 16:55:36.496035060 +0200
34029@@ -0,0 +1,93 @@
34030+/*
34031+ * Copyright (C) 2005-2016 Junjiro R. Okajima
34032+ *
34033+ * This program, aufs is free software; you can redistribute it and/or modify
34034+ * it under the terms of the GNU General Public License as published by
34035+ * the Free Software Foundation; either version 2 of the License, or
34036+ * (at your option) any later version.
34037+ *
34038+ * This program is distributed in the hope that it will be useful,
34039+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34040+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34041+ * GNU General Public License for more details.
34042+ *
34043+ * You should have received a copy of the GNU General Public License
34044+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
34045+ */
34046+
34047+/*
34048+ * workqueue for asynchronous/super-io operations
34049+ * todo: try new credentials management scheme
34050+ */
34051+
34052+#ifndef __AUFS_WKQ_H__
34053+#define __AUFS_WKQ_H__
34054+
34055+#ifdef __KERNEL__
34056+
34057+#include <linux/percpu_counter.h>
34058+
34059+struct super_block;
34060+
34061+/* ---------------------------------------------------------------------- */
34062+
34063+/*
34064+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
34065+ */
34066+struct au_nowait_tasks {
34067+ atomic_t nw_len;
34068+ wait_queue_head_t nw_wq;
34069+};
34070+
34071+/* ---------------------------------------------------------------------- */
34072+
34073+typedef void (*au_wkq_func_t)(void *args);
34074+
34075+/* wkq flags */
34076+#define AuWkq_WAIT 1
34077+#define AuWkq_NEST (1 << 1)
34078+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
34079+#define au_fset_wkq(flags, name) \
34080+ do { (flags) |= AuWkq_##name; } while (0)
34081+#define au_fclr_wkq(flags, name) \
34082+ do { (flags) &= ~AuWkq_##name; } while (0)
34083+
34084+#ifndef CONFIG_AUFS_HNOTIFY
34085+#undef AuWkq_NEST
34086+#define AuWkq_NEST 0
34087+#endif
34088+
34089+/* wkq.c */
34090+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
34091+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
34092+ unsigned int flags);
34093+void au_nwt_init(struct au_nowait_tasks *nwt);
34094+int __init au_wkq_init(void);
34095+void au_wkq_fin(void);
34096+
34097+/* ---------------------------------------------------------------------- */
34098+
34099+static inline int au_wkq_test(void)
34100+{
34101+ return current->flags & PF_WQ_WORKER;
34102+}
34103+
34104+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
34105+{
34106+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
34107+}
34108+
34109+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
34110+{
34111+ if (atomic_dec_and_test(&nwt->nw_len))
34112+ wake_up_all(&nwt->nw_wq);
34113+}
34114+
34115+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
34116+{
34117+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
34118+ return 0;
34119+}
34120+
34121+#endif /* __KERNEL__ */
34122+#endif /* __AUFS_WKQ_H__ */
34123diff -urN /usr/share/empty/fs/aufs/xattr.c linux/fs/aufs/xattr.c
34124--- /usr/share/empty/fs/aufs/xattr.c 1970-01-01 01:00:00.000000000 +0100
34125+++ linux/fs/aufs/xattr.c 2016-12-17 12:28:17.598545045 +0100
34126@@ -0,0 +1,332 @@
34127+/*
34128+ * Copyright (C) 2014-2016 Junjiro R. Okajima
34129+ *
34130+ * This program, aufs is free software; you can redistribute it and/or modify
34131+ * it under the terms of the GNU General Public License as published by
34132+ * the Free Software Foundation; either version 2 of the License, or
34133+ * (at your option) any later version.
34134+ *
34135+ * This program is distributed in the hope that it will be useful,
34136+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34137+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34138+ * GNU General Public License for more details.
34139+ *
34140+ * You should have received a copy of the GNU General Public License
34141+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
34142+ */
34143+
34144+/*
34145+ * handling xattr functions
34146+ */
34147+
34148+#include <linux/xattr.h>
34149+#include "aufs.h"
34150+
34151+static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
34152+{
34153+ if (!ignore_flags)
34154+ goto out;
34155+ switch (err) {
34156+ case -ENOMEM:
34157+ case -EDQUOT:
34158+ goto out;
34159+ }
34160+
34161+ if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
34162+ err = 0;
34163+ goto out;
34164+ }
34165+
34166+#define cmp(brattr, prefix) do { \
34167+ if (!strncmp(name, XATTR_##prefix##_PREFIX, \
34168+ XATTR_##prefix##_PREFIX_LEN)) { \
34169+ if (ignore_flags & AuBrAttr_ICEX_##brattr) \
34170+ err = 0; \
34171+ goto out; \
34172+ } \
34173+ } while (0)
34174+
34175+ cmp(SEC, SECURITY);
34176+ cmp(SYS, SYSTEM);
34177+ cmp(TR, TRUSTED);
34178+ cmp(USR, USER);
34179+#undef cmp
34180+
34181+ if (ignore_flags & AuBrAttr_ICEX_OTH)
34182+ err = 0;
34183+
34184+out:
34185+ return err;
34186+}
34187+
34188+static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
34189+
34190+static int au_do_cpup_xattr(struct dentry *h_dst, struct dentry *h_src,
34191+ char *name, char **buf, unsigned int ignore_flags,
34192+ unsigned int verbose)
34193+{
34194+ int err;
34195+ ssize_t ssz;
34196+ struct inode *h_idst;
34197+
34198+ ssz = vfs_getxattr_alloc(h_src, name, buf, 0, GFP_NOFS);
34199+ err = ssz;
34200+ if (unlikely(err <= 0)) {
34201+ if (err == -ENODATA
34202+ || (err == -EOPNOTSUPP
34203+ && ((ignore_flags & au_xattr_out_of_list)
34204+ || (au_test_nfs_noacl(d_inode(h_src))
34205+ && (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)
34206+ || !strcmp(name,
34207+ XATTR_NAME_POSIX_ACL_DEFAULT))))
34208+ ))
34209+ err = 0;
34210+ if (err && (verbose || au_debug_test()))
34211+ pr_err("%s, err %d\n", name, err);
34212+ goto out;
34213+ }
34214+
34215+ /* unlock it temporary */
34216+ h_idst = d_inode(h_dst);
34217+ inode_unlock(h_idst);
34218+ err = vfsub_setxattr(h_dst, name, *buf, ssz, /*flags*/0);
34219+ inode_lock_nested(h_idst, AuLsc_I_CHILD2);
34220+ if (unlikely(err)) {
34221+ if (verbose || au_debug_test())
34222+ pr_err("%s, err %d\n", name, err);
34223+ err = au_xattr_ignore(err, name, ignore_flags);
34224+ }
34225+
34226+out:
34227+ return err;
34228+}
34229+
34230+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
34231+ unsigned int verbose)
34232+{
34233+ int err, unlocked, acl_access, acl_default;
34234+ ssize_t ssz;
34235+ struct inode *h_isrc, *h_idst;
34236+ char *value, *p, *o, *e;
34237+
34238+ /* try stopping to update the source inode while we are referencing */
34239+ /* there should not be the parent-child relationship between them */
34240+ h_isrc = d_inode(h_src);
34241+ h_idst = d_inode(h_dst);
34242+ inode_unlock(h_idst);
34243+ inode_lock_nested(h_isrc, AuLsc_I_CHILD);
34244+ inode_lock_nested(h_idst, AuLsc_I_CHILD2);
34245+ unlocked = 0;
34246+
34247+ /* some filesystems don't list POSIX ACL, for example tmpfs */
34248+ ssz = vfs_listxattr(h_src, NULL, 0);
34249+ err = ssz;
34250+ if (unlikely(err < 0)) {
34251+ AuTraceErr(err);
34252+ if (err == -ENODATA
34253+ || err == -EOPNOTSUPP)
34254+ err = 0; /* ignore */
34255+ goto out;
34256+ }
34257+
34258+ err = 0;
34259+ p = NULL;
34260+ o = NULL;
34261+ if (ssz) {
34262+ err = -ENOMEM;
34263+ p = kmalloc(ssz, GFP_NOFS);
34264+ o = p;
34265+ if (unlikely(!p))
34266+ goto out;
34267+ err = vfs_listxattr(h_src, p, ssz);
34268+ }
34269+ inode_unlock(h_isrc);
34270+ unlocked = 1;
34271+ AuDbg("err %d, ssz %zd\n", err, ssz);
34272+ if (unlikely(err < 0))
34273+ goto out_free;
34274+
34275+ err = 0;
34276+ e = p + ssz;
34277+ value = NULL;
34278+ acl_access = 0;
34279+ acl_default = 0;
34280+ while (!err && p < e) {
34281+ acl_access |= !strncmp(p, XATTR_NAME_POSIX_ACL_ACCESS,
34282+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1);
34283+ acl_default |= !strncmp(p, XATTR_NAME_POSIX_ACL_DEFAULT,
34284+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)
34285+ - 1);
34286+ err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
34287+ verbose);
34288+ p += strlen(p) + 1;
34289+ }
34290+ AuTraceErr(err);
34291+ ignore_flags |= au_xattr_out_of_list;
34292+ if (!err && !acl_access) {
34293+ err = au_do_cpup_xattr(h_dst, h_src,
34294+ XATTR_NAME_POSIX_ACL_ACCESS, &value,
34295+ ignore_flags, verbose);
34296+ AuTraceErr(err);
34297+ }
34298+ if (!err && !acl_default) {
34299+ err = au_do_cpup_xattr(h_dst, h_src,
34300+ XATTR_NAME_POSIX_ACL_DEFAULT, &value,
34301+ ignore_flags, verbose);
34302+ AuTraceErr(err);
34303+ }
34304+
34305+ if (value)
34306+ au_delayed_kfree(value);
34307+
34308+out_free:
34309+ if (o)
34310+ au_delayed_kfree(o);
34311+out:
34312+ if (!unlocked)
34313+ inode_unlock(h_isrc);
34314+ AuTraceErr(err);
34315+ return err;
34316+}
34317+
34318+/* ---------------------------------------------------------------------- */
34319+
34320+enum {
34321+ AU_XATTR_LIST,
34322+ AU_XATTR_GET
34323+};
34324+
34325+struct au_lgxattr {
34326+ int type;
34327+ union {
34328+ struct {
34329+ char *list;
34330+ size_t size;
34331+ } list;
34332+ struct {
34333+ const char *name;
34334+ void *value;
34335+ size_t size;
34336+ } get;
34337+ } u;
34338+};
34339+
34340+static ssize_t au_lgxattr(struct dentry *dentry, struct au_lgxattr *arg)
34341+{
34342+ ssize_t err;
34343+ struct path h_path;
34344+ struct super_block *sb;
34345+
34346+ sb = dentry->d_sb;
34347+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
34348+ if (unlikely(err))
34349+ goto out;
34350+ err = au_h_path_getattr(dentry, /*force*/1, &h_path);
34351+ if (unlikely(err))
34352+ goto out_si;
34353+ if (unlikely(!h_path.dentry))
34354+ /* illegally overlapped or something */
34355+ goto out_di; /* pretending success */
34356+
34357+ /* always topmost entry only */
34358+ switch (arg->type) {
34359+ case AU_XATTR_LIST:
34360+ err = vfs_listxattr(h_path.dentry,
34361+ arg->u.list.list, arg->u.list.size);
34362+ break;
34363+ case AU_XATTR_GET:
34364+ AuDebugOn(d_is_negative(h_path.dentry));
34365+ err = vfs_getxattr(h_path.dentry,
34366+ arg->u.get.name, arg->u.get.value,
34367+ arg->u.get.size);
34368+ break;
34369+ }
34370+
34371+out_di:
34372+ di_read_unlock(dentry, AuLock_IR);
34373+out_si:
34374+ si_read_unlock(sb);
34375+out:
34376+ AuTraceErr(err);
34377+ return err;
34378+}
34379+
34380+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
34381+{
34382+ struct au_lgxattr arg = {
34383+ .type = AU_XATTR_LIST,
34384+ .u.list = {
34385+ .list = list,
34386+ .size = size
34387+ },
34388+ };
34389+
34390+ return au_lgxattr(dentry, &arg);
34391+}
34392+
34393+static ssize_t au_getxattr(struct dentry *dentry,
34394+ struct inode *inode __maybe_unused,
34395+ const char *name, void *value, size_t size)
34396+{
34397+ struct au_lgxattr arg = {
34398+ .type = AU_XATTR_GET,
34399+ .u.get = {
34400+ .name = name,
34401+ .value = value,
34402+ .size = size
34403+ },
34404+ };
34405+
34406+ return au_lgxattr(dentry, &arg);
34407+}
34408+
34409+static int au_setxattr(struct dentry *dentry, struct inode *inode,
34410+ const char *name, const void *value, size_t size,
34411+ int flags)
34412+{
34413+ struct au_sxattr arg = {
34414+ .type = AU_XATTR_SET,
34415+ .u.set = {
34416+ .name = name,
34417+ .value = value,
34418+ .size = size,
34419+ .flags = flags
34420+ },
34421+ };
34422+
34423+ return au_sxattr(dentry, inode, &arg);
34424+}
34425+
34426+/* ---------------------------------------------------------------------- */
34427+
34428+static int au_xattr_get(const struct xattr_handler *handler,
34429+ struct dentry *dentry, struct inode *inode,
34430+ const char *name, void *buffer, size_t size)
34431+{
34432+ return au_getxattr(dentry, inode, name, buffer, size);
34433+}
34434+
34435+static int au_xattr_set(const struct xattr_handler *handler,
34436+ struct dentry *dentry, struct inode *inode,
34437+ const char *name, const void *value, size_t size,
34438+ int flags)
34439+{
34440+ return au_setxattr(dentry, inode, name, value, size, flags);
34441+}
34442+
34443+static const struct xattr_handler au_xattr_handler = {
34444+ .name = "",
34445+ .prefix = "",
34446+ .get = au_xattr_get,
34447+ .set = au_xattr_set
34448+};
34449+
34450+static const struct xattr_handler *au_xattr_handlers[] = {
34451+ &au_xattr_handler,
34452+ NULL
34453+};
34454+
34455+void au_xattr_init(struct super_block *sb)
34456+{
34457+ sb->s_xattr = au_xattr_handlers;
34458+}
34459diff -urN /usr/share/empty/fs/aufs/xino.c linux/fs/aufs/xino.c
34460--- /usr/share/empty/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
34461+++ linux/fs/aufs/xino.c 2016-10-09 16:55:36.496035060 +0200
34462@@ -0,0 +1,1318 @@
34463+/*
34464+ * Copyright (C) 2005-2016 Junjiro R. Okajima
34465+ *
34466+ * This program, aufs is free software; you can redistribute it and/or modify
34467+ * it under the terms of the GNU General Public License as published by
34468+ * the Free Software Foundation; either version 2 of the License, or
34469+ * (at your option) any later version.
34470+ *
34471+ * This program is distributed in the hope that it will be useful,
34472+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34473+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34474+ * GNU General Public License for more details.
34475+ *
34476+ * You should have received a copy of the GNU General Public License
34477+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
34478+ */
34479+
34480+/*
34481+ * external inode number translation table and bitmap
34482+ */
34483+
34484+#include <linux/seq_file.h>
34485+#include <linux/statfs.h>
34486+#include "aufs.h"
34487+
34488+/* todo: unnecessary to support mmap_sem since kernel-space? */
34489+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
34490+ loff_t *pos)
34491+{
34492+ ssize_t err;
34493+ mm_segment_t oldfs;
34494+ union {
34495+ void *k;
34496+ char __user *u;
34497+ } buf;
34498+
34499+ buf.k = kbuf;
34500+ oldfs = get_fs();
34501+ set_fs(KERNEL_DS);
34502+ do {
34503+ /* todo: signal_pending? */
34504+ err = func(file, buf.u, size, pos);
34505+ } while (err == -EAGAIN || err == -EINTR);
34506+ set_fs(oldfs);
34507+
34508+#if 0 /* reserved for future use */
34509+ if (err > 0)
34510+ fsnotify_access(file->f_path.dentry);
34511+#endif
34512+
34513+ return err;
34514+}
34515+
34516+/* ---------------------------------------------------------------------- */
34517+
34518+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
34519+ size_t size, loff_t *pos);
34520+
34521+static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
34522+ size_t size, loff_t *pos)
34523+{
34524+ ssize_t err;
34525+ mm_segment_t oldfs;
34526+ union {
34527+ void *k;
34528+ const char __user *u;
34529+ } buf;
34530+ int i;
34531+ const int prevent_endless = 10;
34532+
34533+ i = 0;
34534+ buf.k = kbuf;
34535+ oldfs = get_fs();
34536+ set_fs(KERNEL_DS);
34537+ do {
34538+ err = func(file, buf.u, size, pos);
34539+ if (err == -EINTR
34540+ && !au_wkq_test()
34541+ && fatal_signal_pending(current)) {
34542+ set_fs(oldfs);
34543+ err = xino_fwrite_wkq(func, file, kbuf, size, pos);
34544+ BUG_ON(err == -EINTR);
34545+ oldfs = get_fs();
34546+ set_fs(KERNEL_DS);
34547+ }
34548+ } while (i++ < prevent_endless
34549+ && (err == -EAGAIN || err == -EINTR));
34550+ set_fs(oldfs);
34551+
34552+#if 0 /* reserved for future use */
34553+ if (err > 0)
34554+ fsnotify_modify(file->f_path.dentry);
34555+#endif
34556+
34557+ return err;
34558+}
34559+
34560+struct do_xino_fwrite_args {
34561+ ssize_t *errp;
34562+ vfs_writef_t func;
34563+ struct file *file;
34564+ void *buf;
34565+ size_t size;
34566+ loff_t *pos;
34567+};
34568+
34569+static void call_do_xino_fwrite(void *args)
34570+{
34571+ struct do_xino_fwrite_args *a = args;
34572+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
34573+}
34574+
34575+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
34576+ size_t size, loff_t *pos)
34577+{
34578+ ssize_t err;
34579+ int wkq_err;
34580+ struct do_xino_fwrite_args args = {
34581+ .errp = &err,
34582+ .func = func,
34583+ .file = file,
34584+ .buf = buf,
34585+ .size = size,
34586+ .pos = pos
34587+ };
34588+
34589+ /*
34590+ * it breaks RLIMIT_FSIZE and normal user's limit,
34591+ * users should care about quota and real 'filesystem full.'
34592+ */
34593+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
34594+ if (unlikely(wkq_err))
34595+ err = wkq_err;
34596+
34597+ return err;
34598+}
34599+
34600+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
34601+ size_t size, loff_t *pos)
34602+{
34603+ ssize_t err;
34604+
34605+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
34606+ lockdep_off();
34607+ err = do_xino_fwrite(func, file, buf, size, pos);
34608+ lockdep_on();
34609+ } else
34610+ err = xino_fwrite_wkq(func, file, buf, size, pos);
34611+
34612+ return err;
34613+}
34614+
34615+/* ---------------------------------------------------------------------- */
34616+
34617+/*
34618+ * create a new xinofile at the same place/path as @base_file.
34619+ */
34620+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
34621+{
34622+ struct file *file;
34623+ struct dentry *base, *parent;
34624+ struct inode *dir, *delegated;
34625+ struct qstr *name;
34626+ struct path path;
34627+ int err;
34628+
34629+ base = base_file->f_path.dentry;
34630+ parent = base->d_parent; /* dir inode is locked */
34631+ dir = d_inode(parent);
34632+ IMustLock(dir);
34633+
34634+ file = ERR_PTR(-EINVAL);
34635+ name = &base->d_name;
34636+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
34637+ if (IS_ERR(path.dentry)) {
34638+ file = (void *)path.dentry;
34639+ pr_err("%pd lookup err %ld\n",
34640+ base, PTR_ERR(path.dentry));
34641+ goto out;
34642+ }
34643+
34644+ /* no need to mnt_want_write() since we call dentry_open() later */
34645+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
34646+ if (unlikely(err)) {
34647+ file = ERR_PTR(err);
34648+ pr_err("%pd create err %d\n", base, err);
34649+ goto out_dput;
34650+ }
34651+
34652+ path.mnt = base_file->f_path.mnt;
34653+ file = vfsub_dentry_open(&path,
34654+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
34655+ /* | __FMODE_NONOTIFY */);
34656+ if (IS_ERR(file)) {
34657+ pr_err("%pd open err %ld\n", base, PTR_ERR(file));
34658+ goto out_dput;
34659+ }
34660+
34661+ delegated = NULL;
34662+ err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
34663+ if (unlikely(err == -EWOULDBLOCK)) {
34664+ pr_warn("cannot retry for NFSv4 delegation"
34665+ " for an internal unlink\n");
34666+ iput(delegated);
34667+ }
34668+ if (unlikely(err)) {
34669+ pr_err("%pd unlink err %d\n", base, err);
34670+ goto out_fput;
34671+ }
34672+
34673+ if (copy_src) {
34674+ /* no one can touch copy_src xino */
34675+ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
34676+ if (unlikely(err)) {
34677+ pr_err("%pd copy err %d\n", base, err);
34678+ goto out_fput;
34679+ }
34680+ }
34681+ goto out_dput; /* success */
34682+
34683+out_fput:
34684+ fput(file);
34685+ file = ERR_PTR(err);
34686+out_dput:
34687+ dput(path.dentry);
34688+out:
34689+ return file;
34690+}
34691+
34692+struct au_xino_lock_dir {
34693+ struct au_hinode *hdir;
34694+ struct dentry *parent;
34695+ struct inode *dir;
34696+};
34697+
34698+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
34699+ struct au_xino_lock_dir *ldir)
34700+{
34701+ aufs_bindex_t brid, bindex;
34702+
34703+ ldir->hdir = NULL;
34704+ bindex = -1;
34705+ brid = au_xino_brid(sb);
34706+ if (brid >= 0)
34707+ bindex = au_br_index(sb, brid);
34708+ if (bindex >= 0) {
34709+ ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
34710+ au_hn_inode_lock_nested(ldir->hdir, AuLsc_I_PARENT);
34711+ } else {
34712+ ldir->parent = dget_parent(xino->f_path.dentry);
34713+ ldir->dir = d_inode(ldir->parent);
34714+ inode_lock_nested(ldir->dir, AuLsc_I_PARENT);
34715+ }
34716+}
34717+
34718+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
34719+{
34720+ if (ldir->hdir)
34721+ au_hn_inode_unlock(ldir->hdir);
34722+ else {
34723+ inode_unlock(ldir->dir);
34724+ dput(ldir->parent);
34725+ }
34726+}
34727+
34728+/* ---------------------------------------------------------------------- */
34729+
34730+/* trucate xino files asynchronously */
34731+
34732+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
34733+{
34734+ int err;
34735+ unsigned long jiffy;
34736+ blkcnt_t blocks;
34737+ aufs_bindex_t bi, bbot;
34738+ struct kstatfs *st;
34739+ struct au_branch *br;
34740+ struct file *new_xino, *file;
34741+ struct super_block *h_sb;
34742+ struct au_xino_lock_dir ldir;
34743+
34744+ err = -ENOMEM;
34745+ st = kmalloc(sizeof(*st), GFP_NOFS);
34746+ if (unlikely(!st))
34747+ goto out;
34748+
34749+ err = -EINVAL;
34750+ bbot = au_sbbot(sb);
34751+ if (unlikely(bindex < 0 || bbot < bindex))
34752+ goto out_st;
34753+ br = au_sbr(sb, bindex);
34754+ file = br->br_xino.xi_file;
34755+ if (!file)
34756+ goto out_st;
34757+
34758+ err = vfs_statfs(&file->f_path, st);
34759+ if (unlikely(err))
34760+ AuErr1("statfs err %d, ignored\n", err);
34761+ jiffy = jiffies;
34762+ blocks = file_inode(file)->i_blocks;
34763+ pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34764+ bindex, (u64)blocks, st->f_bfree, st->f_blocks);
34765+
34766+ au_xino_lock_dir(sb, file, &ldir);
34767+ /* mnt_want_write() is unnecessary here */
34768+ new_xino = au_xino_create2(file, file);
34769+ au_xino_unlock_dir(&ldir);
34770+ err = PTR_ERR(new_xino);
34771+ if (IS_ERR(new_xino)) {
34772+ pr_err("err %d, ignored\n", err);
34773+ goto out_st;
34774+ }
34775+ err = 0;
34776+ fput(file);
34777+ br->br_xino.xi_file = new_xino;
34778+
34779+ h_sb = au_br_sb(br);
34780+ for (bi = 0; bi <= bbot; bi++) {
34781+ if (unlikely(bi == bindex))
34782+ continue;
34783+ br = au_sbr(sb, bi);
34784+ if (au_br_sb(br) != h_sb)
34785+ continue;
34786+
34787+ fput(br->br_xino.xi_file);
34788+ br->br_xino.xi_file = new_xino;
34789+ get_file(new_xino);
34790+ }
34791+
34792+ err = vfs_statfs(&new_xino->f_path, st);
34793+ if (!err) {
34794+ pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34795+ bindex, (u64)file_inode(new_xino)->i_blocks,
34796+ st->f_bfree, st->f_blocks);
34797+ if (file_inode(new_xino)->i_blocks < blocks)
34798+ au_sbi(sb)->si_xino_jiffy = jiffy;
34799+ } else
34800+ AuErr1("statfs err %d, ignored\n", err);
34801+
34802+out_st:
34803+ au_delayed_kfree(st);
34804+out:
34805+ return err;
34806+}
34807+
34808+struct xino_do_trunc_args {
34809+ struct super_block *sb;
34810+ struct au_branch *br;
34811+};
34812+
34813+static void xino_do_trunc(void *_args)
34814+{
34815+ struct xino_do_trunc_args *args = _args;
34816+ struct super_block *sb;
34817+ struct au_branch *br;
34818+ struct inode *dir;
34819+ int err;
34820+ aufs_bindex_t bindex;
34821+
34822+ err = 0;
34823+ sb = args->sb;
34824+ dir = d_inode(sb->s_root);
34825+ br = args->br;
34826+
34827+ si_noflush_write_lock(sb);
34828+ ii_read_lock_parent(dir);
34829+ bindex = au_br_index(sb, br->br_id);
34830+ err = au_xino_trunc(sb, bindex);
34831+ ii_read_unlock(dir);
34832+ if (unlikely(err))
34833+ pr_warn("err b%d, (%d)\n", bindex, err);
34834+ atomic_dec(&br->br_xino_running);
34835+ au_br_put(br);
34836+ si_write_unlock(sb);
34837+ au_nwt_done(&au_sbi(sb)->si_nowait);
34838+ au_delayed_kfree(args);
34839+}
34840+
34841+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
34842+{
34843+ int err;
34844+ struct kstatfs st;
34845+ struct au_sbinfo *sbinfo;
34846+
34847+ /* todo: si_xino_expire and the ratio should be customizable */
34848+ sbinfo = au_sbi(sb);
34849+ if (time_before(jiffies,
34850+ sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
34851+ return 0;
34852+
34853+ /* truncation border */
34854+ err = vfs_statfs(&br->br_xino.xi_file->f_path, &st);
34855+ if (unlikely(err)) {
34856+ AuErr1("statfs err %d, ignored\n", err);
34857+ return 0;
34858+ }
34859+ if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC)
34860+ return 0;
34861+
34862+ return 1;
34863+}
34864+
34865+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
34866+{
34867+ struct xino_do_trunc_args *args;
34868+ int wkq_err;
34869+
34870+ if (!xino_trunc_test(sb, br))
34871+ return;
34872+
34873+ if (atomic_inc_return(&br->br_xino_running) > 1)
34874+ goto out;
34875+
34876+ /* lock and kfree() will be called in trunc_xino() */
34877+ args = kmalloc(sizeof(*args), GFP_NOFS);
34878+ if (unlikely(!args)) {
34879+ AuErr1("no memory\n");
34880+ goto out;
34881+ }
34882+
34883+ au_br_get(br);
34884+ args->sb = sb;
34885+ args->br = br;
34886+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
34887+ if (!wkq_err)
34888+ return; /* success */
34889+
34890+ pr_err("wkq %d\n", wkq_err);
34891+ au_br_put(br);
34892+ au_delayed_kfree(args);
34893+
34894+out:
34895+ atomic_dec(&br->br_xino_running);
34896+}
34897+
34898+/* ---------------------------------------------------------------------- */
34899+
34900+static int au_xino_do_write(vfs_writef_t write, struct file *file,
34901+ ino_t h_ino, ino_t ino)
34902+{
34903+ loff_t pos;
34904+ ssize_t sz;
34905+
34906+ pos = h_ino;
34907+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
34908+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
34909+ return -EFBIG;
34910+ }
34911+ pos *= sizeof(ino);
34912+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
34913+ if (sz == sizeof(ino))
34914+ return 0; /* success */
34915+
34916+ AuIOErr("write failed (%zd)\n", sz);
34917+ return -EIO;
34918+}
34919+
34920+/*
34921+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
34922+ * at the position of @h_ino.
34923+ * even if @ino is zero, it is written to the xinofile and means no entry.
34924+ * if the size of the xino file on a specific filesystem exceeds the watermark,
34925+ * try truncating it.
34926+ */
34927+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
34928+ ino_t ino)
34929+{
34930+ int err;
34931+ unsigned int mnt_flags;
34932+ struct au_branch *br;
34933+
34934+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
34935+ || ((loff_t)-1) > 0);
34936+ SiMustAnyLock(sb);
34937+
34938+ mnt_flags = au_mntflags(sb);
34939+ if (!au_opt_test(mnt_flags, XINO))
34940+ return 0;
34941+
34942+ br = au_sbr(sb, bindex);
34943+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
34944+ h_ino, ino);
34945+ if (!err) {
34946+ if (au_opt_test(mnt_flags, TRUNC_XINO)
34947+ && au_test_fs_trunc_xino(au_br_sb(br)))
34948+ xino_try_trunc(sb, br);
34949+ return 0; /* success */
34950+ }
34951+
34952+ AuIOErr("write failed (%d)\n", err);
34953+ return -EIO;
34954+}
34955+
34956+/* ---------------------------------------------------------------------- */
34957+
34958+/* aufs inode number bitmap */
34959+
34960+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
34961+static ino_t xib_calc_ino(unsigned long pindex, int bit)
34962+{
34963+ ino_t ino;
34964+
34965+ AuDebugOn(bit < 0 || page_bits <= bit);
34966+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
34967+ return ino;
34968+}
34969+
34970+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
34971+{
34972+ AuDebugOn(ino < AUFS_FIRST_INO);
34973+ ino -= AUFS_FIRST_INO;
34974+ *pindex = ino / page_bits;
34975+ *bit = ino % page_bits;
34976+}
34977+
34978+static int xib_pindex(struct super_block *sb, unsigned long pindex)
34979+{
34980+ int err;
34981+ loff_t pos;
34982+ ssize_t sz;
34983+ struct au_sbinfo *sbinfo;
34984+ struct file *xib;
34985+ unsigned long *p;
34986+
34987+ sbinfo = au_sbi(sb);
34988+ MtxMustLock(&sbinfo->si_xib_mtx);
34989+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
34990+ || !au_opt_test(sbinfo->si_mntflags, XINO));
34991+
34992+ if (pindex == sbinfo->si_xib_last_pindex)
34993+ return 0;
34994+
34995+ xib = sbinfo->si_xib;
34996+ p = sbinfo->si_xib_buf;
34997+ pos = sbinfo->si_xib_last_pindex;
34998+ pos *= PAGE_SIZE;
34999+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
35000+ if (unlikely(sz != PAGE_SIZE))
35001+ goto out;
35002+
35003+ pos = pindex;
35004+ pos *= PAGE_SIZE;
35005+ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
35006+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
35007+ else {
35008+ memset(p, 0, PAGE_SIZE);
35009+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
35010+ }
35011+ if (sz == PAGE_SIZE) {
35012+ sbinfo->si_xib_last_pindex = pindex;
35013+ return 0; /* success */
35014+ }
35015+
35016+out:
35017+ AuIOErr1("write failed (%zd)\n", sz);
35018+ err = sz;
35019+ if (sz >= 0)
35020+ err = -EIO;
35021+ return err;
35022+}
35023+
35024+/* ---------------------------------------------------------------------- */
35025+
35026+static void au_xib_clear_bit(struct inode *inode)
35027+{
35028+ int err, bit;
35029+ unsigned long pindex;
35030+ struct super_block *sb;
35031+ struct au_sbinfo *sbinfo;
35032+
35033+ AuDebugOn(inode->i_nlink);
35034+
35035+ sb = inode->i_sb;
35036+ xib_calc_bit(inode->i_ino, &pindex, &bit);
35037+ AuDebugOn(page_bits <= bit);
35038+ sbinfo = au_sbi(sb);
35039+ mutex_lock(&sbinfo->si_xib_mtx);
35040+ err = xib_pindex(sb, pindex);
35041+ if (!err) {
35042+ clear_bit(bit, sbinfo->si_xib_buf);
35043+ sbinfo->si_xib_next_bit = bit;
35044+ }
35045+ mutex_unlock(&sbinfo->si_xib_mtx);
35046+}
35047+
35048+/* for s_op->delete_inode() */
35049+void au_xino_delete_inode(struct inode *inode, const int unlinked)
35050+{
35051+ int err;
35052+ unsigned int mnt_flags;
35053+ aufs_bindex_t bindex, bbot, bi;
35054+ unsigned char try_trunc;
35055+ struct au_iinfo *iinfo;
35056+ struct super_block *sb;
35057+ struct au_hinode *hi;
35058+ struct inode *h_inode;
35059+ struct au_branch *br;
35060+ vfs_writef_t xwrite;
35061+
35062+ AuDebugOn(au_is_bad_inode(inode));
35063+
35064+ sb = inode->i_sb;
35065+ mnt_flags = au_mntflags(sb);
35066+ if (!au_opt_test(mnt_flags, XINO)
35067+ || inode->i_ino == AUFS_ROOT_INO)
35068+ return;
35069+
35070+ if (unlinked) {
35071+ au_xigen_inc(inode);
35072+ au_xib_clear_bit(inode);
35073+ }
35074+
35075+ iinfo = au_ii(inode);
35076+ bindex = iinfo->ii_btop;
35077+ if (bindex < 0)
35078+ return;
35079+
35080+ xwrite = au_sbi(sb)->si_xwrite;
35081+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
35082+ hi = au_hinode(iinfo, bindex);
35083+ bbot = iinfo->ii_bbot;
35084+ for (; bindex <= bbot; bindex++, hi++) {
35085+ h_inode = hi->hi_inode;
35086+ if (!h_inode
35087+ || (!unlinked && h_inode->i_nlink))
35088+ continue;
35089+
35090+ /* inode may not be revalidated */
35091+ bi = au_br_index(sb, hi->hi_id);
35092+ if (bi < 0)
35093+ continue;
35094+
35095+ br = au_sbr(sb, bi);
35096+ err = au_xino_do_write(xwrite, br->br_xino.xi_file,
35097+ h_inode->i_ino, /*ino*/0);
35098+ if (!err && try_trunc
35099+ && au_test_fs_trunc_xino(au_br_sb(br)))
35100+ xino_try_trunc(sb, br);
35101+ }
35102+}
35103+
35104+/* get an unused inode number from bitmap */
35105+ino_t au_xino_new_ino(struct super_block *sb)
35106+{
35107+ ino_t ino;
35108+ unsigned long *p, pindex, ul, pend;
35109+ struct au_sbinfo *sbinfo;
35110+ struct file *file;
35111+ int free_bit, err;
35112+
35113+ if (!au_opt_test(au_mntflags(sb), XINO))
35114+ return iunique(sb, AUFS_FIRST_INO);
35115+
35116+ sbinfo = au_sbi(sb);
35117+ mutex_lock(&sbinfo->si_xib_mtx);
35118+ p = sbinfo->si_xib_buf;
35119+ free_bit = sbinfo->si_xib_next_bit;
35120+ if (free_bit < page_bits && !test_bit(free_bit, p))
35121+ goto out; /* success */
35122+ free_bit = find_first_zero_bit(p, page_bits);
35123+ if (free_bit < page_bits)
35124+ goto out; /* success */
35125+
35126+ pindex = sbinfo->si_xib_last_pindex;
35127+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
35128+ err = xib_pindex(sb, ul);
35129+ if (unlikely(err))
35130+ goto out_err;
35131+ free_bit = find_first_zero_bit(p, page_bits);
35132+ if (free_bit < page_bits)
35133+ goto out; /* success */
35134+ }
35135+
35136+ file = sbinfo->si_xib;
35137+ pend = vfsub_f_size_read(file) / PAGE_SIZE;
35138+ for (ul = pindex + 1; ul <= pend; ul++) {
35139+ err = xib_pindex(sb, ul);
35140+ if (unlikely(err))
35141+ goto out_err;
35142+ free_bit = find_first_zero_bit(p, page_bits);
35143+ if (free_bit < page_bits)
35144+ goto out; /* success */
35145+ }
35146+ BUG();
35147+
35148+out:
35149+ set_bit(free_bit, p);
35150+ sbinfo->si_xib_next_bit = free_bit + 1;
35151+ pindex = sbinfo->si_xib_last_pindex;
35152+ mutex_unlock(&sbinfo->si_xib_mtx);
35153+ ino = xib_calc_ino(pindex, free_bit);
35154+ AuDbg("i%lu\n", (unsigned long)ino);
35155+ return ino;
35156+out_err:
35157+ mutex_unlock(&sbinfo->si_xib_mtx);
35158+ AuDbg("i0\n");
35159+ return 0;
35160+}
35161+
35162+/*
35163+ * read @ino from xinofile for the specified branch{@sb, @bindex}
35164+ * at the position of @h_ino.
35165+ * if @ino does not exist and @do_new is true, get new one.
35166+ */
35167+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
35168+ ino_t *ino)
35169+{
35170+ int err;
35171+ ssize_t sz;
35172+ loff_t pos;
35173+ struct file *file;
35174+ struct au_sbinfo *sbinfo;
35175+
35176+ *ino = 0;
35177+ if (!au_opt_test(au_mntflags(sb), XINO))
35178+ return 0; /* no xino */
35179+
35180+ err = 0;
35181+ sbinfo = au_sbi(sb);
35182+ pos = h_ino;
35183+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
35184+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
35185+ return -EFBIG;
35186+ }
35187+ pos *= sizeof(*ino);
35188+
35189+ file = au_sbr(sb, bindex)->br_xino.xi_file;
35190+ if (vfsub_f_size_read(file) < pos + sizeof(*ino))
35191+ return 0; /* no ino */
35192+
35193+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
35194+ if (sz == sizeof(*ino))
35195+ return 0; /* success */
35196+
35197+ err = sz;
35198+ if (unlikely(sz >= 0)) {
35199+ err = -EIO;
35200+ AuIOErr("xino read error (%zd)\n", sz);
35201+ }
35202+
35203+ return err;
35204+}
35205+
35206+/* ---------------------------------------------------------------------- */
35207+
35208+/* create and set a new xino file */
35209+
35210+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
35211+{
35212+ struct file *file;
35213+ struct dentry *h_parent, *d;
35214+ struct inode *h_dir, *inode;
35215+ int err;
35216+
35217+ /*
35218+ * at mount-time, and the xino file is the default path,
35219+ * hnotify is disabled so we have no notify events to ignore.
35220+ * when a user specified the xino, we cannot get au_hdir to be ignored.
35221+ */
35222+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
35223+ /* | __FMODE_NONOTIFY */,
35224+ S_IRUGO | S_IWUGO);
35225+ if (IS_ERR(file)) {
35226+ if (!silent)
35227+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
35228+ return file;
35229+ }
35230+
35231+ /* keep file count */
35232+ err = 0;
35233+ inode = file_inode(file);
35234+ h_parent = dget_parent(file->f_path.dentry);
35235+ h_dir = d_inode(h_parent);
35236+ inode_lock_nested(h_dir, AuLsc_I_PARENT);
35237+ /* mnt_want_write() is unnecessary here */
35238+ /* no delegation since it is just created */
35239+ if (inode->i_nlink)
35240+ err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
35241+ /*force*/0);
35242+ inode_unlock(h_dir);
35243+ dput(h_parent);
35244+ if (unlikely(err)) {
35245+ if (!silent)
35246+ pr_err("unlink %s(%d)\n", fname, err);
35247+ goto out;
35248+ }
35249+
35250+ err = -EINVAL;
35251+ d = file->f_path.dentry;
35252+ if (unlikely(sb == d->d_sb)) {
35253+ if (!silent)
35254+ pr_err("%s must be outside\n", fname);
35255+ goto out;
35256+ }
35257+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
35258+ if (!silent)
35259+ pr_err("xino doesn't support %s(%s)\n",
35260+ fname, au_sbtype(d->d_sb));
35261+ goto out;
35262+ }
35263+ return file; /* success */
35264+
35265+out:
35266+ fput(file);
35267+ file = ERR_PTR(err);
35268+ return file;
35269+}
35270+
35271+/*
35272+ * find another branch who is on the same filesystem of the specified
35273+ * branch{@btgt}. search until @bbot.
35274+ */
35275+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
35276+ aufs_bindex_t bbot)
35277+{
35278+ aufs_bindex_t bindex;
35279+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
35280+
35281+ for (bindex = 0; bindex < btgt; bindex++)
35282+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
35283+ return bindex;
35284+ for (bindex++; bindex <= bbot; bindex++)
35285+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
35286+ return bindex;
35287+ return -1;
35288+}
35289+
35290+/* ---------------------------------------------------------------------- */
35291+
35292+/*
35293+ * initialize the xinofile for the specified branch @br
35294+ * at the place/path where @base_file indicates.
35295+ * test whether another branch is on the same filesystem or not,
35296+ * if @do_test is true.
35297+ */
35298+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
35299+ struct file *base_file, int do_test)
35300+{
35301+ int err;
35302+ ino_t ino;
35303+ aufs_bindex_t bbot, bindex;
35304+ struct au_branch *shared_br, *b;
35305+ struct file *file;
35306+ struct super_block *tgt_sb;
35307+
35308+ shared_br = NULL;
35309+ bbot = au_sbbot(sb);
35310+ if (do_test) {
35311+ tgt_sb = au_br_sb(br);
35312+ for (bindex = 0; bindex <= bbot; bindex++) {
35313+ b = au_sbr(sb, bindex);
35314+ if (tgt_sb == au_br_sb(b)) {
35315+ shared_br = b;
35316+ break;
35317+ }
35318+ }
35319+ }
35320+
35321+ if (!shared_br || !shared_br->br_xino.xi_file) {
35322+ struct au_xino_lock_dir ldir;
35323+
35324+ au_xino_lock_dir(sb, base_file, &ldir);
35325+ /* mnt_want_write() is unnecessary here */
35326+ file = au_xino_create2(base_file, NULL);
35327+ au_xino_unlock_dir(&ldir);
35328+ err = PTR_ERR(file);
35329+ if (IS_ERR(file))
35330+ goto out;
35331+ br->br_xino.xi_file = file;
35332+ } else {
35333+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
35334+ get_file(br->br_xino.xi_file);
35335+ }
35336+
35337+ ino = AUFS_ROOT_INO;
35338+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
35339+ h_ino, ino);
35340+ if (unlikely(err)) {
35341+ fput(br->br_xino.xi_file);
35342+ br->br_xino.xi_file = NULL;
35343+ }
35344+
35345+out:
35346+ return err;
35347+}
35348+
35349+/* ---------------------------------------------------------------------- */
35350+
35351+/* trucate a xino bitmap file */
35352+
35353+/* todo: slow */
35354+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
35355+{
35356+ int err, bit;
35357+ ssize_t sz;
35358+ unsigned long pindex;
35359+ loff_t pos, pend;
35360+ struct au_sbinfo *sbinfo;
35361+ vfs_readf_t func;
35362+ ino_t *ino;
35363+ unsigned long *p;
35364+
35365+ err = 0;
35366+ sbinfo = au_sbi(sb);
35367+ MtxMustLock(&sbinfo->si_xib_mtx);
35368+ p = sbinfo->si_xib_buf;
35369+ func = sbinfo->si_xread;
35370+ pend = vfsub_f_size_read(file);
35371+ pos = 0;
35372+ while (pos < pend) {
35373+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
35374+ err = sz;
35375+ if (unlikely(sz <= 0))
35376+ goto out;
35377+
35378+ err = 0;
35379+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
35380+ if (unlikely(*ino < AUFS_FIRST_INO))
35381+ continue;
35382+
35383+ xib_calc_bit(*ino, &pindex, &bit);
35384+ AuDebugOn(page_bits <= bit);
35385+ err = xib_pindex(sb, pindex);
35386+ if (!err)
35387+ set_bit(bit, p);
35388+ else
35389+ goto out;
35390+ }
35391+ }
35392+
35393+out:
35394+ return err;
35395+}
35396+
35397+static int xib_restore(struct super_block *sb)
35398+{
35399+ int err;
35400+ aufs_bindex_t bindex, bbot;
35401+ void *page;
35402+
35403+ err = -ENOMEM;
35404+ page = (void *)__get_free_page(GFP_NOFS);
35405+ if (unlikely(!page))
35406+ goto out;
35407+
35408+ err = 0;
35409+ bbot = au_sbbot(sb);
35410+ for (bindex = 0; !err && bindex <= bbot; bindex++)
35411+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
35412+ err = do_xib_restore
35413+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
35414+ else
35415+ AuDbg("b%d\n", bindex);
35416+ au_delayed_free_page((unsigned long)page);
35417+
35418+out:
35419+ return err;
35420+}
35421+
35422+int au_xib_trunc(struct super_block *sb)
35423+{
35424+ int err;
35425+ ssize_t sz;
35426+ loff_t pos;
35427+ struct au_xino_lock_dir ldir;
35428+ struct au_sbinfo *sbinfo;
35429+ unsigned long *p;
35430+ struct file *file;
35431+
35432+ SiMustWriteLock(sb);
35433+
35434+ err = 0;
35435+ sbinfo = au_sbi(sb);
35436+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
35437+ goto out;
35438+
35439+ file = sbinfo->si_xib;
35440+ if (vfsub_f_size_read(file) <= PAGE_SIZE)
35441+ goto out;
35442+
35443+ au_xino_lock_dir(sb, file, &ldir);
35444+ /* mnt_want_write() is unnecessary here */
35445+ file = au_xino_create2(sbinfo->si_xib, NULL);
35446+ au_xino_unlock_dir(&ldir);
35447+ err = PTR_ERR(file);
35448+ if (IS_ERR(file))
35449+ goto out;
35450+ fput(sbinfo->si_xib);
35451+ sbinfo->si_xib = file;
35452+
35453+ p = sbinfo->si_xib_buf;
35454+ memset(p, 0, PAGE_SIZE);
35455+ pos = 0;
35456+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
35457+ if (unlikely(sz != PAGE_SIZE)) {
35458+ err = sz;
35459+ AuIOErr("err %d\n", err);
35460+ if (sz >= 0)
35461+ err = -EIO;
35462+ goto out;
35463+ }
35464+
35465+ mutex_lock(&sbinfo->si_xib_mtx);
35466+ /* mnt_want_write() is unnecessary here */
35467+ err = xib_restore(sb);
35468+ mutex_unlock(&sbinfo->si_xib_mtx);
35469+
35470+out:
35471+ return err;
35472+}
35473+
35474+/* ---------------------------------------------------------------------- */
35475+
35476+/*
35477+ * xino mount option handlers
35478+ */
35479+
35480+/* xino bitmap */
35481+static void xino_clear_xib(struct super_block *sb)
35482+{
35483+ struct au_sbinfo *sbinfo;
35484+
35485+ SiMustWriteLock(sb);
35486+
35487+ sbinfo = au_sbi(sb);
35488+ sbinfo->si_xread = NULL;
35489+ sbinfo->si_xwrite = NULL;
35490+ if (sbinfo->si_xib)
35491+ fput(sbinfo->si_xib);
35492+ sbinfo->si_xib = NULL;
35493+ if (sbinfo->si_xib_buf)
35494+ au_delayed_free_page((unsigned long)sbinfo->si_xib_buf);
35495+ sbinfo->si_xib_buf = NULL;
35496+}
35497+
35498+static int au_xino_set_xib(struct super_block *sb, struct file *base)
35499+{
35500+ int err;
35501+ loff_t pos;
35502+ struct au_sbinfo *sbinfo;
35503+ struct file *file;
35504+
35505+ SiMustWriteLock(sb);
35506+
35507+ sbinfo = au_sbi(sb);
35508+ file = au_xino_create2(base, sbinfo->si_xib);
35509+ err = PTR_ERR(file);
35510+ if (IS_ERR(file))
35511+ goto out;
35512+ if (sbinfo->si_xib)
35513+ fput(sbinfo->si_xib);
35514+ sbinfo->si_xib = file;
35515+ sbinfo->si_xread = vfs_readf(file);
35516+ sbinfo->si_xwrite = vfs_writef(file);
35517+
35518+ err = -ENOMEM;
35519+ if (!sbinfo->si_xib_buf)
35520+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
35521+ if (unlikely(!sbinfo->si_xib_buf))
35522+ goto out_unset;
35523+
35524+ sbinfo->si_xib_last_pindex = 0;
35525+ sbinfo->si_xib_next_bit = 0;
35526+ if (vfsub_f_size_read(file) < PAGE_SIZE) {
35527+ pos = 0;
35528+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
35529+ PAGE_SIZE, &pos);
35530+ if (unlikely(err != PAGE_SIZE))
35531+ goto out_free;
35532+ }
35533+ err = 0;
35534+ goto out; /* success */
35535+
35536+out_free:
35537+ if (sbinfo->si_xib_buf)
35538+ au_delayed_free_page((unsigned long)sbinfo->si_xib_buf);
35539+ sbinfo->si_xib_buf = NULL;
35540+ if (err >= 0)
35541+ err = -EIO;
35542+out_unset:
35543+ fput(sbinfo->si_xib);
35544+ sbinfo->si_xib = NULL;
35545+ sbinfo->si_xread = NULL;
35546+ sbinfo->si_xwrite = NULL;
35547+out:
35548+ return err;
35549+}
35550+
35551+/* xino for each branch */
35552+static void xino_clear_br(struct super_block *sb)
35553+{
35554+ aufs_bindex_t bindex, bbot;
35555+ struct au_branch *br;
35556+
35557+ bbot = au_sbbot(sb);
35558+ for (bindex = 0; bindex <= bbot; bindex++) {
35559+ br = au_sbr(sb, bindex);
35560+ if (!br || !br->br_xino.xi_file)
35561+ continue;
35562+
35563+ fput(br->br_xino.xi_file);
35564+ br->br_xino.xi_file = NULL;
35565+ }
35566+}
35567+
35568+static int au_xino_set_br(struct super_block *sb, struct file *base)
35569+{
35570+ int err;
35571+ ino_t ino;
35572+ aufs_bindex_t bindex, bbot, bshared;
35573+ struct {
35574+ struct file *old, *new;
35575+ } *fpair, *p;
35576+ struct au_branch *br;
35577+ struct inode *inode;
35578+ vfs_writef_t writef;
35579+
35580+ SiMustWriteLock(sb);
35581+
35582+ err = -ENOMEM;
35583+ bbot = au_sbbot(sb);
35584+ fpair = kcalloc(bbot + 1, sizeof(*fpair), GFP_NOFS);
35585+ if (unlikely(!fpair))
35586+ goto out;
35587+
35588+ inode = d_inode(sb->s_root);
35589+ ino = AUFS_ROOT_INO;
35590+ writef = au_sbi(sb)->si_xwrite;
35591+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
35592+ bshared = is_sb_shared(sb, bindex, bindex - 1);
35593+ if (bshared >= 0) {
35594+ /* shared xino */
35595+ *p = fpair[bshared];
35596+ get_file(p->new);
35597+ }
35598+
35599+ if (!p->new) {
35600+ /* new xino */
35601+ br = au_sbr(sb, bindex);
35602+ p->old = br->br_xino.xi_file;
35603+ p->new = au_xino_create2(base, br->br_xino.xi_file);
35604+ err = PTR_ERR(p->new);
35605+ if (IS_ERR(p->new)) {
35606+ p->new = NULL;
35607+ goto out_pair;
35608+ }
35609+ }
35610+
35611+ err = au_xino_do_write(writef, p->new,
35612+ au_h_iptr(inode, bindex)->i_ino, ino);
35613+ if (unlikely(err))
35614+ goto out_pair;
35615+ }
35616+
35617+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
35618+ br = au_sbr(sb, bindex);
35619+ if (br->br_xino.xi_file)
35620+ fput(br->br_xino.xi_file);
35621+ get_file(p->new);
35622+ br->br_xino.xi_file = p->new;
35623+ }
35624+
35625+out_pair:
35626+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++)
35627+ if (p->new)
35628+ fput(p->new);
35629+ else
35630+ break;
35631+ au_delayed_kfree(fpair);
35632+out:
35633+ return err;
35634+}
35635+
35636+void au_xino_clr(struct super_block *sb)
35637+{
35638+ struct au_sbinfo *sbinfo;
35639+
35640+ au_xigen_clr(sb);
35641+ xino_clear_xib(sb);
35642+ xino_clear_br(sb);
35643+ sbinfo = au_sbi(sb);
35644+ /* lvalue, do not call au_mntflags() */
35645+ au_opt_clr(sbinfo->si_mntflags, XINO);
35646+}
35647+
35648+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
35649+{
35650+ int err, skip;
35651+ struct dentry *parent, *cur_parent;
35652+ struct qstr *dname, *cur_name;
35653+ struct file *cur_xino;
35654+ struct inode *dir;
35655+ struct au_sbinfo *sbinfo;
35656+
35657+ SiMustWriteLock(sb);
35658+
35659+ err = 0;
35660+ sbinfo = au_sbi(sb);
35661+ parent = dget_parent(xino->file->f_path.dentry);
35662+ if (remount) {
35663+ skip = 0;
35664+ dname = &xino->file->f_path.dentry->d_name;
35665+ cur_xino = sbinfo->si_xib;
35666+ if (cur_xino) {
35667+ cur_parent = dget_parent(cur_xino->f_path.dentry);
35668+ cur_name = &cur_xino->f_path.dentry->d_name;
35669+ skip = (cur_parent == parent
35670+ && au_qstreq(dname, cur_name));
35671+ dput(cur_parent);
35672+ }
35673+ if (skip)
35674+ goto out;
35675+ }
35676+
35677+ au_opt_set(sbinfo->si_mntflags, XINO);
35678+ dir = d_inode(parent);
35679+ inode_lock_nested(dir, AuLsc_I_PARENT);
35680+ /* mnt_want_write() is unnecessary here */
35681+ err = au_xino_set_xib(sb, xino->file);
35682+ if (!err)
35683+ err = au_xigen_set(sb, xino->file);
35684+ if (!err)
35685+ err = au_xino_set_br(sb, xino->file);
35686+ inode_unlock(dir);
35687+ if (!err)
35688+ goto out; /* success */
35689+
35690+ /* reset all */
35691+ AuIOErr("failed creating xino(%d).\n", err);
35692+ au_xigen_clr(sb);
35693+ xino_clear_xib(sb);
35694+
35695+out:
35696+ dput(parent);
35697+ return err;
35698+}
35699+
35700+/* ---------------------------------------------------------------------- */
35701+
35702+/*
35703+ * create a xinofile at the default place/path.
35704+ */
35705+struct file *au_xino_def(struct super_block *sb)
35706+{
35707+ struct file *file;
35708+ char *page, *p;
35709+ struct au_branch *br;
35710+ struct super_block *h_sb;
35711+ struct path path;
35712+ aufs_bindex_t bbot, bindex, bwr;
35713+
35714+ br = NULL;
35715+ bbot = au_sbbot(sb);
35716+ bwr = -1;
35717+ for (bindex = 0; bindex <= bbot; bindex++) {
35718+ br = au_sbr(sb, bindex);
35719+ if (au_br_writable(br->br_perm)
35720+ && !au_test_fs_bad_xino(au_br_sb(br))) {
35721+ bwr = bindex;
35722+ break;
35723+ }
35724+ }
35725+
35726+ if (bwr >= 0) {
35727+ file = ERR_PTR(-ENOMEM);
35728+ page = (void *)__get_free_page(GFP_NOFS);
35729+ if (unlikely(!page))
35730+ goto out;
35731+ path.mnt = au_br_mnt(br);
35732+ path.dentry = au_h_dptr(sb->s_root, bwr);
35733+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
35734+ file = (void *)p;
35735+ if (!IS_ERR(p)) {
35736+ strcat(p, "/" AUFS_XINO_FNAME);
35737+ AuDbg("%s\n", p);
35738+ file = au_xino_create(sb, p, /*silent*/0);
35739+ if (!IS_ERR(file))
35740+ au_xino_brid_set(sb, br->br_id);
35741+ }
35742+ au_delayed_free_page((unsigned long)page);
35743+ } else {
35744+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
35745+ if (IS_ERR(file))
35746+ goto out;
35747+ h_sb = file->f_path.dentry->d_sb;
35748+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
35749+ pr_err("xino doesn't support %s(%s)\n",
35750+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
35751+ fput(file);
35752+ file = ERR_PTR(-EINVAL);
35753+ }
35754+ if (!IS_ERR(file))
35755+ au_xino_brid_set(sb, -1);
35756+ }
35757+
35758+out:
35759+ return file;
35760+}
35761+
35762+/* ---------------------------------------------------------------------- */
35763+
35764+int au_xino_path(struct seq_file *seq, struct file *file)
35765+{
35766+ int err;
35767+
35768+ err = au_seq_path(seq, &file->f_path);
35769+ if (unlikely(err))
35770+ goto out;
35771+
35772+#define Deleted "\\040(deleted)"
35773+ seq->count -= sizeof(Deleted) - 1;
35774+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
35775+ sizeof(Deleted) - 1));
35776+#undef Deleted
35777+
35778+out:
35779+ return err;
35780+}
35781diff -urN /usr/share/empty/include/uapi/linux/aufs_type.h linux/include/uapi/linux/aufs_type.h
35782--- /usr/share/empty/include/uapi/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
35783+++ linux/include/uapi/linux/aufs_type.h 2016-12-17 12:28:38.769494865 +0100
35784@@ -0,0 +1,419 @@
35785+/*
35786+ * Copyright (C) 2005-2016 Junjiro R. Okajima
35787+ *
35788+ * This program, aufs is free software; you can redistribute it and/or modify
35789+ * it under the terms of the GNU General Public License as published by
35790+ * the Free Software Foundation; either version 2 of the License, or
35791+ * (at your option) any later version.
35792+ *
35793+ * This program is distributed in the hope that it will be useful,
35794+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35795+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35796+ * GNU General Public License for more details.
35797+ *
35798+ * You should have received a copy of the GNU General Public License
35799+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
35800+ */
35801+
35802+#ifndef __AUFS_TYPE_H__
35803+#define __AUFS_TYPE_H__
35804+
35805+#define AUFS_NAME "aufs"
35806+
35807+#ifdef __KERNEL__
35808+/*
35809+ * define it before including all other headers.
35810+ * sched.h may use pr_* macros before defining "current", so define the
35811+ * no-current version first, and re-define later.
35812+ */
35813+#define pr_fmt(fmt) AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
35814+#include <linux/sched.h>
35815+#undef pr_fmt
35816+#define pr_fmt(fmt) \
35817+ AUFS_NAME " %s:%d:%.*s[%d]: " fmt, __func__, __LINE__, \
35818+ (int)sizeof(current->comm), current->comm, current->pid
35819+#else
35820+#include <stdint.h>
35821+#include <sys/types.h>
35822+#endif /* __KERNEL__ */
35823+
35824+#include <linux/limits.h>
35825+
35826+#define AUFS_VERSION "4.9-20161219"
35827+
35828+/* todo? move this to linux-2.6.19/include/magic.h */
35829+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
35830+
35831+/* ---------------------------------------------------------------------- */
35832+
35833+#ifdef CONFIG_AUFS_BRANCH_MAX_127
35834+typedef int8_t aufs_bindex_t;
35835+#define AUFS_BRANCH_MAX 127
35836+#else
35837+typedef int16_t aufs_bindex_t;
35838+#ifdef CONFIG_AUFS_BRANCH_MAX_511
35839+#define AUFS_BRANCH_MAX 511
35840+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
35841+#define AUFS_BRANCH_MAX 1023
35842+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
35843+#define AUFS_BRANCH_MAX 32767
35844+#endif
35845+#endif
35846+
35847+#ifdef __KERNEL__
35848+#ifndef AUFS_BRANCH_MAX
35849+#error unknown CONFIG_AUFS_BRANCH_MAX value
35850+#endif
35851+#endif /* __KERNEL__ */
35852+
35853+/* ---------------------------------------------------------------------- */
35854+
35855+#define AUFS_FSTYPE AUFS_NAME
35856+
35857+#define AUFS_ROOT_INO 2
35858+#define AUFS_FIRST_INO 11
35859+
35860+#define AUFS_WH_PFX ".wh."
35861+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
35862+#define AUFS_WH_TMP_LEN 4
35863+/* a limit for rmdir/rename a dir and copyup */
35864+#define AUFS_MAX_NAMELEN (NAME_MAX \
35865+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
35866+ - 1 /* dot */\
35867+ - AUFS_WH_TMP_LEN) /* hex */
35868+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
35869+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
35870+#define AUFS_XINO_DEF_SEC 30 /* seconds */
35871+#define AUFS_XINO_DEF_TRUNC 45 /* percentage */
35872+#define AUFS_DIRWH_DEF 3
35873+#define AUFS_RDCACHE_DEF 10 /* seconds */
35874+#define AUFS_RDCACHE_MAX 3600 /* seconds */
35875+#define AUFS_RDBLK_DEF 512 /* bytes */
35876+#define AUFS_RDHASH_DEF 32
35877+#define AUFS_WKQ_NAME AUFS_NAME "d"
35878+#define AUFS_MFS_DEF_SEC 30 /* seconds */
35879+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
35880+#define AUFS_FHSM_CACHE_DEF_SEC 30 /* seconds */
35881+#define AUFS_PLINK_WARN 50 /* number of plinks in a single bucket */
35882+
35883+/* pseudo-link maintenace under /proc */
35884+#define AUFS_PLINK_MAINT_NAME "plink_maint"
35885+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
35886+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
35887+
35888+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
35889+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
35890+
35891+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
35892+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
35893+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
35894+
35895+/* doubly whiteouted */
35896+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
35897+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
35898+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
35899+
35900+/* branch permissions and attributes */
35901+#define AUFS_BRPERM_RW "rw"
35902+#define AUFS_BRPERM_RO "ro"
35903+#define AUFS_BRPERM_RR "rr"
35904+#define AUFS_BRATTR_COO_REG "coo_reg"
35905+#define AUFS_BRATTR_COO_ALL "coo_all"
35906+#define AUFS_BRATTR_FHSM "fhsm"
35907+#define AUFS_BRATTR_UNPIN "unpin"
35908+#define AUFS_BRATTR_ICEX "icex"
35909+#define AUFS_BRATTR_ICEX_SEC "icexsec"
35910+#define AUFS_BRATTR_ICEX_SYS "icexsys"
35911+#define AUFS_BRATTR_ICEX_TR "icextr"
35912+#define AUFS_BRATTR_ICEX_USR "icexusr"
35913+#define AUFS_BRATTR_ICEX_OTH "icexoth"
35914+#define AUFS_BRRATTR_WH "wh"
35915+#define AUFS_BRWATTR_NLWH "nolwh"
35916+#define AUFS_BRWATTR_MOO "moo"
35917+
35918+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */
35919+#define AuBrPerm_RO (1 << 1) /* readonly */
35920+#define AuBrPerm_RR (1 << 2) /* natively readonly */
35921+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
35922+
35923+#define AuBrAttr_COO_REG (1 << 3) /* copy-up on open */
35924+#define AuBrAttr_COO_ALL (1 << 4)
35925+#define AuBrAttr_COO_Mask (AuBrAttr_COO_REG | AuBrAttr_COO_ALL)
35926+
35927+#define AuBrAttr_FHSM (1 << 5) /* file-based hsm */
35928+#define AuBrAttr_UNPIN (1 << 6) /* rename-able top dir of
35929+ branch. meaningless since
35930+ linux-3.18-rc1 */
35931+
35932+/* ignore error in copying XATTR */
35933+#define AuBrAttr_ICEX_SEC (1 << 7)
35934+#define AuBrAttr_ICEX_SYS (1 << 8)
35935+#define AuBrAttr_ICEX_TR (1 << 9)
35936+#define AuBrAttr_ICEX_USR (1 << 10)
35937+#define AuBrAttr_ICEX_OTH (1 << 11)
35938+#define AuBrAttr_ICEX (AuBrAttr_ICEX_SEC \
35939+ | AuBrAttr_ICEX_SYS \
35940+ | AuBrAttr_ICEX_TR \
35941+ | AuBrAttr_ICEX_USR \
35942+ | AuBrAttr_ICEX_OTH)
35943+
35944+#define AuBrRAttr_WH (1 << 12) /* whiteout-able */
35945+#define AuBrRAttr_Mask AuBrRAttr_WH
35946+
35947+#define AuBrWAttr_NoLinkWH (1 << 13) /* un-hardlinkable whiteouts */
35948+#define AuBrWAttr_MOO (1 << 14) /* move-up on open */
35949+#define AuBrWAttr_Mask (AuBrWAttr_NoLinkWH | AuBrWAttr_MOO)
35950+
35951+#define AuBrAttr_CMOO_Mask (AuBrAttr_COO_Mask | AuBrWAttr_MOO)
35952+
35953+/* #warning test userspace */
35954+#ifdef __KERNEL__
35955+#ifndef CONFIG_AUFS_FHSM
35956+#undef AuBrAttr_FHSM
35957+#define AuBrAttr_FHSM 0
35958+#endif
35959+#ifndef CONFIG_AUFS_XATTR
35960+#undef AuBrAttr_ICEX
35961+#define AuBrAttr_ICEX 0
35962+#undef AuBrAttr_ICEX_SEC
35963+#define AuBrAttr_ICEX_SEC 0
35964+#undef AuBrAttr_ICEX_SYS
35965+#define AuBrAttr_ICEX_SYS 0
35966+#undef AuBrAttr_ICEX_TR
35967+#define AuBrAttr_ICEX_TR 0
35968+#undef AuBrAttr_ICEX_USR
35969+#define AuBrAttr_ICEX_USR 0
35970+#undef AuBrAttr_ICEX_OTH
35971+#define AuBrAttr_ICEX_OTH 0
35972+#endif
35973+#endif
35974+
35975+/* the longest combination */
35976+/* AUFS_BRATTR_ICEX and AUFS_BRATTR_ICEX_TR don't affect here */
35977+#define AuBrPermStrSz sizeof(AUFS_BRPERM_RW \
35978+ "+" AUFS_BRATTR_COO_REG \
35979+ "+" AUFS_BRATTR_FHSM \
35980+ "+" AUFS_BRATTR_UNPIN \
35981+ "+" AUFS_BRATTR_ICEX_SEC \
35982+ "+" AUFS_BRATTR_ICEX_SYS \
35983+ "+" AUFS_BRATTR_ICEX_USR \
35984+ "+" AUFS_BRATTR_ICEX_OTH \
35985+ "+" AUFS_BRWATTR_NLWH)
35986+
35987+typedef struct {
35988+ char a[AuBrPermStrSz];
35989+} au_br_perm_str_t;
35990+
35991+static inline int au_br_writable(int brperm)
35992+{
35993+ return brperm & AuBrPerm_RW;
35994+}
35995+
35996+static inline int au_br_whable(int brperm)
35997+{
35998+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
35999+}
36000+
36001+static inline int au_br_wh_linkable(int brperm)
36002+{
36003+ return !(brperm & AuBrWAttr_NoLinkWH);
36004+}
36005+
36006+static inline int au_br_cmoo(int brperm)
36007+{
36008+ return brperm & AuBrAttr_CMOO_Mask;
36009+}
36010+
36011+static inline int au_br_fhsm(int brperm)
36012+{
36013+ return brperm & AuBrAttr_FHSM;
36014+}
36015+
36016+/* ---------------------------------------------------------------------- */
36017+
36018+/* ioctl */
36019+enum {
36020+ /* readdir in userspace */
36021+ AuCtl_RDU,
36022+ AuCtl_RDU_INO,
36023+
36024+ AuCtl_WBR_FD, /* pathconf wrapper */
36025+ AuCtl_IBUSY, /* busy inode */
36026+ AuCtl_MVDOWN, /* move-down */
36027+ AuCtl_BR, /* info about branches */
36028+ AuCtl_FHSM_FD /* connection for fhsm */
36029+};
36030+
36031+/* borrowed from linux/include/linux/kernel.h */
36032+#ifndef ALIGN
36033+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
36034+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
36035+#endif
36036+
36037+/* borrowed from linux/include/linux/compiler-gcc3.h */
36038+#ifndef __aligned
36039+#define __aligned(x) __attribute__((aligned(x)))
36040+#endif
36041+
36042+#ifdef __KERNEL__
36043+#ifndef __packed
36044+#define __packed __attribute__((packed))
36045+#endif
36046+#endif
36047+
36048+struct au_rdu_cookie {
36049+ uint64_t h_pos;
36050+ int16_t bindex;
36051+ uint8_t flags;
36052+ uint8_t pad;
36053+ uint32_t generation;
36054+} __aligned(8);
36055+
36056+struct au_rdu_ent {
36057+ uint64_t ino;
36058+ int16_t bindex;
36059+ uint8_t type;
36060+ uint8_t nlen;
36061+ uint8_t wh;
36062+ char name[0];
36063+} __aligned(8);
36064+
36065+static inline int au_rdu_len(int nlen)
36066+{
36067+ /* include the terminating NULL */
36068+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
36069+ sizeof(uint64_t));
36070+}
36071+
36072+union au_rdu_ent_ul {
36073+ struct au_rdu_ent __user *e;
36074+ uint64_t ul;
36075+};
36076+
36077+enum {
36078+ AufsCtlRduV_SZ,
36079+ AufsCtlRduV_End
36080+};
36081+
36082+struct aufs_rdu {
36083+ /* input */
36084+ union {
36085+ uint64_t sz; /* AuCtl_RDU */
36086+ uint64_t nent; /* AuCtl_RDU_INO */
36087+ };
36088+ union au_rdu_ent_ul ent;
36089+ uint16_t verify[AufsCtlRduV_End];
36090+
36091+ /* input/output */
36092+ uint32_t blk;
36093+
36094+ /* output */
36095+ union au_rdu_ent_ul tail;
36096+ /* number of entries which were added in a single call */
36097+ uint64_t rent;
36098+ uint8_t full;
36099+ uint8_t shwh;
36100+
36101+ struct au_rdu_cookie cookie;
36102+} __aligned(8);
36103+
36104+/* ---------------------------------------------------------------------- */
36105+
36106+struct aufs_wbr_fd {
36107+ uint32_t oflags;
36108+ int16_t brid;
36109+} __aligned(8);
36110+
36111+/* ---------------------------------------------------------------------- */
36112+
36113+struct aufs_ibusy {
36114+ uint64_t ino, h_ino;
36115+ int16_t bindex;
36116+} __aligned(8);
36117+
36118+/* ---------------------------------------------------------------------- */
36119+
36120+/* error code for move-down */
36121+/* the actual message strings are implemented in aufs-util.git */
36122+enum {
36123+ EAU_MVDOWN_OPAQUE = 1,
36124+ EAU_MVDOWN_WHITEOUT,
36125+ EAU_MVDOWN_UPPER,
36126+ EAU_MVDOWN_BOTTOM,
36127+ EAU_MVDOWN_NOUPPER,
36128+ EAU_MVDOWN_NOLOWERBR,
36129+ EAU_Last
36130+};
36131+
36132+/* flags for move-down */
36133+#define AUFS_MVDOWN_DMSG 1
36134+#define AUFS_MVDOWN_OWLOWER (1 << 1) /* overwrite lower */
36135+#define AUFS_MVDOWN_KUPPER (1 << 2) /* keep upper */
36136+#define AUFS_MVDOWN_ROLOWER (1 << 3) /* do even if lower is RO */
36137+#define AUFS_MVDOWN_ROLOWER_R (1 << 4) /* did on lower RO */
36138+#define AUFS_MVDOWN_ROUPPER (1 << 5) /* do even if upper is RO */
36139+#define AUFS_MVDOWN_ROUPPER_R (1 << 6) /* did on upper RO */
36140+#define AUFS_MVDOWN_BRID_UPPER (1 << 7) /* upper brid */
36141+#define AUFS_MVDOWN_BRID_LOWER (1 << 8) /* lower brid */
36142+#define AUFS_MVDOWN_FHSM_LOWER (1 << 9) /* find fhsm attr for lower */
36143+#define AUFS_MVDOWN_STFS (1 << 10) /* req. stfs */
36144+#define AUFS_MVDOWN_STFS_FAILED (1 << 11) /* output: stfs is unusable */
36145+#define AUFS_MVDOWN_BOTTOM (1 << 12) /* output: no more lowers */
36146+
36147+/* index for move-down */
36148+enum {
36149+ AUFS_MVDOWN_UPPER,
36150+ AUFS_MVDOWN_LOWER,
36151+ AUFS_MVDOWN_NARRAY
36152+};
36153+
36154+/*
36155+ * additional info of move-down
36156+ * number of free blocks and inodes.
36157+ * subset of struct kstatfs, but smaller and always 64bit.
36158+ */
36159+struct aufs_stfs {
36160+ uint64_t f_blocks;
36161+ uint64_t f_bavail;
36162+ uint64_t f_files;
36163+ uint64_t f_ffree;
36164+};
36165+
36166+struct aufs_stbr {
36167+ int16_t brid; /* optional input */
36168+ int16_t bindex; /* output */
36169+ struct aufs_stfs stfs; /* output when AUFS_MVDOWN_STFS set */
36170+} __aligned(8);
36171+
36172+struct aufs_mvdown {
36173+ uint32_t flags; /* input/output */
36174+ struct aufs_stbr stbr[AUFS_MVDOWN_NARRAY]; /* input/output */
36175+ int8_t au_errno; /* output */
36176+} __aligned(8);
36177+
36178+/* ---------------------------------------------------------------------- */
36179+
36180+union aufs_brinfo {
36181+ /* PATH_MAX may differ between kernel-space and user-space */
36182+ char _spacer[4096];
36183+ struct {
36184+ int16_t id;
36185+ int perm;
36186+ char path[0];
36187+ };
36188+} __aligned(8);
36189+
36190+/* ---------------------------------------------------------------------- */
36191+
36192+#define AuCtlType 'A'
36193+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
36194+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
36195+#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \
36196+ struct aufs_wbr_fd)
36197+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
36198+#define AUFS_CTL_MVDOWN _IOWR(AuCtlType, AuCtl_MVDOWN, \
36199+ struct aufs_mvdown)
36200+#define AUFS_CTL_BRINFO _IOW(AuCtlType, AuCtl_BR, union aufs_brinfo)
36201+#define AUFS_CTL_FHSM_FD _IOW(AuCtlType, AuCtl_FHSM_FD, int)
36202+
36203+#endif /* __AUFS_TYPE_H__ */
36204aufs4.9 loopback patch
36205
36206diff --git a/drivers/block/loop.c b/drivers/block/loop.c
36207index 6ee9235..f64161f 100644
36208--- a/drivers/block/loop.c
36209+++ b/drivers/block/loop.c
36210@@ -551,7 +551,7 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
36211 }
36212
36213 struct switch_request {
36214- struct file *file;
36215+ struct file *file, *virt_file;
36216 struct completion wait;
36217 };
36218
36219@@ -577,6 +577,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
36220 mapping = file->f_mapping;
36221 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
36222 lo->lo_backing_file = file;
36223+ lo->lo_backing_virt_file = p->virt_file;
36224 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
36225 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
36226 lo->old_gfp_mask = mapping_gfp_mask(mapping);
36227@@ -589,11 +590,13 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
36228 * First it needs to flush existing IO, it does this by sending a magic
36229 * BIO down the pipe. The completion of this BIO does the actual switch.
36230 */
36231-static int loop_switch(struct loop_device *lo, struct file *file)
36232+static int loop_switch(struct loop_device *lo, struct file *file,
36233+ struct file *virt_file)
36234 {
36235 struct switch_request w;
36236
36237 w.file = file;
36238+ w.virt_file = virt_file;
36239
36240 /* freeze queue and wait for completion of scheduled requests */
36241 blk_mq_freeze_queue(lo->lo_queue);
36242@@ -612,7 +615,16 @@ static int loop_switch(struct loop_device *lo, struct file *file)
36243 */
36244 static int loop_flush(struct loop_device *lo)
36245 {
36246- return loop_switch(lo, NULL);
36247+ return loop_switch(lo, NULL, NULL);
36248+}
36249+
36250+static struct file *loop_real_file(struct file *file)
36251+{
36252+ struct file *f = NULL;
36253+
36254+ if (file->f_path.dentry->d_sb->s_op->real_loop)
36255+ f = file->f_path.dentry->d_sb->s_op->real_loop(file);
36256+ return f;
36257 }
36258
36259 static void loop_reread_partitions(struct loop_device *lo,
36260@@ -649,6 +661,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
36261 unsigned int arg)
36262 {
36263 struct file *file, *old_file;
36264+ struct file *f, *virt_file = NULL, *old_virt_file;
36265 struct inode *inode;
36266 int error;
36267
36268@@ -665,9 +678,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
36269 file = fget(arg);
36270 if (!file)
36271 goto out;
36272+ f = loop_real_file(file);
36273+ if (f) {
36274+ virt_file = file;
36275+ file = f;
36276+ get_file(file);
36277+ }
36278
36279 inode = file->f_mapping->host;
36280 old_file = lo->lo_backing_file;
36281+ old_virt_file = lo->lo_backing_virt_file;
36282
36283 error = -EINVAL;
36284
36285@@ -679,17 +699,21 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
36286 goto out_putf;
36287
36288 /* and ... switch */
36289- error = loop_switch(lo, file);
36290+ error = loop_switch(lo, file, virt_file);
36291 if (error)
36292 goto out_putf;
36293
36294 fput(old_file);
36295+ if (old_virt_file)
36296+ fput(old_virt_file);
36297 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
36298 loop_reread_partitions(lo, bdev);
36299 return 0;
36300
36301 out_putf:
36302 fput(file);
36303+ if (virt_file)
36304+ fput(virt_file);
36305 out:
36306 return error;
36307 }
36308@@ -876,7 +900,7 @@ static int loop_prepare_queue(struct loop_device *lo)
36309 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
36310 struct block_device *bdev, unsigned int arg)
36311 {
36312- struct file *file, *f;
36313+ struct file *file, *f, *virt_file = NULL;
36314 struct inode *inode;
36315 struct address_space *mapping;
36316 unsigned lo_blocksize;
36317@@ -891,6 +915,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
36318 file = fget(arg);
36319 if (!file)
36320 goto out;
36321+ f = loop_real_file(file);
36322+ if (f) {
36323+ virt_file = file;
36324+ file = f;
36325+ get_file(file);
36326+ }
36327
36328 error = -EBUSY;
36329 if (lo->lo_state != Lo_unbound)
36330@@ -943,6 +973,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
36331 lo->lo_device = bdev;
36332 lo->lo_flags = lo_flags;
36333 lo->lo_backing_file = file;
36334+ lo->lo_backing_virt_file = virt_file;
36335 lo->transfer = NULL;
36336 lo->ioctl = NULL;
36337 lo->lo_sizelimit = 0;
36338@@ -975,6 +1006,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
36339
36340 out_putf:
36341 fput(file);
36342+ if (virt_file)
36343+ fput(virt_file);
36344 out:
36345 /* This is safe: open() is still holding a reference. */
36346 module_put(THIS_MODULE);
36347@@ -1021,6 +1054,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
36348 static int loop_clr_fd(struct loop_device *lo)
36349 {
36350 struct file *filp = lo->lo_backing_file;
36351+ struct file *virt_filp = lo->lo_backing_virt_file;
36352 gfp_t gfp = lo->old_gfp_mask;
36353 struct block_device *bdev = lo->lo_device;
36354
36355@@ -1052,6 +1086,7 @@ static int loop_clr_fd(struct loop_device *lo)
36356 spin_lock_irq(&lo->lo_lock);
36357 lo->lo_state = Lo_rundown;
36358 lo->lo_backing_file = NULL;
36359+ lo->lo_backing_virt_file = NULL;
36360 spin_unlock_irq(&lo->lo_lock);
36361
36362 loop_release_xfer(lo);
36363@@ -1096,6 +1131,8 @@ static int loop_clr_fd(struct loop_device *lo)
36364 * bd_mutex which is usually taken before lo_ctl_mutex.
36365 */
36366 fput(filp);
36367+ if (virt_filp)
36368+ fput(virt_filp);
36369 return 0;
36370 }
36371
36372diff --git a/drivers/block/loop.h b/drivers/block/loop.h
36373index fb2237c..c3888c5 100644
36374--- a/drivers/block/loop.h
36375+++ b/drivers/block/loop.h
36376@@ -46,7 +46,7 @@ struct loop_device {
36377 int (*ioctl)(struct loop_device *, int cmd,
36378 unsigned long arg);
36379
36380- struct file * lo_backing_file;
36381+ struct file * lo_backing_file, *lo_backing_virt_file;
36382 struct block_device *lo_device;
36383 unsigned lo_blocksize;
36384 void *key_data;
36385diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
36386index d2a9a1d..d7519d0 100644
36387--- a/fs/aufs/f_op.c
36388+++ b/fs/aufs/f_op.c
36389@@ -351,7 +351,7 @@ static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
36390 if (IS_ERR(h_file))
36391 goto out;
36392
36393- if (au_test_loopback_kthread()) {
36394+ if (0 && au_test_loopback_kthread()) {
36395 au_warn_loopback(h_file->f_path.dentry->d_sb);
36396 if (file->f_mapping != h_file->f_mapping) {
36397 file->f_mapping = h_file->f_mapping;
36398diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c
36399index c3ca50f..a3dbdaf 100644
36400--- a/fs/aufs/loop.c
36401+++ b/fs/aufs/loop.c
36402@@ -132,3 +132,19 @@ void au_loopback_fin(void)
36403 symbol_put(loop_backing_file);
36404 au_delayed_kfree(au_warn_loopback_array);
36405 }
36406+
36407+/* ---------------------------------------------------------------------- */
36408+
36409+/* support the loopback block device insude aufs */
36410+
36411+struct file *aufs_real_loop(struct file *file)
36412+{
36413+ struct file *f;
36414+
36415+ BUG_ON(!au_test_aufs(file->f_path.dentry->d_sb));
36416+ fi_read_lock(file);
36417+ f = au_hf_top(file);
36418+ fi_read_unlock(file);
36419+ AuDebugOn(!f);
36420+ return f;
36421+}
36422diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h
36423index 48bf070..66afec7 100644
36424--- a/fs/aufs/loop.h
36425+++ b/fs/aufs/loop.h
36426@@ -25,7 +25,11 @@
36427
36428 int au_loopback_init(void);
36429 void au_loopback_fin(void);
36430+
36431+struct file *aufs_real_loop(struct file *file);
36432 #else
36433+AuStub(struct file *, loop_backing_file, return NULL)
36434+
36435 AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
36436 struct dentry *h_adding)
36437 AuStubInt0(au_test_loopback_kthread, void)
36438@@ -33,6 +37,8 @@
36439
36440 AuStubInt0(au_loopback_init, void)
36441 AuStubVoid(au_loopback_fin, void)
36442+
36443+AuStub(struct file *, aufs_real_loop, return NULL, struct file *file)
36444 #endif /* BLK_DEV_LOOP */
36445
36446 #endif /* __KERNEL__ */
36447diff --git a/fs/aufs/super.c b/fs/aufs/super.c
36448index 0082ce4..5085378 100644
36449--- a/fs/aufs/super.c
36450+++ b/fs/aufs/super.c
36451@@ -839,7 +839,10 @@ static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
36452 .statfs = aufs_statfs,
36453 .put_super = aufs_put_super,
36454 .sync_fs = aufs_sync_fs,
36455- .remount_fs = aufs_remount_fs
36456+ .remount_fs = aufs_remount_fs,
36457+#ifdef CONFIG_AUFS_BDEV_LOOP
36458+ .real_loop = aufs_real_loop
36459+#endif
36460 };
36461
36462 /* ---------------------------------------------------------------------- */
36463diff --git a/include/linux/fs.h b/include/linux/fs.h
36464index a903bc3..db820e3 100644
36465--- a/include/linux/fs.h
36466+++ b/include/linux/fs.h
36467@@ -1823,6 +1823,10 @@ struct super_operations {
36468 struct shrink_control *);
36469 long (*free_cached_objects)(struct super_block *,
36470 struct shrink_control *);
36471+#if defined(CONFIG_BLK_DEV_LOOP) || defined(CONFIG_BLK_DEV_LOOP_MODULE)
36472+ /* and aufs */
36473+ struct file *(*real_loop)(struct file *);
36474+#endif
36475 };
36476
36477 /*
This page took 0.70666 seconds and 4 git commands to generate.