]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-aufs4.patch
- up to 4.9.84
[packages/kernel.git] / kernel-aufs4.patch
CommitLineData
f2c43d5f 1aufs4.9 kbuild patch
7f207e10
AM
2
3diff --git a/fs/Kconfig b/fs/Kconfig
f2c43d5f 4index 4bd03a2..620e01b 100644
7f207e10
AM
5--- a/fs/Kconfig
6+++ b/fs/Kconfig
f2c43d5f 7@@ -249,6 +249,7 @@ source "fs/pstore/Kconfig"
5527c038 8 source "fs/sysv/Kconfig"
7e9cd9fe 9 source "fs/ufs/Kconfig"
7f207e10
AM
10 source "fs/exofs/Kconfig"
11+source "fs/aufs/Kconfig"
12
13 endif # MISC_FILESYSTEMS
14
15diff --git a/fs/Makefile b/fs/Makefile
e2f27e51 16index ed2b632..aa6d14b 100644
7f207e10
AM
17--- a/fs/Makefile
18+++ b/fs/Makefile
e2f27e51 19@@ -129,3 +129,4 @@ obj-y += exofs/ # Multiple modules
7f207e10 20 obj-$(CONFIG_CEPH_FS) += ceph/
bf0370f2 21 obj-$(CONFIG_PSTORE) += pstore/
c06a8ce3 22 obj-$(CONFIG_EFIVAR_FS) += efivarfs/
86dc4139 23+obj-$(CONFIG_AUFS_FS) += aufs/
c06a8ce3 24diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
f2c43d5f 25index cd2be1c..78f3c68 100644
c06a8ce3
AM
26--- a/include/uapi/linux/Kbuild
27+++ b/include/uapi/linux/Kbuild
5527c038 28@@ -59,6 +59,7 @@ header-y += atmsvc.h
03673fb0
JR
29 header-y += atm_tcp.h
30 header-y += atm_zatm.h
c06a8ce3
AM
31 header-y += audit.h
32+header-y += aufs_type.h
c06a8ce3 33 header-y += auto_fs4.h
03673fb0 34 header-y += auto_fs.h
c06a8ce3 35 header-y += auxvec.h
f2c43d5f 36aufs4.9 base patch
7f207e10 37
c1595e42 38diff --git a/MAINTAINERS b/MAINTAINERS
f2c43d5f 39index 63cefa6..d78b954 100644
c1595e42
JR
40--- a/MAINTAINERS
41+++ b/MAINTAINERS
f2c43d5f 42@@ -2293,6 +2293,19 @@ F: include/linux/audit.h
c1595e42
JR
43 F: include/uapi/linux/audit.h
44 F: kernel/audit*
45
46+AUFS (advanced multi layered unification filesystem) FILESYSTEM
47+M: "J. R. Okajima" <hooanon05g@gmail.com>
48+L: linux-unionfs@vger.kernel.org
49+L: aufs-users@lists.sourceforge.net (members only)
50+W: http://aufs.sourceforge.net
5527c038 51+T: git://github.com/sfjro/aufs4-linux.git
c1595e42
JR
52+S: Supported
53+F: Documentation/filesystems/aufs/
54+F: Documentation/ABI/testing/debugfs-aufs
55+F: Documentation/ABI/testing/sysfs-aufs
56+F: fs/aufs/
57+F: include/uapi/linux/aufs_type.h
58+
59 AUXILIARY DISPLAY DRIVERS
60 M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
61 W: http://miguelojeda.es/auxdisplay.htm
392086de 62diff --git a/drivers/block/loop.c b/drivers/block/loop.c
f2c43d5f 63index fa1b7a9..6ee9235 100644
392086de
AM
64--- a/drivers/block/loop.c
65+++ b/drivers/block/loop.c
e2f27e51 66@@ -701,6 +701,24 @@ static inline int is_loop_device(struct file *file)
392086de
AM
67 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
68 }
69
70+/*
71+ * for AUFS
72+ * no get/put for file.
73+ */
74+struct file *loop_backing_file(struct super_block *sb)
75+{
76+ struct file *ret;
77+ struct loop_device *l;
78+
79+ ret = NULL;
80+ if (MAJOR(sb->s_dev) == LOOP_MAJOR) {
81+ l = sb->s_bdev->bd_disk->private_data;
82+ ret = l->lo_backing_file;
83+ }
84+ return ret;
85+}
febd17d6 86+EXPORT_SYMBOL_GPL(loop_backing_file);
392086de
AM
87+
88 /* loop sysfs attributes */
89
90 static ssize_t loop_attr_show(struct device *dev, char *page,
c1595e42 91diff --git a/fs/dcache.c b/fs/dcache.c
e2f27e51 92index 5c7cc95..df0268c 100644
c1595e42
JR
93--- a/fs/dcache.c
94+++ b/fs/dcache.c
e2f27e51 95@@ -1164,7 +1164,7 @@ enum d_walk_ret {
c1595e42
JR
96 *
97 * The @enter() and @finish() callbacks are called with d_lock held.
98 */
99-static void d_walk(struct dentry *parent, void *data,
100+void d_walk(struct dentry *parent, void *data,
101 enum d_walk_ret (*enter)(void *, struct dentry *),
102 void (*finish)(void *))
103 {
febd17d6
JR
104diff --git a/fs/fcntl.c b/fs/fcntl.c
105index 350a2c8..6f42279 100644
106--- a/fs/fcntl.c
107+++ b/fs/fcntl.c
108@@ -29,7 +29,7 @@
109
110 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
111
112-static int setfl(int fd, struct file * filp, unsigned long arg)
113+int setfl(int fd, struct file * filp, unsigned long arg)
114 {
115 struct inode * inode = file_inode(filp);
116 int error = 0;
117@@ -60,6 +60,8 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
118
119 if (filp->f_op->check_flags)
120 error = filp->f_op->check_flags(arg);
121+ if (!error && filp->f_op->setfl)
122+ error = filp->f_op->setfl(filp, arg);
123 if (error)
124 return error;
125
5afbbe0d 126diff --git a/fs/inode.c b/fs/inode.c
f2c43d5f 127index 88110fd..9a9ba3a 100644
5afbbe0d
AM
128--- a/fs/inode.c
129+++ b/fs/inode.c
f2c43d5f 130@@ -1642,7 +1642,7 @@ int generic_update_time(struct inode *inode, struct timespec *time, int flags)
5afbbe0d
AM
131 * This does the actual work of updating an inodes time or version. Must have
132 * had called mnt_want_write() before calling this.
133 */
134-static int update_time(struct inode *inode, struct timespec *time, int flags)
135+int update_time(struct inode *inode, struct timespec *time, int flags)
136 {
137 int (*update_time)(struct inode *, struct timespec *, int);
138
5527c038 139diff --git a/fs/read_write.c b/fs/read_write.c
f2c43d5f 140index 190e0d36..4052813 100644
5527c038
JR
141--- a/fs/read_write.c
142+++ b/fs/read_write.c
5afbbe0d 143@@ -515,6 +515,28 @@ ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
5527c038
JR
144 }
145 EXPORT_SYMBOL(__vfs_write);
146
147+vfs_readf_t vfs_readf(struct file *file)
148+{
149+ const struct file_operations *fop = file->f_op;
150+
151+ if (fop->read)
152+ return fop->read;
153+ if (fop->read_iter)
154+ return new_sync_read;
155+ return ERR_PTR(-ENOSYS);
156+}
157+
158+vfs_writef_t vfs_writef(struct file *file)
159+{
160+ const struct file_operations *fop = file->f_op;
161+
162+ if (fop->write)
163+ return fop->write;
164+ if (fop->write_iter)
165+ return new_sync_write;
166+ return ERR_PTR(-ENOSYS);
167+}
168+
169 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
170 {
171 mm_segment_t old_fs;
7f207e10 172diff --git a/fs/splice.c b/fs/splice.c
f2c43d5f 173index 5a7750b..28160a7 100644
7f207e10
AM
174--- a/fs/splice.c
175+++ b/fs/splice.c
f2c43d5f 176@@ -855,8 +855,8 @@ ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
177 /*
178 * Attempt to initiate a splice from pipe to file.
179 */
180-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
181- loff_t *ppos, size_t len, unsigned int flags)
182+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
183+ loff_t *ppos, size_t len, unsigned int flags)
184 {
185 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
186 loff_t *, size_t, unsigned int);
f2c43d5f 187@@ -872,9 +872,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
188 /*
189 * Attempt to initiate a splice from a file to a pipe.
190 */
191-static long do_splice_to(struct file *in, loff_t *ppos,
192- struct pipe_inode_info *pipe, size_t len,
193- unsigned int flags)
194+long do_splice_to(struct file *in, loff_t *ppos,
195+ struct pipe_inode_info *pipe, size_t len,
196+ unsigned int flags)
197 {
198 ssize_t (*splice_read)(struct file *, loff_t *,
199 struct pipe_inode_info *, size_t, unsigned int);
b912730e 200diff --git a/include/linux/file.h b/include/linux/file.h
5afbbe0d 201index 7444f5f..bdac0be 100644
b912730e
AM
202--- a/include/linux/file.h
203+++ b/include/linux/file.h
f2c43d5f 204@@ -19,6 +19,7 @@
b912730e
AM
205 struct path;
206 extern struct file *alloc_file(struct path *, fmode_t mode,
207 const struct file_operations *fop);
208+extern struct file *get_empty_filp(void);
209
210 static inline void fput_light(struct file *file, int fput_needed)
211 {
5527c038 212diff --git a/include/linux/fs.h b/include/linux/fs.h
f2c43d5f 213index dc0478c..27c05e7 100644
5527c038
JR
214--- a/include/linux/fs.h
215+++ b/include/linux/fs.h
f2c43d5f 216@@ -1291,6 +1291,7 @@ struct fasync_struct {
febd17d6
JR
217 /* can be called from interrupts */
218 extern void kill_fasync(struct fasync_struct **, int, int);
219
220+extern int setfl(int fd, struct file * filp, unsigned long arg);
221 extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
222 extern void f_setown(struct file *filp, unsigned long arg, int force);
223 extern void f_delown(struct file *filp);
f2c43d5f 224@@ -1715,6 +1716,7 @@ struct file_operations {
febd17d6
JR
225 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
226 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
227 int (*check_flags)(int);
228+ int (*setfl)(struct file *, unsigned long);
229 int (*flock) (struct file *, int, struct file_lock *);
230 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
231 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
f2c43d5f 232@@ -1768,6 +1770,12 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
5527c038
JR
233 struct iovec *fast_pointer,
234 struct iovec **ret_pointer);
235
236+typedef ssize_t (*vfs_readf_t)(struct file *, char __user *, size_t, loff_t *);
237+typedef ssize_t (*vfs_writef_t)(struct file *, const char __user *, size_t,
238+ loff_t *);
239+vfs_readf_t vfs_readf(struct file *file);
240+vfs_writef_t vfs_writef(struct file *file);
241+
242 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
243 extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
244 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
f2c43d5f 245@@ -2140,6 +2148,7 @@ extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
5afbbe0d
AM
246 extern void ihold(struct inode * inode);
247 extern void iput(struct inode *);
248 extern int generic_update_time(struct inode *, struct timespec *, int);
249+extern int update_time(struct inode *, struct timespec *, int);
250
251 /* /sys/fs */
252 extern struct kobject *fs_kobj;
1e00d052 253diff --git a/include/linux/splice.h b/include/linux/splice.h
f2c43d5f 254index 00a2116..1f0a4a2 100644
1e00d052
AM
255--- a/include/linux/splice.h
256+++ b/include/linux/splice.h
f2c43d5f 257@@ -86,4 +86,10 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
4b3da204
AM
258
259 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
106341ce 260 extern const struct pipe_buf_operations default_pipe_buf_ops;
1e00d052
AM
261+
262+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
263+ loff_t *ppos, size_t len, unsigned int flags);
264+extern long do_splice_to(struct file *in, loff_t *ppos,
265+ struct pipe_inode_info *pipe, size_t len,
266+ unsigned int flags);
267 #endif
f2c43d5f 268aufs4.9 mmap patch
fb47a38f 269
c1595e42 270diff --git a/fs/proc/base.c b/fs/proc/base.c
f2c43d5f 271index ca651ac..0e8551a 100644
c1595e42
JR
272--- a/fs/proc/base.c
273+++ b/fs/proc/base.c
f2c43d5f 274@@ -1953,7 +1953,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
c1595e42
JR
275 down_read(&mm->mmap_sem);
276 vma = find_exact_vma(mm, vm_start, vm_end);
277 if (vma && vma->vm_file) {
278- *path = vma->vm_file->f_path;
279+ *path = vma_pr_or_file(vma)->f_path;
280 path_get(path);
281 rc = 0;
282 }
fb47a38f 283diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
c2c0f25c 284index f8595e8..cb8eda0 100644
fb47a38f
JR
285--- a/fs/proc/nommu.c
286+++ b/fs/proc/nommu.c
076b876e 287@@ -45,7 +45,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
fb47a38f
JR
288 file = region->vm_file;
289
290 if (file) {
291- struct inode *inode = file_inode(region->vm_file);
292+ struct inode *inode;
076b876e 293+
fb47a38f
JR
294+ file = vmr_pr_or_file(region);
295+ inode = file_inode(file);
296 dev = inode->i_sb->s_dev;
297 ino = inode->i_ino;
298 }
299diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
f2c43d5f 300index 35b92d8..5b981db 100644
fb47a38f
JR
301--- a/fs/proc/task_mmu.c
302+++ b/fs/proc/task_mmu.c
f2c43d5f 303@@ -291,7 +291,10 @@ static int is_stack(struct proc_maps_private *priv,
fb47a38f
JR
304 const char *name = NULL;
305
306 if (file) {
307- struct inode *inode = file_inode(vma->vm_file);
308+ struct inode *inode;
076b876e 309+
fb47a38f
JR
310+ file = vma_pr_or_file(vma);
311+ inode = file_inode(file);
312 dev = inode->i_sb->s_dev;
313 ino = inode->i_ino;
314 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
f2c43d5f 315@@ -1627,7 +1630,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
076b876e
AM
316 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
317 struct vm_area_struct *vma = v;
318 struct numa_maps *md = &numa_priv->md;
319- struct file *file = vma->vm_file;
320+ struct file *file = vma_pr_or_file(vma);
076b876e 321 struct mm_struct *mm = vma->vm_mm;
7e9cd9fe
AM
322 struct mm_walk walk = {
323 .hugetlb_entry = gather_hugetlb_stats,
fb47a38f 324diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
f2c43d5f 325index 3717562..6a328f1 100644
fb47a38f
JR
326--- a/fs/proc/task_nommu.c
327+++ b/fs/proc/task_nommu.c
f2c43d5f 328@@ -155,7 +155,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
fb47a38f
JR
329 file = vma->vm_file;
330
331 if (file) {
332- struct inode *inode = file_inode(vma->vm_file);
333+ struct inode *inode;
076b876e 334+
b912730e 335+ file = vma_pr_or_file(vma);
fb47a38f
JR
336+ inode = file_inode(file);
337 dev = inode->i_sb->s_dev;
338 ino = inode->i_ino;
339 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
340diff --git a/include/linux/mm.h b/include/linux/mm.h
f2c43d5f 341index a92c8d7..1d83a2a 100644
fb47a38f
JR
342--- a/include/linux/mm.h
343+++ b/include/linux/mm.h
f2c43d5f 344@@ -1266,6 +1266,28 @@ static inline int fixup_user_fault(struct task_struct *tsk,
fb47a38f
JR
345 }
346 #endif
347
076b876e
AM
348+extern void vma_do_file_update_time(struct vm_area_struct *, const char[], int);
349+extern struct file *vma_do_pr_or_file(struct vm_area_struct *, const char[],
f2c43d5f 350+ int);
076b876e
AM
351+extern void vma_do_get_file(struct vm_area_struct *, const char[], int);
352+extern void vma_do_fput(struct vm_area_struct *, const char[], int);
fb47a38f 353+
f2c43d5f
AM
354+#define vma_file_update_time(vma) vma_do_file_update_time(vma, __func__, \
355+ __LINE__)
356+#define vma_pr_or_file(vma) vma_do_pr_or_file(vma, __func__, \
357+ __LINE__)
358+#define vma_get_file(vma) vma_do_get_file(vma, __func__, __LINE__)
359+#define vma_fput(vma) vma_do_fput(vma, __func__, __LINE__)
b912730e
AM
360+
361+#ifndef CONFIG_MMU
076b876e
AM
362+extern struct file *vmr_do_pr_or_file(struct vm_region *, const char[], int);
363+extern void vmr_do_fput(struct vm_region *, const char[], int);
364+
f2c43d5f
AM
365+#define vmr_pr_or_file(region) vmr_do_pr_or_file(region, __func__, \
366+ __LINE__)
367+#define vmr_fput(region) vmr_do_fput(region, __func__, __LINE__)
b912730e 368+#endif /* !CONFIG_MMU */
fb47a38f 369+
106341ce
AM
370 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len,
371 unsigned int gup_flags);
fb47a38f 372 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
fb47a38f 373diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
f2c43d5f 374index 4a8aced..badd16b 100644
fb47a38f
JR
375--- a/include/linux/mm_types.h
376+++ b/include/linux/mm_types.h
e2f27e51 377@@ -275,6 +275,7 @@ struct vm_region {
fb47a38f
JR
378 unsigned long vm_top; /* region allocated to here */
379 unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
380 struct file *vm_file; /* the backing file or NULL */
381+ struct file *vm_prfile; /* the virtual backing file or NULL */
382
383 int vm_usage; /* region usage count (access under nommu_region_sem) */
384 bool vm_icache_flushed : 1; /* true if the icache has been flushed for
e2f27e51 385@@ -349,6 +350,7 @@ struct vm_area_struct {
fb47a38f 386 unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
5afbbe0d 387 units */
fb47a38f
JR
388 struct file * vm_file; /* File we map to (can be NULL). */
389+ struct file *vm_prfile; /* shadow of vm_file */
390 void * vm_private_data; /* was vm_pte (shared mem) */
391
392 #ifndef CONFIG_MMU
393diff --git a/kernel/fork.c b/kernel/fork.c
f2c43d5f 394index 997ac1d..4d0131b 100644
fb47a38f
JR
395--- a/kernel/fork.c
396+++ b/kernel/fork.c
f2c43d5f 397@@ -624,7 +624,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
fb47a38f
JR
398 struct inode *inode = file_inode(file);
399 struct address_space *mapping = file->f_mapping;
400
401- get_file(file);
402+ vma_get_file(tmp);
403 if (tmp->vm_flags & VM_DENYWRITE)
404 atomic_dec(&inode->i_writecount);
2000de60 405 i_mmap_lock_write(mapping);
076b876e 406diff --git a/mm/Makefile b/mm/Makefile
f2c43d5f 407index 295bd7a..14fa1c8 100644
076b876e
AM
408--- a/mm/Makefile
409+++ b/mm/Makefile
f2c43d5f 410@@ -37,7 +37,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
076b876e 411 mm_init.o mmu_context.o percpu.o slab_common.o \
c1595e42 412 compaction.o vmacache.o \
076b876e 413 interval_tree.o list_lru.o workingset.o \
7e9cd9fe
AM
414- debug.o $(mmu-y)
415+ prfile.o debug.o $(mmu-y)
076b876e
AM
416
417 obj-y += init-mm.o
418
fb47a38f 419diff --git a/mm/filemap.c b/mm/filemap.c
f2c43d5f 420index 50b52fe..9e607f9 100644
fb47a38f
JR
421--- a/mm/filemap.c
422+++ b/mm/filemap.c
f2c43d5f 423@@ -2304,7 +2304,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
fb47a38f
JR
424 int ret = VM_FAULT_LOCKED;
425
426 sb_start_pagefault(inode->i_sb);
427- file_update_time(vma->vm_file);
428+ vma_file_update_time(vma);
429 lock_page(page);
430 if (page->mapping != inode->i_mapping) {
431 unlock_page(page);
fb47a38f 432diff --git a/mm/memory.c b/mm/memory.c
f2c43d5f 433index e18c57b..7be4a39 100644
fb47a38f
JR
434--- a/mm/memory.c
435+++ b/mm/memory.c
f2c43d5f 436@@ -2117,7 +2117,7 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte,
fb47a38f 437 }
7e9cd9fe 438
b912730e
AM
439 if (!page_mkwrite)
440- file_update_time(vma->vm_file);
441+ vma_file_update_time(vma);
442 }
443
444 return VM_FAULT_WRITE;
fb47a38f 445diff --git a/mm/mmap.c b/mm/mmap.c
f2c43d5f 446index 1af87c1..95b0ff4 100644
fb47a38f
JR
447--- a/mm/mmap.c
448+++ b/mm/mmap.c
f2c43d5f 449@@ -170,7 +170,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
fb47a38f
JR
450 if (vma->vm_ops && vma->vm_ops->close)
451 vma->vm_ops->close(vma);
452 if (vma->vm_file)
453- fput(vma->vm_file);
454+ vma_fput(vma);
455 mpol_put(vma_policy(vma));
456 kmem_cache_free(vm_area_cachep, vma);
457 return next;
f2c43d5f 458@@ -879,7 +879,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
fb47a38f
JR
459 if (remove_next) {
460 if (file) {
461 uprobe_munmap(next, next->vm_start, next->vm_end);
462- fput(file);
463+ vma_fput(vma);
464 }
465 if (next->anon_vma)
466 anon_vma_merge(vma, next);
f2c43d5f 467@@ -1727,8 +1727,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
35939ee7
JR
468 return addr;
469
fb47a38f 470 unmap_and_free_vma:
fb47a38f
JR
471+ vma_fput(vma);
472 vma->vm_file = NULL;
473- fput(file);
474
475 /* Undo any partial mapping done by a device driver. */
476 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
f2c43d5f 477@@ -2533,7 +2533,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
fb47a38f
JR
478 goto out_free_mpol;
479
480 if (new->vm_file)
481- get_file(new->vm_file);
482+ vma_get_file(new);
483
484 if (new->vm_ops && new->vm_ops->open)
485 new->vm_ops->open(new);
f2c43d5f 486@@ -2552,7 +2552,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
fb47a38f
JR
487 if (new->vm_ops && new->vm_ops->close)
488 new->vm_ops->close(new);
489 if (new->vm_file)
490- fput(new->vm_file);
491+ vma_fput(new);
492 unlink_anon_vmas(new);
493 out_free_mpol:
494 mpol_put(vma_policy(new));
f2c43d5f 495@@ -2703,7 +2703,7 @@ int vm_munmap(unsigned long start, size_t len)
7e9cd9fe
AM
496 struct vm_area_struct *vma;
497 unsigned long populate = 0;
498 unsigned long ret = -EINVAL;
499- struct file *file;
5afbbe0d 500+ struct file *file, *prfile;
7e9cd9fe 501
5afbbe0d
AM
502 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n",
503 current->comm, current->pid);
f2c43d5f 504@@ -2778,10 +2778,27 @@ int vm_munmap(unsigned long start, size_t len)
febd17d6 505 }
7e9cd9fe
AM
506 }
507
508- file = get_file(vma->vm_file);
509+ vma_get_file(vma);
5afbbe0d
AM
510+ file = vma->vm_file;
511+ prfile = vma->vm_prfile;
7e9cd9fe
AM
512 ret = do_mmap_pgoff(vma->vm_file, start, size,
513 prot, flags, pgoff, &populate);
5afbbe0d
AM
514+ if (!IS_ERR_VALUE(ret) && file && prfile) {
515+ struct vm_area_struct *new_vma;
516+
517+ new_vma = find_vma(mm, ret);
518+ if (!new_vma->vm_prfile)
519+ new_vma->vm_prfile = prfile;
520+ if (new_vma != vma)
521+ get_file(prfile);
522+ }
523+ /*
524+ * two fput()s instead of vma_fput(vma),
525+ * coz vma may not be available anymore.
526+ */
527 fput(file);
528+ if (prfile)
529+ fput(prfile);
7e9cd9fe
AM
530 out:
531 up_write(&mm->mmap_sem);
532 if (populate)
f2c43d5f 533@@ -3056,7 +3073,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
79b8bda9
AM
534 if (anon_vma_clone(new_vma, vma))
535 goto out_free_mempol;
536 if (new_vma->vm_file)
537- get_file(new_vma->vm_file);
538+ vma_get_file(new_vma);
539 if (new_vma->vm_ops && new_vma->vm_ops->open)
540 new_vma->vm_ops->open(new_vma);
541 vma_link(mm, new_vma, prev, rb_link, rb_parent);
fb47a38f 542diff --git a/mm/nommu.c b/mm/nommu.c
f2c43d5f 543index 8b8faaf..5d26ed94 100644
fb47a38f
JR
544--- a/mm/nommu.c
545+++ b/mm/nommu.c
f2c43d5f 546@@ -636,7 +636,7 @@ static void __put_nommu_region(struct vm_region *region)
fb47a38f
JR
547 up_write(&nommu_region_sem);
548
549 if (region->vm_file)
550- fput(region->vm_file);
551+ vmr_fput(region);
552
553 /* IO memory and memory shared directly out of the pagecache
554 * from ramfs/tmpfs mustn't be released here */
f2c43d5f 555@@ -794,7 +794,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
fb47a38f
JR
556 if (vma->vm_ops && vma->vm_ops->close)
557 vma->vm_ops->close(vma);
558 if (vma->vm_file)
559- fput(vma->vm_file);
560+ vma_fput(vma);
561 put_nommu_region(vma->vm_region);
562 kmem_cache_free(vm_area_cachep, vma);
563 }
f2c43d5f 564@@ -1320,7 +1320,7 @@ unsigned long do_mmap(struct file *file,
fb47a38f
JR
565 goto error_just_free;
566 }
567 }
568- fput(region->vm_file);
569+ vmr_fput(region);
570 kmem_cache_free(vm_region_jar, region);
571 region = pregion;
572 result = start;
f2c43d5f 573@@ -1395,10 +1395,10 @@ unsigned long do_mmap(struct file *file,
fb47a38f
JR
574 up_write(&nommu_region_sem);
575 error:
576 if (region->vm_file)
577- fput(region->vm_file);
578+ vmr_fput(region);
579 kmem_cache_free(vm_region_jar, region);
580 if (vma->vm_file)
581- fput(vma->vm_file);
582+ vma_fput(vma);
583 kmem_cache_free(vm_area_cachep, vma);
fb47a38f 584 return ret;
c2c0f25c 585
076b876e
AM
586diff --git a/mm/prfile.c b/mm/prfile.c
587new file mode 100644
c2c0f25c 588index 0000000..b323b8a
076b876e
AM
589--- /dev/null
590+++ b/mm/prfile.c
591@@ -0,0 +1,86 @@
592+/*
593+ * Mainly for aufs which mmap(2) diffrent file and wants to print different path
594+ * in /proc/PID/maps.
595+ * Call these functions via macros defined in linux/mm.h.
596+ *
597+ * See Documentation/filesystems/aufs/design/06mmap.txt
598+ *
599+ * Copyright (c) 2014 Junjro R. Okajima
600+ * Copyright (c) 2014 Ian Campbell
601+ */
602+
603+#include <linux/mm.h>
604+#include <linux/file.h>
605+#include <linux/fs.h>
606+
607+/* #define PRFILE_TRACE */
608+static inline void prfile_trace(struct file *f, struct file *pr,
609+ const char func[], int line, const char func2[])
610+{
611+#ifdef PRFILE_TRACE
612+ if (pr)
c2c0f25c 613+ pr_info("%s:%d: %s, %s\n", func, line, func2,
7e9cd9fe 614+ f ? (char *)f->f_path.dentry->d_name.name : "(null)");
076b876e
AM
615+#endif
616+}
617+
076b876e
AM
618+void vma_do_file_update_time(struct vm_area_struct *vma, const char func[],
619+ int line)
620+{
621+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
622+
623+ prfile_trace(f, pr, func, line, __func__);
624+ file_update_time(f);
625+ if (f && pr)
626+ file_update_time(pr);
627+}
628+
629+struct file *vma_do_pr_or_file(struct vm_area_struct *vma, const char func[],
630+ int line)
631+{
632+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
633+
634+ prfile_trace(f, pr, func, line, __func__);
635+ return (f && pr) ? pr : f;
636+}
637+
638+void vma_do_get_file(struct vm_area_struct *vma, const char func[], int line)
639+{
640+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
641+
642+ prfile_trace(f, pr, func, line, __func__);
643+ get_file(f);
644+ if (f && pr)
645+ get_file(pr);
646+}
647+
648+void vma_do_fput(struct vm_area_struct *vma, const char func[], int line)
649+{
650+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
651+
652+ prfile_trace(f, pr, func, line, __func__);
653+ fput(f);
654+ if (f && pr)
655+ fput(pr);
656+}
b912730e
AM
657+
658+#ifndef CONFIG_MMU
076b876e
AM
659+struct file *vmr_do_pr_or_file(struct vm_region *region, const char func[],
660+ int line)
661+{
662+ struct file *f = region->vm_file, *pr = region->vm_prfile;
663+
664+ prfile_trace(f, pr, func, line, __func__);
665+ return (f && pr) ? pr : f;
666+}
667+
668+void vmr_do_fput(struct vm_region *region, const char func[], int line)
669+{
670+ struct file *f = region->vm_file, *pr = region->vm_prfile;
671+
672+ prfile_trace(f, pr, func, line, __func__);
673+ fput(f);
674+ if (f && pr)
675+ fput(pr);
676+}
b912730e 677+#endif /* !CONFIG_MMU */
f2c43d5f 678aufs4.9 standalone patch
7f207e10 679
c1595e42 680diff --git a/fs/dcache.c b/fs/dcache.c
f2c43d5f 681index df0268c..755fea1 100644
c1595e42
JR
682--- a/fs/dcache.c
683+++ b/fs/dcache.c
f2c43d5f 684@@ -1272,6 +1272,7 @@ void d_walk(struct dentry *parent, void *data,
c1595e42
JR
685 seq = 1;
686 goto again;
687 }
febd17d6 688+EXPORT_SYMBOL_GPL(d_walk);
c1595e42
JR
689
690 /*
691 * Search for at least 1 mount point in the dentry's subdirs.
f2c43d5f
AM
692@@ -2855,6 +2856,7 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
693
694 write_sequnlock(&rename_lock);
695 }
696+EXPORT_SYMBOL_GPL(d_exchange);
697
698 /**
699 * d_ancestor - search for an ancestor
79b8bda9 700diff --git a/fs/exec.c b/fs/exec.c
f2c43d5f 701index 4e497b9..e27d323 100644
79b8bda9
AM
702--- a/fs/exec.c
703+++ b/fs/exec.c
5afbbe0d 704@@ -104,6 +104,7 @@ bool path_noexec(const struct path *path)
79b8bda9
AM
705 return (path->mnt->mnt_flags & MNT_NOEXEC) ||
706 (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
707 }
febd17d6 708+EXPORT_SYMBOL_GPL(path_noexec);
79b8bda9
AM
709
710 #ifdef CONFIG_USELIB
711 /*
febd17d6
JR
712diff --git a/fs/fcntl.c b/fs/fcntl.c
713index 6f42279..04fd33c 100644
714--- a/fs/fcntl.c
715+++ b/fs/fcntl.c
716@@ -82,6 +82,7 @@ int setfl(int fd, struct file * filp, unsigned long arg)
717 out:
718 return error;
719 }
720+EXPORT_SYMBOL_GPL(setfl);
721
722 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
723 int force)
b912730e 724diff --git a/fs/file_table.c b/fs/file_table.c
febd17d6 725index ad17e05..ae9f267 100644
b912730e
AM
726--- a/fs/file_table.c
727+++ b/fs/file_table.c
f2c43d5f 728@@ -147,6 +147,7 @@ struct file *get_empty_filp(void)
b912730e
AM
729 }
730 return ERR_PTR(-ENFILE);
731 }
febd17d6 732+EXPORT_SYMBOL_GPL(get_empty_filp);
b912730e
AM
733
734 /**
735 * alloc_file - allocate and initialize a 'struct file'
8cdd5066
JR
736@@ -258,6 +259,7 @@ void flush_delayed_fput(void)
737 {
738 delayed_fput(NULL);
739 }
febd17d6 740+EXPORT_SYMBOL_GPL(flush_delayed_fput);
8cdd5066
JR
741
742 static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
743
744@@ -300,6 +302,7 @@ void __fput_sync(struct file *file)
745 }
746
747 EXPORT_SYMBOL(fput);
febd17d6 748+EXPORT_SYMBOL_GPL(__fput_sync);
8cdd5066
JR
749
750 void put_filp(struct file *file)
751 {
752@@ -308,6 +311,7 @@ void put_filp(struct file *file)
b912730e
AM
753 file_free(file);
754 }
755 }
febd17d6 756+EXPORT_SYMBOL_GPL(put_filp);
b912730e 757
79b8bda9 758 void __init files_init(void)
b912730e 759 {
5afbbe0d 760diff --git a/fs/inode.c b/fs/inode.c
f2c43d5f 761index 9a9ba3a..a3a18d8 100644
5afbbe0d
AM
762--- a/fs/inode.c
763+++ b/fs/inode.c
f2c43d5f 764@@ -1651,6 +1651,7 @@ int update_time(struct inode *inode, struct timespec *time, int flags)
5afbbe0d
AM
765
766 return update_time(inode, time, flags);
767 }
768+EXPORT_SYMBOL_GPL(update_time);
769
770 /**
771 * touch_atime - update the access time
7f207e10 772diff --git a/fs/namespace.c b/fs/namespace.c
f2c43d5f 773index e6c234b..8d13f7b 100644
7f207e10
AM
774--- a/fs/namespace.c
775+++ b/fs/namespace.c
f2c43d5f 776@@ -466,6 +466,7 @@ void __mnt_drop_write(struct vfsmount *mnt)
c06a8ce3
AM
777 mnt_dec_writers(real_mount(mnt));
778 preempt_enable();
779 }
780+EXPORT_SYMBOL_GPL(__mnt_drop_write);
781
782 /**
783 * mnt_drop_write - give up write access to a mount
f2c43d5f 784@@ -1823,6 +1824,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
7f207e10
AM
785 }
786 return 0;
787 }
febd17d6 788+EXPORT_SYMBOL_GPL(iterate_mounts);
7f207e10 789
7eafdf33 790 static void cleanup_group_ids(struct mount *mnt, struct mount *end)
7f207e10
AM
791 {
792diff --git a/fs/notify/group.c b/fs/notify/group.c
f2c43d5f 793index fbe3cbe..bdfc61e 100644
7f207e10
AM
794--- a/fs/notify/group.c
795+++ b/fs/notify/group.c
796@@ -22,6 +22,7 @@
797 #include <linux/srcu.h>
798 #include <linux/rculist.h>
799 #include <linux/wait.h>
800+#include <linux/module.h>
801
802 #include <linux/fsnotify_backend.h>
803 #include "fsnotify.h"
e2f27e51 804@@ -100,6 +101,7 @@ void fsnotify_get_group(struct fsnotify_group *group)
1716fcea
AM
805 {
806 atomic_inc(&group->refcnt);
807 }
febd17d6 808+EXPORT_SYMBOL_GPL(fsnotify_get_group);
1716fcea
AM
809
810 /*
811 * Drop a reference to a group. Free it if it's through.
e2f27e51 812@@ -109,6 +111,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
7f207e10 813 if (atomic_dec_and_test(&group->refcnt))
1716fcea 814 fsnotify_final_destroy_group(group);
7f207e10 815 }
febd17d6 816+EXPORT_SYMBOL_GPL(fsnotify_put_group);
7f207e10
AM
817
818 /*
819 * Create a new fsnotify_group and hold a reference for the group returned.
e2f27e51 820@@ -137,6 +140,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
7f207e10
AM
821
822 return group;
823 }
febd17d6 824+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
1716fcea
AM
825
826 int fsnotify_fasync(int fd, struct file *file, int on)
827 {
7f207e10 828diff --git a/fs/notify/mark.c b/fs/notify/mark.c
5afbbe0d 829index d3fea0b..5fc06ad 100644
7f207e10
AM
830--- a/fs/notify/mark.c
831+++ b/fs/notify/mark.c
febd17d6 832@@ -113,6 +113,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
7f207e10 833 mark->free_mark(mark);
1716fcea 834 }
7f207e10 835 }
febd17d6 836+EXPORT_SYMBOL_GPL(fsnotify_put_mark);
7f207e10 837
2000de60
JR
838 /* Calculate mask of events for a list of marks */
839 u32 fsnotify_recalc_mask(struct hlist_head *head)
5afbbe0d 840@@ -230,6 +231,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
1716fcea 841 mutex_unlock(&group->mark_mutex);
79b8bda9 842 fsnotify_free_mark(mark);
7f207e10 843 }
febd17d6 844+EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
7f207e10 845
79b8bda9
AM
846 void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
847 {
f2c43d5f 848@@ -415,6 +417,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
7f207e10
AM
849
850 return ret;
851 }
febd17d6 852+EXPORT_SYMBOL_GPL(fsnotify_add_mark);
7f207e10 853
1716fcea
AM
854 int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
855 struct inode *inode, struct vfsmount *mnt, int allow_dups)
5afbbe0d 856@@ -533,6 +536,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
7f207e10
AM
857 atomic_set(&mark->refcnt, 1);
858 mark->free_mark = free_mark;
859 }
febd17d6 860+EXPORT_SYMBOL_GPL(fsnotify_init_mark);
7f207e10 861
5afbbe0d
AM
862 /*
863 * Destroy all marks in destroy_list, waits for SRCU period to finish before
7f207e10 864diff --git a/fs/open.c b/fs/open.c
f2c43d5f 865index d3ed817..20d2494 100644
7f207e10
AM
866--- a/fs/open.c
867+++ b/fs/open.c
c2c0f25c 868@@ -64,6 +64,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
febd17d6 869 inode_unlock(dentry->d_inode);
7f207e10
AM
870 return ret;
871 }
febd17d6 872+EXPORT_SYMBOL_GPL(do_truncate);
7f207e10 873
5afbbe0d 874 long vfs_truncate(const struct path *path, loff_t length)
7f207e10 875 {
f2c43d5f 876@@ -695,6 +696,7 @@ int open_check_o_direct(struct file *f)
b912730e
AM
877 }
878 return 0;
879 }
febd17d6 880+EXPORT_SYMBOL_GPL(open_check_o_direct);
b912730e
AM
881
882 static int do_dentry_open(struct file *f,
c2c0f25c 883 struct inode *inode,
5527c038 884diff --git a/fs/read_write.c b/fs/read_write.c
f2c43d5f 885index 4052813..7dfd732 100644
5527c038
JR
886--- a/fs/read_write.c
887+++ b/fs/read_write.c
5afbbe0d 888@@ -525,6 +525,7 @@ vfs_readf_t vfs_readf(struct file *file)
5527c038
JR
889 return new_sync_read;
890 return ERR_PTR(-ENOSYS);
891 }
febd17d6 892+EXPORT_SYMBOL_GPL(vfs_readf);
5527c038
JR
893
894 vfs_writef_t vfs_writef(struct file *file)
895 {
5afbbe0d 896@@ -536,6 +537,7 @@ vfs_writef_t vfs_writef(struct file *file)
5527c038
JR
897 return new_sync_write;
898 return ERR_PTR(-ENOSYS);
899 }
febd17d6 900+EXPORT_SYMBOL_GPL(vfs_writef);
5527c038
JR
901
902 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
903 {
7f207e10 904diff --git a/fs/splice.c b/fs/splice.c
f2c43d5f 905index 28160a7..98c1902 100644
7f207e10
AM
906--- a/fs/splice.c
907+++ b/fs/splice.c
f2c43d5f 908@@ -868,6 +868,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
392086de
AM
909
910 return splice_write(pipe, out, ppos, len, flags);
7f207e10 911 }
febd17d6 912+EXPORT_SYMBOL_GPL(do_splice_from);
7f207e10
AM
913
914 /*
915 * Attempt to initiate a splice from a file to a pipe.
f2c43d5f 916@@ -897,6 +898,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
7f207e10
AM
917
918 return splice_read(in, ppos, pipe, len, flags);
919 }
febd17d6 920+EXPORT_SYMBOL_GPL(do_splice_to);
7f207e10
AM
921
922 /**
923 * splice_direct_to_actor - splices data directly between two non-pipes
c1595e42 924diff --git a/fs/xattr.c b/fs/xattr.c
f2c43d5f 925index 2d13b4e..41c2bcd 100644
c1595e42
JR
926--- a/fs/xattr.c
927+++ b/fs/xattr.c
f2c43d5f 928@@ -296,6 +296,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
c1595e42
JR
929 *xattr_value = value;
930 return error;
931 }
febd17d6 932+EXPORT_SYMBOL_GPL(vfs_getxattr_alloc);
c1595e42 933
febd17d6 934 ssize_t
f2c43d5f 935 __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
8cdd5066 936diff --git a/kernel/task_work.c b/kernel/task_work.c
e2f27e51 937index d513051..e056d54 100644
8cdd5066
JR
938--- a/kernel/task_work.c
939+++ b/kernel/task_work.c
e2f27e51 940@@ -119,3 +119,4 @@ void task_work_run(void)
8cdd5066
JR
941 } while (work);
942 }
943 }
febd17d6 944+EXPORT_SYMBOL_GPL(task_work_run);
7f207e10 945diff --git a/security/commoncap.c b/security/commoncap.c
f2c43d5f 946index 8df676f..6b5cc07 100644
7f207e10
AM
947--- a/security/commoncap.c
948+++ b/security/commoncap.c
f2c43d5f 949@@ -1061,12 +1061,14 @@ int cap_mmap_addr(unsigned long addr)
94337f0d 950 }
7f207e10
AM
951 return ret;
952 }
febd17d6 953+EXPORT_SYMBOL_GPL(cap_mmap_addr);
0c3ec466
AM
954
955 int cap_mmap_file(struct file *file, unsigned long reqprot,
956 unsigned long prot, unsigned long flags)
957 {
958 return 0;
959 }
febd17d6 960+EXPORT_SYMBOL_GPL(cap_mmap_file);
c2c0f25c
AM
961
962 #ifdef CONFIG_SECURITY
963
7f207e10 964diff --git a/security/device_cgroup.c b/security/device_cgroup.c
febd17d6 965index 03c1652..f88c84b 100644
7f207e10
AM
966--- a/security/device_cgroup.c
967+++ b/security/device_cgroup.c
f6c5ef8b
AM
968@@ -7,6 +7,7 @@
969 #include <linux/device_cgroup.h>
970 #include <linux/cgroup.h>
971 #include <linux/ctype.h>
972+#include <linux/export.h>
973 #include <linux/list.h>
974 #include <linux/uaccess.h>
975 #include <linux/seq_file.h>
076b876e 976@@ -849,6 +850,7 @@ int __devcgroup_inode_permission(struct inode *inode, int mask)
537831f9
AM
977 return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
978 access);
7f207e10 979 }
febd17d6 980+EXPORT_SYMBOL_GPL(__devcgroup_inode_permission);
7f207e10
AM
981
982 int devcgroup_inode_mknod(int mode, dev_t dev)
983 {
984diff --git a/security/security.c b/security/security.c
f2c43d5f 985index f825304..8dd441d 100644
7f207e10
AM
986--- a/security/security.c
987+++ b/security/security.c
f2c43d5f 988@@ -443,6 +443,7 @@ int security_path_rmdir(const struct path *dir, struct dentry *dentry)
7f207e10 989 return 0;
c2c0f25c 990 return call_int_hook(path_rmdir, 0, dir, dentry);
7f207e10 991 }
febd17d6 992+EXPORT_SYMBOL_GPL(security_path_rmdir);
7f207e10 993
5afbbe0d 994 int security_path_unlink(const struct path *dir, struct dentry *dentry)
7f207e10 995 {
f2c43d5f 996@@ -459,6 +460,7 @@ int security_path_symlink(const struct path *dir, struct dentry *dentry,
7f207e10 997 return 0;
c2c0f25c 998 return call_int_hook(path_symlink, 0, dir, dentry, old_name);
7f207e10 999 }
febd17d6 1000+EXPORT_SYMBOL_GPL(security_path_symlink);
7f207e10 1001
5afbbe0d 1002 int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
7f207e10 1003 struct dentry *new_dentry)
f2c43d5f 1004@@ -467,6 +469,7 @@ int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
7f207e10 1005 return 0;
c2c0f25c 1006 return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry);
7f207e10 1007 }
febd17d6 1008+EXPORT_SYMBOL_GPL(security_path_link);
7f207e10 1009
5afbbe0d
AM
1010 int security_path_rename(const struct path *old_dir, struct dentry *old_dentry,
1011 const struct path *new_dir, struct dentry *new_dentry,
f2c43d5f 1012@@ -494,6 +497,7 @@ int security_path_truncate(const struct path *path)
7f207e10 1013 return 0;
c2c0f25c 1014 return call_int_hook(path_truncate, 0, path);
7f207e10 1015 }
febd17d6 1016+EXPORT_SYMBOL_GPL(security_path_truncate);
7f207e10 1017
5afbbe0d 1018 int security_path_chmod(const struct path *path, umode_t mode)
7eafdf33 1019 {
f2c43d5f 1020@@ -501,6 +505,7 @@ int security_path_chmod(const struct path *path, umode_t mode)
7f207e10 1021 return 0;
c2c0f25c 1022 return call_int_hook(path_chmod, 0, path, mode);
7f207e10 1023 }
febd17d6 1024+EXPORT_SYMBOL_GPL(security_path_chmod);
7f207e10 1025
5afbbe0d 1026 int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
7f207e10 1027 {
f2c43d5f 1028@@ -508,6 +513,7 @@ int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
7f207e10 1029 return 0;
c2c0f25c 1030 return call_int_hook(path_chown, 0, path, uid, gid);
7f207e10 1031 }
febd17d6 1032+EXPORT_SYMBOL_GPL(security_path_chown);
7f207e10 1033
5afbbe0d 1034 int security_path_chroot(const struct path *path)
7f207e10 1035 {
f2c43d5f 1036@@ -593,6 +599,7 @@ int security_inode_readlink(struct dentry *dentry)
7f207e10 1037 return 0;
c2c0f25c 1038 return call_int_hook(inode_readlink, 0, dentry);
7f207e10 1039 }
febd17d6 1040+EXPORT_SYMBOL_GPL(security_inode_readlink);
7f207e10 1041
c2c0f25c
AM
1042 int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
1043 bool rcu)
f2c43d5f 1044@@ -608,6 +615,7 @@ int security_inode_permission(struct inode *inode, int mask)
7f207e10 1045 return 0;
c2c0f25c 1046 return call_int_hook(inode_permission, 0, inode, mask);
7f207e10 1047 }
febd17d6 1048+EXPORT_SYMBOL_GPL(security_inode_permission);
7f207e10 1049
1e00d052 1050 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
7f207e10 1051 {
f2c43d5f 1052@@ -779,6 +787,7 @@ int security_file_permission(struct file *file, int mask)
7f207e10
AM
1053
1054 return fsnotify_perm(file, mask);
1055 }
febd17d6 1056+EXPORT_SYMBOL_GPL(security_file_permission);
7f207e10
AM
1057
1058 int security_file_alloc(struct file *file)
1059 {
f2c43d5f 1060@@ -838,6 +847,7 @@ int security_mmap_file(struct file *file, unsigned long prot,
7f207e10
AM
1061 return ret;
1062 return ima_file_mmap(file, prot);
1063 }
febd17d6 1064+EXPORT_SYMBOL_GPL(security_mmap_file);
7f207e10 1065
0c3ec466
AM
1066 int security_mmap_addr(unsigned long addr)
1067 {
7f207e10
AM
1068diff -urN /usr/share/empty/Documentation/ABI/testing/debugfs-aufs linux/Documentation/ABI/testing/debugfs-aufs
1069--- /usr/share/empty/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1070+++ linux/Documentation/ABI/testing/debugfs-aufs 2016-10-09 16:55:36.476034536 +0200
86dc4139 1071@@ -0,0 +1,50 @@
7f207e10
AM
1072+What: /debug/aufs/si_<id>/
1073+Date: March 2009
f6b6e03d 1074+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1075+Description:
1076+ Under /debug/aufs, a directory named si_<id> is created
1077+ per aufs mount, where <id> is a unique id generated
1078+ internally.
1facf9fc 1079+
86dc4139
AM
1080+What: /debug/aufs/si_<id>/plink
1081+Date: Apr 2013
f6b6e03d 1082+Contact: J. R. Okajima <hooanon05g@gmail.com>
86dc4139
AM
1083+Description:
1084+ It has three lines and shows the information about the
1085+ pseudo-link. The first line is a single number
1086+ representing a number of buckets. The second line is a
1087+ number of pseudo-links per buckets (separated by a
1088+ blank). The last line is a single number representing a
1089+ total number of psedo-links.
1090+ When the aufs mount option 'noplink' is specified, it
1091+ will show "1\n0\n0\n".
1092+
7f207e10
AM
1093+What: /debug/aufs/si_<id>/xib
1094+Date: March 2009
f6b6e03d 1095+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1096+Description:
1097+ It shows the consumed blocks by xib (External Inode Number
1098+ Bitmap), its block size and file size.
1099+ When the aufs mount option 'noxino' is specified, it
1100+ will be empty. About XINO files, see the aufs manual.
1101+
1102+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
1103+Date: March 2009
f6b6e03d 1104+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1105+Description:
1106+ It shows the consumed blocks by xino (External Inode Number
1107+ Translation Table), its link count, block size and file
1108+ size.
1109+ When the aufs mount option 'noxino' is specified, it
1110+ will be empty. About XINO files, see the aufs manual.
1111+
1112+What: /debug/aufs/si_<id>/xigen
1113+Date: March 2009
f6b6e03d 1114+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1115+Description:
1116+ It shows the consumed blocks by xigen (External Inode
1117+ Generation Table), its block size and file size.
1118+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
1119+ be created.
1120+ When the aufs mount option 'noxino' is specified, it
1121+ will be empty. About XINO files, see the aufs manual.
1122diff -urN /usr/share/empty/Documentation/ABI/testing/sysfs-aufs linux/Documentation/ABI/testing/sysfs-aufs
1123--- /usr/share/empty/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1124+++ linux/Documentation/ABI/testing/sysfs-aufs 2016-10-09 16:55:36.476034536 +0200
392086de 1125@@ -0,0 +1,31 @@
7f207e10
AM
1126+What: /sys/fs/aufs/si_<id>/
1127+Date: March 2009
f6b6e03d 1128+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1129+Description:
1130+ Under /sys/fs/aufs, a directory named si_<id> is created
1131+ per aufs mount, where <id> is a unique id generated
1132+ internally.
1133+
1134+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
1135+Date: March 2009
f6b6e03d 1136+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1137+Description:
1138+ It shows the abolute path of a member directory (which
1139+ is called branch) in aufs, and its permission.
1140+
392086de
AM
1141+What: /sys/fs/aufs/si_<id>/brid0, brid1 ... bridN
1142+Date: July 2013
f6b6e03d 1143+Contact: J. R. Okajima <hooanon05g@gmail.com>
392086de
AM
1144+Description:
1145+ It shows the id of a member directory (which is called
1146+ branch) in aufs.
1147+
7f207e10
AM
1148+What: /sys/fs/aufs/si_<id>/xi_path
1149+Date: March 2009
f6b6e03d 1150+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1151+Description:
1152+ It shows the abolute path of XINO (External Inode Number
1153+ Bitmap, Translation Table and Generation Table) file
1154+ even if it is the default path.
1155+ When the aufs mount option 'noxino' is specified, it
1156+ will be empty. About XINO files, see the aufs manual.
53392da6
AM
1157diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt linux/Documentation/filesystems/aufs/design/01intro.txt
1158--- /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1159+++ linux/Documentation/filesystems/aufs/design/01intro.txt 2016-10-09 16:55:36.479367956 +0200
7e9cd9fe 1160@@ -0,0 +1,170 @@
53392da6 1161+
8cdd5066 1162+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1163+#
1164+# This program is free software; you can redistribute it and/or modify
1165+# it under the terms of the GNU General Public License as published by
1166+# the Free Software Foundation; either version 2 of the License, or
1167+# (at your option) any later version.
1168+#
1169+# This program is distributed in the hope that it will be useful,
1170+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1171+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1172+# GNU General Public License for more details.
1173+#
1174+# You should have received a copy of the GNU General Public License
523b37e3 1175+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1176+
1177+Introduction
1178+----------------------------------------
1179+
1180+aufs [ei ju: ef es] | [a u f s]
1181+1. abbrev. for "advanced multi-layered unification filesystem".
1182+2. abbrev. for "another unionfs".
1183+3. abbrev. for "auf das" in German which means "on the" in English.
1184+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
1185+ But "Filesystem aufs Filesystem" is hard to understand.
1186+
1187+AUFS is a filesystem with features:
1188+- multi layered stackable unification filesystem, the member directory
1189+ is called as a branch.
1190+- branch permission and attribute, 'readonly', 'real-readonly',
7e9cd9fe 1191+ 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
53392da6
AM
1192+ combination.
1193+- internal "file copy-on-write".
1194+- logical deletion, whiteout.
1195+- dynamic branch manipulation, adding, deleting and changing permission.
1196+- allow bypassing aufs, user's direct branch access.
1197+- external inode number translation table and bitmap which maintains the
1198+ persistent aufs inode number.
1199+- seekable directory, including NFS readdir.
1200+- file mapping, mmap and sharing pages.
1201+- pseudo-link, hardlink over branches.
1202+- loopback mounted filesystem as a branch.
1203+- several policies to select one among multiple writable branches.
1204+- revert a single systemcall when an error occurs in aufs.
1205+- and more...
1206+
1207+
1208+Multi Layered Stackable Unification Filesystem
1209+----------------------------------------------------------------------
1210+Most people already knows what it is.
1211+It is a filesystem which unifies several directories and provides a
1212+merged single directory. When users access a file, the access will be
1213+passed/re-directed/converted (sorry, I am not sure which English word is
1214+correct) to the real file on the member filesystem. The member
1215+filesystem is called 'lower filesystem' or 'branch' and has a mode
1216+'readonly' and 'readwrite.' And the deletion for a file on the lower
1217+readonly branch is handled by creating 'whiteout' on the upper writable
1218+branch.
1219+
1220+On LKML, there have been discussions about UnionMount (Jan Blunck,
1221+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
1222+different approaches to implement the merged-view.
1223+The former tries putting it into VFS, and the latter implements as a
1224+separate filesystem.
1225+(If I misunderstand about these implementations, please let me know and
1226+I shall correct it. Because it is a long time ago when I read their
1227+source files last time).
1228+
1229+UnionMount's approach will be able to small, but may be hard to share
1230+branches between several UnionMount since the whiteout in it is
1231+implemented in the inode on branch filesystem and always
1232+shared. According to Bharata's post, readdir does not seems to be
1233+finished yet.
1234+There are several missing features known in this implementations such as
1235+- for users, the inode number may change silently. eg. copy-up.
1236+- link(2) may break by copy-up.
1237+- read(2) may get an obsoleted filedata (fstat(2) too).
1238+- fcntl(F_SETLK) may be broken by copy-up.
1239+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
1240+ open(O_RDWR).
1241+
7e9cd9fe
AM
1242+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
1243+merged into mainline. This is another implementation of UnionMount as a
1244+separated filesystem. All the limitations and known problems which
1245+UnionMount are equally inherited to "overlay" filesystem.
1246+
1247+Unionfs has a longer history. When I started implementing a stackable
1248+filesystem (Aug 2005), it already existed. It has virtual super_block,
1249+inode, dentry and file objects and they have an array pointing lower
1250+same kind objects. After contributing many patches for Unionfs, I
1251+re-started my project AUFS (Jun 2006).
53392da6
AM
1252+
1253+In AUFS, the structure of filesystem resembles to Unionfs, but I
1254+implemented my own ideas, approaches and enhancements and it became
1255+totally different one.
1256+
1257+Comparing DM snapshot and fs based implementation
1258+- the number of bytes to be copied between devices is much smaller.
1259+- the type of filesystem must be one and only.
1260+- the fs must be writable, no readonly fs, even for the lower original
1261+ device. so the compression fs will not be usable. but if we use
1262+ loopback mount, we may address this issue.
1263+ for instance,
1264+ mount /cdrom/squashfs.img /sq
1265+ losetup /sq/ext2.img
1266+ losetup /somewhere/cow
1267+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
1268+- it will be difficult (or needs more operations) to extract the
1269+ difference between the original device and COW.
1270+- DM snapshot-merge may help a lot when users try merging. in the
1271+ fs-layer union, users will use rsync(1).
1272+
7e9cd9fe
AM
1273+You may want to read my old paper "Filesystems in LiveCD"
1274+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
53392da6 1275+
7e9cd9fe
AM
1276+
1277+Several characters/aspects/persona of aufs
53392da6
AM
1278+----------------------------------------------------------------------
1279+
7e9cd9fe 1280+Aufs has several characters, aspects or persona.
53392da6
AM
1281+1. a filesystem, callee of VFS helper
1282+2. sub-VFS, caller of VFS helper for branches
1283+3. a virtual filesystem which maintains persistent inode number
1284+4. reader/writer of files on branches such like an application
1285+
1286+1. Callee of VFS Helper
1287+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
1288+unlink(2) from an application reaches sys_unlink() kernel function and
1289+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
1290+calls filesystem specific unlink operation. Actually aufs implements the
1291+unlink operation but it behaves like a redirector.
1292+
1293+2. Caller of VFS Helper for Branches
1294+aufs_unlink() passes the unlink request to the branch filesystem as if
1295+it were called from VFS. So the called unlink operation of the branch
1296+filesystem acts as usual. As a caller of VFS helper, aufs should handle
1297+every necessary pre/post operation for the branch filesystem.
1298+- acquire the lock for the parent dir on a branch
1299+- lookup in a branch
1300+- revalidate dentry on a branch
1301+- mnt_want_write() for a branch
1302+- vfs_unlink() for a branch
1303+- mnt_drop_write() for a branch
1304+- release the lock on a branch
1305+
1306+3. Persistent Inode Number
1307+One of the most important issue for a filesystem is to maintain inode
1308+numbers. This is particularly important to support exporting a
1309+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
1310+backend block device for its own. But some storage is necessary to
7e9cd9fe
AM
1311+keep and maintain the inode numbers. It may be a large space and may not
1312+suit to keep in memory. Aufs rents some space from its first writable
1313+branch filesystem (by default) and creates file(s) on it. These files
1314+are created by aufs internally and removed soon (currently) keeping
1315+opened.
53392da6
AM
1316+Note: Because these files are removed, they are totally gone after
1317+ unmounting aufs. It means the inode numbers are not persistent
1318+ across unmount or reboot. I have a plan to make them really
1319+ persistent which will be important for aufs on NFS server.
1320+
1321+4. Read/Write Files Internally (copy-on-write)
1322+Because a branch can be readonly, when you write a file on it, aufs will
1323+"copy-up" it to the upper writable branch internally. And then write the
1324+originally requested thing to the file. Generally kernel doesn't
1325+open/read/write file actively. In aufs, even a single write may cause a
1326+internal "file copy". This behaviour is very similar to cp(1) command.
1327+
1328+Some people may think it is better to pass such work to user space
1329+helper, instead of doing in kernel space. Actually I am still thinking
1330+about it. But currently I have implemented it in kernel space.
1331diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt linux/Documentation/filesystems/aufs/design/02struct.txt
1332--- /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1333+++ linux/Documentation/filesystems/aufs/design/02struct.txt 2016-10-09 16:55:36.479367956 +0200
7e9cd9fe 1334@@ -0,0 +1,258 @@
53392da6 1335+
8cdd5066 1336+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1337+#
1338+# This program is free software; you can redistribute it and/or modify
1339+# it under the terms of the GNU General Public License as published by
1340+# the Free Software Foundation; either version 2 of the License, or
1341+# (at your option) any later version.
1342+#
1343+# This program is distributed in the hope that it will be useful,
1344+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1345+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1346+# GNU General Public License for more details.
1347+#
1348+# You should have received a copy of the GNU General Public License
523b37e3 1349+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1350+
1351+Basic Aufs Internal Structure
1352+
1353+Superblock/Inode/Dentry/File Objects
1354+----------------------------------------------------------------------
1355+As like an ordinary filesystem, aufs has its own
1356+superblock/inode/dentry/file objects. All these objects have a
1357+dynamically allocated array and store the same kind of pointers to the
1358+lower filesystem, branch.
1359+For example, when you build a union with one readwrite branch and one
1360+readonly, mounted /au, /rw and /ro respectively.
1361+- /au = /rw + /ro
1362+- /ro/fileA exists but /rw/fileA
1363+
1364+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
1365+pointers are stored in a aufs dentry. The array in aufs dentry will be,
7e9cd9fe 1366+- [0] = NULL (because /rw/fileA doesn't exist)
53392da6
AM
1367+- [1] = /ro/fileA
1368+
1369+This style of an array is essentially same to the aufs
1370+superblock/inode/dentry/file objects.
1371+
1372+Because aufs supports manipulating branches, ie. add/delete/change
7e9cd9fe
AM
1373+branches dynamically, these objects has its own generation. When
1374+branches are changed, the generation in aufs superblock is
1375+incremented. And a generation in other object are compared when it is
1376+accessed. When a generation in other objects are obsoleted, aufs
1377+refreshes the internal array.
53392da6
AM
1378+
1379+
1380+Superblock
1381+----------------------------------------------------------------------
1382+Additionally aufs superblock has some data for policies to select one
1383+among multiple writable branches, XIB files, pseudo-links and kobject.
1384+See below in detail.
7e9cd9fe
AM
1385+About the policies which supports copy-down a directory, see
1386+wbr_policy.txt too.
53392da6
AM
1387+
1388+
1389+Branch and XINO(External Inode Number Translation Table)
1390+----------------------------------------------------------------------
1391+Every branch has its own xino (external inode number translation table)
1392+file. The xino file is created and unlinked by aufs internally. When two
1393+members of a union exist on the same filesystem, they share the single
1394+xino file.
1395+The struct of a xino file is simple, just a sequence of aufs inode
1396+numbers which is indexed by the lower inode number.
1397+In the above sample, assume the inode number of /ro/fileA is i111 and
1398+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
1399+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
1400+
1401+When the inode numbers are not contiguous, the xino file will be sparse
1402+which has a hole in it and doesn't consume as much disk space as it
1403+might appear. If your branch filesystem consumes disk space for such
1404+holes, then you should specify 'xino=' option at mounting aufs.
1405+
7e9cd9fe
AM
1406+Aufs has a mount option to free the disk blocks for such holes in XINO
1407+files on tmpfs or ramdisk. But it is not so effective actually. If you
1408+meet a problem of disk shortage due to XINO files, then you should try
1409+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
1410+The patch localizes the assignment inumbers per tmpfs-mount and avoid
1411+the holes in XINO files.
1412+
53392da6 1413+Also a writable branch has three kinds of "whiteout bases". All these
7e9cd9fe 1414+are existed when the branch is joined to aufs, and their names are
53392da6
AM
1415+whiteout-ed doubly, so that users will never see their names in aufs
1416+hierarchy.
7e9cd9fe 1417+1. a regular file which will be hardlinked to all whiteouts.
53392da6 1418+2. a directory to store a pseudo-link.
7e9cd9fe 1419+3. a directory to store an "orphan"-ed file temporary.
53392da6
AM
1420+
1421+1. Whiteout Base
1422+ When you remove a file on a readonly branch, aufs handles it as a
1423+ logical deletion and creates a whiteout on the upper writable branch
1424+ as a hardlink of this file in order not to consume inode on the
1425+ writable branch.
1426+2. Pseudo-link Dir
1427+ See below, Pseudo-link.
1428+3. Step-Parent Dir
1429+ When "fileC" exists on the lower readonly branch only and it is
1430+ opened and removed with its parent dir, and then user writes
1431+ something into it, then aufs copies-up fileC to this
1432+ directory. Because there is no other dir to store fileC. After
1433+ creating a file under this dir, the file is unlinked.
1434+
1435+Because aufs supports manipulating branches, ie. add/delete/change
7e9cd9fe
AM
1436+dynamically, a branch has its own id. When the branch order changes,
1437+aufs finds the new index by searching the branch id.
53392da6
AM
1438+
1439+
1440+Pseudo-link
1441+----------------------------------------------------------------------
1442+Assume "fileA" exists on the lower readonly branch only and it is
1443+hardlinked to "fileB" on the branch. When you write something to fileA,
1444+aufs copies-up it to the upper writable branch. Additionally aufs
1445+creates a hardlink under the Pseudo-link Directory of the writable
1446+branch. The inode of a pseudo-link is kept in aufs super_block as a
1447+simple list. If fileB is read after unlinking fileA, aufs returns
1448+filedata from the pseudo-link instead of the lower readonly
1449+branch. Because the pseudo-link is based upon the inode, to keep the
7e9cd9fe 1450+inode number by xino (see above) is essentially necessary.
53392da6
AM
1451+
1452+All the hardlinks under the Pseudo-link Directory of the writable branch
1453+should be restored in a proper location later. Aufs provides a utility
1454+to do this. The userspace helpers executed at remounting and unmounting
1455+aufs by default.
1456+During this utility is running, it puts aufs into the pseudo-link
1457+maintenance mode. In this mode, only the process which began the
1458+maintenance mode (and its child processes) is allowed to operate in
1459+aufs. Some other processes which are not related to the pseudo-link will
1460+be allowed to run too, but the rest have to return an error or wait
1461+until the maintenance mode ends. If a process already acquires an inode
1462+mutex (in VFS), it has to return an error.
1463+
1464+
1465+XIB(external inode number bitmap)
1466+----------------------------------------------------------------------
1467+Addition to the xino file per a branch, aufs has an external inode number
7e9cd9fe
AM
1468+bitmap in a superblock object. It is also an internal file such like a
1469+xino file.
53392da6
AM
1470+It is a simple bitmap to mark whether the aufs inode number is in-use or
1471+not.
1472+To reduce the file I/O, aufs prepares a single memory page to cache xib.
1473+
7e9cd9fe 1474+As well as XINO files, aufs has a feature to truncate/refresh XIB to
53392da6
AM
1475+reduce the number of consumed disk blocks for these files.
1476+
1477+
1478+Virtual or Vertical Dir, and Readdir in Userspace
1479+----------------------------------------------------------------------
1480+In order to support multiple layers (branches), aufs readdir operation
1481+constructs a virtual dir block on memory. For readdir, aufs calls
1482+vfs_readdir() internally for each dir on branches, merges their entries
1483+with eliminating the whiteout-ed ones, and sets it to file (dir)
1484+object. So the file object has its entry list until it is closed. The
1485+entry list will be updated when the file position is zero and becomes
7e9cd9fe 1486+obsoleted. This decision is made in aufs automatically.
53392da6
AM
1487+
1488+The dynamically allocated memory block for the name of entries has a
1489+unit of 512 bytes (by default) and stores the names contiguously (no
1490+padding). Another block for each entry is handled by kmem_cache too.
1491+During building dir blocks, aufs creates hash list and judging whether
1492+the entry is whiteouted by its upper branch or already listed.
1493+The merged result is cached in the corresponding inode object and
1494+maintained by a customizable life-time option.
1495+
1496+Some people may call it can be a security hole or invite DoS attack
1497+since the opened and once readdir-ed dir (file object) holds its entry
1498+list and becomes a pressure for system memory. But I'd say it is similar
1499+to files under /proc or /sys. The virtual files in them also holds a
1500+memory page (generally) while they are opened. When an idea to reduce
1501+memory for them is introduced, it will be applied to aufs too.
1502+For those who really hate this situation, I've developed readdir(3)
1503+library which operates this merging in userspace. You just need to set
1504+LD_PRELOAD environment variable, and aufs will not consume no memory in
1505+kernel space for readdir(3).
1506+
1507+
1508+Workqueue
1509+----------------------------------------------------------------------
1510+Aufs sometimes requires privilege access to a branch. For instance,
1511+in copy-up/down operation. When a user process is going to make changes
1512+to a file which exists in the lower readonly branch only, and the mode
1513+of one of ancestor directories may not be writable by a user
1514+process. Here aufs copy-up the file with its ancestors and they may
1515+require privilege to set its owner/group/mode/etc.
1516+This is a typical case of a application character of aufs (see
1517+Introduction).
1518+
1519+Aufs uses workqueue synchronously for this case. It creates its own
1520+workqueue. The workqueue is a kernel thread and has privilege. Aufs
1521+passes the request to call mkdir or write (for example), and wait for
1522+its completion. This approach solves a problem of a signal handler
1523+simply.
1524+If aufs didn't adopt the workqueue and changed the privilege of the
7e9cd9fe
AM
1525+process, then the process may receive the unexpected SIGXFSZ or other
1526+signals.
53392da6
AM
1527+
1528+Also aufs uses the system global workqueue ("events" kernel thread) too
1529+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
1530+whiteout base and etc. This is unrelated to a privilege.
1531+Most of aufs operation tries acquiring a rw_semaphore for aufs
1532+superblock at the beginning, at the same time waits for the completion
1533+of all queued asynchronous tasks.
1534+
1535+
1536+Whiteout
1537+----------------------------------------------------------------------
1538+The whiteout in aufs is very similar to Unionfs's. That is represented
1539+by its filename. UnionMount takes an approach of a file mode, but I am
1540+afraid several utilities (find(1) or something) will have to support it.
1541+
1542+Basically the whiteout represents "logical deletion" which stops aufs to
1543+lookup further, but also it represents "dir is opaque" which also stop
7e9cd9fe 1544+further lookup.
53392da6
AM
1545+
1546+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
1547+In order to make several functions in a single systemcall to be
1548+revertible, aufs adopts an approach to rename a directory to a temporary
1549+unique whiteouted name.
1550+For example, in rename(2) dir where the target dir already existed, aufs
1551+renames the target dir to a temporary unique whiteouted name before the
7e9cd9fe 1552+actual rename on a branch, and then handles other actions (make it opaque,
53392da6
AM
1553+update the attributes, etc). If an error happens in these actions, aufs
1554+simply renames the whiteouted name back and returns an error. If all are
1555+succeeded, aufs registers a function to remove the whiteouted unique
1556+temporary name completely and asynchronously to the system global
1557+workqueue.
1558+
1559+
1560+Copy-up
1561+----------------------------------------------------------------------
1562+It is a well-known feature or concept.
1563+When user modifies a file on a readonly branch, aufs operate "copy-up"
1564+internally and makes change to the new file on the upper writable branch.
1565+When the trigger systemcall does not update the timestamps of the parent
1566+dir, aufs reverts it after copy-up.
c2b27bf2
AM
1567+
1568+
1569+Move-down (aufs3.9 and later)
1570+----------------------------------------------------------------------
1571+"Copy-up" is one of the essential feature in aufs. It copies a file from
1572+the lower readonly branch to the upper writable branch when a user
1573+changes something about the file.
1574+"Move-down" is an opposite action of copy-up. Basically this action is
1575+ran manually instead of automatically and internally.
076b876e
AM
1576+For desgin and implementation, aufs has to consider these issues.
1577+- whiteout for the file may exist on the lower branch.
1578+- ancestor directories may not exist on the lower branch.
1579+- diropq for the ancestor directories may exist on the upper branch.
1580+- free space on the lower branch will reduce.
1581+- another access to the file may happen during moving-down, including
7e9cd9fe 1582+ UDBA (see "Revalidate Dentry and UDBA").
076b876e
AM
1583+- the file should not be hard-linked nor pseudo-linked. they should be
1584+ handled by auplink utility later.
c2b27bf2
AM
1585+
1586+Sometimes users want to move-down a file from the upper writable branch
1587+to the lower readonly or writable branch. For instance,
1588+- the free space of the upper writable branch is going to run out.
1589+- create a new intermediate branch between the upper and lower branch.
1590+- etc.
1591+
1592+For this purpose, use "aumvdown" command in aufs-util.git.
b912730e
AM
1593diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt linux/Documentation/filesystems/aufs/design/03atomic_open.txt
1594--- /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1595+++ linux/Documentation/filesystems/aufs/design/03atomic_open.txt 2016-10-09 16:55:36.479367956 +0200
b912730e
AM
1596@@ -0,0 +1,85 @@
1597+
8cdd5066 1598+# Copyright (C) 2015-2016 Junjiro R. Okajima
b912730e
AM
1599+#
1600+# This program is free software; you can redistribute it and/or modify
1601+# it under the terms of the GNU General Public License as published by
1602+# the Free Software Foundation; either version 2 of the License, or
1603+# (at your option) any later version.
1604+#
1605+# This program is distributed in the hope that it will be useful,
1606+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1607+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1608+# GNU General Public License for more details.
1609+#
1610+# You should have received a copy of the GNU General Public License
1611+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1612+
1613+Support for a branch who has its ->atomic_open()
1614+----------------------------------------------------------------------
1615+The filesystems who implement its ->atomic_open() are not majority. For
1616+example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
1617+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
1618+->atomic_open(), NFSv4 returns an error for this open(2). While I am not
1619+sure whether all filesystems who have ->atomic_open() behave like this,
1620+but NFSv4 surely returns the error.
1621+
1622+In order to support ->atomic_open() for aufs, there are a few
1623+approaches.
1624+
1625+A. Introduce aufs_atomic_open()
1626+ - calls one of VFS:do_last(), lookup_open() or atomic_open() for
1627+ branch fs.
1628+B. Introduce aufs_atomic_open() calling create, open and chmod. this is
1629+ an aufs user Pip Cet's approach
1630+ - calls aufs_create(), VFS finish_open() and notify_change().
1631+ - pass fake-mode to finish_open(), and then correct the mode by
1632+ notify_change().
1633+C. Extend aufs_open() to call branch fs's ->atomic_open()
1634+ - no aufs_atomic_open().
1635+ - aufs_lookup() registers the TID to an aufs internal object.
1636+ - aufs_create() does nothing when the matching TID is registered, but
1637+ registers the mode.
1638+ - aufs_open() calls branch fs's ->atomic_open() when the matching
1639+ TID is registered.
1640+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
1641+ credential
1642+ - no aufs_atomic_open().
1643+ - aufs_create() registers the TID to an internal object. this info
1644+ represents "this process created this file just now."
1645+ - when aufs gets EACCES from branch fs's ->open(), then confirm the
1646+ registered TID and re-try open() with superuser's credential.
1647+
1648+Pros and cons for each approach.
1649+
1650+A.
1651+ - straightforward but highly depends upon VFS internal.
1652+ - the atomic behavaiour is kept.
1653+ - some of parameters such as nameidata are hard to reproduce for
1654+ branch fs.
1655+ - large overhead.
1656+B.
1657+ - easy to implement.
1658+ - the atomic behavaiour is lost.
1659+C.
1660+ - the atomic behavaiour is kept.
1661+ - dirty and tricky.
1662+ - VFS checks whether the file is created correctly after calling
1663+ ->create(), which means this approach doesn't work.
1664+D.
1665+ - easy to implement.
1666+ - the atomic behavaiour is lost.
1667+ - to open a file with superuser's credential and give it to a user
1668+ process is a bad idea, since the file object keeps the credential
1669+ in it. It may affect LSM or something. This approach doesn't work
1670+ either.
1671+
1672+The approach A is ideal, but it hard to implement. So here is a
1673+variation of A, which is to be implemented.
1674+
1675+A-1. Introduce aufs_atomic_open()
1676+ - calls branch fs ->atomic_open() if exists. otherwise calls
1677+ vfs_create() and finish_open().
1678+ - the demerit is that the several checks after branch fs
1679+ ->atomic_open() are lost. in the ordinary case, the checks are
1680+ done by VFS:do_last(), lookup_open() and atomic_open(). some can
1681+ be implemented in aufs, but not all I am afraid.
53392da6
AM
1682diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt linux/Documentation/filesystems/aufs/design/03lookup.txt
1683--- /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1684+++ linux/Documentation/filesystems/aufs/design/03lookup.txt 2016-10-09 16:55:36.479367956 +0200
7e9cd9fe 1685@@ -0,0 +1,113 @@
53392da6 1686+
8cdd5066 1687+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1688+#
1689+# This program is free software; you can redistribute it and/or modify
1690+# it under the terms of the GNU General Public License as published by
1691+# the Free Software Foundation; either version 2 of the License, or
1692+# (at your option) any later version.
1693+#
1694+# This program is distributed in the hope that it will be useful,
1695+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1696+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1697+# GNU General Public License for more details.
1698+#
1699+# You should have received a copy of the GNU General Public License
523b37e3 1700+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1701+
1702+Lookup in a Branch
1703+----------------------------------------------------------------------
1704+Since aufs has a character of sub-VFS (see Introduction), it operates
7e9cd9fe
AM
1705+lookup for branches as VFS does. It may be a heavy work. But almost all
1706+lookup operation in aufs is the simplest case, ie. lookup only an entry
1707+directly connected to its parent. Digging down the directory hierarchy
1708+is unnecessary. VFS has a function lookup_one_len() for that use, and
1709+aufs calls it.
1710+
1711+When a branch is a remote filesystem, aufs basically relies upon its
53392da6
AM
1712+->d_revalidate(), also aufs forces the hardest revalidate tests for
1713+them.
1714+For d_revalidate, aufs implements three levels of revalidate tests. See
1715+"Revalidate Dentry and UDBA" in detail.
1716+
1717+
076b876e
AM
1718+Test Only the Highest One for the Directory Permission (dirperm1 option)
1719+----------------------------------------------------------------------
1720+Let's try case study.
1721+- aufs has two branches, upper readwrite and lower readonly.
1722+ /au = /rw + /ro
1723+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1724+- user invoked "chmod a+rx /au/dirA"
1725+- the internal copy-up is activated and "/rw/dirA" is created and its
7e9cd9fe 1726+ permission bits are set to world readable.
076b876e
AM
1727+- then "/au/dirA" becomes world readable?
1728+
1729+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1730+or it may be a natively readonly filesystem. If aufs respects the lower
1731+branch, it should not respond readdir request from other users. But user
1732+allowed it by chmod. Should really aufs rejects showing the entries
1733+under /ro/dirA?
1734+
7e9cd9fe
AM
1735+To be honest, I don't have a good solution for this case. So aufs
1736+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
1737+users.
076b876e
AM
1738+When dirperm1 is specified, aufs checks only the highest one for the
1739+directory permission, and shows the entries. Otherwise, as usual, checks
1740+every dir existing on all branches and rejects the request.
1741+
1742+As a side effect, dirperm1 option improves the performance of aufs
1743+because the number of permission check is reduced when the number of
1744+branch is many.
1745+
1746+
53392da6
AM
1747+Revalidate Dentry and UDBA (User's Direct Branch Access)
1748+----------------------------------------------------------------------
1749+Generally VFS helpers re-validate a dentry as a part of lookup.
1750+0. digging down the directory hierarchy.
1751+1. lock the parent dir by its i_mutex.
1752+2. lookup the final (child) entry.
1753+3. revalidate it.
1754+4. call the actual operation (create, unlink, etc.)
1755+5. unlock the parent dir
1756+
1757+If the filesystem implements its ->d_revalidate() (step 3), then it is
1758+called. Actually aufs implements it and checks the dentry on a branch is
1759+still valid.
1760+But it is not enough. Because aufs has to release the lock for the
1761+parent dir on a branch at the end of ->lookup() (step 2) and
1762+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
1763+held by VFS.
1764+If the file on a branch is changed directly, eg. bypassing aufs, after
1765+aufs released the lock, then the subsequent operation may cause
1766+something unpleasant result.
1767+
1768+This situation is a result of VFS architecture, ->lookup() and
1769+->d_revalidate() is separated. But I never say it is wrong. It is a good
1770+design from VFS's point of view. It is just not suitable for sub-VFS
1771+character in aufs.
1772+
1773+Aufs supports such case by three level of revalidation which is
1774+selectable by user.
1775+1. Simple Revalidate
1776+ Addition to the native flow in VFS's, confirm the child-parent
1777+ relationship on the branch just after locking the parent dir on the
1778+ branch in the "actual operation" (step 4). When this validation
1779+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
1780+ checks the validation of the dentry on branches.
1781+2. Monitor Changes Internally by Inotify/Fsnotify
1782+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
1783+ the dentry on the branch, and returns EBUSY if it finds different
1784+ dentry.
1785+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
1786+ during it is in cache. When the event is notified, aufs registers a
1787+ function to kernel 'events' thread by schedule_work(). And the
1788+ function sets some special status to the cached aufs dentry and inode
1789+ private data. If they are not cached, then aufs has nothing to
1790+ do. When the same file is accessed through aufs (step 0-3) later,
1791+ aufs will detect the status and refresh all necessary data.
1792+ In this mode, aufs has to ignore the event which is fired by aufs
1793+ itself.
1794+3. No Extra Validation
1795+ This is the simplest test and doesn't add any additional revalidation
7e9cd9fe 1796+ test, and skip the revalidation in step 4. It is useful and improves
53392da6
AM
1797+ aufs performance when system surely hide the aufs branches from user,
1798+ by over-mounting something (or another method).
1799diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt linux/Documentation/filesystems/aufs/design/04branch.txt
1800--- /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1801+++ linux/Documentation/filesystems/aufs/design/04branch.txt 2016-10-09 16:55:36.482701377 +0200
7e9cd9fe 1802@@ -0,0 +1,74 @@
53392da6 1803+
8cdd5066 1804+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1805+#
1806+# This program is free software; you can redistribute it and/or modify
1807+# it under the terms of the GNU General Public License as published by
1808+# the Free Software Foundation; either version 2 of the License, or
1809+# (at your option) any later version.
1810+#
1811+# This program is distributed in the hope that it will be useful,
1812+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1813+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1814+# GNU General Public License for more details.
1815+#
1816+# You should have received a copy of the GNU General Public License
523b37e3 1817+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1818+
1819+Branch Manipulation
1820+
1821+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
1822+and changing its permission/attribute, there are a lot of works to do.
1823+
1824+
1825+Add a Branch
1826+----------------------------------------------------------------------
1827+o Confirm the adding dir exists outside of aufs, including loopback
7e9cd9fe 1828+ mount, and its various attributes.
53392da6
AM
1829+o Initialize the xino file and whiteout bases if necessary.
1830+ See struct.txt.
1831+
1832+o Check the owner/group/mode of the directory
1833+ When the owner/group/mode of the adding directory differs from the
1834+ existing branch, aufs issues a warning because it may impose a
1835+ security risk.
1836+ For example, when a upper writable branch has a world writable empty
1837+ top directory, a malicious user can create any files on the writable
1838+ branch directly, like copy-up and modify manually. If something like
1839+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
1840+ writable branch, and the writable branch is world-writable, then a
1841+ malicious guy may create /etc/passwd on the writable branch directly
1842+ and the infected file will be valid in aufs.
7e9cd9fe 1843+ I am afraid it can be a security issue, but aufs can do nothing except
53392da6
AM
1844+ producing a warning.
1845+
1846+
1847+Delete a Branch
1848+----------------------------------------------------------------------
1849+o Confirm the deleting branch is not busy
1850+ To be general, there is one merit to adopt "remount" interface to
1851+ manipulate branches. It is to discard caches. At deleting a branch,
1852+ aufs checks the still cached (and connected) dentries and inodes. If
1853+ there are any, then they are all in-use. An inode without its
1854+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
1855+
1856+ For the cached one, aufs checks whether the same named entry exists on
1857+ other branches.
1858+ If the cached one is a directory, because aufs provides a merged view
1859+ to users, as long as one dir is left on any branch aufs can show the
1860+ dir to users. In this case, the branch can be removed from aufs.
1861+ Otherwise aufs rejects deleting the branch.
1862+
1863+ If any file on the deleting branch is opened by aufs, then aufs
1864+ rejects deleting.
1865+
1866+
1867+Modify the Permission of a Branch
1868+----------------------------------------------------------------------
1869+o Re-initialize or remove the xino file and whiteout bases if necessary.
1870+ See struct.txt.
1871+
1872+o rw --> ro: Confirm the modifying branch is not busy
1873+ Aufs rejects the request if any of these conditions are true.
1874+ - a file on the branch is mmap-ed.
1875+ - a regular file on the branch is opened for write and there is no
1876+ same named entry on the upper branch.
1877diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt linux/Documentation/filesystems/aufs/design/05wbr_policy.txt
1878--- /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1879+++ linux/Documentation/filesystems/aufs/design/05wbr_policy.txt 2016-10-09 16:55:36.482701377 +0200
523b37e3 1880@@ -0,0 +1,64 @@
53392da6 1881+
8cdd5066 1882+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1883+#
1884+# This program is free software; you can redistribute it and/or modify
1885+# it under the terms of the GNU General Public License as published by
1886+# the Free Software Foundation; either version 2 of the License, or
1887+# (at your option) any later version.
1888+#
1889+# This program is distributed in the hope that it will be useful,
1890+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1891+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1892+# GNU General Public License for more details.
1893+#
1894+# You should have received a copy of the GNU General Public License
523b37e3 1895+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1896+
1897+Policies to Select One among Multiple Writable Branches
1898+----------------------------------------------------------------------
1899+When the number of writable branch is more than one, aufs has to decide
1900+the target branch for file creation or copy-up. By default, the highest
1901+writable branch which has the parent (or ancestor) dir of the target
1902+file is chosen (top-down-parent policy).
1903+By user's request, aufs implements some other policies to select the
7e9cd9fe
AM
1904+writable branch, for file creation several policies, round-robin,
1905+most-free-space, and other policies. For copy-up, top-down-parent,
1906+bottom-up-parent, bottom-up and others.
53392da6
AM
1907+
1908+As expected, the round-robin policy selects the branch in circular. When
1909+you have two writable branches and creates 10 new files, 5 files will be
1910+created for each branch. mkdir(2) systemcall is an exception. When you
1911+create 10 new directories, all will be created on the same branch.
1912+And the most-free-space policy selects the one which has most free
1913+space among the writable branches. The amount of free space will be
1914+checked by aufs internally, and users can specify its time interval.
1915+
1916+The policies for copy-up is more simple,
1917+top-down-parent is equivalent to the same named on in create policy,
1918+bottom-up-parent selects the writable branch where the parent dir
1919+exists and the nearest upper one from the copyup-source,
1920+bottom-up selects the nearest upper writable branch from the
1921+copyup-source, regardless the existence of the parent dir.
1922+
1923+There are some rules or exceptions to apply these policies.
1924+- If there is a readonly branch above the policy-selected branch and
1925+ the parent dir is marked as opaque (a variation of whiteout), or the
1926+ target (creating) file is whiteout-ed on the upper readonly branch,
1927+ then the result of the policy is ignored and the target file will be
1928+ created on the nearest upper writable branch than the readonly branch.
1929+- If there is a writable branch above the policy-selected branch and
1930+ the parent dir is marked as opaque or the target file is whiteouted
1931+ on the branch, then the result of the policy is ignored and the target
1932+ file will be created on the highest one among the upper writable
1933+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1934+ it as usual.
1935+- link(2) and rename(2) systemcalls are exceptions in every policy.
1936+ They try selecting the branch where the source exists as possible
1937+ since copyup a large file will take long time. If it can't be,
1938+ ie. the branch where the source exists is readonly, then they will
1939+ follow the copyup policy.
1940+- There is an exception for rename(2) when the target exists.
1941+ If the rename target exists, aufs compares the index of the branches
1942+ where the source and the target exists and selects the higher
1943+ one. If the selected branch is readonly, then aufs follows the
1944+ copyup policy.
076b876e
AM
1945diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt linux/Documentation/filesystems/aufs/design/06fhsm.txt
1946--- /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 1947+++ linux/Documentation/filesystems/aufs/design/06fhsm.txt 2016-10-09 16:55:36.482701377 +0200
076b876e
AM
1948@@ -0,0 +1,120 @@
1949+
8cdd5066 1950+# Copyright (C) 2011-2016 Junjiro R. Okajima
076b876e
AM
1951+#
1952+# This program is free software; you can redistribute it and/or modify
1953+# it under the terms of the GNU General Public License as published by
1954+# the Free Software Foundation; either version 2 of the License, or
1955+# (at your option) any later version.
1956+#
1957+# This program is distributed in the hope that it will be useful,
1958+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1959+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1960+# GNU General Public License for more details.
1961+#
1962+# You should have received a copy of the GNU General Public License
1963+# along with this program; if not, write to the Free Software
1964+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1965+
1966+
1967+File-based Hierarchical Storage Management (FHSM)
1968+----------------------------------------------------------------------
1969+Hierarchical Storage Management (or HSM) is a well-known feature in the
1970+storage world. Aufs provides this feature as file-based with multiple
7e9cd9fe 1971+writable branches, based upon the principle of "Colder, the Lower".
076b876e 1972+Here the word "colder" means that the less used files, and "lower" means
7e9cd9fe 1973+that the position in the order of the stacked branches vertically.
076b876e
AM
1974+These multiple writable branches are prioritized, ie. the topmost one
1975+should be the fastest drive and be used heavily.
1976+
1977+o Characters in aufs FHSM story
1978+- aufs itself and a new branch attribute.
1979+- a new ioctl interface to move-down and to establish a connection with
1980+ the daemon ("move-down" is a converse of "copy-up").
1981+- userspace tool and daemon.
1982+
1983+The userspace daemon establishes a connection with aufs and waits for
1984+the notification. The notified information is very similar to struct
1985+statfs containing the number of consumed blocks and inodes.
1986+When the consumed blocks/inodes of a branch exceeds the user-specified
1987+upper watermark, the daemon activates its move-down process until the
1988+consumed blocks/inodes reaches the user-specified lower watermark.
1989+
1990+The actual move-down is done by aufs based upon the request from
1991+user-space since we need to maintain the inode number and the internal
1992+pointer arrays in aufs.
1993+
1994+Currently aufs FHSM handles the regular files only. Additionally they
1995+must not be hard-linked nor pseudo-linked.
1996+
1997+
1998+o Cowork of aufs and the user-space daemon
1999+ During the userspace daemon established the connection, aufs sends a
2000+ small notification to it whenever aufs writes something into the
2001+ writable branch. But it may cost high since aufs issues statfs(2)
2002+ internally. So user can specify a new option to cache the
2003+ info. Actually the notification is controlled by these factors.
2004+ + the specified cache time.
2005+ + classified as "force" by aufs internally.
2006+ Until the specified time expires, aufs doesn't send the info
2007+ except the forced cases. When aufs decide forcing, the info is always
2008+ notified to userspace.
2009+ For example, the number of free inodes is generally large enough and
2010+ the shortage of it happens rarely. So aufs doesn't force the
2011+ notification when creating a new file, directory and others. This is
2012+ the typical case which aufs doesn't force.
2013+ When aufs writes the actual filedata and the files consumes any of new
2014+ blocks, the aufs forces notifying.
2015+
2016+
2017+o Interfaces in aufs
2018+- New branch attribute.
2019+ + fhsm
2020+ Specifies that the branch is managed by FHSM feature. In other word,
2021+ participant in the FHSM.
2022+ When nofhsm is set to the branch, it will not be the source/target
2023+ branch of the move-down operation. This attribute is set
2024+ independently from coo and moo attributes, and if you want full
2025+ FHSM, you should specify them as well.
2026+- New mount option.
2027+ + fhsm_sec
2028+ Specifies a second to suppress many less important info to be
2029+ notified.
2030+- New ioctl.
2031+ + AUFS_CTL_FHSM_FD
2032+ create a new file descriptor which userspace can read the notification
2033+ (a subset of struct statfs) from aufs.
2034+- Module parameter 'brs'
2035+ It has to be set to 1. Otherwise the new mount option 'fhsm' will not
2036+ be set.
2037+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
2038+ When there are two or more branches with fhsm attributes,
2039+ /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
2040+ terminates it. As a result of remounting and branch-manipulation, the
2041+ number of branches with fhsm attribute can be one. In this case,
2042+ /sbin/mount.aufs will terminate the user-space daemon.
2043+
2044+
2045+Finally the operation is done as these steps in kernel-space.
2046+- make sure that,
2047+ + no one else is using the file.
2048+ + the file is not hard-linked.
2049+ + the file is not pseudo-linked.
2050+ + the file is a regular file.
2051+ + the parent dir is not opaqued.
2052+- find the target writable branch.
2053+- make sure the file is not whiteout-ed by the upper (than the target)
2054+ branch.
2055+- make the parent dir on the target branch.
2056+- mutex lock the inode on the branch.
2057+- unlink the whiteout on the target branch (if exists).
2058+- lookup and create the whiteout-ed temporary name on the target branch.
2059+- copy the file as the whiteout-ed temporary name on the target branch.
2060+- rename the whiteout-ed temporary name to the original name.
2061+- unlink the file on the source branch.
2062+- maintain the internal pointer array and the external inode number
2063+ table (XINO).
2064+- maintain the timestamps and other attributes of the parent dir and the
2065+ file.
2066+
2067+And of course, in every step, an error may happen. So the operation
2068+should restore the original file state after an error happens.
53392da6
AM
2069diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt linux/Documentation/filesystems/aufs/design/06mmap.txt
2070--- /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2071+++ linux/Documentation/filesystems/aufs/design/06mmap.txt 2016-10-09 16:55:36.482701377 +0200
b912730e 2072@@ -0,0 +1,72 @@
53392da6 2073+
8cdd5066 2074+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
2075+#
2076+# This program is free software; you can redistribute it and/or modify
2077+# it under the terms of the GNU General Public License as published by
2078+# the Free Software Foundation; either version 2 of the License, or
2079+# (at your option) any later version.
2080+#
2081+# This program is distributed in the hope that it will be useful,
2082+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2083+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2084+# GNU General Public License for more details.
2085+#
2086+# You should have received a copy of the GNU General Public License
523b37e3 2087+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2088+
2089+mmap(2) -- File Memory Mapping
2090+----------------------------------------------------------------------
2091+In aufs, the file-mapped pages are handled by a branch fs directly, no
2092+interaction with aufs. It means aufs_mmap() calls the branch fs's
2093+->mmap().
2094+This approach is simple and good, but there is one problem.
7e9cd9fe 2095+Under /proc, several entries show the mmapped files by its path (with
53392da6
AM
2096+device and inode number), and the printed path will be the path on the
2097+branch fs's instead of virtual aufs's.
2098+This is not a problem in most cases, but some utilities lsof(1) (and its
2099+user) may expect the path on aufs.
2100+
2101+To address this issue, aufs adds a new member called vm_prfile in struct
2102+vm_area_struct (and struct vm_region). The original vm_file points to
2103+the file on the branch fs in order to handle everything correctly as
2104+usual. The new vm_prfile points to a virtual file in aufs, and the
2105+show-functions in procfs refers to vm_prfile if it is set.
2106+Also we need to maintain several other places where touching vm_file
2107+such like
2108+- fork()/clone() copies vma and the reference count of vm_file is
2109+ incremented.
2110+- merging vma maintains the ref count too.
2111+
7e9cd9fe 2112+This is not a good approach. It just fakes the printed path. But it
53392da6
AM
2113+leaves all behaviour around f_mapping unchanged. This is surely an
2114+advantage.
2115+Actually aufs had adopted another complicated approach which calls
2116+generic_file_mmap() and handles struct vm_operations_struct. In this
2117+approach, aufs met a hard problem and I could not solve it without
2118+switching the approach.
b912730e
AM
2119+
2120+There may be one more another approach which is
2121+- bind-mount the branch-root onto the aufs-root internally
2122+- grab the new vfsmount (ie. struct mount)
2123+- lazy-umount the branch-root internally
2124+- in open(2) the aufs-file, open the branch-file with the hidden
2125+ vfsmount (instead of the original branch's vfsmount)
2126+- ideally this "bind-mount and lazy-umount" should be done atomically,
2127+ but it may be possible from userspace by the mount helper.
2128+
2129+Adding the internal hidden vfsmount and using it in opening a file, the
2130+file path under /proc will be printed correctly. This approach looks
2131+smarter, but is not possible I am afraid.
2132+- aufs-root may be bind-mount later. when it happens, another hidden
2133+ vfsmount will be required.
2134+- it is hard to get the chance to bind-mount and lazy-umount
2135+ + in kernel-space, FS can have vfsmount in open(2) via
2136+ file->f_path, and aufs can know its vfsmount. But several locks are
2137+ already acquired, and if aufs tries to bind-mount and lazy-umount
2138+ here, then it may cause a deadlock.
2139+ + in user-space, bind-mount doesn't invoke the mount helper.
2140+- since /proc shows dev and ino, aufs has to give vma these info. it
2141+ means a new member vm_prinode will be necessary. this is essentially
2142+ equivalent to vm_prfile described above.
2143+
2144+I have to give up this "looks-smater" approach.
c1595e42
JR
2145diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt linux/Documentation/filesystems/aufs/design/06xattr.txt
2146--- /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2147+++ linux/Documentation/filesystems/aufs/design/06xattr.txt 2016-10-09 16:55:36.482701377 +0200
c1595e42
JR
2148@@ -0,0 +1,96 @@
2149+
8cdd5066 2150+# Copyright (C) 2014-2016 Junjiro R. Okajima
c1595e42
JR
2151+#
2152+# This program is free software; you can redistribute it and/or modify
2153+# it under the terms of the GNU General Public License as published by
2154+# the Free Software Foundation; either version 2 of the License, or
2155+# (at your option) any later version.
2156+#
2157+# This program is distributed in the hope that it will be useful,
2158+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2159+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2160+# GNU General Public License for more details.
2161+#
2162+# You should have received a copy of the GNU General Public License
2163+# along with this program; if not, write to the Free Software
2164+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2165+
2166+
2167+Listing XATTR/EA and getting the value
2168+----------------------------------------------------------------------
2169+For the inode standard attributes (owner, group, timestamps, etc.), aufs
2170+shows the values from the topmost existing file. This behaviour is good
7e9cd9fe 2171+for the non-dir entries since the bahaviour exactly matches the shown
c1595e42
JR
2172+information. But for the directories, aufs considers all the same named
2173+entries on the lower branches. Which means, if one of the lower entry
2174+rejects readdir call, then aufs returns an error even if the topmost
2175+entry allows it. This behaviour is necessary to respect the branch fs's
2176+security, but can make users confused since the user-visible standard
2177+attributes don't match the behaviour.
2178+To address this issue, aufs has a mount option called dirperm1 which
2179+checks the permission for the topmost entry only, and ignores the lower
2180+entry's permission.
2181+
2182+A similar issue can happen around XATTR.
2183+getxattr(2) and listxattr(2) families behave as if dirperm1 option is
7e9cd9fe
AM
2184+always set. Otherwise these very unpleasant situation would happen.
2185+- listxattr(2) may return the duplicated entries.
c1595e42
JR
2186+- users may not be able to remove or reset the XATTR forever,
2187+
2188+
2189+XATTR/EA support in the internal (copy,move)-(up,down)
2190+----------------------------------------------------------------------
7e9cd9fe 2191+Generally the extended attributes of inode are categorized as these.
c1595e42
JR
2192+- "security" for LSM and capability.
2193+- "system" for posix ACL, 'acl' mount option is required for the branch
2194+ fs generally.
2195+- "trusted" for userspace, CAP_SYS_ADMIN is required.
2196+- "user" for userspace, 'user_xattr' mount option is required for the
2197+ branch fs generally.
2198+
2199+Moreover there are some other categories. Aufs handles these rather
2200+unpopular categories as the ordinary ones, ie. there is no special
2201+condition nor exception.
2202+
2203+In copy-up, the support for XATTR on the dst branch may differ from the
2204+src branch. In this case, the copy-up operation will get an error and
7e9cd9fe
AM
2205+the original user operation which triggered the copy-up will fail. It
2206+can happen that even all copy-up will fail.
c1595e42
JR
2207+When both of src and dst branches support XATTR and if an error occurs
2208+during copying XATTR, then the copy-up should fail obviously. That is a
2209+good reason and aufs should return an error to userspace. But when only
7e9cd9fe 2210+the src branch support that XATTR, aufs should not return an error.
c1595e42
JR
2211+For example, the src branch supports ACL but the dst branch doesn't
2212+because the dst branch may natively un-support it or temporary
2213+un-support it due to "noacl" mount option. Of course, the dst branch fs
2214+may NOT return an error even if the XATTR is not supported. It is
2215+totally up to the branch fs.
2216+
2217+Anyway when the aufs internal copy-up gets an error from the dst branch
2218+fs, then aufs tries removing the just copied entry and returns the error
2219+to the userspace. The worst case of this situation will be all copy-up
2220+will fail.
2221+
2222+For the copy-up operation, there two basic approaches.
2223+- copy the specified XATTR only (by category above), and return the
7e9cd9fe 2224+ error unconditionally if it happens.
c1595e42
JR
2225+- copy all XATTR, and ignore the error on the specified category only.
2226+
2227+In order to support XATTR and to implement the correct behaviour, aufs
7e9cd9fe
AM
2228+chooses the latter approach and introduces some new branch attributes,
2229+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
c1595e42 2230+They correspond to the XATTR namespaces (see above). Additionally, to be
7e9cd9fe
AM
2231+convenient, "icex" is also provided which means all "icex*" attributes
2232+are set (here the word "icex" stands for "ignore copy-error on XATTR").
c1595e42
JR
2233+
2234+The meaning of these attributes is to ignore the error from setting
2235+XATTR on that branch.
2236+Note that aufs tries copying all XATTR unconditionally, and ignores the
2237+error from the dst branch according to the specified attributes.
2238+
2239+Some XATTR may have its default value. The default value may come from
2240+the parent dir or the environment. If the default value is set at the
2241+file creating-time, it will be overwritten by copy-up.
2242+Some contradiction may happen I am afraid.
2243+Do we need another attribute to stop copying XATTR? I am unsure. For
2244+now, aufs implements the branch attributes to ignore the error.
53392da6
AM
2245diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt linux/Documentation/filesystems/aufs/design/07export.txt
2246--- /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2247+++ linux/Documentation/filesystems/aufs/design/07export.txt 2016-10-09 16:55:36.482701377 +0200
523b37e3 2248@@ -0,0 +1,58 @@
53392da6 2249+
8cdd5066 2250+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
2251+#
2252+# This program is free software; you can redistribute it and/or modify
2253+# it under the terms of the GNU General Public License as published by
2254+# the Free Software Foundation; either version 2 of the License, or
2255+# (at your option) any later version.
2256+#
2257+# This program is distributed in the hope that it will be useful,
2258+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2259+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2260+# GNU General Public License for more details.
2261+#
2262+# You should have received a copy of the GNU General Public License
523b37e3 2263+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2264+
2265+Export Aufs via NFS
2266+----------------------------------------------------------------------
2267+Here is an approach.
2268+- like xino/xib, add a new file 'xigen' which stores aufs inode
2269+ generation.
2270+- iget_locked(): initialize aufs inode generation for a new inode, and
2271+ store it in xigen file.
2272+- destroy_inode(): increment aufs inode generation and store it in xigen
2273+ file. it is necessary even if it is not unlinked, because any data of
2274+ inode may be changed by UDBA.
2275+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
2276+ build file handle by
2277+ + branch id (4 bytes)
2278+ + superblock generation (4 bytes)
2279+ + inode number (4 or 8 bytes)
2280+ + parent dir inode number (4 or 8 bytes)
2281+ + inode generation (4 bytes))
2282+ + return value of exportfs_encode_fh() for the parent on a branch (4
2283+ bytes)
2284+ + file handle for a branch (by exportfs_encode_fh())
2285+- fh_to_dentry():
2286+ + find the index of a branch from its id in handle, and check it is
2287+ still exist in aufs.
2288+ + 1st level: get the inode number from handle and search it in cache.
7e9cd9fe
AM
2289+ + 2nd level: if not found in cache, get the parent inode number from
2290+ the handle and search it in cache. and then open the found parent
2291+ dir, find the matching inode number by vfs_readdir() and get its
2292+ name, and call lookup_one_len() for the target dentry.
53392da6
AM
2293+ + 3rd level: if the parent dir is not cached, call
2294+ exportfs_decode_fh() for a branch and get the parent on a branch,
2295+ build a pathname of it, convert it a pathname in aufs, call
2296+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
2297+ the 2nd level.
2298+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
2299+ for every branch, but not itself. to get this, (currently) aufs
2300+ searches in current->nsproxy->mnt_ns list. it may not be a good
2301+ idea, but I didn't get other approach.
2302+ + test the generation of the gotten inode.
2303+- every inode operation: they may get EBUSY due to UDBA. in this case,
2304+ convert it into ESTALE for NFSD.
2305+- readdir(): call lockdep_on/off() because filldir in NFSD calls
2306+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
2307diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt linux/Documentation/filesystems/aufs/design/08shwh.txt
2308--- /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2309+++ linux/Documentation/filesystems/aufs/design/08shwh.txt 2016-10-09 16:55:36.482701377 +0200
523b37e3 2310@@ -0,0 +1,52 @@
53392da6 2311+
8cdd5066 2312+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
2313+#
2314+# This program is free software; you can redistribute it and/or modify
2315+# it under the terms of the GNU General Public License as published by
2316+# the Free Software Foundation; either version 2 of the License, or
2317+# (at your option) any later version.
2318+#
2319+# This program is distributed in the hope that it will be useful,
2320+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2321+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2322+# GNU General Public License for more details.
2323+#
2324+# You should have received a copy of the GNU General Public License
523b37e3 2325+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2326+
2327+Show Whiteout Mode (shwh)
2328+----------------------------------------------------------------------
2329+Generally aufs hides the name of whiteouts. But in some cases, to show
2330+them is very useful for users. For instance, creating a new middle layer
2331+(branch) by merging existing layers.
2332+
2333+(borrowing aufs1 HOW-TO from a user, Michael Towers)
2334+When you have three branches,
2335+- Bottom: 'system', squashfs (underlying base system), read-only
2336+- Middle: 'mods', squashfs, read-only
2337+- Top: 'overlay', ram (tmpfs), read-write
2338+
2339+The top layer is loaded at boot time and saved at shutdown, to preserve
2340+the changes made to the system during the session.
2341+When larger changes have been made, or smaller changes have accumulated,
2342+the size of the saved top layer data grows. At this point, it would be
2343+nice to be able to merge the two overlay branches ('mods' and 'overlay')
2344+and rewrite the 'mods' squashfs, clearing the top layer and thus
2345+restoring save and load speed.
2346+
2347+This merging is simplified by the use of another aufs mount, of just the
2348+two overlay branches using the 'shwh' option.
2349+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
2350+ aufs /livesys/merge_union
2351+
2352+A merged view of these two branches is then available at
2353+/livesys/merge_union, and the new feature is that the whiteouts are
2354+visible!
2355+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
2356+writing to all branches. Also the default mode for all branches is 'ro'.
2357+It is now possible to save the combined contents of the two overlay
2358+branches to a new squashfs, e.g.:
2359+# mksquashfs /livesys/merge_union /path/to/newmods.squash
2360+
2361+This new squashfs archive can be stored on the boot device and the
2362+initramfs will use it to replace the old one at the next boot.
2363diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt linux/Documentation/filesystems/aufs/design/10dynop.txt
2364--- /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2365+++ linux/Documentation/filesystems/aufs/design/10dynop.txt 2016-10-09 16:55:36.482701377 +0200
7e9cd9fe 2366@@ -0,0 +1,47 @@
53392da6 2367+
8cdd5066 2368+# Copyright (C) 2010-2016 Junjiro R. Okajima
53392da6
AM
2369+#
2370+# This program is free software; you can redistribute it and/or modify
2371+# it under the terms of the GNU General Public License as published by
2372+# the Free Software Foundation; either version 2 of the License, or
2373+# (at your option) any later version.
2374+#
2375+# This program is distributed in the hope that it will be useful,
2376+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2377+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2378+# GNU General Public License for more details.
2379+#
2380+# You should have received a copy of the GNU General Public License
523b37e3 2381+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2382+
2383+Dynamically customizable FS operations
2384+----------------------------------------------------------------------
2385+Generally FS operations (struct inode_operations, struct
2386+address_space_operations, struct file_operations, etc.) are defined as
2387+"static const", but it never means that FS have only one set of
2388+operation. Some FS have multiple sets of them. For instance, ext2 has
2389+three sets, one for XIP, for NOBH, and for normal.
2390+Since aufs overrides and redirects these operations, sometimes aufs has
7e9cd9fe 2391+to change its behaviour according to the branch FS type. More importantly
53392da6
AM
2392+VFS acts differently if a function (member in the struct) is set or
2393+not. It means aufs should have several sets of operations and select one
2394+among them according to the branch FS definition.
2395+
7e9cd9fe 2396+In order to solve this problem and not to affect the behaviour of VFS,
53392da6 2397+aufs defines these operations dynamically. For instance, aufs defines
7e9cd9fe
AM
2398+dummy direct_IO function for struct address_space_operations, but it may
2399+not be set to the address_space_operations actually. When the branch FS
2400+doesn't have it, aufs doesn't set it to its address_space_operations
2401+while the function definition itself is still alive. So the behaviour
2402+itself will not change, and it will return an error when direct_IO is
2403+not set.
53392da6
AM
2404+
2405+The lifetime of these dynamically generated operation object is
2406+maintained by aufs branch object. When the branch is removed from aufs,
2407+the reference counter of the object is decremented. When it reaches
2408+zero, the dynamically generated operation object will be freed.
2409+
7e9cd9fe
AM
2410+This approach is designed to support AIO (io_submit), Direct I/O and
2411+XIP (DAX) mainly.
2412+Currently this approach is applied to address_space_operations for
2413+regular files only.
53392da6
AM
2414diff -urN /usr/share/empty/Documentation/filesystems/aufs/README linux/Documentation/filesystems/aufs/README
2415--- /usr/share/empty/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
2416+++ linux/Documentation/filesystems/aufs/README 2016-12-17 12:28:17.595211562 +0100
2417@@ -0,0 +1,393 @@
53392da6 2418+
5527c038 2419+Aufs4 -- advanced multi layered unification filesystem version 4.x
53392da6
AM
2420+http://aufs.sf.net
2421+Junjiro R. Okajima
2422+
2423+
2424+0. Introduction
2425+----------------------------------------
2426+In the early days, aufs was entirely re-designed and re-implemented
7e9cd9fe 2427+Unionfs Version 1.x series. Adding many original ideas, approaches,
53392da6
AM
2428+improvements and implementations, it becomes totally different from
2429+Unionfs while keeping the basic features.
2430+Recently, Unionfs Version 2.x series begin taking some of the same
2431+approaches to aufs1's.
2432+Unionfs is being developed by Professor Erez Zadok at Stony Brook
2433+University and his team.
2434+
5527c038 2435+Aufs4 supports linux-4.0 and later, and for linux-3.x series try aufs3.
53392da6
AM
2436+If you want older kernel version support, try aufs2-2.6.git or
2437+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
2438+
2439+Note: it becomes clear that "Aufs was rejected. Let's give it up."
38d290e6
JR
2440+ According to Christoph Hellwig, linux rejects all union-type
2441+ filesystems but UnionMount.
53392da6
AM
2442+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
2443+
38d290e6
JR
2444+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
2445+ UnionMount, and he pointed out an issue around a directory mutex
2446+ lock and aufs addressed it. But it is still unsure whether aufs will
2447+ be merged (or any other union solution).
076b876e 2448+<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
38d290e6 2449+
53392da6
AM
2450+
2451+1. Features
2452+----------------------------------------
2453+- unite several directories into a single virtual filesystem. The member
2454+ directory is called as a branch.
2455+- you can specify the permission flags to the branch, which are 'readonly',
2456+ 'readwrite' and 'whiteout-able.'
2457+- by upper writable branch, internal copyup and whiteout, files/dirs on
2458+ readonly branch are modifiable logically.
2459+- dynamic branch manipulation, add, del.
2460+- etc...
2461+
7e9cd9fe
AM
2462+Also there are many enhancements in aufs, such as:
2463+- test only the highest one for the directory permission (dirperm1)
2464+- copyup on open (coo=)
2465+- 'move' policy for copy-up between two writable branches, after
2466+ checking free space.
2467+- xattr, acl
53392da6
AM
2468+- readdir(3) in userspace.
2469+- keep inode number by external inode number table
2470+- keep the timestamps of file/dir in internal copyup operation
2471+- seekable directory, supporting NFS readdir.
2472+- whiteout is hardlinked in order to reduce the consumption of inodes
2473+ on branch
2474+- do not copyup, nor create a whiteout when it is unnecessary
2475+- revert a single systemcall when an error occurs in aufs
2476+- remount interface instead of ioctl
2477+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
2478+- loopback mounted filesystem as a branch
2479+- kernel thread for removing the dir who has a plenty of whiteouts
2480+- support copyup sparse file (a file which has a 'hole' in it)
2481+- default permission flags for branches
2482+- selectable permission flags for ro branch, whether whiteout can
2483+ exist or not
2484+- export via NFS.
2485+- support <sysfs>/fs/aufs and <debugfs>/aufs.
2486+- support multiple writable branches, some policies to select one
2487+ among multiple writable branches.
2488+- a new semantics for link(2) and rename(2) to support multiple
2489+ writable branches.
2490+- no glibc changes are required.
2491+- pseudo hardlink (hardlink over branches)
2492+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
2493+ including NFS or remote filesystem branch.
2494+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
2495+- and more...
2496+
5527c038 2497+Currently these features are dropped temporary from aufs4.
53392da6 2498+See design/08plan.txt in detail.
53392da6
AM
2499+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
2500+ (robr)
2501+- statistics of aufs thread (/sys/fs/aufs/stat)
53392da6
AM
2502+
2503+Features or just an idea in the future (see also design/*.txt),
2504+- reorder the branch index without del/re-add.
2505+- permanent xino files for NFSD
2506+- an option for refreshing the opened files after add/del branches
53392da6
AM
2507+- light version, without branch manipulation. (unnecessary?)
2508+- copyup in userspace
2509+- inotify in userspace
2510+- readv/writev
53392da6
AM
2511+
2512+
2513+2. Download
2514+----------------------------------------
5527c038
JR
2515+There are three GIT trees for aufs4, aufs4-linux.git,
2516+aufs4-standalone.git, and aufs-util.git. Note that there is no "4" in
1e00d052 2517+"aufs-util.git."
5527c038
JR
2518+While the aufs-util is always necessary, you need either of aufs4-linux
2519+or aufs4-standalone.
1e00d052 2520+
5527c038 2521+The aufs4-linux tree includes the whole linux mainline GIT tree,
1e00d052
AM
2522+git://git.kernel.org/.../torvalds/linux.git.
2523+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
5527c038 2524+build aufs4 as an external kernel module.
2000de60 2525+Several extra patches are not included in this tree. Only
be52b249 2526+aufs4-standalone tree contains them. They are described in the later
2000de60 2527+section "Configuration and Compilation."
1e00d052 2528+
5527c038 2529+On the other hand, the aufs4-standalone tree has only aufs source files
53392da6 2530+and necessary patches, and you can select CONFIG_AUFS_FS=m.
2000de60 2531+But you need to apply all aufs patches manually.
53392da6 2532+
5527c038
JR
2533+You will find GIT branches whose name is in form of "aufs4.x" where "x"
2534+represents the linux kernel version, "linux-4.x". For instance,
2535+"aufs4.0" is for linux-4.0. For latest "linux-4.x-rcN", use
2536+"aufs4.x-rcN" branch.
1e00d052 2537+
5527c038 2538+o aufs4-linux tree
1e00d052 2539+$ git clone --reference /your/linux/git/tree \
5527c038 2540+ git://github.com/sfjro/aufs4-linux.git aufs4-linux.git
1e00d052 2541+- if you don't have linux GIT tree, then remove "--reference ..."
5527c038
JR
2542+$ cd aufs4-linux.git
2543+$ git checkout origin/aufs4.0
53392da6 2544+
2000de60
JR
2545+Or You may want to directly git-pull aufs into your linux GIT tree, and
2546+leave the patch-work to GIT.
2547+$ cd /your/linux/git/tree
5527c038
JR
2548+$ git remote add aufs4 git://github.com/sfjro/aufs4-linux.git
2549+$ git fetch aufs4
2550+$ git checkout -b my4.0 v4.0
2551+$ (add your local change...)
2552+$ git pull aufs4 aufs4.0
2553+- now you have v4.0 + your_changes + aufs4.0 in you my4.0 branch.
2000de60 2554+- you may need to solve some conflicts between your_changes and
5527c038
JR
2555+ aufs4.0. in this case, git-rerere is recommended so that you can
2556+ solve the similar conflicts automatically when you upgrade to 4.1 or
2000de60
JR
2557+ later in the future.
2558+
5527c038
JR
2559+o aufs4-standalone tree
2560+$ git clone git://github.com/sfjro/aufs4-standalone.git aufs4-standalone.git
2561+$ cd aufs4-standalone.git
2562+$ git checkout origin/aufs4.0
53392da6
AM
2563+
2564+o aufs-util tree
5527c038
JR
2565+$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git
2566+- note that the public aufs-util.git is on SourceForge instead of
2567+ GitHUB.
53392da6 2568+$ cd aufs-util.git
5527c038 2569+$ git checkout origin/aufs4.0
53392da6 2570+
5527c038
JR
2571+Note: The 4.x-rcN branch is to be used with `rc' kernel versions ONLY.
2572+The minor version number, 'x' in '4.x', of aufs may not always
9dbd164d
AM
2573+follow the minor version number of the kernel.
2574+Because changes in the kernel that cause the use of a new
2575+minor version number do not always require changes to aufs-util.
2576+
2577+Since aufs-util has its own minor version number, you may not be
2578+able to find a GIT branch in aufs-util for your kernel's
2579+exact minor version number.
2580+In this case, you should git-checkout the branch for the
53392da6 2581+nearest lower number.
9dbd164d
AM
2582+
2583+For (an unreleased) example:
5527c038
JR
2584+If you are using "linux-4.10" and the "aufs4.10" branch
2585+does not exist in aufs-util repository, then "aufs4.9", "aufs4.8"
9dbd164d
AM
2586+or something numerically smaller is the branch for your kernel.
2587+
53392da6
AM
2588+Also you can view all branches by
2589+ $ git branch -a
2590+
2591+
2592+3. Configuration and Compilation
2593+----------------------------------------
2594+Make sure you have git-checkout'ed the correct branch.
2595+
5527c038 2596+For aufs4-linux tree,
c06a8ce3 2597+- enable CONFIG_AUFS_FS.
1e00d052
AM
2598+- set other aufs configurations if necessary.
2599+
5527c038 2600+For aufs4-standalone tree,
53392da6
AM
2601+There are several ways to build.
2602+
2603+1.
5527c038
JR
2604+- apply ./aufs4-kbuild.patch to your kernel source files.
2605+- apply ./aufs4-base.patch too.
2606+- apply ./aufs4-mmap.patch too.
2607+- apply ./aufs4-standalone.patch too, if you have a plan to set
2608+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs4-standalone.patch.
537831f9
AM
2609+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
2610+ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
c06a8ce3 2611+- enable CONFIG_AUFS_FS, you can select either
53392da6
AM
2612+ =m or =y.
2613+- and build your kernel as usual.
2614+- install the built kernel.
c06a8ce3
AM
2615+ Note: Since linux-3.9, every filesystem module requires an alias
2616+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2617+ modules.aliases file if you set CONFIG_AUFS_FS=m.
7eafdf33
AM
2618+- install the header files too by "make headers_install" to the
2619+ directory where you specify. By default, it is $PWD/usr.
b4510431 2620+ "make help" shows a brief note for headers_install.
53392da6
AM
2621+- and reboot your system.
2622+
2623+2.
2624+- module only (CONFIG_AUFS_FS=m).
5527c038
JR
2625+- apply ./aufs4-base.patch to your kernel source files.
2626+- apply ./aufs4-mmap.patch too.
2627+- apply ./aufs4-standalone.patch too.
53392da6
AM
2628+- build your kernel, don't forget "make headers_install", and reboot.
2629+- edit ./config.mk and set other aufs configurations if necessary.
b4510431 2630+ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
53392da6
AM
2631+ every aufs configurations.
2632+- build the module by simple "make".
c06a8ce3
AM
2633+ Note: Since linux-3.9, every filesystem module requires an alias
2634+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2635+ modules.aliases file.
53392da6
AM
2636+- you can specify ${KDIR} make variable which points to your kernel
2637+ source tree.
2638+- install the files
2639+ + run "make install" to install the aufs module, or copy the built
b4510431
AM
2640+ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
2641+ + run "make install_headers" (instead of headers_install) to install
2642+ the modified aufs header file (you can specify DESTDIR which is
2643+ available in aufs standalone version's Makefile only), or copy
2644+ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
2645+ you like manually. By default, the target directory is $PWD/usr.
5527c038 2646+- no need to apply aufs4-kbuild.patch, nor copying source files to your
53392da6
AM
2647+ kernel source tree.
2648+
b4510431 2649+Note: The header file aufs_type.h is necessary to build aufs-util
53392da6
AM
2650+ as well as "make headers_install" in the kernel source tree.
2651+ headers_install is subject to be forgotten, but it is essentially
2652+ necessary, not only for building aufs-util.
2653+ You may not meet problems without headers_install in some older
2654+ version though.
2655+
2656+And then,
2657+- read README in aufs-util, build and install it
9dbd164d
AM
2658+- note that your distribution may contain an obsoleted version of
2659+ aufs_type.h in /usr/include/linux or something. When you build aufs
2660+ utilities, make sure that your compiler refers the correct aufs header
2661+ file which is built by "make headers_install."
53392da6
AM
2662+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
2663+ then run "make install_ulib" too. And refer to the aufs manual in
2664+ detail.
2665+
5527c038 2666+There several other patches in aufs4-standalone.git. They are all
38d290e6 2667+optional. When you meet some problems, they will help you.
5527c038 2668+- aufs4-loopback.patch
38d290e6
JR
2669+ Supports a nested loopback mount in a branch-fs. This patch is
2670+ unnecessary until aufs produces a message like "you may want to try
2671+ another patch for loopback file".
2672+- vfs-ino.patch
2673+ Modifies a system global kernel internal function get_next_ino() in
2674+ order to stop assigning 0 for an inode-number. Not directly related to
2675+ aufs, but recommended generally.
2676+- tmpfs-idr.patch
2677+ Keeps the tmpfs inode number as the lowest value. Effective to reduce
2678+ the size of aufs XINO files for tmpfs branch. Also it prevents the
2679+ duplication of inode number, which is important for backup tools and
2680+ other utilities. When you find aufs XINO files for tmpfs branch
2681+ growing too much, try this patch.
be52b249
AM
2682+- lockdep-debug.patch
2683+ Because aufs is not only an ordinary filesystem (callee of VFS), but
2684+ also a caller of VFS functions for branch filesystems, subclassing of
2685+ the internal locks for LOCKDEP is necessary. LOCKDEP is a debugging
2686+ feature of linux kernel. If you enable CONFIG_LOCKDEP, then you will
2687+ need to apply this debug patch to expand several constant values.
2688+ If don't know what LOCKDEP, then you don't have apply this patch.
38d290e6 2689+
53392da6
AM
2690+
2691+4. Usage
2692+----------------------------------------
2693+At first, make sure aufs-util are installed, and please read the aufs
2694+manual, aufs.5 in aufs-util.git tree.
2695+$ man -l aufs.5
2696+
2697+And then,
2698+$ mkdir /tmp/rw /tmp/aufs
2699+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
2700+
2701+Here is another example. The result is equivalent.
2702+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
2703+ Or
2704+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
2705+# mount -o remount,append:${HOME} /tmp/aufs
2706+
2707+Then, you can see whole tree of your home dir through /tmp/aufs. If
2708+you modify a file under /tmp/aufs, the one on your home directory is
2709+not affected, instead the same named file will be newly created under
2710+/tmp/rw. And all of your modification to a file will be applied to
2711+the one under /tmp/rw. This is called the file based Copy on Write
2712+(COW) method.
2713+Aufs mount options are described in aufs.5.
2714+If you run chroot or something and make your aufs as a root directory,
2715+then you need to customize the shutdown script. See the aufs manual in
2716+detail.
2717+
2718+Additionally, there are some sample usages of aufs which are a
2719+diskless system with network booting, and LiveCD over NFS.
2720+See sample dir in CVS tree on SourceForge.
2721+
2722+
2723+5. Contact
2724+----------------------------------------
2725+When you have any problems or strange behaviour in aufs, please let me
2726+know with:
2727+- /proc/mounts (instead of the output of mount(8))
2728+- /sys/module/aufs/*
2729+- /sys/fs/aufs/* (if you have them)
2730+- /debug/aufs/* (if you have them)
2731+- linux kernel version
2732+ if your kernel is not plain, for example modified by distributor,
2733+ the url where i can download its source is necessary too.
2734+- aufs version which was printed at loading the module or booting the
2735+ system, instead of the date you downloaded.
2736+- configuration (define/undefine CONFIG_AUFS_xxx)
2737+- kernel configuration or /proc/config.gz (if you have it)
2738+- behaviour which you think to be incorrect
2739+- actual operation, reproducible one is better
2740+- mailto: aufs-users at lists.sourceforge.net
2741+
2742+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
2743+and Feature Requests) on SourceForge. Please join and write to
2744+aufs-users ML.
2745+
2746+
2747+6. Acknowledgements
2748+----------------------------------------
2749+Thanks to everyone who have tried and are using aufs, whoever
2750+have reported a bug or any feedback.
2751+
2752+Especially donators:
2753+Tomas Matejicek(slax.org) made a donation (much more than once).
2754+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
2755+ scripts) is making "doubling" donations.
2756+ Unfortunately I cannot list all of the donators, but I really
b4510431 2757+ appreciate.
53392da6
AM
2758+ It ends Aug 2010, but the ordinary donation URL is still available.
2759+ <http://sourceforge.net/donate/index.php?group_id=167503>
2760+Dai Itasaka made a donation (2007/8).
2761+Chuck Smith made a donation (2008/4, 10 and 12).
2762+Henk Schoneveld made a donation (2008/9).
2763+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
2764+Francois Dupoux made a donation (2008/11).
2765+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
2766+ aufs2 GIT tree (2009/2).
2767+William Grant made a donation (2009/3).
2768+Patrick Lane made a donation (2009/4).
2769+The Mail Archive (mail-archive.com) made donations (2009/5).
2770+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
2771+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
2772+Pavel Pronskiy made a donation (2011/2).
2773+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
2774+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
537831f9
AM
2775+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
2776+11).
1e00d052 2777+Sam Liddicott made a donation (2011/9).
86dc4139
AM
2778+Era Scarecrow made a donation (2013/4).
2779+Bor Ratajc made a donation (2013/4).
2780+Alessandro Gorreta made a donation (2013/4).
2781+POIRETTE Marc made a donation (2013/4).
2782+Alessandro Gorreta made a donation (2013/4).
2783+lauri kasvandik made a donation (2013/5).
392086de 2784+"pemasu from Finland" made a donation (2013/7).
523b37e3
AM
2785+The Parted Magic Project made a donation (2013/9 and 11).
2786+Pavel Barta made a donation (2013/10).
38d290e6 2787+Nikolay Pertsev made a donation (2014/5).
c2c0f25c 2788+James B made a donation (2014/7 and 2015/7).
076b876e 2789+Stefano Di Biase made a donation (2014/8).
2000de60 2790+Daniel Epellei made a donation (2015/1).
8cdd5066 2791+OmegaPhil made a donation (2016/1).
5afbbe0d 2792+Tomasz Szewczyk made a donation (2016/4).
f2c43d5f 2793+James Burry made a donation (2016/12).
53392da6
AM
2794+
2795+Thank you very much.
2796+Donations are always, including future donations, very important and
2797+helpful for me to keep on developing aufs.
2798+
2799+
2800+7.
2801+----------------------------------------
2802+If you are an experienced user, no explanation is needed. Aufs is
2803+just a linux filesystem.
2804+
2805+
2806+Enjoy!
2807+
2808+# Local variables: ;
2809+# mode: text;
2810+# End: ;
7f207e10
AM
2811diff -urN /usr/share/empty/fs/aufs/aufs.h linux/fs/aufs/aufs.h
2812--- /usr/share/empty/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 2813+++ linux/fs/aufs/aufs.h 2016-10-09 16:55:36.486034798 +0200
523b37e3 2814@@ -0,0 +1,59 @@
7f207e10 2815+/*
8cdd5066 2816+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7f207e10
AM
2817+ *
2818+ * This program, aufs is free software; you can redistribute it and/or modify
2819+ * it under the terms of the GNU General Public License as published by
2820+ * the Free Software Foundation; either version 2 of the License, or
2821+ * (at your option) any later version.
2822+ *
2823+ * This program is distributed in the hope that it will be useful,
2824+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2825+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2826+ * GNU General Public License for more details.
2827+ *
2828+ * You should have received a copy of the GNU General Public License
523b37e3 2829+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2830+ */
2831+
2832+/*
2833+ * all header files
2834+ */
2835+
2836+#ifndef __AUFS_H__
2837+#define __AUFS_H__
2838+
2839+#ifdef __KERNEL__
2840+
2841+#define AuStub(type, name, body, ...) \
2842+ static inline type name(__VA_ARGS__) { body; }
2843+
2844+#define AuStubVoid(name, ...) \
2845+ AuStub(void, name, , __VA_ARGS__)
2846+#define AuStubInt0(name, ...) \
2847+ AuStub(int, name, return 0, __VA_ARGS__)
2848+
2849+#include "debug.h"
2850+
2851+#include "branch.h"
2852+#include "cpup.h"
2853+#include "dcsub.h"
2854+#include "dbgaufs.h"
2855+#include "dentry.h"
2856+#include "dir.h"
2857+#include "dynop.h"
2858+#include "file.h"
2859+#include "fstype.h"
2860+#include "inode.h"
2861+#include "loop.h"
2862+#include "module.h"
7f207e10
AM
2863+#include "opts.h"
2864+#include "rwsem.h"
2865+#include "spl.h"
2866+#include "super.h"
2867+#include "sysaufs.h"
2868+#include "vfsub.h"
2869+#include "whout.h"
2870+#include "wkq.h"
2871+
2872+#endif /* __KERNEL__ */
2873+#endif /* __AUFS_H__ */
2874diff -urN /usr/share/empty/fs/aufs/branch.c linux/fs/aufs/branch.c
2875--- /usr/share/empty/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
2876+++ linux/fs/aufs/branch.c 2016-10-09 16:55:38.886097714 +0200
2877@@ -0,0 +1,1412 @@
7f207e10 2878+/*
8cdd5066 2879+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7f207e10
AM
2880+ *
2881+ * This program, aufs is free software; you can redistribute it and/or modify
2882+ * it under the terms of the GNU General Public License as published by
2883+ * the Free Software Foundation; either version 2 of the License, or
2884+ * (at your option) any later version.
2885+ *
2886+ * This program is distributed in the hope that it will be useful,
2887+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2888+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2889+ * GNU General Public License for more details.
2890+ *
2891+ * You should have received a copy of the GNU General Public License
523b37e3 2892+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2893+ */
2894+
2895+/*
2896+ * branch management
2897+ */
2898+
027c5e7a 2899+#include <linux/compat.h>
7f207e10
AM
2900+#include <linux/statfs.h>
2901+#include "aufs.h"
2902+
2903+/*
2904+ * free a single branch
1facf9fc 2905+ */
2906+static void au_br_do_free(struct au_branch *br)
2907+{
2908+ int i;
2909+ struct au_wbr *wbr;
4a4d8108 2910+ struct au_dykey **key;
1facf9fc 2911+
027c5e7a
AM
2912+ au_hnotify_fin_br(br);
2913+
1facf9fc 2914+ if (br->br_xino.xi_file)
2915+ fput(br->br_xino.xi_file);
2916+ mutex_destroy(&br->br_xino.xi_nondir_mtx);
2917+
5afbbe0d
AM
2918+ AuDebugOn(au_br_count(br));
2919+ au_br_count_fin(br);
1facf9fc 2920+
2921+ wbr = br->br_wbr;
2922+ if (wbr) {
2923+ for (i = 0; i < AuBrWh_Last; i++)
2924+ dput(wbr->wbr_wh[i]);
2925+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
dece6358 2926+ AuRwDestroy(&wbr->wbr_wh_rwsem);
1facf9fc 2927+ }
2928+
076b876e
AM
2929+ if (br->br_fhsm) {
2930+ au_br_fhsm_fin(br->br_fhsm);
f0c0a007 2931+ au_delayed_kfree(br->br_fhsm);
076b876e
AM
2932+ }
2933+
4a4d8108
AM
2934+ key = br->br_dykey;
2935+ for (i = 0; i < AuBrDynOp; i++, key++)
2936+ if (*key)
2937+ au_dy_put(*key);
2938+ else
2939+ break;
2940+
537831f9
AM
2941+ /* recursive lock, s_umount of branch's */
2942+ lockdep_off();
86dc4139 2943+ path_put(&br->br_path);
537831f9 2944+ lockdep_on();
f0c0a007
AM
2945+ if (wbr)
2946+ au_delayed_kfree(wbr);
2947+ au_delayed_kfree(br);
1facf9fc 2948+}
2949+
2950+/*
2951+ * frees all branches
2952+ */
2953+void au_br_free(struct au_sbinfo *sbinfo)
2954+{
2955+ aufs_bindex_t bmax;
2956+ struct au_branch **br;
2957+
dece6358
AM
2958+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2959+
5afbbe0d 2960+ bmax = sbinfo->si_bbot + 1;
1facf9fc 2961+ br = sbinfo->si_branch;
2962+ while (bmax--)
2963+ au_br_do_free(*br++);
2964+}
2965+
2966+/*
2967+ * find the index of a branch which is specified by @br_id.
2968+ */
2969+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
2970+{
5afbbe0d 2971+ aufs_bindex_t bindex, bbot;
1facf9fc 2972+
5afbbe0d
AM
2973+ bbot = au_sbbot(sb);
2974+ for (bindex = 0; bindex <= bbot; bindex++)
1facf9fc 2975+ if (au_sbr_id(sb, bindex) == br_id)
2976+ return bindex;
2977+ return -1;
2978+}
2979+
2980+/* ---------------------------------------------------------------------- */
2981+
2982+/*
2983+ * add a branch
2984+ */
2985+
b752ccd1
AM
2986+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
2987+ struct dentry *h_root)
1facf9fc 2988+{
b752ccd1
AM
2989+ if (unlikely(h_adding == h_root
2990+ || au_test_loopback_overlap(sb, h_adding)))
1facf9fc 2991+ return 1;
b752ccd1
AM
2992+ if (h_adding->d_sb != h_root->d_sb)
2993+ return 0;
2994+ return au_test_subdir(h_adding, h_root)
2995+ || au_test_subdir(h_root, h_adding);
1facf9fc 2996+}
2997+
2998+/*
2999+ * returns a newly allocated branch. @new_nbranch is a number of branches
3000+ * after adding a branch.
3001+ */
3002+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
3003+ int perm)
3004+{
3005+ struct au_branch *add_branch;
3006+ struct dentry *root;
5527c038 3007+ struct inode *inode;
4a4d8108 3008+ int err;
1facf9fc 3009+
4a4d8108 3010+ err = -ENOMEM;
1facf9fc 3011+ root = sb->s_root;
be52b249 3012+ add_branch = kzalloc(sizeof(*add_branch), GFP_NOFS);
1facf9fc 3013+ if (unlikely(!add_branch))
3014+ goto out;
3015+
027c5e7a
AM
3016+ err = au_hnotify_init_br(add_branch, perm);
3017+ if (unlikely(err))
3018+ goto out_br;
3019+
1facf9fc 3020+ if (au_br_writable(perm)) {
3021+ /* may be freed separately at changing the branch permission */
be52b249 3022+ add_branch->br_wbr = kzalloc(sizeof(*add_branch->br_wbr),
1facf9fc 3023+ GFP_NOFS);
3024+ if (unlikely(!add_branch->br_wbr))
027c5e7a 3025+ goto out_hnotify;
1facf9fc 3026+ }
3027+
076b876e
AM
3028+ if (au_br_fhsm(perm)) {
3029+ err = au_fhsm_br_alloc(add_branch);
3030+ if (unlikely(err))
3031+ goto out_wbr;
3032+ }
3033+
e2f27e51 3034+ err = au_sbr_realloc(au_sbi(sb), new_nbranch, /*may_shrink*/0);
4a4d8108 3035+ if (!err)
e2f27e51 3036+ err = au_di_realloc(au_di(root), new_nbranch, /*may_shrink*/0);
5527c038
JR
3037+ if (!err) {
3038+ inode = d_inode(root);
e2f27e51 3039+ err = au_hinode_realloc(au_ii(inode), new_nbranch, /*may_shrink*/0);
5527c038 3040+ }
4a4d8108
AM
3041+ if (!err)
3042+ return add_branch; /* success */
1facf9fc 3043+
076b876e 3044+out_wbr:
f0c0a007
AM
3045+ if (add_branch->br_wbr)
3046+ au_delayed_kfree(add_branch->br_wbr);
027c5e7a
AM
3047+out_hnotify:
3048+ au_hnotify_fin_br(add_branch);
4f0767ce 3049+out_br:
f0c0a007 3050+ au_delayed_kfree(add_branch);
4f0767ce 3051+out:
4a4d8108 3052+ return ERR_PTR(err);
1facf9fc 3053+}
3054+
3055+/*
3056+ * test if the branch permission is legal or not.
3057+ */
3058+static int test_br(struct inode *inode, int brperm, char *path)
3059+{
3060+ int err;
3061+
4a4d8108
AM
3062+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
3063+ if (!err)
3064+ goto out;
1facf9fc 3065+
4a4d8108
AM
3066+ err = -EINVAL;
3067+ pr_err("write permission for readonly mount or inode, %s\n", path);
3068+
4f0767ce 3069+out:
1facf9fc 3070+ return err;
3071+}
3072+
3073+/*
3074+ * returns:
3075+ * 0: success, the caller will add it
3076+ * plus: success, it is already unified, the caller should ignore it
3077+ * minus: error
3078+ */
3079+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
3080+{
3081+ int err;
5afbbe0d 3082+ aufs_bindex_t bbot, bindex;
5527c038 3083+ struct dentry *root, *h_dentry;
1facf9fc 3084+ struct inode *inode, *h_inode;
3085+
3086+ root = sb->s_root;
5afbbe0d
AM
3087+ bbot = au_sbbot(sb);
3088+ if (unlikely(bbot >= 0
1facf9fc 3089+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
3090+ err = 1;
3091+ if (!remount) {
3092+ err = -EINVAL;
4a4d8108 3093+ pr_err("%s duplicated\n", add->pathname);
1facf9fc 3094+ }
3095+ goto out;
3096+ }
3097+
3098+ err = -ENOSPC; /* -E2BIG; */
3099+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
5afbbe0d 3100+ || AUFS_BRANCH_MAX - 1 <= bbot)) {
4a4d8108 3101+ pr_err("number of branches exceeded %s\n", add->pathname);
1facf9fc 3102+ goto out;
3103+ }
3104+
3105+ err = -EDOM;
5afbbe0d 3106+ if (unlikely(add->bindex < 0 || bbot + 1 < add->bindex)) {
4a4d8108 3107+ pr_err("bad index %d\n", add->bindex);
1facf9fc 3108+ goto out;
3109+ }
3110+
5527c038 3111+ inode = d_inode(add->path.dentry);
1facf9fc 3112+ err = -ENOENT;
3113+ if (unlikely(!inode->i_nlink)) {
4a4d8108 3114+ pr_err("no existence %s\n", add->pathname);
1facf9fc 3115+ goto out;
3116+ }
3117+
3118+ err = -EINVAL;
3119+ if (unlikely(inode->i_sb == sb)) {
4a4d8108 3120+ pr_err("%s must be outside\n", add->pathname);
1facf9fc 3121+ goto out;
3122+ }
3123+
3124+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
4a4d8108
AM
3125+ pr_err("unsupported filesystem, %s (%s)\n",
3126+ add->pathname, au_sbtype(inode->i_sb));
1facf9fc 3127+ goto out;
3128+ }
3129+
c1595e42
JR
3130+ if (unlikely(inode->i_sb->s_stack_depth)) {
3131+ pr_err("already stacked, %s (%s)\n",
3132+ add->pathname, au_sbtype(inode->i_sb));
3133+ goto out;
3134+ }
3135+
5527c038 3136+ err = test_br(d_inode(add->path.dentry), add->perm, add->pathname);
1facf9fc 3137+ if (unlikely(err))
3138+ goto out;
3139+
5afbbe0d 3140+ if (bbot < 0)
1facf9fc 3141+ return 0; /* success */
3142+
3143+ err = -EINVAL;
5afbbe0d 3144+ for (bindex = 0; bindex <= bbot; bindex++)
1facf9fc 3145+ if (unlikely(test_overlap(sb, add->path.dentry,
3146+ au_h_dptr(root, bindex)))) {
4a4d8108 3147+ pr_err("%s is overlapped\n", add->pathname);
1facf9fc 3148+ goto out;
3149+ }
3150+
3151+ err = 0;
3152+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
5527c038
JR
3153+ h_dentry = au_h_dptr(root, 0);
3154+ h_inode = d_inode(h_dentry);
1facf9fc 3155+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
0c3ec466
AM
3156+ || !uid_eq(h_inode->i_uid, inode->i_uid)
3157+ || !gid_eq(h_inode->i_gid, inode->i_gid))
3158+ pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
3159+ add->pathname,
3160+ i_uid_read(inode), i_gid_read(inode),
3161+ (inode->i_mode & S_IALLUGO),
3162+ i_uid_read(h_inode), i_gid_read(h_inode),
3163+ (h_inode->i_mode & S_IALLUGO));
1facf9fc 3164+ }
3165+
4f0767ce 3166+out:
1facf9fc 3167+ return err;
3168+}
3169+
3170+/*
3171+ * initialize or clean the whiteouts for an adding branch
3172+ */
3173+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
86dc4139 3174+ int new_perm)
1facf9fc 3175+{
3176+ int err, old_perm;
3177+ aufs_bindex_t bindex;
febd17d6 3178+ struct inode *h_inode;
1facf9fc 3179+ struct au_wbr *wbr;
3180+ struct au_hinode *hdir;
5527c038 3181+ struct dentry *h_dentry;
1facf9fc 3182+
86dc4139
AM
3183+ err = vfsub_mnt_want_write(au_br_mnt(br));
3184+ if (unlikely(err))
3185+ goto out;
3186+
1facf9fc 3187+ wbr = br->br_wbr;
3188+ old_perm = br->br_perm;
3189+ br->br_perm = new_perm;
3190+ hdir = NULL;
febd17d6 3191+ h_inode = NULL;
1facf9fc 3192+ bindex = au_br_index(sb, br->br_id);
3193+ if (0 <= bindex) {
5527c038 3194+ hdir = au_hi(d_inode(sb->s_root), bindex);
5afbbe0d 3195+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 3196+ } else {
5527c038 3197+ h_dentry = au_br_dentry(br);
febd17d6
JR
3198+ h_inode = d_inode(h_dentry);
3199+ inode_lock_nested(h_inode, AuLsc_I_PARENT);
1facf9fc 3200+ }
3201+ if (!wbr)
86dc4139 3202+ err = au_wh_init(br, sb);
1facf9fc 3203+ else {
3204+ wbr_wh_write_lock(wbr);
86dc4139 3205+ err = au_wh_init(br, sb);
1facf9fc 3206+ wbr_wh_write_unlock(wbr);
3207+ }
3208+ if (hdir)
5afbbe0d 3209+ au_hn_inode_unlock(hdir);
1facf9fc 3210+ else
febd17d6 3211+ inode_unlock(h_inode);
86dc4139 3212+ vfsub_mnt_drop_write(au_br_mnt(br));
1facf9fc 3213+ br->br_perm = old_perm;
3214+
3215+ if (!err && wbr && !au_br_writable(new_perm)) {
f0c0a007 3216+ au_delayed_kfree(wbr);
1facf9fc 3217+ br->br_wbr = NULL;
3218+ }
3219+
86dc4139 3220+out:
1facf9fc 3221+ return err;
3222+}
3223+
3224+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
86dc4139 3225+ int perm)
1facf9fc 3226+{
3227+ int err;
4a4d8108 3228+ struct kstatfs kst;
1facf9fc 3229+ struct au_wbr *wbr;
3230+
3231+ wbr = br->br_wbr;
dece6358 3232+ au_rw_init(&wbr->wbr_wh_rwsem);
1facf9fc 3233+ atomic_set(&wbr->wbr_wh_running, 0);
1facf9fc 3234+
4a4d8108
AM
3235+ /*
3236+ * a limit for rmdir/rename a dir
523b37e3 3237+ * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h
4a4d8108 3238+ */
86dc4139 3239+ err = vfs_statfs(&br->br_path, &kst);
4a4d8108
AM
3240+ if (unlikely(err))
3241+ goto out;
3242+ err = -EINVAL;
3243+ if (kst.f_namelen >= NAME_MAX)
86dc4139 3244+ err = au_br_init_wh(sb, br, perm);
4a4d8108 3245+ else
523b37e3
AM
3246+ pr_err("%pd(%s), unsupported namelen %ld\n",
3247+ au_br_dentry(br),
86dc4139 3248+ au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen);
1facf9fc 3249+
4f0767ce 3250+out:
1facf9fc 3251+ return err;
3252+}
3253+
c1595e42 3254+/* initialize a new branch */
1facf9fc 3255+static int au_br_init(struct au_branch *br, struct super_block *sb,
3256+ struct au_opt_add *add)
3257+{
3258+ int err;
5527c038 3259+ struct inode *h_inode;
1facf9fc 3260+
3261+ err = 0;
1facf9fc 3262+ mutex_init(&br->br_xino.xi_nondir_mtx);
3263+ br->br_perm = add->perm;
86dc4139 3264+ br->br_path = add->path; /* set first, path_get() later */
4a4d8108 3265+ spin_lock_init(&br->br_dykey_lock);
5afbbe0d 3266+ au_br_count_init(br);
1facf9fc 3267+ atomic_set(&br->br_xino_running, 0);
3268+ br->br_id = au_new_br_id(sb);
7f207e10 3269+ AuDebugOn(br->br_id < 0);
1facf9fc 3270+
3271+ if (au_br_writable(add->perm)) {
86dc4139 3272+ err = au_wbr_init(br, sb, add->perm);
1facf9fc 3273+ if (unlikely(err))
b752ccd1 3274+ goto out_err;
1facf9fc 3275+ }
3276+
3277+ if (au_opt_test(au_mntflags(sb), XINO)) {
5527c038
JR
3278+ h_inode = d_inode(add->path.dentry);
3279+ err = au_xino_br(sb, br, h_inode->i_ino,
1facf9fc 3280+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
3281+ if (unlikely(err)) {
3282+ AuDebugOn(br->br_xino.xi_file);
b752ccd1 3283+ goto out_err;
1facf9fc 3284+ }
3285+ }
3286+
3287+ sysaufs_br_init(br);
86dc4139 3288+ path_get(&br->br_path);
b752ccd1 3289+ goto out; /* success */
1facf9fc 3290+
4f0767ce 3291+out_err:
86dc4139 3292+ memset(&br->br_path, 0, sizeof(br->br_path));
4f0767ce 3293+out:
1facf9fc 3294+ return err;
3295+}
3296+
3297+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
5afbbe0d 3298+ struct au_branch *br, aufs_bindex_t bbot,
1facf9fc 3299+ aufs_bindex_t amount)
3300+{
3301+ struct au_branch **brp;
3302+
dece6358
AM
3303+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3304+
1facf9fc 3305+ brp = sbinfo->si_branch + bindex;
3306+ memmove(brp + 1, brp, sizeof(*brp) * amount);
3307+ *brp = br;
5afbbe0d
AM
3308+ sbinfo->si_bbot++;
3309+ if (unlikely(bbot < 0))
3310+ sbinfo->si_bbot = 0;
1facf9fc 3311+}
3312+
3313+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
5afbbe0d 3314+ aufs_bindex_t bbot, aufs_bindex_t amount)
1facf9fc 3315+{
3316+ struct au_hdentry *hdp;
3317+
1308ab2a 3318+ AuRwMustWriteLock(&dinfo->di_rwsem);
3319+
5afbbe0d 3320+ hdp = au_hdentry(dinfo, bindex);
1facf9fc 3321+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
3322+ au_h_dentry_init(hdp);
5afbbe0d
AM
3323+ dinfo->di_bbot++;
3324+ if (unlikely(bbot < 0))
3325+ dinfo->di_btop = 0;
1facf9fc 3326+}
3327+
3328+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
5afbbe0d 3329+ aufs_bindex_t bbot, aufs_bindex_t amount)
1facf9fc 3330+{
3331+ struct au_hinode *hip;
3332+
1308ab2a 3333+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3334+
5afbbe0d 3335+ hip = au_hinode(iinfo, bindex);
1facf9fc 3336+ memmove(hip + 1, hip, sizeof(*hip) * amount);
5afbbe0d
AM
3337+ au_hinode_init(hip);
3338+ iinfo->ii_bbot++;
3339+ if (unlikely(bbot < 0))
3340+ iinfo->ii_btop = 0;
1facf9fc 3341+}
3342+
86dc4139
AM
3343+static void au_br_do_add(struct super_block *sb, struct au_branch *br,
3344+ aufs_bindex_t bindex)
1facf9fc 3345+{
86dc4139 3346+ struct dentry *root, *h_dentry;
5527c038 3347+ struct inode *root_inode, *h_inode;
5afbbe0d 3348+ aufs_bindex_t bbot, amount;
1facf9fc 3349+
3350+ root = sb->s_root;
5527c038 3351+ root_inode = d_inode(root);
5afbbe0d
AM
3352+ bbot = au_sbbot(sb);
3353+ amount = bbot + 1 - bindex;
86dc4139 3354+ h_dentry = au_br_dentry(br);
53392da6 3355+ au_sbilist_lock();
5afbbe0d
AM
3356+ au_br_do_add_brp(au_sbi(sb), bindex, br, bbot, amount);
3357+ au_br_do_add_hdp(au_di(root), bindex, bbot, amount);
3358+ au_br_do_add_hip(au_ii(root_inode), bindex, bbot, amount);
1facf9fc 3359+ au_set_h_dptr(root, bindex, dget(h_dentry));
5527c038
JR
3360+ h_inode = d_inode(h_dentry);
3361+ au_set_h_iptr(root_inode, bindex, au_igrab(h_inode), /*flags*/0);
53392da6 3362+ au_sbilist_unlock();
1facf9fc 3363+}
3364+
3365+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
3366+{
3367+ int err;
5afbbe0d 3368+ aufs_bindex_t bbot, add_bindex;
1facf9fc 3369+ struct dentry *root, *h_dentry;
3370+ struct inode *root_inode;
3371+ struct au_branch *add_branch;
3372+
3373+ root = sb->s_root;
5527c038 3374+ root_inode = d_inode(root);
1facf9fc 3375+ IMustLock(root_inode);
5afbbe0d 3376+ IiMustWriteLock(root_inode);
1facf9fc 3377+ err = test_add(sb, add, remount);
3378+ if (unlikely(err < 0))
3379+ goto out;
3380+ if (err) {
3381+ err = 0;
3382+ goto out; /* success */
3383+ }
3384+
5afbbe0d
AM
3385+ bbot = au_sbbot(sb);
3386+ add_branch = au_br_alloc(sb, bbot + 2, add->perm);
1facf9fc 3387+ err = PTR_ERR(add_branch);
3388+ if (IS_ERR(add_branch))
3389+ goto out;
3390+
3391+ err = au_br_init(add_branch, sb, add);
3392+ if (unlikely(err)) {
3393+ au_br_do_free(add_branch);
3394+ goto out;
3395+ }
3396+
3397+ add_bindex = add->bindex;
1facf9fc 3398+ if (!remount)
86dc4139 3399+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 3400+ else {
3401+ sysaufs_brs_del(sb, add_bindex);
86dc4139 3402+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 3403+ sysaufs_brs_add(sb, add_bindex);
3404+ }
3405+
86dc4139 3406+ h_dentry = add->path.dentry;
1308ab2a 3407+ if (!add_bindex) {
1facf9fc 3408+ au_cpup_attr_all(root_inode, /*force*/1);
1308ab2a 3409+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
3410+ } else
5527c038 3411+ au_add_nlink(root_inode, d_inode(h_dentry));
1facf9fc 3412+
3413+ /*
4a4d8108 3414+ * this test/set prevents aufs from handling unnecesary notify events
027c5e7a 3415+ * of xino files, in case of re-adding a writable branch which was
1facf9fc 3416+ * once detached from aufs.
3417+ */
3418+ if (au_xino_brid(sb) < 0
3419+ && au_br_writable(add_branch->br_perm)
3420+ && !au_test_fs_bad_xino(h_dentry->d_sb)
3421+ && add_branch->br_xino.xi_file
2000de60 3422+ && add_branch->br_xino.xi_file->f_path.dentry->d_parent == h_dentry)
1facf9fc 3423+ au_xino_brid_set(sb, add_branch->br_id);
3424+
4f0767ce 3425+out:
1facf9fc 3426+ return err;
3427+}
3428+
3429+/* ---------------------------------------------------------------------- */
3430+
79b8bda9 3431+static unsigned long long au_farray_cb(struct super_block *sb, void *a,
076b876e
AM
3432+ unsigned long long max __maybe_unused,
3433+ void *arg)
3434+{
3435+ unsigned long long n;
3436+ struct file **p, *f;
3437+ struct au_sphlhead *files;
3438+ struct au_finfo *finfo;
076b876e
AM
3439+
3440+ n = 0;
3441+ p = a;
3442+ files = &au_sbi(sb)->si_files;
3443+ spin_lock(&files->spin);
3444+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
3445+ f = finfo->fi_file;
3446+ if (file_count(f)
3447+ && !special_file(file_inode(f)->i_mode)) {
3448+ get_file(f);
3449+ *p++ = f;
3450+ n++;
3451+ AuDebugOn(n > max);
3452+ }
3453+ }
3454+ spin_unlock(&files->spin);
3455+
3456+ return n;
3457+}
3458+
3459+static struct file **au_farray_alloc(struct super_block *sb,
3460+ unsigned long long *max)
3461+{
5afbbe0d 3462+ *max = au_nfiles(sb);
79b8bda9 3463+ return au_array_alloc(max, au_farray_cb, sb, /*arg*/NULL);
076b876e
AM
3464+}
3465+
3466+static void au_farray_free(struct file **a, unsigned long long max)
3467+{
3468+ unsigned long long ull;
3469+
3470+ for (ull = 0; ull < max; ull++)
3471+ if (a[ull])
3472+ fput(a[ull]);
be52b249 3473+ kvfree(a);
076b876e
AM
3474+}
3475+
3476+/* ---------------------------------------------------------------------- */
3477+
1facf9fc 3478+/*
3479+ * delete a branch
3480+ */
3481+
3482+/* to show the line number, do not make it inlined function */
4a4d8108 3483+#define AuVerbose(do_info, fmt, ...) do { \
1facf9fc 3484+ if (do_info) \
4a4d8108 3485+ pr_info(fmt, ##__VA_ARGS__); \
1facf9fc 3486+} while (0)
3487+
5afbbe0d
AM
3488+static int au_test_ibusy(struct inode *inode, aufs_bindex_t btop,
3489+ aufs_bindex_t bbot)
027c5e7a 3490+{
5afbbe0d 3491+ return (inode && !S_ISDIR(inode->i_mode)) || btop == bbot;
027c5e7a
AM
3492+}
3493+
5afbbe0d
AM
3494+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t btop,
3495+ aufs_bindex_t bbot)
027c5e7a 3496+{
5afbbe0d 3497+ return au_test_ibusy(d_inode(dentry), btop, bbot);
027c5e7a
AM
3498+}
3499+
1facf9fc 3500+/*
3501+ * test if the branch is deletable or not.
3502+ */
3503+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
b752ccd1 3504+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3505+{
3506+ int err, i, j, ndentry;
5afbbe0d 3507+ aufs_bindex_t btop, bbot;
1facf9fc 3508+ struct au_dcsub_pages dpages;
3509+ struct au_dpage *dpage;
3510+ struct dentry *d;
1facf9fc 3511+
3512+ err = au_dpages_init(&dpages, GFP_NOFS);
3513+ if (unlikely(err))
3514+ goto out;
3515+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
3516+ if (unlikely(err))
3517+ goto out_dpages;
3518+
1facf9fc 3519+ for (i = 0; !err && i < dpages.ndpage; i++) {
3520+ dpage = dpages.dpages + i;
3521+ ndentry = dpage->ndentry;
3522+ for (j = 0; !err && j < ndentry; j++) {
3523+ d = dpage->dentries[j];
c1595e42 3524+ AuDebugOn(au_dcount(d) <= 0);
027c5e7a 3525+ if (!au_digen_test(d, sigen)) {
1facf9fc 3526+ di_read_lock_child(d, AuLock_IR);
027c5e7a
AM
3527+ if (unlikely(au_dbrange_test(d))) {
3528+ di_read_unlock(d, AuLock_IR);
3529+ continue;
3530+ }
3531+ } else {
1facf9fc 3532+ di_write_lock_child(d);
027c5e7a
AM
3533+ if (unlikely(au_dbrange_test(d))) {
3534+ di_write_unlock(d);
3535+ continue;
3536+ }
1facf9fc 3537+ err = au_reval_dpath(d, sigen);
3538+ if (!err)
3539+ di_downgrade_lock(d, AuLock_IR);
3540+ else {
3541+ di_write_unlock(d);
3542+ break;
3543+ }
3544+ }
3545+
027c5e7a 3546+ /* AuDbgDentry(d); */
5afbbe0d
AM
3547+ btop = au_dbtop(d);
3548+ bbot = au_dbbot(d);
3549+ if (btop <= bindex
3550+ && bindex <= bbot
1facf9fc 3551+ && au_h_dptr(d, bindex)
5afbbe0d 3552+ && au_test_dbusy(d, btop, bbot)) {
1facf9fc 3553+ err = -EBUSY;
523b37e3 3554+ AuVerbose(verbose, "busy %pd\n", d);
027c5e7a 3555+ AuDbgDentry(d);
1facf9fc 3556+ }
3557+ di_read_unlock(d, AuLock_IR);
3558+ }
3559+ }
3560+
4f0767ce 3561+out_dpages:
1facf9fc 3562+ au_dpages_free(&dpages);
4f0767ce 3563+out:
1facf9fc 3564+ return err;
3565+}
3566+
3567+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
b752ccd1 3568+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3569+{
3570+ int err;
7f207e10
AM
3571+ unsigned long long max, ull;
3572+ struct inode *i, **array;
5afbbe0d 3573+ aufs_bindex_t btop, bbot;
1facf9fc 3574+
7f207e10
AM
3575+ array = au_iarray_alloc(sb, &max);
3576+ err = PTR_ERR(array);
3577+ if (IS_ERR(array))
3578+ goto out;
3579+
1facf9fc 3580+ err = 0;
7f207e10
AM
3581+ AuDbg("b%d\n", bindex);
3582+ for (ull = 0; !err && ull < max; ull++) {
3583+ i = array[ull];
076b876e
AM
3584+ if (unlikely(!i))
3585+ break;
7f207e10 3586+ if (i->i_ino == AUFS_ROOT_INO)
1facf9fc 3587+ continue;
3588+
7f207e10 3589+ /* AuDbgInode(i); */
537831f9 3590+ if (au_iigen(i, NULL) == sigen)
1facf9fc 3591+ ii_read_lock_child(i);
3592+ else {
3593+ ii_write_lock_child(i);
027c5e7a
AM
3594+ err = au_refresh_hinode_self(i);
3595+ au_iigen_dec(i);
1facf9fc 3596+ if (!err)
3597+ ii_downgrade_lock(i);
3598+ else {
3599+ ii_write_unlock(i);
3600+ break;
3601+ }
3602+ }
3603+
5afbbe0d
AM
3604+ btop = au_ibtop(i);
3605+ bbot = au_ibbot(i);
3606+ if (btop <= bindex
3607+ && bindex <= bbot
1facf9fc 3608+ && au_h_iptr(i, bindex)
5afbbe0d 3609+ && au_test_ibusy(i, btop, bbot)) {
1facf9fc 3610+ err = -EBUSY;
3611+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
7f207e10 3612+ AuDbgInode(i);
1facf9fc 3613+ }
3614+ ii_read_unlock(i);
3615+ }
7f207e10 3616+ au_iarray_free(array, max);
1facf9fc 3617+
7f207e10 3618+out:
1facf9fc 3619+ return err;
3620+}
3621+
b752ccd1
AM
3622+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
3623+ const unsigned int verbose)
1facf9fc 3624+{
3625+ int err;
3626+ unsigned int sigen;
3627+
3628+ sigen = au_sigen(root->d_sb);
3629+ DiMustNoWaiters(root);
5527c038 3630+ IiMustNoWaiters(d_inode(root));
1facf9fc 3631+ di_write_unlock(root);
b752ccd1 3632+ err = test_dentry_busy(root, bindex, sigen, verbose);
1facf9fc 3633+ if (!err)
b752ccd1 3634+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
1facf9fc 3635+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
3636+
3637+ return err;
3638+}
3639+
076b876e
AM
3640+static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
3641+ struct file **to_free, int *idx)
3642+{
3643+ int err;
c1595e42 3644+ unsigned char matched, root;
5afbbe0d 3645+ aufs_bindex_t bindex, bbot;
076b876e
AM
3646+ struct au_fidir *fidir;
3647+ struct au_hfile *hfile;
3648+
3649+ err = 0;
2000de60 3650+ root = IS_ROOT(file->f_path.dentry);
c1595e42
JR
3651+ if (root) {
3652+ get_file(file);
3653+ to_free[*idx] = file;
3654+ (*idx)++;
3655+ goto out;
3656+ }
3657+
076b876e 3658+ matched = 0;
076b876e
AM
3659+ fidir = au_fi(file)->fi_hdir;
3660+ AuDebugOn(!fidir);
5afbbe0d
AM
3661+ bbot = au_fbbot_dir(file);
3662+ for (bindex = au_fbtop(file); bindex <= bbot; bindex++) {
076b876e
AM
3663+ hfile = fidir->fd_hfile + bindex;
3664+ if (!hfile->hf_file)
3665+ continue;
3666+
c1595e42 3667+ if (hfile->hf_br->br_id == br_id) {
076b876e 3668+ matched = 1;
076b876e 3669+ break;
c1595e42 3670+ }
076b876e 3671+ }
c1595e42 3672+ if (matched)
076b876e
AM
3673+ err = -EBUSY;
3674+
3675+out:
3676+ return err;
3677+}
3678+
3679+static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
3680+ struct file **to_free, int opened)
3681+{
3682+ int err, idx;
3683+ unsigned long long ull, max;
5afbbe0d 3684+ aufs_bindex_t btop;
076b876e 3685+ struct file *file, **array;
076b876e
AM
3686+ struct dentry *root;
3687+ struct au_hfile *hfile;
3688+
3689+ array = au_farray_alloc(sb, &max);
3690+ err = PTR_ERR(array);
3691+ if (IS_ERR(array))
3692+ goto out;
3693+
3694+ err = 0;
3695+ idx = 0;
3696+ root = sb->s_root;
3697+ di_write_unlock(root);
3698+ for (ull = 0; ull < max; ull++) {
3699+ file = array[ull];
3700+ if (unlikely(!file))
3701+ break;
3702+
3703+ /* AuDbg("%pD\n", file); */
3704+ fi_read_lock(file);
5afbbe0d 3705+ btop = au_fbtop(file);
2000de60 3706+ if (!d_is_dir(file->f_path.dentry)) {
076b876e
AM
3707+ hfile = &au_fi(file)->fi_htop;
3708+ if (hfile->hf_br->br_id == br_id)
3709+ err = -EBUSY;
3710+ } else
3711+ err = test_dir_busy(file, br_id, to_free, &idx);
3712+ fi_read_unlock(file);
3713+ if (unlikely(err))
3714+ break;
3715+ }
3716+ di_write_lock_child(root);
3717+ au_farray_free(array, max);
3718+ AuDebugOn(idx > opened);
3719+
3720+out:
3721+ return err;
3722+}
3723+
3724+static void br_del_file(struct file **to_free, unsigned long long opened,
3725+ aufs_bindex_t br_id)
3726+{
3727+ unsigned long long ull;
5afbbe0d 3728+ aufs_bindex_t bindex, btop, bbot, bfound;
076b876e
AM
3729+ struct file *file;
3730+ struct au_fidir *fidir;
3731+ struct au_hfile *hfile;
3732+
3733+ for (ull = 0; ull < opened; ull++) {
3734+ file = to_free[ull];
3735+ if (unlikely(!file))
3736+ break;
3737+
3738+ /* AuDbg("%pD\n", file); */
2000de60 3739+ AuDebugOn(!d_is_dir(file->f_path.dentry));
076b876e
AM
3740+ bfound = -1;
3741+ fidir = au_fi(file)->fi_hdir;
3742+ AuDebugOn(!fidir);
3743+ fi_write_lock(file);
5afbbe0d
AM
3744+ btop = au_fbtop(file);
3745+ bbot = au_fbbot_dir(file);
3746+ for (bindex = btop; bindex <= bbot; bindex++) {
076b876e
AM
3747+ hfile = fidir->fd_hfile + bindex;
3748+ if (!hfile->hf_file)
3749+ continue;
3750+
3751+ if (hfile->hf_br->br_id == br_id) {
3752+ bfound = bindex;
3753+ break;
3754+ }
3755+ }
3756+ AuDebugOn(bfound < 0);
3757+ au_set_h_fptr(file, bfound, NULL);
5afbbe0d
AM
3758+ if (bfound == btop) {
3759+ for (btop++; btop <= bbot; btop++)
3760+ if (au_hf_dir(file, btop)) {
3761+ au_set_fbtop(file, btop);
076b876e
AM
3762+ break;
3763+ }
3764+ }
3765+ fi_write_unlock(file);
3766+ }
3767+}
3768+
1facf9fc 3769+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
3770+ const aufs_bindex_t bindex,
5afbbe0d 3771+ const aufs_bindex_t bbot)
1facf9fc 3772+{
3773+ struct au_branch **brp, **p;
3774+
dece6358
AM
3775+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3776+
1facf9fc 3777+ brp = sbinfo->si_branch + bindex;
5afbbe0d
AM
3778+ if (bindex < bbot)
3779+ memmove(brp, brp + 1, sizeof(*brp) * (bbot - bindex));
3780+ sbinfo->si_branch[0 + bbot] = NULL;
3781+ sbinfo->si_bbot--;
1facf9fc 3782+
e2f27e51
AM
3783+ p = au_krealloc(sbinfo->si_branch, sizeof(*p) * bbot, AuGFP_SBILIST,
3784+ /*may_shrink*/1);
1facf9fc 3785+ if (p)
3786+ sbinfo->si_branch = p;
4a4d8108 3787+ /* harmless error */
1facf9fc 3788+}
3789+
3790+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
5afbbe0d 3791+ const aufs_bindex_t bbot)
1facf9fc 3792+{
3793+ struct au_hdentry *hdp, *p;
3794+
1308ab2a 3795+ AuRwMustWriteLock(&dinfo->di_rwsem);
3796+
5afbbe0d
AM
3797+ hdp = au_hdentry(dinfo, bindex);
3798+ if (bindex < bbot)
3799+ memmove(hdp, hdp + 1, sizeof(*hdp) * (bbot - bindex));
3800+ /* au_h_dentry_init(au_hdentry(dinfo, bbot); */
3801+ dinfo->di_bbot--;
1facf9fc 3802+
e2f27e51
AM
3803+ p = au_krealloc(dinfo->di_hdentry, sizeof(*p) * bbot, AuGFP_SBILIST,
3804+ /*may_shrink*/1);
1facf9fc 3805+ if (p)
3806+ dinfo->di_hdentry = p;
4a4d8108 3807+ /* harmless error */
1facf9fc 3808+}
3809+
3810+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
5afbbe0d 3811+ const aufs_bindex_t bbot)
1facf9fc 3812+{
3813+ struct au_hinode *hip, *p;
3814+
1308ab2a 3815+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3816+
5afbbe0d
AM
3817+ hip = au_hinode(iinfo, bindex);
3818+ if (bindex < bbot)
3819+ memmove(hip, hip + 1, sizeof(*hip) * (bbot - bindex));
3820+ /* au_hinode_init(au_hinode(iinfo, bbot)); */
3821+ iinfo->ii_bbot--;
1facf9fc 3822+
e2f27e51
AM
3823+ p = au_krealloc(iinfo->ii_hinode, sizeof(*p) * bbot, AuGFP_SBILIST,
3824+ /*may_shrink*/1);
1facf9fc 3825+ if (p)
3826+ iinfo->ii_hinode = p;
4a4d8108 3827+ /* harmless error */
1facf9fc 3828+}
3829+
3830+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
3831+ struct au_branch *br)
3832+{
5afbbe0d 3833+ aufs_bindex_t bbot;
1facf9fc 3834+ struct au_sbinfo *sbinfo;
53392da6
AM
3835+ struct dentry *root, *h_root;
3836+ struct inode *inode, *h_inode;
3837+ struct au_hinode *hinode;
1facf9fc 3838+
dece6358
AM
3839+ SiMustWriteLock(sb);
3840+
1facf9fc 3841+ root = sb->s_root;
5527c038 3842+ inode = d_inode(root);
1facf9fc 3843+ sbinfo = au_sbi(sb);
5afbbe0d 3844+ bbot = sbinfo->si_bbot;
1facf9fc 3845+
53392da6
AM
3846+ h_root = au_h_dptr(root, bindex);
3847+ hinode = au_hi(inode, bindex);
3848+ h_inode = au_igrab(hinode->hi_inode);
3849+ au_hiput(hinode);
1facf9fc 3850+
53392da6 3851+ au_sbilist_lock();
5afbbe0d
AM
3852+ au_br_do_del_brp(sbinfo, bindex, bbot);
3853+ au_br_do_del_hdp(au_di(root), bindex, bbot);
3854+ au_br_do_del_hip(au_ii(inode), bindex, bbot);
53392da6
AM
3855+ au_sbilist_unlock();
3856+
3857+ dput(h_root);
3858+ iput(h_inode);
3859+ au_br_do_free(br);
1facf9fc 3860+}
3861+
79b8bda9
AM
3862+static unsigned long long empty_cb(struct super_block *sb, void *array,
3863+ unsigned long long max, void *arg)
076b876e
AM
3864+{
3865+ return max;
3866+}
3867+
1facf9fc 3868+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
3869+{
3870+ int err, rerr, i;
076b876e 3871+ unsigned long long opened;
1facf9fc 3872+ unsigned int mnt_flags;
5afbbe0d 3873+ aufs_bindex_t bindex, bbot, br_id;
1facf9fc 3874+ unsigned char do_wh, verbose;
3875+ struct au_branch *br;
3876+ struct au_wbr *wbr;
076b876e
AM
3877+ struct dentry *root;
3878+ struct file **to_free;
1facf9fc 3879+
3880+ err = 0;
076b876e
AM
3881+ opened = 0;
3882+ to_free = NULL;
3883+ root = sb->s_root;
3884+ bindex = au_find_dbindex(root, del->h_path.dentry);
1facf9fc 3885+ if (bindex < 0) {
3886+ if (remount)
3887+ goto out; /* success */
3888+ err = -ENOENT;
4a4d8108 3889+ pr_err("%s no such branch\n", del->pathname);
1facf9fc 3890+ goto out;
3891+ }
3892+ AuDbg("bindex b%d\n", bindex);
3893+
3894+ err = -EBUSY;
3895+ mnt_flags = au_mntflags(sb);
3896+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
5afbbe0d
AM
3897+ bbot = au_sbbot(sb);
3898+ if (unlikely(!bbot)) {
1facf9fc 3899+ AuVerbose(verbose, "no more branches left\n");
3900+ goto out;
3901+ }
3902+ br = au_sbr(sb, bindex);
86dc4139 3903+ AuDebugOn(!path_equal(&br->br_path, &del->h_path));
076b876e
AM
3904+
3905+ br_id = br->br_id;
5afbbe0d 3906+ opened = au_br_count(br);
076b876e 3907+ if (unlikely(opened)) {
79b8bda9 3908+ to_free = au_array_alloc(&opened, empty_cb, sb, NULL);
076b876e
AM
3909+ err = PTR_ERR(to_free);
3910+ if (IS_ERR(to_free))
3911+ goto out;
3912+
3913+ err = test_file_busy(sb, br_id, to_free, opened);
3914+ if (unlikely(err)) {
3915+ AuVerbose(verbose, "%llu file(s) opened\n", opened);
3916+ goto out;
3917+ }
1facf9fc 3918+ }
3919+
3920+ wbr = br->br_wbr;
3921+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
3922+ if (do_wh) {
1308ab2a 3923+ /* instead of WbrWhMustWriteLock(wbr) */
3924+ SiMustWriteLock(sb);
1facf9fc 3925+ for (i = 0; i < AuBrWh_Last; i++) {
3926+ dput(wbr->wbr_wh[i]);
3927+ wbr->wbr_wh[i] = NULL;
3928+ }
3929+ }
3930+
076b876e 3931+ err = test_children_busy(root, bindex, verbose);
1facf9fc 3932+ if (unlikely(err)) {
3933+ if (do_wh)
3934+ goto out_wh;
3935+ goto out;
3936+ }
3937+
3938+ err = 0;
076b876e
AM
3939+ if (to_free) {
3940+ /*
3941+ * now we confirmed the branch is deletable.
3942+ * let's free the remaining opened dirs on the branch.
3943+ */
3944+ di_write_unlock(root);
3945+ br_del_file(to_free, opened, br_id);
3946+ di_write_lock_child(root);
3947+ }
3948+
1facf9fc 3949+ if (!remount)
3950+ au_br_do_del(sb, bindex, br);
3951+ else {
3952+ sysaufs_brs_del(sb, bindex);
3953+ au_br_do_del(sb, bindex, br);
3954+ sysaufs_brs_add(sb, bindex);
3955+ }
3956+
1308ab2a 3957+ if (!bindex) {
5527c038 3958+ au_cpup_attr_all(d_inode(root), /*force*/1);
1308ab2a 3959+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
3960+ } else
5527c038 3961+ au_sub_nlink(d_inode(root), d_inode(del->h_path.dentry));
1facf9fc 3962+ if (au_opt_test(mnt_flags, PLINK))
3963+ au_plink_half_refresh(sb, br_id);
3964+
b752ccd1 3965+ if (au_xino_brid(sb) == br_id)
1facf9fc 3966+ au_xino_brid_set(sb, -1);
3967+ goto out; /* success */
3968+
4f0767ce 3969+out_wh:
1facf9fc 3970+ /* revert */
86dc4139 3971+ rerr = au_br_init_wh(sb, br, br->br_perm);
1facf9fc 3972+ if (rerr)
0c3ec466
AM
3973+ pr_warn("failed re-creating base whiteout, %s. (%d)\n",
3974+ del->pathname, rerr);
4f0767ce 3975+out:
076b876e
AM
3976+ if (to_free)
3977+ au_farray_free(to_free, opened);
1facf9fc 3978+ return err;
3979+}
3980+
3981+/* ---------------------------------------------------------------------- */
3982+
027c5e7a
AM
3983+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
3984+{
3985+ int err;
5afbbe0d 3986+ aufs_bindex_t btop, bbot;
027c5e7a
AM
3987+ struct aufs_ibusy ibusy;
3988+ struct inode *inode, *h_inode;
3989+
3990+ err = -EPERM;
3991+ if (unlikely(!capable(CAP_SYS_ADMIN)))
3992+ goto out;
3993+
3994+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
3995+ if (!err)
3996+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
3997+ if (unlikely(err)) {
3998+ err = -EFAULT;
3999+ AuTraceErr(err);
4000+ goto out;
4001+ }
4002+
4003+ err = -EINVAL;
4004+ si_read_lock(sb, AuLock_FLUSH);
5afbbe0d 4005+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbbot(sb)))
027c5e7a
AM
4006+ goto out_unlock;
4007+
4008+ err = 0;
4009+ ibusy.h_ino = 0; /* invalid */
4010+ inode = ilookup(sb, ibusy.ino);
4011+ if (!inode
4012+ || inode->i_ino == AUFS_ROOT_INO
5afbbe0d 4013+ || au_is_bad_inode(inode))
027c5e7a
AM
4014+ goto out_unlock;
4015+
4016+ ii_read_lock_child(inode);
5afbbe0d
AM
4017+ btop = au_ibtop(inode);
4018+ bbot = au_ibbot(inode);
4019+ if (btop <= ibusy.bindex && ibusy.bindex <= bbot) {
027c5e7a 4020+ h_inode = au_h_iptr(inode, ibusy.bindex);
5afbbe0d 4021+ if (h_inode && au_test_ibusy(inode, btop, bbot))
027c5e7a
AM
4022+ ibusy.h_ino = h_inode->i_ino;
4023+ }
4024+ ii_read_unlock(inode);
4025+ iput(inode);
4026+
4027+out_unlock:
4028+ si_read_unlock(sb);
4029+ if (!err) {
4030+ err = __put_user(ibusy.h_ino, &arg->h_ino);
4031+ if (unlikely(err)) {
4032+ err = -EFAULT;
4033+ AuTraceErr(err);
4034+ }
4035+ }
4036+out:
4037+ return err;
4038+}
4039+
4040+long au_ibusy_ioctl(struct file *file, unsigned long arg)
4041+{
2000de60 4042+ return au_ibusy(file->f_path.dentry->d_sb, (void __user *)arg);
027c5e7a
AM
4043+}
4044+
4045+#ifdef CONFIG_COMPAT
4046+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
4047+{
2000de60 4048+ return au_ibusy(file->f_path.dentry->d_sb, compat_ptr(arg));
027c5e7a
AM
4049+}
4050+#endif
4051+
4052+/* ---------------------------------------------------------------------- */
4053+
1facf9fc 4054+/*
4055+ * change a branch permission
4056+ */
4057+
dece6358
AM
4058+static void au_warn_ima(void)
4059+{
4060+#ifdef CONFIG_IMA
1308ab2a 4061+ /* since it doesn't support mark_files_ro() */
027c5e7a 4062+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
dece6358
AM
4063+#endif
4064+}
4065+
1facf9fc 4066+static int do_need_sigen_inc(int a, int b)
4067+{
4068+ return au_br_whable(a) && !au_br_whable(b);
4069+}
4070+
4071+static int need_sigen_inc(int old, int new)
4072+{
4073+ return do_need_sigen_inc(old, new)
4074+ || do_need_sigen_inc(new, old);
4075+}
4076+
4077+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
4078+{
7f207e10 4079+ int err, do_warn;
027c5e7a 4080+ unsigned int mnt_flags;
7f207e10 4081+ unsigned long long ull, max;
e49829fe 4082+ aufs_bindex_t br_id;
38d290e6 4083+ unsigned char verbose, writer;
7f207e10 4084+ struct file *file, *hf, **array;
e49829fe 4085+ struct au_hfile *hfile;
1facf9fc 4086+
027c5e7a
AM
4087+ mnt_flags = au_mntflags(sb);
4088+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4089+
7f207e10
AM
4090+ array = au_farray_alloc(sb, &max);
4091+ err = PTR_ERR(array);
4092+ if (IS_ERR(array))
1facf9fc 4093+ goto out;
4094+
7f207e10 4095+ do_warn = 0;
e49829fe 4096+ br_id = au_sbr_id(sb, bindex);
7f207e10
AM
4097+ for (ull = 0; ull < max; ull++) {
4098+ file = array[ull];
076b876e
AM
4099+ if (unlikely(!file))
4100+ break;
1facf9fc 4101+
523b37e3 4102+ /* AuDbg("%pD\n", file); */
1facf9fc 4103+ fi_read_lock(file);
4104+ if (unlikely(au_test_mmapped(file))) {
4105+ err = -EBUSY;
523b37e3 4106+ AuVerbose(verbose, "mmapped %pD\n", file);
7f207e10 4107+ AuDbgFile(file);
1facf9fc 4108+ FiMustNoWaiters(file);
4109+ fi_read_unlock(file);
7f207e10 4110+ goto out_array;
1facf9fc 4111+ }
4112+
e49829fe
JR
4113+ hfile = &au_fi(file)->fi_htop;
4114+ hf = hfile->hf_file;
7e9cd9fe 4115+ if (!d_is_reg(file->f_path.dentry)
1facf9fc 4116+ || !(file->f_mode & FMODE_WRITE)
e49829fe 4117+ || hfile->hf_br->br_id != br_id
7f207e10
AM
4118+ || !(hf->f_mode & FMODE_WRITE))
4119+ array[ull] = NULL;
4120+ else {
4121+ do_warn = 1;
4122+ get_file(file);
1facf9fc 4123+ }
4124+
1facf9fc 4125+ FiMustNoWaiters(file);
4126+ fi_read_unlock(file);
7f207e10
AM
4127+ fput(file);
4128+ }
1facf9fc 4129+
4130+ err = 0;
7f207e10 4131+ if (do_warn)
dece6358 4132+ au_warn_ima();
7f207e10
AM
4133+
4134+ for (ull = 0; ull < max; ull++) {
4135+ file = array[ull];
4136+ if (!file)
4137+ continue;
4138+
1facf9fc 4139+ /* todo: already flushed? */
523b37e3
AM
4140+ /*
4141+ * fs/super.c:mark_files_ro() is gone, but aufs keeps its
4142+ * approach which resets f_mode and calls mnt_drop_write() and
4143+ * file_release_write() for each file, because the branch
4144+ * attribute in aufs world is totally different from the native
4145+ * fs rw/ro mode.
4146+ */
7f207e10
AM
4147+ /* fi_read_lock(file); */
4148+ hfile = &au_fi(file)->fi_htop;
4149+ hf = hfile->hf_file;
4150+ /* fi_read_unlock(file); */
027c5e7a 4151+ spin_lock(&hf->f_lock);
38d290e6
JR
4152+ writer = !!(hf->f_mode & FMODE_WRITER);
4153+ hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER);
027c5e7a 4154+ spin_unlock(&hf->f_lock);
38d290e6
JR
4155+ if (writer) {
4156+ put_write_access(file_inode(hf));
c06a8ce3 4157+ __mnt_drop_write(hf->f_path.mnt);
1facf9fc 4158+ }
4159+ }
4160+
7f207e10
AM
4161+out_array:
4162+ au_farray_free(array, max);
4f0767ce 4163+out:
7f207e10 4164+ AuTraceErr(err);
1facf9fc 4165+ return err;
4166+}
4167+
4168+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4169+ int *do_refresh)
1facf9fc 4170+{
4171+ int err, rerr;
4172+ aufs_bindex_t bindex;
4173+ struct dentry *root;
4174+ struct au_branch *br;
076b876e 4175+ struct au_br_fhsm *bf;
1facf9fc 4176+
4177+ root = sb->s_root;
1facf9fc 4178+ bindex = au_find_dbindex(root, mod->h_root);
4179+ if (bindex < 0) {
4180+ if (remount)
4181+ return 0; /* success */
4182+ err = -ENOENT;
4a4d8108 4183+ pr_err("%s no such branch\n", mod->path);
1facf9fc 4184+ goto out;
4185+ }
4186+ AuDbg("bindex b%d\n", bindex);
4187+
5527c038 4188+ err = test_br(d_inode(mod->h_root), mod->perm, mod->path);
1facf9fc 4189+ if (unlikely(err))
4190+ goto out;
4191+
4192+ br = au_sbr(sb, bindex);
86dc4139 4193+ AuDebugOn(mod->h_root != au_br_dentry(br));
1facf9fc 4194+ if (br->br_perm == mod->perm)
4195+ return 0; /* success */
4196+
076b876e
AM
4197+ /* pre-allocate for non-fhsm --> fhsm */
4198+ bf = NULL;
4199+ if (!au_br_fhsm(br->br_perm) && au_br_fhsm(mod->perm)) {
4200+ err = au_fhsm_br_alloc(br);
4201+ if (unlikely(err))
4202+ goto out;
4203+ bf = br->br_fhsm;
4204+ br->br_fhsm = NULL;
4205+ }
4206+
1facf9fc 4207+ if (au_br_writable(br->br_perm)) {
4208+ /* remove whiteout base */
86dc4139 4209+ err = au_br_init_wh(sb, br, mod->perm);
1facf9fc 4210+ if (unlikely(err))
076b876e 4211+ goto out_bf;
1facf9fc 4212+
4213+ if (!au_br_writable(mod->perm)) {
4214+ /* rw --> ro, file might be mmapped */
4215+ DiMustNoWaiters(root);
5527c038 4216+ IiMustNoWaiters(d_inode(root));
1facf9fc 4217+ di_write_unlock(root);
4218+ err = au_br_mod_files_ro(sb, bindex);
4219+ /* aufs_write_lock() calls ..._child() */
4220+ di_write_lock_child(root);
4221+
4222+ if (unlikely(err)) {
4223+ rerr = -ENOMEM;
be52b249 4224+ br->br_wbr = kzalloc(sizeof(*br->br_wbr),
1facf9fc 4225+ GFP_NOFS);
86dc4139
AM
4226+ if (br->br_wbr)
4227+ rerr = au_wbr_init(br, sb, br->br_perm);
1facf9fc 4228+ if (unlikely(rerr)) {
4229+ AuIOErr("nested error %d (%d)\n",
4230+ rerr, err);
4231+ br->br_perm = mod->perm;
4232+ }
4233+ }
4234+ }
4235+ } else if (au_br_writable(mod->perm)) {
4236+ /* ro --> rw */
4237+ err = -ENOMEM;
be52b249 4238+ br->br_wbr = kzalloc(sizeof(*br->br_wbr), GFP_NOFS);
1facf9fc 4239+ if (br->br_wbr) {
86dc4139 4240+ err = au_wbr_init(br, sb, mod->perm);
1facf9fc 4241+ if (unlikely(err)) {
f0c0a007 4242+ au_delayed_kfree(br->br_wbr);
1facf9fc 4243+ br->br_wbr = NULL;
4244+ }
4245+ }
4246+ }
076b876e
AM
4247+ if (unlikely(err))
4248+ goto out_bf;
4249+
4250+ if (au_br_fhsm(br->br_perm)) {
4251+ if (!au_br_fhsm(mod->perm)) {
4252+ /* fhsm --> non-fhsm */
4253+ au_br_fhsm_fin(br->br_fhsm);
f0c0a007 4254+ au_delayed_kfree(br->br_fhsm);
076b876e
AM
4255+ br->br_fhsm = NULL;
4256+ }
4257+ } else if (au_br_fhsm(mod->perm))
4258+ /* non-fhsm --> fhsm */
4259+ br->br_fhsm = bf;
4260+
076b876e
AM
4261+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
4262+ br->br_perm = mod->perm;
4263+ goto out; /* success */
1facf9fc 4264+
076b876e 4265+out_bf:
f0c0a007
AM
4266+ if (bf)
4267+ au_delayed_kfree(bf);
076b876e
AM
4268+out:
4269+ AuTraceErr(err);
4270+ return err;
4271+}
4272+
4273+/* ---------------------------------------------------------------------- */
4274+
4275+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs)
4276+{
4277+ int err;
4278+ struct kstatfs kstfs;
4279+
4280+ err = vfs_statfs(&br->br_path, &kstfs);
1facf9fc 4281+ if (!err) {
076b876e
AM
4282+ stfs->f_blocks = kstfs.f_blocks;
4283+ stfs->f_bavail = kstfs.f_bavail;
4284+ stfs->f_files = kstfs.f_files;
4285+ stfs->f_ffree = kstfs.f_ffree;
1facf9fc 4286+ }
4287+
1facf9fc 4288+ return err;
4289+}
7f207e10
AM
4290diff -urN /usr/share/empty/fs/aufs/branch.h linux/fs/aufs/branch.h
4291--- /usr/share/empty/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 4292+++ linux/fs/aufs/branch.h 2016-10-09 16:55:36.486034798 +0200
5afbbe0d 4293@@ -0,0 +1,309 @@
1facf9fc 4294+/*
8cdd5066 4295+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 4296+ *
4297+ * This program, aufs is free software; you can redistribute it and/or modify
4298+ * it under the terms of the GNU General Public License as published by
4299+ * the Free Software Foundation; either version 2 of the License, or
4300+ * (at your option) any later version.
dece6358
AM
4301+ *
4302+ * This program is distributed in the hope that it will be useful,
4303+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4304+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4305+ * GNU General Public License for more details.
4306+ *
4307+ * You should have received a copy of the GNU General Public License
523b37e3 4308+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4309+ */
4310+
4311+/*
4312+ * branch filesystems and xino for them
4313+ */
4314+
4315+#ifndef __AUFS_BRANCH_H__
4316+#define __AUFS_BRANCH_H__
4317+
4318+#ifdef __KERNEL__
4319+
1facf9fc 4320+#include <linux/mount.h>
4a4d8108 4321+#include "dynop.h"
1facf9fc 4322+#include "rwsem.h"
4323+#include "super.h"
4324+
4325+/* ---------------------------------------------------------------------- */
4326+
4327+/* a xino file */
4328+struct au_xino_file {
4329+ struct file *xi_file;
4330+ struct mutex xi_nondir_mtx;
4331+
4332+ /* todo: make xino files an array to support huge inode number */
4333+
4334+#ifdef CONFIG_DEBUG_FS
4335+ struct dentry *xi_dbgaufs;
4336+#endif
4337+};
4338+
076b876e
AM
4339+/* File-based Hierarchical Storage Management */
4340+struct au_br_fhsm {
4341+#ifdef CONFIG_AUFS_FHSM
4342+ struct mutex bf_lock;
4343+ unsigned long bf_jiffy;
4344+ struct aufs_stfs bf_stfs;
4345+ int bf_readable;
4346+#endif
4347+};
4348+
1facf9fc 4349+/* members for writable branch only */
4350+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
4351+struct au_wbr {
dece6358 4352+ struct au_rwsem wbr_wh_rwsem;
1facf9fc 4353+ struct dentry *wbr_wh[AuBrWh_Last];
4a4d8108 4354+ atomic_t wbr_wh_running;
1facf9fc 4355+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
4356+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
4357+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
4358+
4359+ /* mfs mode */
4360+ unsigned long long wbr_bytes;
4361+};
4362+
4a4d8108
AM
4363+/* ext2 has 3 types of operations at least, ext3 has 4 */
4364+#define AuBrDynOp (AuDyLast * 4)
4365+
1716fcea
AM
4366+#ifdef CONFIG_AUFS_HFSNOTIFY
4367+/* support for asynchronous destruction */
4368+struct au_br_hfsnotify {
4369+ struct fsnotify_group *hfsn_group;
4370+};
4371+#endif
4372+
392086de
AM
4373+/* sysfs entries */
4374+struct au_brsysfs {
4375+ char name[16];
4376+ struct attribute attr;
4377+};
4378+
4379+enum {
4380+ AuBrSysfs_BR,
4381+ AuBrSysfs_BRID,
4382+ AuBrSysfs_Last
4383+};
4384+
1facf9fc 4385+/* protected by superblock rwsem */
4386+struct au_branch {
4387+ struct au_xino_file br_xino;
4388+
4389+ aufs_bindex_t br_id;
4390+
4391+ int br_perm;
86dc4139 4392+ struct path br_path;
4a4d8108
AM
4393+ spinlock_t br_dykey_lock;
4394+ struct au_dykey *br_dykey[AuBrDynOp];
5afbbe0d 4395+ struct percpu_counter br_count;
1facf9fc 4396+
4397+ struct au_wbr *br_wbr;
076b876e 4398+ struct au_br_fhsm *br_fhsm;
1facf9fc 4399+
4400+ /* xino truncation */
1facf9fc 4401+ atomic_t br_xino_running;
4402+
027c5e7a 4403+#ifdef CONFIG_AUFS_HFSNOTIFY
1716fcea 4404+ struct au_br_hfsnotify *br_hfsn;
027c5e7a
AM
4405+#endif
4406+
1facf9fc 4407+#ifdef CONFIG_SYSFS
392086de
AM
4408+ /* entries under sysfs per mount-point */
4409+ struct au_brsysfs br_sysfs[AuBrSysfs_Last];
1facf9fc 4410+#endif
4411+};
4412+
4413+/* ---------------------------------------------------------------------- */
4414+
86dc4139
AM
4415+static inline struct vfsmount *au_br_mnt(struct au_branch *br)
4416+{
4417+ return br->br_path.mnt;
4418+}
4419+
4420+static inline struct dentry *au_br_dentry(struct au_branch *br)
4421+{
4422+ return br->br_path.dentry;
4423+}
4424+
4425+static inline struct super_block *au_br_sb(struct au_branch *br)
4426+{
4427+ return au_br_mnt(br)->mnt_sb;
4428+}
4429+
5afbbe0d
AM
4430+static inline void au_br_get(struct au_branch *br)
4431+{
4432+ percpu_counter_inc(&br->br_count);
4433+}
4434+
4435+static inline void au_br_put(struct au_branch *br)
4436+{
4437+ percpu_counter_dec(&br->br_count);
4438+}
4439+
4440+static inline s64 au_br_count(struct au_branch *br)
4441+{
4442+ return percpu_counter_sum(&br->br_count);
4443+}
4444+
4445+static inline void au_br_count_init(struct au_branch *br)
4446+{
4447+ percpu_counter_init(&br->br_count, 0, GFP_NOFS);
4448+}
4449+
4450+static inline void au_br_count_fin(struct au_branch *br)
4451+{
4452+ percpu_counter_destroy(&br->br_count);
4453+}
4454+
1facf9fc 4455+static inline int au_br_rdonly(struct au_branch *br)
4456+{
86dc4139 4457+ return ((au_br_sb(br)->s_flags & MS_RDONLY)
1facf9fc 4458+ || !au_br_writable(br->br_perm))
4459+ ? -EROFS : 0;
4460+}
4461+
4a4d8108 4462+static inline int au_br_hnotifyable(int brperm __maybe_unused)
1facf9fc 4463+{
4a4d8108 4464+#ifdef CONFIG_AUFS_HNOTIFY
1e00d052 4465+ return !(brperm & AuBrPerm_RR);
1facf9fc 4466+#else
4467+ return 0;
4468+#endif
4469+}
4470+
b912730e
AM
4471+static inline int au_br_test_oflag(int oflag, struct au_branch *br)
4472+{
4473+ int err, exec_flag;
4474+
4475+ err = 0;
4476+ exec_flag = oflag & __FMODE_EXEC;
79b8bda9 4477+ if (unlikely(exec_flag && path_noexec(&br->br_path)))
b912730e
AM
4478+ err = -EACCES;
4479+
4480+ return err;
4481+}
4482+
1facf9fc 4483+/* ---------------------------------------------------------------------- */
4484+
4485+/* branch.c */
4486+struct au_sbinfo;
4487+void au_br_free(struct au_sbinfo *sinfo);
4488+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
4489+struct au_opt_add;
4490+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
4491+struct au_opt_del;
4492+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
027c5e7a
AM
4493+long au_ibusy_ioctl(struct file *file, unsigned long arg);
4494+#ifdef CONFIG_COMPAT
4495+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
4496+#endif
1facf9fc 4497+struct au_opt_mod;
4498+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4499+ int *do_refresh);
076b876e
AM
4500+struct aufs_stfs;
4501+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
1facf9fc 4502+
4503+/* xino.c */
4504+static const loff_t au_loff_max = LLONG_MAX;
4505+
4506+int au_xib_trunc(struct super_block *sb);
5527c038 4507+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
1facf9fc 4508+ loff_t *pos);
5527c038
JR
4509+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
4510+ size_t size, loff_t *pos);
1facf9fc 4511+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
4512+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
4513+ino_t au_xino_new_ino(struct super_block *sb);
b752ccd1 4514+void au_xino_delete_inode(struct inode *inode, const int unlinked);
1facf9fc 4515+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4516+ ino_t ino);
4517+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4518+ ino_t *ino);
4519+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
4520+ struct file *base_file, int do_test);
4521+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
4522+
4523+struct au_opt_xino;
4524+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
4525+void au_xino_clr(struct super_block *sb);
4526+struct file *au_xino_def(struct super_block *sb);
4527+int au_xino_path(struct seq_file *seq, struct file *file);
4528+
4529+/* ---------------------------------------------------------------------- */
4530+
4531+/* Superblock to branch */
4532+static inline
4533+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
4534+{
4535+ return au_sbr(sb, bindex)->br_id;
4536+}
4537+
4538+static inline
4539+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
4540+{
86dc4139 4541+ return au_br_mnt(au_sbr(sb, bindex));
1facf9fc 4542+}
4543+
4544+static inline
4545+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
4546+{
86dc4139 4547+ return au_br_sb(au_sbr(sb, bindex));
1facf9fc 4548+}
4549+
5afbbe0d
AM
4550+static inline void au_sbr_get(struct super_block *sb, aufs_bindex_t bindex)
4551+{
4552+ au_br_get(au_sbr(sb, bindex));
4553+}
4554+
1facf9fc 4555+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
4556+{
5afbbe0d 4557+ au_br_put(au_sbr(sb, bindex));
1facf9fc 4558+}
4559+
4560+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
4561+{
4562+ return au_sbr(sb, bindex)->br_perm;
4563+}
4564+
4565+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
4566+{
4567+ return au_br_whable(au_sbr_perm(sb, bindex));
4568+}
4569+
4570+/* ---------------------------------------------------------------------- */
4571+
4572+/*
4573+ * wbr_wh_read_lock, wbr_wh_write_lock
4574+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
4575+ */
4576+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
4577+
dece6358
AM
4578+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
4579+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
4580+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
4581+
076b876e
AM
4582+/* ---------------------------------------------------------------------- */
4583+
4584+#ifdef CONFIG_AUFS_FHSM
4585+static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
4586+{
4587+ mutex_init(&brfhsm->bf_lock);
4588+ brfhsm->bf_jiffy = 0;
4589+ brfhsm->bf_readable = 0;
4590+}
4591+
4592+static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
4593+{
4594+ mutex_destroy(&brfhsm->bf_lock);
4595+}
4596+#else
4597+AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
4598+AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
4599+#endif
4600+
1facf9fc 4601+#endif /* __KERNEL__ */
4602+#endif /* __AUFS_BRANCH_H__ */
7f207e10
AM
4603diff -urN /usr/share/empty/fs/aufs/conf.mk linux/fs/aufs/conf.mk
4604--- /usr/share/empty/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
e2f27e51 4605+++ linux/fs/aufs/conf.mk 2016-10-09 16:55:36.486034798 +0200
c1595e42 4606@@ -0,0 +1,38 @@
4a4d8108
AM
4607+
4608+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
4609+
4610+define AuConf
4611+ifdef ${1}
4612+AuConfStr += ${1}=${${1}}
4613+endif
4614+endef
4615+
b752ccd1 4616+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
e49829fe 4617+ SBILIST \
7f207e10 4618+ HNOTIFY HFSNOTIFY \
4a4d8108 4619+ EXPORT INO_T_64 \
c1595e42 4620+ XATTR \
076b876e 4621+ FHSM \
4a4d8108 4622+ RDU \
4a4d8108
AM
4623+ SHWH \
4624+ BR_RAMFS \
4625+ BR_FUSE POLL \
4626+ BR_HFSPLUS \
4627+ BDEV_LOOP \
b752ccd1
AM
4628+ DEBUG MAGIC_SYSRQ
4629+$(foreach i, ${AuConfAll}, \
4a4d8108
AM
4630+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
4631+
4632+AuConfName = ${obj}/conf.str
4633+${AuConfName}.tmp: FORCE
4634+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
4635+${AuConfName}: ${AuConfName}.tmp
4636+ @diff -q $< $@ > /dev/null 2>&1 || { \
4637+ echo ' GEN ' $@; \
4638+ cp -p $< $@; \
4639+ }
4640+FORCE:
4641+clean-files += ${AuConfName} ${AuConfName}.tmp
4642+${obj}/sysfs.o: ${AuConfName}
b752ccd1
AM
4643+
4644+-include ${srctree}/${src}/conf_priv.mk
7f207e10
AM
4645diff -urN /usr/share/empty/fs/aufs/cpup.c linux/fs/aufs/cpup.c
4646--- /usr/share/empty/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
4647+++ linux/fs/aufs/cpup.c 2016-12-17 12:28:17.595211562 +0100
4648@@ -0,0 +1,1394 @@
1facf9fc 4649+/*
8cdd5066 4650+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 4651+ *
4652+ * This program, aufs is free software; you can redistribute it and/or modify
4653+ * it under the terms of the GNU General Public License as published by
4654+ * the Free Software Foundation; either version 2 of the License, or
4655+ * (at your option) any later version.
dece6358
AM
4656+ *
4657+ * This program is distributed in the hope that it will be useful,
4658+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4659+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4660+ * GNU General Public License for more details.
4661+ *
4662+ * You should have received a copy of the GNU General Public License
523b37e3 4663+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4664+ */
4665+
4666+/*
4667+ * copy-up functions, see wbr_policy.c for copy-down
4668+ */
4669+
4670+#include <linux/fs_stack.h>
dece6358 4671+#include <linux/mm.h>
8cdd5066 4672+#include <linux/task_work.h>
1facf9fc 4673+#include "aufs.h"
4674+
86dc4139 4675+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
1facf9fc 4676+{
4677+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
367653fa 4678+ | S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
1facf9fc 4679+
86dc4139
AM
4680+ BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
4681+
4682+ dst->i_flags |= iflags & ~mask;
1facf9fc 4683+ if (au_test_fs_notime(dst->i_sb))
4684+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
4685+}
4686+
4687+void au_cpup_attr_timesizes(struct inode *inode)
4688+{
4689+ struct inode *h_inode;
4690+
5afbbe0d 4691+ h_inode = au_h_iptr(inode, au_ibtop(inode));
1facf9fc 4692+ fsstack_copy_attr_times(inode, h_inode);
4a4d8108 4693+ fsstack_copy_inode_size(inode, h_inode);
1facf9fc 4694+}
4695+
4696+void au_cpup_attr_nlink(struct inode *inode, int force)
4697+{
4698+ struct inode *h_inode;
4699+ struct super_block *sb;
5afbbe0d 4700+ aufs_bindex_t bindex, bbot;
1facf9fc 4701+
4702+ sb = inode->i_sb;
5afbbe0d 4703+ bindex = au_ibtop(inode);
1facf9fc 4704+ h_inode = au_h_iptr(inode, bindex);
4705+ if (!force
4706+ && !S_ISDIR(h_inode->i_mode)
4707+ && au_opt_test(au_mntflags(sb), PLINK)
4708+ && au_plink_test(inode))
4709+ return;
4710+
7eafdf33
AM
4711+ /*
4712+ * 0 can happen in revalidating.
38d290e6
JR
4713+ * h_inode->i_mutex may not be held here, but it is harmless since once
4714+ * i_nlink reaches 0, it will never become positive except O_TMPFILE
4715+ * case.
4716+ * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause
4717+ * the incorrect link count.
7eafdf33 4718+ */
92d182d2 4719+ set_nlink(inode, h_inode->i_nlink);
1facf9fc 4720+
4721+ /*
4722+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
4723+ * it may includes whplink directory.
4724+ */
4725+ if (S_ISDIR(h_inode->i_mode)) {
5afbbe0d
AM
4726+ bbot = au_ibbot(inode);
4727+ for (bindex++; bindex <= bbot; bindex++) {
1facf9fc 4728+ h_inode = au_h_iptr(inode, bindex);
4729+ if (h_inode)
4730+ au_add_nlink(inode, h_inode);
4731+ }
4732+ }
4733+}
4734+
4735+void au_cpup_attr_changeable(struct inode *inode)
4736+{
4737+ struct inode *h_inode;
4738+
5afbbe0d 4739+ h_inode = au_h_iptr(inode, au_ibtop(inode));
1facf9fc 4740+ inode->i_mode = h_inode->i_mode;
4741+ inode->i_uid = h_inode->i_uid;
4742+ inode->i_gid = h_inode->i_gid;
4743+ au_cpup_attr_timesizes(inode);
86dc4139 4744+ au_cpup_attr_flags(inode, h_inode->i_flags);
1facf9fc 4745+}
4746+
4747+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
4748+{
4749+ struct au_iinfo *iinfo = au_ii(inode);
4750+
1308ab2a 4751+ IiMustWriteLock(inode);
4752+
1facf9fc 4753+ iinfo->ii_higen = h_inode->i_generation;
4754+ iinfo->ii_hsb1 = h_inode->i_sb;
4755+}
4756+
4757+void au_cpup_attr_all(struct inode *inode, int force)
4758+{
4759+ struct inode *h_inode;
4760+
5afbbe0d 4761+ h_inode = au_h_iptr(inode, au_ibtop(inode));
1facf9fc 4762+ au_cpup_attr_changeable(inode);
4763+ if (inode->i_nlink > 0)
4764+ au_cpup_attr_nlink(inode, force);
4765+ inode->i_rdev = h_inode->i_rdev;
4766+ inode->i_blkbits = h_inode->i_blkbits;
4767+ au_cpup_igen(inode, h_inode);
4768+}
4769+
4770+/* ---------------------------------------------------------------------- */
4771+
4772+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
4773+
4774+/* keep the timestamps of the parent dir when cpup */
4775+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4776+ struct path *h_path)
4777+{
4778+ struct inode *h_inode;
4779+
4780+ dt->dt_dentry = dentry;
4781+ dt->dt_h_path = *h_path;
5527c038 4782+ h_inode = d_inode(h_path->dentry);
1facf9fc 4783+ dt->dt_atime = h_inode->i_atime;
4784+ dt->dt_mtime = h_inode->i_mtime;
4785+ /* smp_mb(); */
4786+}
4787+
4788+void au_dtime_revert(struct au_dtime *dt)
4789+{
4790+ struct iattr attr;
4791+ int err;
4792+
4793+ attr.ia_atime = dt->dt_atime;
4794+ attr.ia_mtime = dt->dt_mtime;
4795+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
4796+ | ATTR_ATIME | ATTR_ATIME_SET;
4797+
523b37e3
AM
4798+ /* no delegation since this is a directory */
4799+ err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL);
1facf9fc 4800+ if (unlikely(err))
0c3ec466 4801+ pr_warn("restoring timestamps failed(%d). ignored\n", err);
1facf9fc 4802+}
4803+
4804+/* ---------------------------------------------------------------------- */
4805+
86dc4139
AM
4806+/* internal use only */
4807+struct au_cpup_reg_attr {
4808+ int valid;
4809+ struct kstat st;
4810+ unsigned int iflags; /* inode->i_flags */
4811+};
4812+
1facf9fc 4813+static noinline_for_stack
86dc4139
AM
4814+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src,
4815+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4816+{
c1595e42 4817+ int err, sbits, icex;
7e9cd9fe
AM
4818+ unsigned int mnt_flags;
4819+ unsigned char verbose;
1facf9fc 4820+ struct iattr ia;
4821+ struct path h_path;
1308ab2a 4822+ struct inode *h_isrc, *h_idst;
86dc4139 4823+ struct kstat *h_st;
c1595e42 4824+ struct au_branch *br;
1facf9fc 4825+
4826+ h_path.dentry = au_h_dptr(dst, bindex);
5527c038 4827+ h_idst = d_inode(h_path.dentry);
c1595e42
JR
4828+ br = au_sbr(dst->d_sb, bindex);
4829+ h_path.mnt = au_br_mnt(br);
5527c038 4830+ h_isrc = d_inode(h_src);
1308ab2a 4831+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
1facf9fc 4832+ | ATTR_ATIME | ATTR_MTIME
4833+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
86dc4139
AM
4834+ if (h_src_attr && h_src_attr->valid) {
4835+ h_st = &h_src_attr->st;
4836+ ia.ia_uid = h_st->uid;
4837+ ia.ia_gid = h_st->gid;
4838+ ia.ia_atime = h_st->atime;
4839+ ia.ia_mtime = h_st->mtime;
4840+ if (h_idst->i_mode != h_st->mode
4841+ && !S_ISLNK(h_idst->i_mode)) {
4842+ ia.ia_valid |= ATTR_MODE;
4843+ ia.ia_mode = h_st->mode;
4844+ }
4845+ sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
4846+ au_cpup_attr_flags(h_idst, h_src_attr->iflags);
4847+ } else {
4848+ ia.ia_uid = h_isrc->i_uid;
4849+ ia.ia_gid = h_isrc->i_gid;
4850+ ia.ia_atime = h_isrc->i_atime;
4851+ ia.ia_mtime = h_isrc->i_mtime;
4852+ if (h_idst->i_mode != h_isrc->i_mode
4853+ && !S_ISLNK(h_idst->i_mode)) {
4854+ ia.ia_valid |= ATTR_MODE;
4855+ ia.ia_mode = h_isrc->i_mode;
4856+ }
4857+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
4858+ au_cpup_attr_flags(h_idst, h_isrc->i_flags);
1308ab2a 4859+ }
523b37e3
AM
4860+ /* no delegation since it is just created */
4861+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4862+
4863+ /* is this nfs only? */
4864+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
4865+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
4866+ ia.ia_mode = h_isrc->i_mode;
523b37e3 4867+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4868+ }
4869+
c1595e42 4870+ icex = br->br_perm & AuBrAttr_ICEX;
7e9cd9fe
AM
4871+ if (!err) {
4872+ mnt_flags = au_mntflags(dst->d_sb);
4873+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4874+ err = au_cpup_xattr(h_path.dentry, h_src, icex, verbose);
4875+ }
c1595e42 4876+
1facf9fc 4877+ return err;
4878+}
4879+
4880+/* ---------------------------------------------------------------------- */
4881+
4882+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
4883+ char *buf, unsigned long blksize)
4884+{
4885+ int err;
4886+ size_t sz, rbytes, wbytes;
4887+ unsigned char all_zero;
4888+ char *p, *zp;
febd17d6 4889+ struct inode *h_inode;
1facf9fc 4890+ /* reduce stack usage */
4891+ struct iattr *ia;
4892+
4893+ zp = page_address(ZERO_PAGE(0));
4894+ if (unlikely(!zp))
4895+ return -ENOMEM; /* possible? */
4896+
4897+ err = 0;
4898+ all_zero = 0;
4899+ while (len) {
4900+ AuDbg("len %lld\n", len);
4901+ sz = blksize;
4902+ if (len < blksize)
4903+ sz = len;
4904+
4905+ rbytes = 0;
4906+ /* todo: signal_pending? */
4907+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
4908+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
4909+ err = rbytes;
4910+ }
4911+ if (unlikely(err < 0))
4912+ break;
4913+
4914+ all_zero = 0;
4915+ if (len >= rbytes && rbytes == blksize)
4916+ all_zero = !memcmp(buf, zp, rbytes);
4917+ if (!all_zero) {
4918+ wbytes = rbytes;
4919+ p = buf;
4920+ while (wbytes) {
4921+ size_t b;
4922+
4923+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
4924+ err = b;
4925+ /* todo: signal_pending? */
4926+ if (unlikely(err == -EAGAIN || err == -EINTR))
4927+ continue;
4928+ if (unlikely(err < 0))
4929+ break;
4930+ wbytes -= b;
4931+ p += b;
4932+ }
392086de
AM
4933+ if (unlikely(err < 0))
4934+ break;
1facf9fc 4935+ } else {
4936+ loff_t res;
4937+
4938+ AuLabel(hole);
4939+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
4940+ err = res;
4941+ if (unlikely(res < 0))
4942+ break;
4943+ }
4944+ len -= rbytes;
4945+ err = 0;
4946+ }
4947+
4948+ /* the last block may be a hole */
4949+ if (!err && all_zero) {
4950+ AuLabel(last hole);
4951+
4952+ err = 1;
2000de60 4953+ if (au_test_nfs(dst->f_path.dentry->d_sb)) {
1facf9fc 4954+ /* nfs requires this step to make last hole */
4955+ /* is this only nfs? */
4956+ do {
4957+ /* todo: signal_pending? */
4958+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
4959+ } while (err == -EAGAIN || err == -EINTR);
4960+ if (err == 1)
4961+ dst->f_pos--;
4962+ }
4963+
4964+ if (err == 1) {
4965+ ia = (void *)buf;
4966+ ia->ia_size = dst->f_pos;
4967+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
4968+ ia->ia_file = dst;
febd17d6
JR
4969+ h_inode = file_inode(dst);
4970+ inode_lock_nested(h_inode, AuLsc_I_CHILD2);
523b37e3
AM
4971+ /* no delegation since it is just created */
4972+ err = vfsub_notify_change(&dst->f_path, ia,
4973+ /*delegated*/NULL);
febd17d6 4974+ inode_unlock(h_inode);
1facf9fc 4975+ }
4976+ }
4977+
4978+ return err;
4979+}
4980+
4981+int au_copy_file(struct file *dst, struct file *src, loff_t len)
4982+{
4983+ int err;
4984+ unsigned long blksize;
4985+ unsigned char do_kfree;
4986+ char *buf;
4987+
4988+ err = -ENOMEM;
2000de60 4989+ blksize = dst->f_path.dentry->d_sb->s_blocksize;
1facf9fc 4990+ if (!blksize || PAGE_SIZE < blksize)
4991+ blksize = PAGE_SIZE;
4992+ AuDbg("blksize %lu\n", blksize);
4993+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
4994+ if (do_kfree)
4995+ buf = kmalloc(blksize, GFP_NOFS);
4996+ else
4997+ buf = (void *)__get_free_page(GFP_NOFS);
4998+ if (unlikely(!buf))
4999+ goto out;
5000+
5001+ if (len > (1 << 22))
5002+ AuDbg("copying a large file %lld\n", (long long)len);
5003+
5004+ src->f_pos = 0;
5005+ dst->f_pos = 0;
5006+ err = au_do_copy_file(dst, src, len, buf, blksize);
5007+ if (do_kfree)
f0c0a007 5008+ au_delayed_kfree(buf);
1facf9fc 5009+ else
f0c0a007 5010+ au_delayed_free_page((unsigned long)buf);
1facf9fc 5011+
4f0767ce 5012+out:
1facf9fc 5013+ return err;
5014+}
5015+
5016+/*
5017+ * to support a sparse file which is opened with O_APPEND,
5018+ * we need to close the file.
5019+ */
c2b27bf2 5020+static int au_cp_regular(struct au_cp_generic *cpg)
1facf9fc 5021+{
5022+ int err, i;
5023+ enum { SRC, DST };
5024+ struct {
5025+ aufs_bindex_t bindex;
5026+ unsigned int flags;
5027+ struct dentry *dentry;
392086de 5028+ int force_wr;
1facf9fc 5029+ struct file *file;
523b37e3 5030+ void *label;
1facf9fc 5031+ } *f, file[] = {
5032+ {
c2b27bf2 5033+ .bindex = cpg->bsrc,
1facf9fc 5034+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
523b37e3 5035+ .label = &&out
1facf9fc 5036+ },
5037+ {
c2b27bf2 5038+ .bindex = cpg->bdst,
1facf9fc 5039+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
392086de 5040+ .force_wr = !!au_ftest_cpup(cpg->flags, RWDST),
523b37e3 5041+ .label = &&out_src
1facf9fc 5042+ }
5043+ };
5044+ struct super_block *sb;
e2f27e51 5045+ struct inode *h_src_inode;
8cdd5066 5046+ struct task_struct *tsk = current;
1facf9fc 5047+
5048+ /* bsrc branch can be ro/rw. */
c2b27bf2 5049+ sb = cpg->dentry->d_sb;
1facf9fc 5050+ f = file;
5051+ for (i = 0; i < 2; i++, f++) {
c2b27bf2
AM
5052+ f->dentry = au_h_dptr(cpg->dentry, f->bindex);
5053+ f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
392086de 5054+ /*file*/NULL, f->force_wr);
1facf9fc 5055+ err = PTR_ERR(f->file);
5056+ if (IS_ERR(f->file))
5057+ goto *f->label;
1facf9fc 5058+ }
5059+
5060+ /* try stopping to update while we copyup */
e2f27e51
AM
5061+ h_src_inode = d_inode(file[SRC].dentry);
5062+ if (!au_test_nfs(h_src_inode->i_sb))
5063+ IMustLock(h_src_inode);
c2b27bf2 5064+ err = au_copy_file(file[DST].file, file[SRC].file, cpg->len);
1facf9fc 5065+
8cdd5066
JR
5066+ /* i wonder if we had O_NO_DELAY_FPUT flag */
5067+ if (tsk->flags & PF_KTHREAD)
5068+ __fput_sync(file[DST].file);
5069+ else {
5070+ WARN(1, "%pD\nPlease report this warning to aufs-users ML",
5071+ file[DST].file);
5072+ fput(file[DST].file);
5073+ /*
5074+ * too bad.
5075+ * we have to call both since we don't know which place the file
5076+ * was added to.
5077+ */
5078+ task_work_run();
5079+ flush_delayed_fput();
5080+ }
1facf9fc 5081+ au_sbr_put(sb, file[DST].bindex);
523b37e3 5082+
4f0767ce 5083+out_src:
1facf9fc 5084+ fput(file[SRC].file);
5085+ au_sbr_put(sb, file[SRC].bindex);
4f0767ce 5086+out:
1facf9fc 5087+ return err;
5088+}
5089+
c2b27bf2 5090+static int au_do_cpup_regular(struct au_cp_generic *cpg,
86dc4139 5091+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 5092+{
5093+ int err, rerr;
5094+ loff_t l;
86dc4139 5095+ struct path h_path;
38d290e6 5096+ struct inode *h_src_inode, *h_dst_inode;
1facf9fc 5097+
5098+ err = 0;
5527c038 5099+ h_src_inode = au_h_iptr(d_inode(cpg->dentry), cpg->bsrc);
86dc4139 5100+ l = i_size_read(h_src_inode);
c2b27bf2
AM
5101+ if (cpg->len == -1 || l < cpg->len)
5102+ cpg->len = l;
5103+ if (cpg->len) {
86dc4139 5104+ /* try stopping to update while we are referencing */
febd17d6 5105+ inode_lock_nested(h_src_inode, AuLsc_I_CHILD);
c2b27bf2 5106+ au_pin_hdir_unlock(cpg->pin);
1facf9fc 5107+
c2b27bf2
AM
5108+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5109+ h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
86dc4139 5110+ h_src_attr->iflags = h_src_inode->i_flags;
5527c038
JR
5111+ if (!au_test_nfs(h_src_inode->i_sb))
5112+ err = vfs_getattr(&h_path, &h_src_attr->st);
5113+ else {
febd17d6 5114+ inode_unlock(h_src_inode);
5527c038 5115+ err = vfs_getattr(&h_path, &h_src_attr->st);
febd17d6 5116+ inode_lock_nested(h_src_inode, AuLsc_I_CHILD);
5527c038 5117+ }
86dc4139 5118+ if (unlikely(err)) {
febd17d6 5119+ inode_unlock(h_src_inode);
86dc4139
AM
5120+ goto out;
5121+ }
5122+ h_src_attr->valid = 1;
e2f27e51
AM
5123+ if (!au_test_nfs(h_src_inode->i_sb)) {
5124+ err = au_cp_regular(cpg);
5125+ inode_unlock(h_src_inode);
5126+ } else {
5127+ inode_unlock(h_src_inode);
5128+ err = au_cp_regular(cpg);
5129+ }
c2b27bf2 5130+ rerr = au_pin_hdir_relock(cpg->pin);
86dc4139
AM
5131+ if (!err && rerr)
5132+ err = rerr;
1facf9fc 5133+ }
38d290e6
JR
5134+ if (!err && (h_src_inode->i_state & I_LINKABLE)) {
5135+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst);
5527c038 5136+ h_dst_inode = d_inode(h_path.dentry);
38d290e6
JR
5137+ spin_lock(&h_dst_inode->i_lock);
5138+ h_dst_inode->i_state |= I_LINKABLE;
5139+ spin_unlock(&h_dst_inode->i_lock);
5140+ }
1facf9fc 5141+
4f0767ce 5142+out:
1facf9fc 5143+ return err;
5144+}
5145+
5146+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
5147+ struct inode *h_dir)
5148+{
5149+ int err, symlen;
5150+ mm_segment_t old_fs;
b752ccd1
AM
5151+ union {
5152+ char *k;
5153+ char __user *u;
5154+ } sym;
5527c038
JR
5155+ struct inode *h_inode = d_inode(h_src);
5156+ const struct inode_operations *h_iop = h_inode->i_op;
1facf9fc 5157+
5158+ err = -ENOSYS;
5527c038 5159+ if (unlikely(!h_iop->readlink))
1facf9fc 5160+ goto out;
5161+
5162+ err = -ENOMEM;
537831f9 5163+ sym.k = (void *)__get_free_page(GFP_NOFS);
b752ccd1 5164+ if (unlikely(!sym.k))
1facf9fc 5165+ goto out;
5166+
9dbd164d 5167+ /* unnecessary to support mmap_sem since symlink is not mmap-able */
1facf9fc 5168+ old_fs = get_fs();
5169+ set_fs(KERNEL_DS);
5527c038 5170+ symlen = h_iop->readlink(h_src, sym.u, PATH_MAX);
1facf9fc 5171+ err = symlen;
5172+ set_fs(old_fs);
5173+
5174+ if (symlen > 0) {
b752ccd1
AM
5175+ sym.k[symlen] = 0;
5176+ err = vfsub_symlink(h_dir, h_path, sym.k);
1facf9fc 5177+ }
f0c0a007 5178+ au_delayed_free_page((unsigned long)sym.k);
1facf9fc 5179+
4f0767ce 5180+out:
1facf9fc 5181+ return err;
5182+}
5183+
8cdd5066
JR
5184+/*
5185+ * regardless 'acl' option, reset all ACL.
5186+ * All ACL will be copied up later from the original entry on the lower branch.
5187+ */
5188+static int au_reset_acl(struct inode *h_dir, struct path *h_path, umode_t mode)
5189+{
5190+ int err;
5191+ struct dentry *h_dentry;
5192+ struct inode *h_inode;
5193+
5194+ h_dentry = h_path->dentry;
5195+ h_inode = d_inode(h_dentry);
5196+ /* forget_all_cached_acls(h_inode)); */
5197+ err = vfsub_removexattr(h_dentry, XATTR_NAME_POSIX_ACL_ACCESS);
5198+ AuTraceErr(err);
5199+ if (err == -EOPNOTSUPP)
5200+ err = 0;
5201+ if (!err)
5202+ err = vfsub_acl_chmod(h_inode, mode);
5203+
5204+ AuTraceErr(err);
5205+ return err;
5206+}
5207+
5208+static int au_do_cpup_dir(struct au_cp_generic *cpg, struct dentry *dst_parent,
5209+ struct inode *h_dir, struct path *h_path)
5210+{
5211+ int err;
5212+ struct inode *dir, *inode;
5213+
5214+ err = vfsub_removexattr(h_path->dentry, XATTR_NAME_POSIX_ACL_DEFAULT);
5215+ AuTraceErr(err);
5216+ if (err == -EOPNOTSUPP)
5217+ err = 0;
5218+ if (unlikely(err))
5219+ goto out;
5220+
5221+ /*
5222+ * strange behaviour from the users view,
5223+ * particularry setattr case
5224+ */
5225+ dir = d_inode(dst_parent);
5afbbe0d 5226+ if (au_ibtop(dir) == cpg->bdst)
8cdd5066
JR
5227+ au_cpup_attr_nlink(dir, /*force*/1);
5228+ inode = d_inode(cpg->dentry);
5229+ au_cpup_attr_nlink(inode, /*force*/1);
5230+
5231+out:
5232+ return err;
5233+}
5234+
1facf9fc 5235+static noinline_for_stack
c2b27bf2 5236+int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
86dc4139 5237+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 5238+{
5239+ int err;
5240+ umode_t mode;
5241+ unsigned int mnt_flags;
076b876e 5242+ unsigned char isdir, isreg, force;
c2b27bf2 5243+ const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5244+ struct au_dtime dt;
5245+ struct path h_path;
5246+ struct dentry *h_src, *h_dst, *h_parent;
8cdd5066 5247+ struct inode *h_inode, *h_dir;
1facf9fc 5248+ struct super_block *sb;
5249+
5250+ /* bsrc branch can be ro/rw. */
c2b27bf2 5251+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038
JR
5252+ h_inode = d_inode(h_src);
5253+ AuDebugOn(h_inode != au_h_iptr(d_inode(cpg->dentry), cpg->bsrc));
1facf9fc 5254+
5255+ /* try stopping to be referenced while we are creating */
c2b27bf2
AM
5256+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
5257+ if (au_ftest_cpup(cpg->flags, RENAME))
86dc4139
AM
5258+ AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
5259+ AUFS_WH_PFX_LEN));
1facf9fc 5260+ h_parent = h_dst->d_parent; /* dir inode is locked */
5527c038 5261+ h_dir = d_inode(h_parent);
1facf9fc 5262+ IMustLock(h_dir);
5263+ AuDebugOn(h_parent != h_dst->d_parent);
5264+
c2b27bf2
AM
5265+ sb = cpg->dentry->d_sb;
5266+ h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
1facf9fc 5267+ if (do_dt) {
5268+ h_path.dentry = h_parent;
5269+ au_dtime_store(&dt, dst_parent, &h_path);
5270+ }
5271+ h_path.dentry = h_dst;
5272+
076b876e 5273+ isreg = 0;
1facf9fc 5274+ isdir = 0;
5275+ mode = h_inode->i_mode;
5276+ switch (mode & S_IFMT) {
5277+ case S_IFREG:
076b876e 5278+ isreg = 1;
8cdd5066 5279+ err = vfsub_create(h_dir, &h_path, S_IRUSR | S_IWUSR,
b4510431 5280+ /*want_excl*/true);
1facf9fc 5281+ if (!err)
c2b27bf2 5282+ err = au_do_cpup_regular(cpg, h_src_attr);
1facf9fc 5283+ break;
5284+ case S_IFDIR:
5285+ isdir = 1;
5286+ err = vfsub_mkdir(h_dir, &h_path, mode);
8cdd5066
JR
5287+ if (!err)
5288+ err = au_do_cpup_dir(cpg, dst_parent, h_dir, &h_path);
1facf9fc 5289+ break;
5290+ case S_IFLNK:
5291+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
5292+ break;
5293+ case S_IFCHR:
5294+ case S_IFBLK:
5295+ AuDebugOn(!capable(CAP_MKNOD));
5296+ /*FALLTHROUGH*/
5297+ case S_IFIFO:
5298+ case S_IFSOCK:
5299+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
5300+ break;
5301+ default:
5302+ AuIOErr("Unknown inode type 0%o\n", mode);
5303+ err = -EIO;
5304+ }
8cdd5066
JR
5305+ if (!err)
5306+ err = au_reset_acl(h_dir, &h_path, mode);
1facf9fc 5307+
5308+ mnt_flags = au_mntflags(sb);
5309+ if (!au_opt_test(mnt_flags, UDBA_NONE)
5310+ && !isdir
5311+ && au_opt_test(mnt_flags, XINO)
38d290e6
JR
5312+ && (h_inode->i_nlink == 1
5313+ || (h_inode->i_state & I_LINKABLE))
1facf9fc 5314+ /* todo: unnecessary? */
5527c038 5315+ /* && d_inode(cpg->dentry)->i_nlink == 1 */
c2b27bf2
AM
5316+ && cpg->bdst < cpg->bsrc
5317+ && !au_ftest_cpup(cpg->flags, KEEPLINO))
5318+ au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
1facf9fc 5319+ /* ignore this error */
5320+
076b876e
AM
5321+ if (!err) {
5322+ force = 0;
5323+ if (isreg) {
5324+ force = !!cpg->len;
5325+ if (cpg->len == -1)
5326+ force = !!i_size_read(h_inode);
5327+ }
5328+ au_fhsm_wrote(sb, cpg->bdst, force);
5329+ }
5330+
1facf9fc 5331+ if (do_dt)
5332+ au_dtime_revert(&dt);
5333+ return err;
5334+}
5335+
392086de 5336+static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path)
86dc4139
AM
5337+{
5338+ int err;
392086de 5339+ struct dentry *dentry, *h_dentry, *h_parent, *parent;
86dc4139 5340+ struct inode *h_dir;
392086de 5341+ aufs_bindex_t bdst;
86dc4139 5342+
392086de
AM
5343+ dentry = cpg->dentry;
5344+ bdst = cpg->bdst;
5345+ h_dentry = au_h_dptr(dentry, bdst);
5346+ if (!au_ftest_cpup(cpg->flags, OVERWRITE)) {
5347+ dget(h_dentry);
5348+ au_set_h_dptr(dentry, bdst, NULL);
5349+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
5350+ if (!err)
5351+ h_path->dentry = dget(au_h_dptr(dentry, bdst));
86dc4139 5352+ au_set_h_dptr(dentry, bdst, h_dentry);
392086de
AM
5353+ } else {
5354+ err = 0;
5355+ parent = dget_parent(dentry);
5356+ h_parent = au_h_dptr(parent, bdst);
5357+ dput(parent);
5358+ h_path->dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
5359+ if (IS_ERR(h_path->dentry))
5360+ err = PTR_ERR(h_path->dentry);
86dc4139 5361+ }
392086de
AM
5362+ if (unlikely(err))
5363+ goto out;
86dc4139 5364+
86dc4139 5365+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5527c038 5366+ h_dir = d_inode(h_parent);
86dc4139 5367+ IMustLock(h_dir);
523b37e3
AM
5368+ AuDbg("%pd %pd\n", h_dentry, h_path->dentry);
5369+ /* no delegation since it is just created */
f2c43d5f
AM
5370+ err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL,
5371+ /*flags*/0);
86dc4139
AM
5372+ dput(h_path->dentry);
5373+
5374+out:
5375+ return err;
5376+}
5377+
1facf9fc 5378+/*
5379+ * copyup the @dentry from @bsrc to @bdst.
5380+ * the caller must set the both of lower dentries.
5381+ * @len is for truncating when it is -1 copyup the entire file.
5382+ * in link/rename cases, @dst_parent may be different from the real one.
c2b27bf2 5383+ * basic->bsrc can be larger than basic->bdst.
f2c43d5f
AM
5384+ * aufs doesn't touch the credential so
5385+ * security_inode_copy_up{,_xattr}() are unnecrssary.
1facf9fc 5386+ */
c2b27bf2 5387+static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5388+{
5389+ int err, rerr;
5afbbe0d 5390+ aufs_bindex_t old_ibtop;
1facf9fc 5391+ unsigned char isdir, plink;
1facf9fc 5392+ struct dentry *h_src, *h_dst, *h_parent;
5527c038 5393+ struct inode *dst_inode, *h_dir, *inode, *delegated, *src_inode;
1facf9fc 5394+ struct super_block *sb;
86dc4139 5395+ struct au_branch *br;
c2b27bf2
AM
5396+ /* to reuduce stack size */
5397+ struct {
5398+ struct au_dtime dt;
5399+ struct path h_path;
5400+ struct au_cpup_reg_attr h_src_attr;
5401+ } *a;
1facf9fc 5402+
c2b27bf2
AM
5403+ err = -ENOMEM;
5404+ a = kmalloc(sizeof(*a), GFP_NOFS);
5405+ if (unlikely(!a))
5406+ goto out;
5407+ a->h_src_attr.valid = 0;
1facf9fc 5408+
c2b27bf2
AM
5409+ sb = cpg->dentry->d_sb;
5410+ br = au_sbr(sb, cpg->bdst);
5411+ a->h_path.mnt = au_br_mnt(br);
5412+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
1facf9fc 5413+ h_parent = h_dst->d_parent; /* dir inode is locked */
5527c038 5414+ h_dir = d_inode(h_parent);
1facf9fc 5415+ IMustLock(h_dir);
5416+
c2b27bf2 5417+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038 5418+ inode = d_inode(cpg->dentry);
1facf9fc 5419+
5420+ if (!dst_parent)
c2b27bf2 5421+ dst_parent = dget_parent(cpg->dentry);
1facf9fc 5422+ else
5423+ dget(dst_parent);
5424+
5425+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
c2b27bf2 5426+ dst_inode = au_h_iptr(inode, cpg->bdst);
1facf9fc 5427+ if (dst_inode) {
5428+ if (unlikely(!plink)) {
5429+ err = -EIO;
027c5e7a
AM
5430+ AuIOErr("hi%lu(i%lu) exists on b%d "
5431+ "but plink is disabled\n",
c2b27bf2
AM
5432+ dst_inode->i_ino, inode->i_ino, cpg->bdst);
5433+ goto out_parent;
1facf9fc 5434+ }
5435+
5436+ if (dst_inode->i_nlink) {
c2b27bf2 5437+ const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5438+
c2b27bf2 5439+ h_src = au_plink_lkup(inode, cpg->bdst);
1facf9fc 5440+ err = PTR_ERR(h_src);
5441+ if (IS_ERR(h_src))
c2b27bf2 5442+ goto out_parent;
5527c038 5443+ if (unlikely(d_is_negative(h_src))) {
1facf9fc 5444+ err = -EIO;
79b8bda9 5445+ AuIOErr("i%lu exists on b%d "
027c5e7a 5446+ "but not pseudo-linked\n",
79b8bda9 5447+ inode->i_ino, cpg->bdst);
1facf9fc 5448+ dput(h_src);
c2b27bf2 5449+ goto out_parent;
1facf9fc 5450+ }
5451+
5452+ if (do_dt) {
c2b27bf2
AM
5453+ a->h_path.dentry = h_parent;
5454+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
1facf9fc 5455+ }
86dc4139 5456+
c2b27bf2 5457+ a->h_path.dentry = h_dst;
523b37e3
AM
5458+ delegated = NULL;
5459+ err = vfsub_link(h_src, h_dir, &a->h_path, &delegated);
c2b27bf2 5460+ if (!err && au_ftest_cpup(cpg->flags, RENAME))
392086de 5461+ err = au_do_ren_after_cpup(cpg, &a->h_path);
1facf9fc 5462+ if (do_dt)
c2b27bf2 5463+ au_dtime_revert(&a->dt);
523b37e3
AM
5464+ if (unlikely(err == -EWOULDBLOCK)) {
5465+ pr_warn("cannot retry for NFSv4 delegation"
5466+ " for an internal link\n");
5467+ iput(delegated);
5468+ }
1facf9fc 5469+ dput(h_src);
c2b27bf2 5470+ goto out_parent;
1facf9fc 5471+ } else
5472+ /* todo: cpup_wh_file? */
5473+ /* udba work */
4a4d8108 5474+ au_update_ibrange(inode, /*do_put_zero*/1);
1facf9fc 5475+ }
5476+
86dc4139 5477+ isdir = S_ISDIR(inode->i_mode);
5afbbe0d 5478+ old_ibtop = au_ibtop(inode);
c2b27bf2 5479+ err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
1facf9fc 5480+ if (unlikely(err))
86dc4139 5481+ goto out_rev;
5527c038 5482+ dst_inode = d_inode(h_dst);
febd17d6 5483+ inode_lock_nested(dst_inode, AuLsc_I_CHILD2);
86dc4139 5484+ /* todo: necessary? */
c2b27bf2 5485+ /* au_pin_hdir_unlock(cpg->pin); */
1facf9fc 5486+
c2b27bf2 5487+ err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr);
86dc4139
AM
5488+ if (unlikely(err)) {
5489+ /* todo: necessary? */
c2b27bf2 5490+ /* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
febd17d6 5491+ inode_unlock(dst_inode);
86dc4139
AM
5492+ goto out_rev;
5493+ }
5494+
5afbbe0d 5495+ if (cpg->bdst < old_ibtop) {
86dc4139 5496+ if (S_ISREG(inode->i_mode)) {
c2b27bf2 5497+ err = au_dy_iaop(inode, cpg->bdst, dst_inode);
86dc4139 5498+ if (unlikely(err)) {
c2b27bf2
AM
5499+ /* ignore an error */
5500+ /* au_pin_hdir_relock(cpg->pin); */
febd17d6 5501+ inode_unlock(dst_inode);
86dc4139 5502+ goto out_rev;
4a4d8108 5503+ }
4a4d8108 5504+ }
5afbbe0d 5505+ au_set_ibtop(inode, cpg->bdst);
c2b27bf2 5506+ } else
5afbbe0d 5507+ au_set_ibbot(inode, cpg->bdst);
c2b27bf2 5508+ au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
86dc4139
AM
5509+ au_hi_flags(inode, isdir));
5510+
5511+ /* todo: necessary? */
c2b27bf2 5512+ /* err = au_pin_hdir_relock(cpg->pin); */
febd17d6 5513+ inode_unlock(dst_inode);
86dc4139
AM
5514+ if (unlikely(err))
5515+ goto out_rev;
5516+
5527c038 5517+ src_inode = d_inode(h_src);
86dc4139 5518+ if (!isdir
5527c038
JR
5519+ && (src_inode->i_nlink > 1
5520+ || src_inode->i_state & I_LINKABLE)
86dc4139 5521+ && plink)
c2b27bf2 5522+ au_plink_append(inode, cpg->bdst, h_dst);
86dc4139 5523+
c2b27bf2
AM
5524+ if (au_ftest_cpup(cpg->flags, RENAME)) {
5525+ a->h_path.dentry = h_dst;
392086de 5526+ err = au_do_ren_after_cpup(cpg, &a->h_path);
86dc4139
AM
5527+ }
5528+ if (!err)
c2b27bf2 5529+ goto out_parent; /* success */
1facf9fc 5530+
5531+ /* revert */
4a4d8108 5532+out_rev:
c2b27bf2
AM
5533+ a->h_path.dentry = h_parent;
5534+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
5535+ a->h_path.dentry = h_dst;
86dc4139 5536+ rerr = 0;
5527c038 5537+ if (d_is_positive(h_dst)) {
523b37e3
AM
5538+ if (!isdir) {
5539+ /* no delegation since it is just created */
5540+ rerr = vfsub_unlink(h_dir, &a->h_path,
5541+ /*delegated*/NULL, /*force*/0);
5542+ } else
c2b27bf2 5543+ rerr = vfsub_rmdir(h_dir, &a->h_path);
86dc4139 5544+ }
c2b27bf2 5545+ au_dtime_revert(&a->dt);
1facf9fc 5546+ if (rerr) {
5547+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
5548+ err = -EIO;
5549+ }
c2b27bf2 5550+out_parent:
1facf9fc 5551+ dput(dst_parent);
f0c0a007 5552+ au_delayed_kfree(a);
c2b27bf2 5553+out:
1facf9fc 5554+ return err;
5555+}
5556+
7e9cd9fe 5557+#if 0 /* reserved */
1facf9fc 5558+struct au_cpup_single_args {
5559+ int *errp;
c2b27bf2 5560+ struct au_cp_generic *cpg;
1facf9fc 5561+ struct dentry *dst_parent;
5562+};
5563+
5564+static void au_call_cpup_single(void *args)
5565+{
5566+ struct au_cpup_single_args *a = args;
86dc4139 5567+
c2b27bf2
AM
5568+ au_pin_hdir_acquire_nest(a->cpg->pin);
5569+ *a->errp = au_cpup_single(a->cpg, a->dst_parent);
5570+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5571+}
c2b27bf2 5572+#endif
1facf9fc 5573+
53392da6
AM
5574+/*
5575+ * prevent SIGXFSZ in copy-up.
5576+ * testing CAP_MKNOD is for generic fs,
5577+ * but CAP_FSETID is for xfs only, currently.
5578+ */
86dc4139 5579+static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
53392da6
AM
5580+{
5581+ int do_sio;
86dc4139
AM
5582+ struct super_block *sb;
5583+ struct inode *h_dir;
53392da6
AM
5584+
5585+ do_sio = 0;
86dc4139 5586+ sb = au_pinned_parent(pin)->d_sb;
53392da6
AM
5587+ if (!au_wkq_test()
5588+ && (!au_sbi(sb)->si_plink_maint_pid
5589+ || au_plink_maint(sb, AuLock_NOPLM))) {
5590+ switch (mode & S_IFMT) {
5591+ case S_IFREG:
5592+ /* no condition about RLIMIT_FSIZE and the file size */
5593+ do_sio = 1;
5594+ break;
5595+ case S_IFCHR:
5596+ case S_IFBLK:
5597+ do_sio = !capable(CAP_MKNOD);
5598+ break;
5599+ }
5600+ if (!do_sio)
5601+ do_sio = ((mode & (S_ISUID | S_ISGID))
5602+ && !capable(CAP_FSETID));
86dc4139
AM
5603+ /* this workaround may be removed in the future */
5604+ if (!do_sio) {
5605+ h_dir = au_pinned_h_dir(pin);
5606+ do_sio = h_dir->i_mode & S_ISVTX;
5607+ }
53392da6
AM
5608+ }
5609+
5610+ return do_sio;
5611+}
5612+
7e9cd9fe 5613+#if 0 /* reserved */
c2b27bf2 5614+int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5615+{
5616+ int err, wkq_err;
1facf9fc 5617+ struct dentry *h_dentry;
5618+
c2b27bf2 5619+ h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038 5620+ if (!au_cpup_sio_test(pin, d_inode(h_dentry)->i_mode))
c2b27bf2 5621+ err = au_cpup_single(cpg, dst_parent);
1facf9fc 5622+ else {
5623+ struct au_cpup_single_args args = {
5624+ .errp = &err,
c2b27bf2
AM
5625+ .cpg = cpg,
5626+ .dst_parent = dst_parent
1facf9fc 5627+ };
5628+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
5629+ if (unlikely(wkq_err))
5630+ err = wkq_err;
5631+ }
5632+
5633+ return err;
5634+}
c2b27bf2 5635+#endif
1facf9fc 5636+
5637+/*
5638+ * copyup the @dentry from the first active lower branch to @bdst,
5639+ * using au_cpup_single().
5640+ */
c2b27bf2 5641+static int au_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5642+{
5643+ int err;
c2b27bf2
AM
5644+ unsigned int flags_orig;
5645+ struct dentry *dentry;
5646+
5647+ AuDebugOn(cpg->bsrc < 0);
1facf9fc 5648+
c2b27bf2 5649+ dentry = cpg->dentry;
86dc4139 5650+ DiMustWriteLock(dentry);
1facf9fc 5651+
c2b27bf2 5652+ err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
1facf9fc 5653+ if (!err) {
c2b27bf2
AM
5654+ flags_orig = cpg->flags;
5655+ au_fset_cpup(cpg->flags, RENAME);
5656+ err = au_cpup_single(cpg, NULL);
5657+ cpg->flags = flags_orig;
1facf9fc 5658+ if (!err)
5659+ return 0; /* success */
5660+
5661+ /* revert */
c2b27bf2 5662+ au_set_h_dptr(dentry, cpg->bdst, NULL);
5afbbe0d 5663+ au_set_dbtop(dentry, cpg->bsrc);
1facf9fc 5664+ }
5665+
5666+ return err;
5667+}
5668+
5669+struct au_cpup_simple_args {
5670+ int *errp;
c2b27bf2 5671+ struct au_cp_generic *cpg;
1facf9fc 5672+};
5673+
5674+static void au_call_cpup_simple(void *args)
5675+{
5676+ struct au_cpup_simple_args *a = args;
86dc4139 5677+
c2b27bf2
AM
5678+ au_pin_hdir_acquire_nest(a->cpg->pin);
5679+ *a->errp = au_cpup_simple(a->cpg);
5680+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5681+}
5682+
c2b27bf2 5683+static int au_do_sio_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5684+{
5685+ int err, wkq_err;
c2b27bf2
AM
5686+ struct dentry *dentry, *parent;
5687+ struct file *h_file;
1facf9fc 5688+ struct inode *h_dir;
5689+
c2b27bf2
AM
5690+ dentry = cpg->dentry;
5691+ h_file = NULL;
5692+ if (au_ftest_cpup(cpg->flags, HOPEN)) {
5693+ AuDebugOn(cpg->bsrc < 0);
392086de 5694+ h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0);
c2b27bf2
AM
5695+ err = PTR_ERR(h_file);
5696+ if (IS_ERR(h_file))
5697+ goto out;
5698+ }
5699+
1facf9fc 5700+ parent = dget_parent(dentry);
5527c038 5701+ h_dir = au_h_iptr(d_inode(parent), cpg->bdst);
53392da6 5702+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
5527c038 5703+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
c2b27bf2 5704+ err = au_cpup_simple(cpg);
1facf9fc 5705+ else {
5706+ struct au_cpup_simple_args args = {
5707+ .errp = &err,
c2b27bf2 5708+ .cpg = cpg
1facf9fc 5709+ };
5710+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
5711+ if (unlikely(wkq_err))
5712+ err = wkq_err;
5713+ }
5714+
5715+ dput(parent);
c2b27bf2
AM
5716+ if (h_file)
5717+ au_h_open_post(dentry, cpg->bsrc, h_file);
5718+
5719+out:
1facf9fc 5720+ return err;
5721+}
5722+
c2b27bf2 5723+int au_sio_cpup_simple(struct au_cp_generic *cpg)
367653fa 5724+{
5afbbe0d 5725+ aufs_bindex_t bsrc, bbot;
c2b27bf2 5726+ struct dentry *dentry, *h_dentry;
367653fa 5727+
c2b27bf2
AM
5728+ if (cpg->bsrc < 0) {
5729+ dentry = cpg->dentry;
5afbbe0d
AM
5730+ bbot = au_dbbot(dentry);
5731+ for (bsrc = cpg->bdst + 1; bsrc <= bbot; bsrc++) {
c2b27bf2
AM
5732+ h_dentry = au_h_dptr(dentry, bsrc);
5733+ if (h_dentry) {
5527c038 5734+ AuDebugOn(d_is_negative(h_dentry));
c2b27bf2
AM
5735+ break;
5736+ }
5737+ }
5afbbe0d 5738+ AuDebugOn(bsrc > bbot);
c2b27bf2 5739+ cpg->bsrc = bsrc;
367653fa 5740+ }
c2b27bf2
AM
5741+ AuDebugOn(cpg->bsrc <= cpg->bdst);
5742+ return au_do_sio_cpup_simple(cpg);
5743+}
367653fa 5744+
c2b27bf2
AM
5745+int au_sio_cpdown_simple(struct au_cp_generic *cpg)
5746+{
5747+ AuDebugOn(cpg->bdst <= cpg->bsrc);
5748+ return au_do_sio_cpup_simple(cpg);
367653fa
AM
5749+}
5750+
1facf9fc 5751+/* ---------------------------------------------------------------------- */
5752+
5753+/*
5754+ * copyup the deleted file for writing.
5755+ */
c2b27bf2
AM
5756+static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
5757+ struct file *file)
1facf9fc 5758+{
5759+ int err;
c2b27bf2
AM
5760+ unsigned int flags_orig;
5761+ aufs_bindex_t bsrc_orig;
c2b27bf2 5762+ struct au_dinfo *dinfo;
5afbbe0d
AM
5763+ struct {
5764+ struct au_hdentry *hd;
5765+ struct dentry *h_dentry;
5766+ } hdst, hsrc;
1facf9fc 5767+
c2b27bf2 5768+ dinfo = au_di(cpg->dentry);
1308ab2a 5769+ AuRwMustWriteLock(&dinfo->di_rwsem);
5770+
c2b27bf2 5771+ bsrc_orig = cpg->bsrc;
5afbbe0d
AM
5772+ cpg->bsrc = dinfo->di_btop;
5773+ hdst.hd = au_hdentry(dinfo, cpg->bdst);
5774+ hdst.h_dentry = hdst.hd->hd_dentry;
5775+ hdst.hd->hd_dentry = wh_dentry;
5776+ dinfo->di_btop = cpg->bdst;
5777+
5778+ hsrc.h_dentry = NULL;
027c5e7a 5779+ if (file) {
5afbbe0d
AM
5780+ hsrc.hd = au_hdentry(dinfo, cpg->bsrc);
5781+ hsrc.h_dentry = hsrc.hd->hd_dentry;
5782+ hsrc.hd->hd_dentry = au_hf_top(file)->f_path.dentry;
027c5e7a 5783+ }
c2b27bf2
AM
5784+ flags_orig = cpg->flags;
5785+ cpg->flags = !AuCpup_DTIME;
5786+ err = au_cpup_single(cpg, /*h_parent*/NULL);
5787+ cpg->flags = flags_orig;
027c5e7a
AM
5788+ if (file) {
5789+ if (!err)
5790+ err = au_reopen_nondir(file);
5afbbe0d 5791+ hsrc.hd->hd_dentry = hsrc.h_dentry;
1facf9fc 5792+ }
5afbbe0d
AM
5793+ hdst.hd->hd_dentry = hdst.h_dentry;
5794+ dinfo->di_btop = cpg->bsrc;
c2b27bf2 5795+ cpg->bsrc = bsrc_orig;
1facf9fc 5796+
5797+ return err;
5798+}
5799+
c2b27bf2 5800+static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5801+{
5802+ int err;
c2b27bf2 5803+ aufs_bindex_t bdst;
1facf9fc 5804+ struct au_dtime dt;
c2b27bf2 5805+ struct dentry *dentry, *parent, *h_parent, *wh_dentry;
1facf9fc 5806+ struct au_branch *br;
5807+ struct path h_path;
5808+
c2b27bf2
AM
5809+ dentry = cpg->dentry;
5810+ bdst = cpg->bdst;
1facf9fc 5811+ br = au_sbr(dentry->d_sb, bdst);
5812+ parent = dget_parent(dentry);
5813+ h_parent = au_h_dptr(parent, bdst);
5814+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
5815+ err = PTR_ERR(wh_dentry);
5816+ if (IS_ERR(wh_dentry))
5817+ goto out;
5818+
5819+ h_path.dentry = h_parent;
86dc4139 5820+ h_path.mnt = au_br_mnt(br);
1facf9fc 5821+ au_dtime_store(&dt, parent, &h_path);
c2b27bf2 5822+ err = au_do_cpup_wh(cpg, wh_dentry, file);
1facf9fc 5823+ if (unlikely(err))
5824+ goto out_wh;
5825+
5826+ dget(wh_dentry);
5827+ h_path.dentry = wh_dentry;
2000de60 5828+ if (!d_is_dir(wh_dentry)) {
523b37e3 5829+ /* no delegation since it is just created */
5527c038 5830+ err = vfsub_unlink(d_inode(h_parent), &h_path,
523b37e3
AM
5831+ /*delegated*/NULL, /*force*/0);
5832+ } else
5527c038 5833+ err = vfsub_rmdir(d_inode(h_parent), &h_path);
1facf9fc 5834+ if (unlikely(err)) {
523b37e3
AM
5835+ AuIOErr("failed remove copied-up tmp file %pd(%d)\n",
5836+ wh_dentry, err);
1facf9fc 5837+ err = -EIO;
5838+ }
5839+ au_dtime_revert(&dt);
5527c038 5840+ au_set_hi_wh(d_inode(dentry), bdst, wh_dentry);
1facf9fc 5841+
4f0767ce 5842+out_wh:
1facf9fc 5843+ dput(wh_dentry);
4f0767ce 5844+out:
1facf9fc 5845+ dput(parent);
5846+ return err;
5847+}
5848+
5849+struct au_cpup_wh_args {
5850+ int *errp;
c2b27bf2 5851+ struct au_cp_generic *cpg;
1facf9fc 5852+ struct file *file;
5853+};
5854+
5855+static void au_call_cpup_wh(void *args)
5856+{
5857+ struct au_cpup_wh_args *a = args;
86dc4139 5858+
c2b27bf2
AM
5859+ au_pin_hdir_acquire_nest(a->cpg->pin);
5860+ *a->errp = au_cpup_wh(a->cpg, a->file);
5861+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5862+}
5863+
c2b27bf2 5864+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5865+{
5866+ int err, wkq_err;
c2b27bf2 5867+ aufs_bindex_t bdst;
c1595e42 5868+ struct dentry *dentry, *parent, *h_orph, *h_parent;
86dc4139 5869+ struct inode *dir, *h_dir, *h_tmpdir;
1facf9fc 5870+ struct au_wbr *wbr;
c2b27bf2 5871+ struct au_pin wh_pin, *pin_orig;
1facf9fc 5872+
c2b27bf2
AM
5873+ dentry = cpg->dentry;
5874+ bdst = cpg->bdst;
1facf9fc 5875+ parent = dget_parent(dentry);
5527c038 5876+ dir = d_inode(parent);
1facf9fc 5877+ h_orph = NULL;
5878+ h_parent = NULL;
5879+ h_dir = au_igrab(au_h_iptr(dir, bdst));
5880+ h_tmpdir = h_dir;
c2b27bf2 5881+ pin_orig = NULL;
1facf9fc 5882+ if (!h_dir->i_nlink) {
5883+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
5884+ h_orph = wbr->wbr_orph;
5885+
5886+ h_parent = dget(au_h_dptr(parent, bdst));
1facf9fc 5887+ au_set_h_dptr(parent, bdst, dget(h_orph));
5527c038 5888+ h_tmpdir = d_inode(h_orph);
1facf9fc 5889+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
5890+
febd17d6 5891+ inode_lock_nested(h_tmpdir, AuLsc_I_PARENT3);
4a4d8108 5892+ /* todo: au_h_open_pre()? */
86dc4139 5893+
c2b27bf2 5894+ pin_orig = cpg->pin;
86dc4139 5895+ au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
c2b27bf2
AM
5896+ AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
5897+ cpg->pin = &wh_pin;
1facf9fc 5898+ }
5899+
53392da6 5900+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
5527c038 5901+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
c2b27bf2 5902+ err = au_cpup_wh(cpg, file);
1facf9fc 5903+ else {
5904+ struct au_cpup_wh_args args = {
5905+ .errp = &err,
c2b27bf2
AM
5906+ .cpg = cpg,
5907+ .file = file
1facf9fc 5908+ };
5909+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
5910+ if (unlikely(wkq_err))
5911+ err = wkq_err;
5912+ }
5913+
5914+ if (h_orph) {
febd17d6 5915+ inode_unlock(h_tmpdir);
4a4d8108 5916+ /* todo: au_h_open_post()? */
1facf9fc 5917+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
1facf9fc 5918+ au_set_h_dptr(parent, bdst, h_parent);
c2b27bf2
AM
5919+ AuDebugOn(!pin_orig);
5920+ cpg->pin = pin_orig;
1facf9fc 5921+ }
5922+ iput(h_dir);
5923+ dput(parent);
5924+
5925+ return err;
5926+}
5927+
5928+/* ---------------------------------------------------------------------- */
5929+
5930+/*
5931+ * generic routine for both of copy-up and copy-down.
5932+ */
5933+/* cf. revalidate function in file.c */
5934+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
5935+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5936+ struct au_pin *pin,
1facf9fc 5937+ struct dentry *h_parent, void *arg),
5938+ void *arg)
5939+{
5940+ int err;
5941+ struct au_pin pin;
5527c038 5942+ struct dentry *d, *parent, *h_parent, *real_parent, *h_dentry;
1facf9fc 5943+
5944+ err = 0;
5945+ parent = dget_parent(dentry);
5946+ if (IS_ROOT(parent))
5947+ goto out;
5948+
5949+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
5950+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
5951+
5952+ /* do not use au_dpage */
5953+ real_parent = parent;
5954+ while (1) {
5955+ dput(parent);
5956+ parent = dget_parent(dentry);
5957+ h_parent = au_h_dptr(parent, bdst);
5958+ if (h_parent)
5959+ goto out; /* success */
5960+
5961+ /* find top dir which is necessary to cpup */
5962+ do {
5963+ d = parent;
5964+ dput(parent);
5965+ parent = dget_parent(d);
5966+ di_read_lock_parent3(parent, !AuLock_IR);
5967+ h_parent = au_h_dptr(parent, bdst);
5968+ di_read_unlock(parent, !AuLock_IR);
5969+ } while (!h_parent);
5970+
5971+ if (d != real_parent)
5972+ di_write_lock_child3(d);
5973+
5974+ /* somebody else might create while we were sleeping */
5527c038
JR
5975+ h_dentry = au_h_dptr(d, bdst);
5976+ if (!h_dentry || d_is_negative(h_dentry)) {
5977+ if (h_dentry)
5afbbe0d 5978+ au_update_dbtop(d);
1facf9fc 5979+
5980+ au_pin_set_dentry(&pin, d);
5981+ err = au_do_pin(&pin);
5982+ if (!err) {
86dc4139 5983+ err = cp(d, bdst, &pin, h_parent, arg);
1facf9fc 5984+ au_unpin(&pin);
5985+ }
5986+ }
5987+
5988+ if (d != real_parent)
5989+ di_write_unlock(d);
5990+ if (unlikely(err))
5991+ break;
5992+ }
5993+
4f0767ce 5994+out:
1facf9fc 5995+ dput(parent);
5996+ return err;
5997+}
5998+
5999+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 6000+ struct au_pin *pin,
2000de60 6001+ struct dentry *h_parent __maybe_unused,
1facf9fc 6002+ void *arg __maybe_unused)
6003+{
c2b27bf2
AM
6004+ struct au_cp_generic cpg = {
6005+ .dentry = dentry,
6006+ .bdst = bdst,
6007+ .bsrc = -1,
6008+ .len = 0,
6009+ .pin = pin,
6010+ .flags = AuCpup_DTIME
6011+ };
6012+ return au_sio_cpup_simple(&cpg);
1facf9fc 6013+}
6014+
6015+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
6016+{
6017+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
6018+}
6019+
6020+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
6021+{
6022+ int err;
6023+ struct dentry *parent;
6024+ struct inode *dir;
6025+
6026+ parent = dget_parent(dentry);
5527c038 6027+ dir = d_inode(parent);
1facf9fc 6028+ err = 0;
6029+ if (au_h_iptr(dir, bdst))
6030+ goto out;
6031+
6032+ di_read_unlock(parent, AuLock_IR);
6033+ di_write_lock_parent(parent);
6034+ /* someone else might change our inode while we were sleeping */
6035+ if (!au_h_iptr(dir, bdst))
6036+ err = au_cpup_dirs(dentry, bdst);
6037+ di_downgrade_lock(parent, AuLock_IR);
6038+
4f0767ce 6039+out:
1facf9fc 6040+ dput(parent);
6041+ return err;
6042+}
7f207e10
AM
6043diff -urN /usr/share/empty/fs/aufs/cpup.h linux/fs/aufs/cpup.h
6044--- /usr/share/empty/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 6045+++ linux/fs/aufs/cpup.h 2016-10-09 16:55:36.486034798 +0200
523b37e3 6046@@ -0,0 +1,94 @@
1facf9fc 6047+/*
8cdd5066 6048+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6049+ *
6050+ * This program, aufs is free software; you can redistribute it and/or modify
6051+ * it under the terms of the GNU General Public License as published by
6052+ * the Free Software Foundation; either version 2 of the License, or
6053+ * (at your option) any later version.
dece6358
AM
6054+ *
6055+ * This program is distributed in the hope that it will be useful,
6056+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6057+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6058+ * GNU General Public License for more details.
6059+ *
6060+ * You should have received a copy of the GNU General Public License
523b37e3 6061+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6062+ */
6063+
6064+/*
6065+ * copy-up/down functions
6066+ */
6067+
6068+#ifndef __AUFS_CPUP_H__
6069+#define __AUFS_CPUP_H__
6070+
6071+#ifdef __KERNEL__
6072+
dece6358 6073+#include <linux/path.h>
1facf9fc 6074+
dece6358
AM
6075+struct inode;
6076+struct file;
86dc4139 6077+struct au_pin;
dece6358 6078+
86dc4139 6079+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
1facf9fc 6080+void au_cpup_attr_timesizes(struct inode *inode);
6081+void au_cpup_attr_nlink(struct inode *inode, int force);
6082+void au_cpup_attr_changeable(struct inode *inode);
6083+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
6084+void au_cpup_attr_all(struct inode *inode, int force);
6085+
6086+/* ---------------------------------------------------------------------- */
6087+
c2b27bf2
AM
6088+struct au_cp_generic {
6089+ struct dentry *dentry;
6090+ aufs_bindex_t bdst, bsrc;
6091+ loff_t len;
6092+ struct au_pin *pin;
6093+ unsigned int flags;
6094+};
6095+
1facf9fc 6096+/* cpup flags */
392086de
AM
6097+#define AuCpup_DTIME 1 /* do dtime_store/revert */
6098+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
6099+ for link(2) */
6100+#define AuCpup_RENAME (1 << 2) /* rename after cpup */
6101+#define AuCpup_HOPEN (1 << 3) /* call h_open_pre/post() in
6102+ cpup */
6103+#define AuCpup_OVERWRITE (1 << 4) /* allow overwriting the
6104+ existing entry */
6105+#define AuCpup_RWDST (1 << 5) /* force write target even if
6106+ the branch is marked as RO */
c2b27bf2 6107+
1facf9fc 6108+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
7f207e10
AM
6109+#define au_fset_cpup(flags, name) \
6110+ do { (flags) |= AuCpup_##name; } while (0)
6111+#define au_fclr_cpup(flags, name) \
6112+ do { (flags) &= ~AuCpup_##name; } while (0)
1facf9fc 6113+
6114+int au_copy_file(struct file *dst, struct file *src, loff_t len);
c2b27bf2
AM
6115+int au_sio_cpup_simple(struct au_cp_generic *cpg);
6116+int au_sio_cpdown_simple(struct au_cp_generic *cpg);
6117+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
1facf9fc 6118+
6119+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
6120+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 6121+ struct au_pin *pin,
1facf9fc 6122+ struct dentry *h_parent, void *arg),
6123+ void *arg);
6124+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
6125+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
6126+
6127+/* ---------------------------------------------------------------------- */
6128+
6129+/* keep timestamps when copyup */
6130+struct au_dtime {
6131+ struct dentry *dt_dentry;
6132+ struct path dt_h_path;
6133+ struct timespec dt_atime, dt_mtime;
6134+};
6135+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
6136+ struct path *h_path);
6137+void au_dtime_revert(struct au_dtime *dt);
6138+
6139+#endif /* __KERNEL__ */
6140+#endif /* __AUFS_CPUP_H__ */
7f207e10
AM
6141diff -urN /usr/share/empty/fs/aufs/dbgaufs.c linux/fs/aufs/dbgaufs.c
6142--- /usr/share/empty/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
6143+++ linux/fs/aufs/dbgaufs.c 2016-10-09 16:55:38.886097714 +0200
6144@@ -0,0 +1,438 @@
1facf9fc 6145+/*
8cdd5066 6146+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6147+ *
6148+ * This program, aufs is free software; you can redistribute it and/or modify
6149+ * it under the terms of the GNU General Public License as published by
6150+ * the Free Software Foundation; either version 2 of the License, or
6151+ * (at your option) any later version.
dece6358
AM
6152+ *
6153+ * This program is distributed in the hope that it will be useful,
6154+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6155+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6156+ * GNU General Public License for more details.
6157+ *
6158+ * You should have received a copy of the GNU General Public License
523b37e3 6159+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6160+ */
6161+
6162+/*
6163+ * debugfs interface
6164+ */
6165+
6166+#include <linux/debugfs.h>
6167+#include "aufs.h"
6168+
6169+#ifndef CONFIG_SYSFS
6170+#error DEBUG_FS depends upon SYSFS
6171+#endif
6172+
6173+static struct dentry *dbgaufs;
6174+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
6175+
6176+/* 20 is max digits length of ulong 64 */
6177+struct dbgaufs_arg {
6178+ int n;
6179+ char a[20 * 4];
6180+};
6181+
6182+/*
6183+ * common function for all XINO files
6184+ */
6185+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
6186+ struct file *file)
6187+{
f0c0a007 6188+ au_delayed_kfree(file->private_data);
1facf9fc 6189+ return 0;
6190+}
6191+
6192+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
6193+{
6194+ int err;
6195+ struct kstat st;
6196+ struct dbgaufs_arg *p;
6197+
6198+ err = -ENOMEM;
6199+ p = kmalloc(sizeof(*p), GFP_NOFS);
6200+ if (unlikely(!p))
6201+ goto out;
6202+
6203+ err = 0;
6204+ p->n = 0;
6205+ file->private_data = p;
6206+ if (!xf)
6207+ goto out;
6208+
c06a8ce3 6209+ err = vfs_getattr(&xf->f_path, &st);
1facf9fc 6210+ if (!err) {
6211+ if (do_fcnt)
6212+ p->n = snprintf
6213+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
6214+ (long)file_count(xf), st.blocks, st.blksize,
6215+ (long long)st.size);
6216+ else
6217+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
6218+ st.blocks, st.blksize,
6219+ (long long)st.size);
6220+ AuDebugOn(p->n >= sizeof(p->a));
6221+ } else {
6222+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
6223+ err = 0;
6224+ }
6225+
4f0767ce 6226+out:
1facf9fc 6227+ return err;
6228+
6229+}
6230+
6231+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
6232+ size_t count, loff_t *ppos)
6233+{
6234+ struct dbgaufs_arg *p;
6235+
6236+ p = file->private_data;
6237+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6238+}
6239+
6240+/* ---------------------------------------------------------------------- */
6241+
86dc4139
AM
6242+struct dbgaufs_plink_arg {
6243+ int n;
6244+ char a[];
6245+};
6246+
6247+static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
6248+ struct file *file)
6249+{
f0c0a007 6250+ au_delayed_free_page((unsigned long)file->private_data);
86dc4139
AM
6251+ return 0;
6252+}
6253+
6254+static int dbgaufs_plink_open(struct inode *inode, struct file *file)
6255+{
6256+ int err, i, limit;
6257+ unsigned long n, sum;
6258+ struct dbgaufs_plink_arg *p;
6259+ struct au_sbinfo *sbinfo;
6260+ struct super_block *sb;
6261+ struct au_sphlhead *sphl;
6262+
6263+ err = -ENOMEM;
6264+ p = (void *)get_zeroed_page(GFP_NOFS);
6265+ if (unlikely(!p))
6266+ goto out;
6267+
6268+ err = -EFBIG;
6269+ sbinfo = inode->i_private;
6270+ sb = sbinfo->si_sb;
6271+ si_noflush_read_lock(sb);
6272+ if (au_opt_test(au_mntflags(sb), PLINK)) {
6273+ limit = PAGE_SIZE - sizeof(p->n);
6274+
6275+ /* the number of buckets */
6276+ n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
6277+ p->n += n;
6278+ limit -= n;
6279+
6280+ sum = 0;
6281+ for (i = 0, sphl = sbinfo->si_plink;
6282+ i < AuPlink_NHASH;
6283+ i++, sphl++) {
6284+ n = au_sphl_count(sphl);
6285+ sum += n;
6286+
6287+ n = snprintf(p->a + p->n, limit, "%lu ", n);
6288+ p->n += n;
6289+ limit -= n;
6290+ if (unlikely(limit <= 0))
6291+ goto out_free;
6292+ }
6293+ p->a[p->n - 1] = '\n';
6294+
6295+ /* the sum of plinks */
6296+ n = snprintf(p->a + p->n, limit, "%lu\n", sum);
6297+ p->n += n;
6298+ limit -= n;
6299+ if (unlikely(limit <= 0))
6300+ goto out_free;
6301+ } else {
6302+#define str "1\n0\n0\n"
6303+ p->n = sizeof(str) - 1;
6304+ strcpy(p->a, str);
6305+#undef str
6306+ }
6307+ si_read_unlock(sb);
6308+
6309+ err = 0;
6310+ file->private_data = p;
6311+ goto out; /* success */
6312+
6313+out_free:
f0c0a007 6314+ au_delayed_free_page((unsigned long)p);
86dc4139
AM
6315+out:
6316+ return err;
6317+}
6318+
6319+static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
6320+ size_t count, loff_t *ppos)
6321+{
6322+ struct dbgaufs_plink_arg *p;
6323+
6324+ p = file->private_data;
6325+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6326+}
6327+
6328+static const struct file_operations dbgaufs_plink_fop = {
6329+ .owner = THIS_MODULE,
6330+ .open = dbgaufs_plink_open,
6331+ .release = dbgaufs_plink_release,
6332+ .read = dbgaufs_plink_read
6333+};
6334+
6335+/* ---------------------------------------------------------------------- */
6336+
1facf9fc 6337+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
6338+{
6339+ int err;
6340+ struct au_sbinfo *sbinfo;
6341+ struct super_block *sb;
6342+
6343+ sbinfo = inode->i_private;
6344+ sb = sbinfo->si_sb;
6345+ si_noflush_read_lock(sb);
6346+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
6347+ si_read_unlock(sb);
6348+ return err;
6349+}
6350+
6351+static const struct file_operations dbgaufs_xib_fop = {
4a4d8108 6352+ .owner = THIS_MODULE,
1facf9fc 6353+ .open = dbgaufs_xib_open,
6354+ .release = dbgaufs_xi_release,
6355+ .read = dbgaufs_xi_read
6356+};
6357+
6358+/* ---------------------------------------------------------------------- */
6359+
6360+#define DbgaufsXi_PREFIX "xi"
6361+
6362+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
6363+{
6364+ int err;
6365+ long l;
6366+ struct au_sbinfo *sbinfo;
6367+ struct super_block *sb;
6368+ struct file *xf;
6369+ struct qstr *name;
6370+
6371+ err = -ENOENT;
6372+ xf = NULL;
2000de60 6373+ name = &file->f_path.dentry->d_name;
1facf9fc 6374+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
6375+ || memcmp(name->name, DbgaufsXi_PREFIX,
6376+ sizeof(DbgaufsXi_PREFIX) - 1)))
6377+ goto out;
9dbd164d 6378+ err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
1facf9fc 6379+ if (unlikely(err))
6380+ goto out;
6381+
6382+ sbinfo = inode->i_private;
6383+ sb = sbinfo->si_sb;
6384+ si_noflush_read_lock(sb);
5afbbe0d 6385+ if (l <= au_sbbot(sb)) {
1facf9fc 6386+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
6387+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
6388+ } else
6389+ err = -ENOENT;
6390+ si_read_unlock(sb);
6391+
4f0767ce 6392+out:
1facf9fc 6393+ return err;
6394+}
6395+
6396+static const struct file_operations dbgaufs_xino_fop = {
4a4d8108 6397+ .owner = THIS_MODULE,
1facf9fc 6398+ .open = dbgaufs_xino_open,
6399+ .release = dbgaufs_xi_release,
6400+ .read = dbgaufs_xi_read
6401+};
6402+
6403+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
6404+{
5afbbe0d 6405+ aufs_bindex_t bbot;
1facf9fc 6406+ struct au_branch *br;
6407+ struct au_xino_file *xi;
6408+
6409+ if (!au_sbi(sb)->si_dbgaufs)
6410+ return;
6411+
5afbbe0d
AM
6412+ bbot = au_sbbot(sb);
6413+ for (; bindex <= bbot; bindex++) {
1facf9fc 6414+ br = au_sbr(sb, bindex);
6415+ xi = &br->br_xino;
e2f27e51
AM
6416+ /* debugfs acquires the parent i_mutex */
6417+ lockdep_off();
c06a8ce3 6418+ debugfs_remove(xi->xi_dbgaufs);
e2f27e51 6419+ lockdep_on();
c06a8ce3 6420+ xi->xi_dbgaufs = NULL;
1facf9fc 6421+ }
6422+}
6423+
6424+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
6425+{
6426+ struct au_sbinfo *sbinfo;
6427+ struct dentry *parent;
6428+ struct au_branch *br;
6429+ struct au_xino_file *xi;
5afbbe0d 6430+ aufs_bindex_t bbot;
1facf9fc 6431+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
6432+
6433+ sbinfo = au_sbi(sb);
6434+ parent = sbinfo->si_dbgaufs;
6435+ if (!parent)
6436+ return;
6437+
5afbbe0d
AM
6438+ bbot = au_sbbot(sb);
6439+ for (; bindex <= bbot; bindex++) {
1facf9fc 6440+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
6441+ br = au_sbr(sb, bindex);
6442+ xi = &br->br_xino;
6443+ AuDebugOn(xi->xi_dbgaufs);
f0c0a007
AM
6444+ /* debugfs acquires the parent i_mutex */
6445+ lockdep_off();
1facf9fc 6446+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
6447+ sbinfo, &dbgaufs_xino_fop);
f0c0a007 6448+ lockdep_on();
1facf9fc 6449+ /* ignore an error */
6450+ if (unlikely(!xi->xi_dbgaufs))
6451+ AuWarn1("failed %s under debugfs\n", name);
6452+ }
6453+}
6454+
6455+/* ---------------------------------------------------------------------- */
6456+
6457+#ifdef CONFIG_AUFS_EXPORT
6458+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
6459+{
6460+ int err;
6461+ struct au_sbinfo *sbinfo;
6462+ struct super_block *sb;
6463+
6464+ sbinfo = inode->i_private;
6465+ sb = sbinfo->si_sb;
6466+ si_noflush_read_lock(sb);
6467+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
6468+ si_read_unlock(sb);
6469+ return err;
6470+}
6471+
6472+static const struct file_operations dbgaufs_xigen_fop = {
4a4d8108 6473+ .owner = THIS_MODULE,
1facf9fc 6474+ .open = dbgaufs_xigen_open,
6475+ .release = dbgaufs_xi_release,
6476+ .read = dbgaufs_xi_read
6477+};
6478+
6479+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6480+{
6481+ int err;
6482+
dece6358 6483+ /*
c1595e42 6484+ * This function is a dynamic '__init' function actually,
dece6358
AM
6485+ * so the tiny check for si_rwsem is unnecessary.
6486+ */
6487+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6488+
1facf9fc 6489+ err = -EIO;
6490+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
6491+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6492+ &dbgaufs_xigen_fop);
6493+ if (sbinfo->si_dbgaufs_xigen)
6494+ err = 0;
6495+
6496+ return err;
6497+}
6498+#else
6499+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6500+{
6501+ return 0;
6502+}
6503+#endif /* CONFIG_AUFS_EXPORT */
6504+
6505+/* ---------------------------------------------------------------------- */
6506+
6507+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
6508+{
dece6358 6509+ /*
7e9cd9fe 6510+ * This function is a dynamic '__fin' function actually,
dece6358
AM
6511+ * so the tiny check for si_rwsem is unnecessary.
6512+ */
6513+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6514+
1facf9fc 6515+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
6516+ sbinfo->si_dbgaufs = NULL;
6517+ kobject_put(&sbinfo->si_kobj);
6518+}
6519+
6520+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
6521+{
6522+ int err;
6523+ char name[SysaufsSiNameLen];
6524+
dece6358 6525+ /*
c1595e42 6526+ * This function is a dynamic '__init' function actually,
dece6358
AM
6527+ * so the tiny check for si_rwsem is unnecessary.
6528+ */
6529+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6530+
1facf9fc 6531+ err = -ENOENT;
6532+ if (!dbgaufs) {
6533+ AuErr1("/debug/aufs is uninitialized\n");
6534+ goto out;
6535+ }
6536+
6537+ err = -EIO;
6538+ sysaufs_name(sbinfo, name);
6539+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
6540+ if (unlikely(!sbinfo->si_dbgaufs))
6541+ goto out;
6542+ kobject_get(&sbinfo->si_kobj);
6543+
6544+ sbinfo->si_dbgaufs_xib = debugfs_create_file
6545+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6546+ &dbgaufs_xib_fop);
6547+ if (unlikely(!sbinfo->si_dbgaufs_xib))
6548+ goto out_dir;
6549+
86dc4139
AM
6550+ sbinfo->si_dbgaufs_plink = debugfs_create_file
6551+ ("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6552+ &dbgaufs_plink_fop);
6553+ if (unlikely(!sbinfo->si_dbgaufs_plink))
6554+ goto out_dir;
6555+
1facf9fc 6556+ err = dbgaufs_xigen_init(sbinfo);
6557+ if (!err)
6558+ goto out; /* success */
6559+
4f0767ce 6560+out_dir:
1facf9fc 6561+ dbgaufs_si_fin(sbinfo);
4f0767ce 6562+out:
1facf9fc 6563+ return err;
6564+}
6565+
6566+/* ---------------------------------------------------------------------- */
6567+
6568+void dbgaufs_fin(void)
6569+{
6570+ debugfs_remove(dbgaufs);
6571+}
6572+
6573+int __init dbgaufs_init(void)
6574+{
6575+ int err;
6576+
6577+ err = -EIO;
6578+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
6579+ if (dbgaufs)
6580+ err = 0;
6581+ return err;
6582+}
7f207e10
AM
6583diff -urN /usr/share/empty/fs/aufs/dbgaufs.h linux/fs/aufs/dbgaufs.h
6584--- /usr/share/empty/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 6585+++ linux/fs/aufs/dbgaufs.h 2016-10-09 16:55:36.486034798 +0200
523b37e3 6586@@ -0,0 +1,48 @@
1facf9fc 6587+/*
8cdd5066 6588+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6589+ *
6590+ * This program, aufs is free software; you can redistribute it and/or modify
6591+ * it under the terms of the GNU General Public License as published by
6592+ * the Free Software Foundation; either version 2 of the License, or
6593+ * (at your option) any later version.
dece6358
AM
6594+ *
6595+ * This program is distributed in the hope that it will be useful,
6596+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6597+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6598+ * GNU General Public License for more details.
6599+ *
6600+ * You should have received a copy of the GNU General Public License
523b37e3 6601+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6602+ */
6603+
6604+/*
6605+ * debugfs interface
6606+ */
6607+
6608+#ifndef __DBGAUFS_H__
6609+#define __DBGAUFS_H__
6610+
6611+#ifdef __KERNEL__
6612+
dece6358 6613+struct super_block;
1facf9fc 6614+struct au_sbinfo;
dece6358 6615+
1facf9fc 6616+#ifdef CONFIG_DEBUG_FS
6617+/* dbgaufs.c */
6618+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
6619+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
6620+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
6621+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
6622+void dbgaufs_fin(void);
6623+int __init dbgaufs_init(void);
1facf9fc 6624+#else
4a4d8108
AM
6625+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
6626+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
6627+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
6628+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
6629+AuStubVoid(dbgaufs_fin, void)
6630+AuStubInt0(__init dbgaufs_init, void)
1facf9fc 6631+#endif /* CONFIG_DEBUG_FS */
6632+
6633+#endif /* __KERNEL__ */
6634+#endif /* __DBGAUFS_H__ */
7f207e10
AM
6635diff -urN /usr/share/empty/fs/aufs/dcsub.c linux/fs/aufs/dcsub.c
6636--- /usr/share/empty/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
6637+++ linux/fs/aufs/dcsub.c 2016-10-09 16:55:38.886097714 +0200
6638@@ -0,0 +1,225 @@
1facf9fc 6639+/*
8cdd5066 6640+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6641+ *
6642+ * This program, aufs is free software; you can redistribute it and/or modify
6643+ * it under the terms of the GNU General Public License as published by
6644+ * the Free Software Foundation; either version 2 of the License, or
6645+ * (at your option) any later version.
dece6358
AM
6646+ *
6647+ * This program is distributed in the hope that it will be useful,
6648+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6649+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6650+ * GNU General Public License for more details.
6651+ *
6652+ * You should have received a copy of the GNU General Public License
523b37e3 6653+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6654+ */
6655+
6656+/*
6657+ * sub-routines for dentry cache
6658+ */
6659+
6660+#include "aufs.h"
6661+
6662+static void au_dpage_free(struct au_dpage *dpage)
6663+{
6664+ int i;
6665+ struct dentry **p;
6666+
6667+ p = dpage->dentries;
6668+ for (i = 0; i < dpage->ndentry; i++)
6669+ dput(*p++);
f0c0a007 6670+ au_delayed_free_page((unsigned long)dpage->dentries);
1facf9fc 6671+}
6672+
6673+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
6674+{
6675+ int err;
6676+ void *p;
6677+
6678+ err = -ENOMEM;
6679+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
6680+ if (unlikely(!dpages->dpages))
6681+ goto out;
6682+
6683+ p = (void *)__get_free_page(gfp);
6684+ if (unlikely(!p))
6685+ goto out_dpages;
6686+
6687+ dpages->dpages[0].ndentry = 0;
6688+ dpages->dpages[0].dentries = p;
6689+ dpages->ndpage = 1;
6690+ return 0; /* success */
6691+
4f0767ce 6692+out_dpages:
f0c0a007 6693+ au_delayed_kfree(dpages->dpages);
4f0767ce 6694+out:
1facf9fc 6695+ return err;
6696+}
6697+
6698+void au_dpages_free(struct au_dcsub_pages *dpages)
6699+{
6700+ int i;
6701+ struct au_dpage *p;
6702+
6703+ p = dpages->dpages;
6704+ for (i = 0; i < dpages->ndpage; i++)
6705+ au_dpage_free(p++);
f0c0a007 6706+ au_delayed_kfree(dpages->dpages);
1facf9fc 6707+}
6708+
6709+static int au_dpages_append(struct au_dcsub_pages *dpages,
6710+ struct dentry *dentry, gfp_t gfp)
6711+{
6712+ int err, sz;
6713+ struct au_dpage *dpage;
6714+ void *p;
6715+
6716+ dpage = dpages->dpages + dpages->ndpage - 1;
6717+ sz = PAGE_SIZE / sizeof(dentry);
6718+ if (unlikely(dpage->ndentry >= sz)) {
6719+ AuLabel(new dpage);
6720+ err = -ENOMEM;
6721+ sz = dpages->ndpage * sizeof(*dpages->dpages);
6722+ p = au_kzrealloc(dpages->dpages, sz,
e2f27e51
AM
6723+ sz + sizeof(*dpages->dpages), gfp,
6724+ /*may_shrink*/0);
1facf9fc 6725+ if (unlikely(!p))
6726+ goto out;
6727+
6728+ dpages->dpages = p;
6729+ dpage = dpages->dpages + dpages->ndpage;
6730+ p = (void *)__get_free_page(gfp);
6731+ if (unlikely(!p))
6732+ goto out;
6733+
6734+ dpage->ndentry = 0;
6735+ dpage->dentries = p;
6736+ dpages->ndpage++;
6737+ }
6738+
c1595e42 6739+ AuDebugOn(au_dcount(dentry) <= 0);
027c5e7a 6740+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
1facf9fc 6741+ return 0; /* success */
6742+
4f0767ce 6743+out:
1facf9fc 6744+ return err;
6745+}
6746+
c1595e42
JR
6747+/* todo: BAD approach */
6748+/* copied from linux/fs/dcache.c */
6749+enum d_walk_ret {
6750+ D_WALK_CONTINUE,
6751+ D_WALK_QUIT,
6752+ D_WALK_NORETRY,
6753+ D_WALK_SKIP,
6754+};
6755+
6756+extern void d_walk(struct dentry *parent, void *data,
6757+ enum d_walk_ret (*enter)(void *, struct dentry *),
6758+ void (*finish)(void *));
6759+
6760+struct ac_dpages_arg {
1facf9fc 6761+ int err;
c1595e42
JR
6762+ struct au_dcsub_pages *dpages;
6763+ struct super_block *sb;
6764+ au_dpages_test test;
6765+ void *arg;
6766+};
1facf9fc 6767+
c1595e42
JR
6768+static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
6769+{
6770+ enum d_walk_ret ret;
6771+ struct ac_dpages_arg *arg = _arg;
1facf9fc 6772+
c1595e42
JR
6773+ ret = D_WALK_CONTINUE;
6774+ if (dentry->d_sb == arg->sb
6775+ && !IS_ROOT(dentry)
6776+ && au_dcount(dentry) > 0
6777+ && au_di(dentry)
6778+ && (!arg->test || arg->test(dentry, arg->arg))) {
6779+ arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
6780+ if (unlikely(arg->err))
6781+ ret = D_WALK_QUIT;
1facf9fc 6782+ }
6783+
c1595e42
JR
6784+ return ret;
6785+}
027c5e7a 6786+
c1595e42
JR
6787+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6788+ au_dpages_test test, void *arg)
6789+{
6790+ struct ac_dpages_arg args = {
6791+ .err = 0,
6792+ .dpages = dpages,
6793+ .sb = root->d_sb,
6794+ .test = test,
6795+ .arg = arg
6796+ };
027c5e7a 6797+
c1595e42
JR
6798+ d_walk(root, &args, au_call_dpages_append, NULL);
6799+
6800+ return args.err;
1facf9fc 6801+}
6802+
6803+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6804+ int do_include, au_dpages_test test, void *arg)
6805+{
6806+ int err;
6807+
6808+ err = 0;
027c5e7a
AM
6809+ write_seqlock(&rename_lock);
6810+ spin_lock(&dentry->d_lock);
6811+ if (do_include
c1595e42 6812+ && au_dcount(dentry) > 0
027c5e7a 6813+ && (!test || test(dentry, arg)))
1facf9fc 6814+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6815+ spin_unlock(&dentry->d_lock);
6816+ if (unlikely(err))
6817+ goto out;
6818+
6819+ /*
523b37e3 6820+ * RCU for vfsmount is unnecessary since this is a traverse in a single
027c5e7a
AM
6821+ * mount
6822+ */
1facf9fc 6823+ while (!IS_ROOT(dentry)) {
027c5e7a
AM
6824+ dentry = dentry->d_parent; /* rename_lock is locked */
6825+ spin_lock(&dentry->d_lock);
c1595e42 6826+ if (au_dcount(dentry) > 0
027c5e7a 6827+ && (!test || test(dentry, arg)))
1facf9fc 6828+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6829+ spin_unlock(&dentry->d_lock);
6830+ if (unlikely(err))
6831+ break;
1facf9fc 6832+ }
6833+
4f0767ce 6834+out:
027c5e7a 6835+ write_sequnlock(&rename_lock);
1facf9fc 6836+ return err;
6837+}
6838+
027c5e7a
AM
6839+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
6840+{
6841+ return au_di(dentry) && dentry->d_sb == arg;
6842+}
6843+
6844+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6845+ struct dentry *dentry, int do_include)
6846+{
6847+ return au_dcsub_pages_rev(dpages, dentry, do_include,
6848+ au_dcsub_dpages_aufs, dentry->d_sb);
6849+}
6850+
4a4d8108 6851+int au_test_subdir(struct dentry *d1, struct dentry *d2)
1facf9fc 6852+{
4a4d8108
AM
6853+ struct path path[2] = {
6854+ {
6855+ .dentry = d1
6856+ },
6857+ {
6858+ .dentry = d2
6859+ }
6860+ };
1facf9fc 6861+
4a4d8108 6862+ return path_is_under(path + 0, path + 1);
1facf9fc 6863+}
7f207e10
AM
6864diff -urN /usr/share/empty/fs/aufs/dcsub.h linux/fs/aufs/dcsub.h
6865--- /usr/share/empty/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 6866+++ linux/fs/aufs/dcsub.h 2016-10-09 16:55:36.486034798 +0200
5527c038 6867@@ -0,0 +1,136 @@
1facf9fc 6868+/*
8cdd5066 6869+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6870+ *
6871+ * This program, aufs is free software; you can redistribute it and/or modify
6872+ * it under the terms of the GNU General Public License as published by
6873+ * the Free Software Foundation; either version 2 of the License, or
6874+ * (at your option) any later version.
dece6358
AM
6875+ *
6876+ * This program is distributed in the hope that it will be useful,
6877+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6878+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6879+ * GNU General Public License for more details.
6880+ *
6881+ * You should have received a copy of the GNU General Public License
523b37e3 6882+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6883+ */
6884+
6885+/*
6886+ * sub-routines for dentry cache
6887+ */
6888+
6889+#ifndef __AUFS_DCSUB_H__
6890+#define __AUFS_DCSUB_H__
6891+
6892+#ifdef __KERNEL__
6893+
7f207e10 6894+#include <linux/dcache.h>
027c5e7a 6895+#include <linux/fs.h>
dece6358 6896+
1facf9fc 6897+struct au_dpage {
6898+ int ndentry;
6899+ struct dentry **dentries;
6900+};
6901+
6902+struct au_dcsub_pages {
6903+ int ndpage;
6904+ struct au_dpage *dpages;
6905+};
6906+
6907+/* ---------------------------------------------------------------------- */
6908+
7f207e10 6909+/* dcsub.c */
1facf9fc 6910+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
6911+void au_dpages_free(struct au_dcsub_pages *dpages);
6912+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
6913+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6914+ au_dpages_test test, void *arg);
6915+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6916+ int do_include, au_dpages_test test, void *arg);
027c5e7a
AM
6917+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6918+ struct dentry *dentry, int do_include);
4a4d8108 6919+int au_test_subdir(struct dentry *d1, struct dentry *d2);
1facf9fc 6920+
7f207e10
AM
6921+/* ---------------------------------------------------------------------- */
6922+
523b37e3
AM
6923+/*
6924+ * todo: in linux-3.13, several similar (but faster) helpers are added to
6925+ * include/linux/dcache.h. Try them (in the future).
6926+ */
6927+
027c5e7a
AM
6928+static inline int au_d_hashed_positive(struct dentry *d)
6929+{
6930+ int err;
5527c038 6931+ struct inode *inode = d_inode(d);
076b876e 6932+
027c5e7a 6933+ err = 0;
5527c038
JR
6934+ if (unlikely(d_unhashed(d)
6935+ || d_is_negative(d)
6936+ || !inode->i_nlink))
027c5e7a
AM
6937+ err = -ENOENT;
6938+ return err;
6939+}
6940+
38d290e6
JR
6941+static inline int au_d_linkable(struct dentry *d)
6942+{
6943+ int err;
5527c038 6944+ struct inode *inode = d_inode(d);
076b876e 6945+
38d290e6
JR
6946+ err = au_d_hashed_positive(d);
6947+ if (err
5527c038 6948+ && d_is_positive(d)
38d290e6
JR
6949+ && (inode->i_state & I_LINKABLE))
6950+ err = 0;
6951+ return err;
6952+}
6953+
027c5e7a
AM
6954+static inline int au_d_alive(struct dentry *d)
6955+{
6956+ int err;
6957+ struct inode *inode;
076b876e 6958+
027c5e7a
AM
6959+ err = 0;
6960+ if (!IS_ROOT(d))
6961+ err = au_d_hashed_positive(d);
6962+ else {
5527c038
JR
6963+ inode = d_inode(d);
6964+ if (unlikely(d_unlinked(d)
6965+ || d_is_negative(d)
6966+ || !inode->i_nlink))
027c5e7a
AM
6967+ err = -ENOENT;
6968+ }
6969+ return err;
6970+}
6971+
6972+static inline int au_alive_dir(struct dentry *d)
7f207e10 6973+{
027c5e7a 6974+ int err;
076b876e 6975+
027c5e7a 6976+ err = au_d_alive(d);
5527c038 6977+ if (unlikely(err || IS_DEADDIR(d_inode(d))))
027c5e7a
AM
6978+ err = -ENOENT;
6979+ return err;
7f207e10
AM
6980+}
6981+
38d290e6
JR
6982+static inline int au_qstreq(struct qstr *a, struct qstr *b)
6983+{
6984+ return a->len == b->len
6985+ && !memcmp(a->name, b->name, a->len);
6986+}
6987+
7e9cd9fe
AM
6988+/*
6989+ * by the commit
6990+ * 360f547 2015-01-25 dcache: let the dentry count go down to zero without
6991+ * taking d_lock
6992+ * the type of d_lockref.count became int, but the inlined function d_count()
6993+ * still returns unsigned int.
6994+ * I don't know why. Maybe it is for every d_count() users?
6995+ * Anyway au_dcount() lives on.
6996+ */
c1595e42
JR
6997+static inline int au_dcount(struct dentry *d)
6998+{
6999+ return (int)d_count(d);
7000+}
7001+
1facf9fc 7002+#endif /* __KERNEL__ */
7003+#endif /* __AUFS_DCSUB_H__ */
7f207e10
AM
7004diff -urN /usr/share/empty/fs/aufs/debug.c linux/fs/aufs/debug.c
7005--- /usr/share/empty/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 7006+++ linux/fs/aufs/debug.c 2016-10-09 16:55:36.486034798 +0200
f0c0a007 7007@@ -0,0 +1,440 @@
1facf9fc 7008+/*
8cdd5066 7009+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 7010+ *
7011+ * This program, aufs is free software; you can redistribute it and/or modify
7012+ * it under the terms of the GNU General Public License as published by
7013+ * the Free Software Foundation; either version 2 of the License, or
7014+ * (at your option) any later version.
dece6358
AM
7015+ *
7016+ * This program is distributed in the hope that it will be useful,
7017+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7018+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7019+ * GNU General Public License for more details.
7020+ *
7021+ * You should have received a copy of the GNU General Public License
523b37e3 7022+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7023+ */
7024+
7025+/*
7026+ * debug print functions
7027+ */
7028+
7029+#include "aufs.h"
7030+
392086de
AM
7031+/* Returns 0, or -errno. arg is in kp->arg. */
7032+static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
7033+{
7034+ int err, n;
7035+
7036+ err = kstrtoint(val, 0, &n);
7037+ if (!err) {
7038+ if (n > 0)
7039+ au_debug_on();
7040+ else
7041+ au_debug_off();
7042+ }
7043+ return err;
7044+}
7045+
7046+/* Returns length written or -errno. Buffer is 4k (ie. be short!) */
7047+static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
7048+{
7049+ atomic_t *a;
7050+
7051+ a = kp->arg;
7052+ return sprintf(buffer, "%d", atomic_read(a));
7053+}
7054+
7055+static struct kernel_param_ops param_ops_atomic_t = {
7056+ .set = param_atomic_t_set,
7057+ .get = param_atomic_t_get
7058+ /* void (*free)(void *arg) */
7059+};
7060+
7061+atomic_t aufs_debug = ATOMIC_INIT(0);
1facf9fc 7062+MODULE_PARM_DESC(debug, "debug print");
392086de 7063+module_param_named(debug, aufs_debug, atomic_t, S_IRUGO | S_IWUSR | S_IWGRP);
1facf9fc 7064+
c1595e42 7065+DEFINE_MUTEX(au_dbg_mtx); /* just to serialize the dbg msgs */
1facf9fc 7066+char *au_plevel = KERN_DEBUG;
e49829fe
JR
7067+#define dpri(fmt, ...) do { \
7068+ if ((au_plevel \
7069+ && strcmp(au_plevel, KERN_DEBUG)) \
7070+ || au_debug_test()) \
7071+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
1facf9fc 7072+} while (0)
7073+
7074+/* ---------------------------------------------------------------------- */
7075+
7076+void au_dpri_whlist(struct au_nhash *whlist)
7077+{
7078+ unsigned long ul, n;
7079+ struct hlist_head *head;
c06a8ce3 7080+ struct au_vdir_wh *pos;
1facf9fc 7081+
7082+ n = whlist->nh_num;
7083+ head = whlist->nh_head;
7084+ for (ul = 0; ul < n; ul++) {
c06a8ce3 7085+ hlist_for_each_entry(pos, head, wh_hash)
1facf9fc 7086+ dpri("b%d, %.*s, %d\n",
c06a8ce3
AM
7087+ pos->wh_bindex,
7088+ pos->wh_str.len, pos->wh_str.name,
7089+ pos->wh_str.len);
1facf9fc 7090+ head++;
7091+ }
7092+}
7093+
7094+void au_dpri_vdir(struct au_vdir *vdir)
7095+{
7096+ unsigned long ul;
7097+ union au_vdir_deblk_p p;
7098+ unsigned char *o;
7099+
7100+ if (!vdir || IS_ERR(vdir)) {
7101+ dpri("err %ld\n", PTR_ERR(vdir));
7102+ return;
7103+ }
7104+
7105+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
7106+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
7107+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
7108+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
7109+ p.deblk = vdir->vd_deblk[ul];
7110+ o = p.deblk;
7111+ dpri("[%lu]: %p\n", ul, o);
7112+ }
7113+}
7114+
53392da6 7115+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
1facf9fc 7116+ struct dentry *wh)
7117+{
7118+ char *n = NULL;
7119+ int l = 0;
7120+
7121+ if (!inode || IS_ERR(inode)) {
7122+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
7123+ return -1;
7124+ }
7125+
c2b27bf2 7126+ /* the type of i_blocks depends upon CONFIG_LBDAF */
1facf9fc 7127+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
7128+ && sizeof(inode->i_blocks) != sizeof(u64));
7129+ if (wh) {
7130+ n = (void *)wh->d_name.name;
7131+ l = wh->d_name.len;
7132+ }
7133+
53392da6
AM
7134+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
7135+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
7136+ bindex, inode,
1facf9fc 7137+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
7138+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
7139+ i_size_read(inode), (unsigned long long)inode->i_blocks,
53392da6 7140+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
1facf9fc 7141+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
b752ccd1
AM
7142+ inode->i_state, inode->i_flags, inode->i_version,
7143+ inode->i_generation,
1facf9fc 7144+ l ? ", wh " : "", l, n);
7145+ return 0;
7146+}
7147+
7148+void au_dpri_inode(struct inode *inode)
7149+{
7150+ struct au_iinfo *iinfo;
5afbbe0d 7151+ struct au_hinode *hi;
1facf9fc 7152+ aufs_bindex_t bindex;
53392da6 7153+ int err, hn;
1facf9fc 7154+
53392da6 7155+ err = do_pri_inode(-1, inode, -1, NULL);
5afbbe0d 7156+ if (err || !au_test_aufs(inode->i_sb) || au_is_bad_inode(inode))
1facf9fc 7157+ return;
7158+
7159+ iinfo = au_ii(inode);
5afbbe0d
AM
7160+ dpri("i-1: btop %d, bbot %d, gen %d\n",
7161+ iinfo->ii_btop, iinfo->ii_bbot, au_iigen(inode, NULL));
7162+ if (iinfo->ii_btop < 0)
1facf9fc 7163+ return;
53392da6 7164+ hn = 0;
5afbbe0d
AM
7165+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; bindex++) {
7166+ hi = au_hinode(iinfo, bindex);
7167+ hn = !!au_hn(hi);
7168+ do_pri_inode(bindex, hi->hi_inode, hn, hi->hi_whdentry);
53392da6 7169+ }
1facf9fc 7170+}
7171+
2cbb1c4b
JR
7172+void au_dpri_dalias(struct inode *inode)
7173+{
7174+ struct dentry *d;
7175+
7176+ spin_lock(&inode->i_lock);
c1595e42 7177+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
2cbb1c4b
JR
7178+ au_dpri_dentry(d);
7179+ spin_unlock(&inode->i_lock);
7180+}
7181+
1facf9fc 7182+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
7183+{
7184+ struct dentry *wh = NULL;
53392da6 7185+ int hn;
5afbbe0d 7186+ struct inode *inode;
076b876e 7187+ struct au_iinfo *iinfo;
5afbbe0d 7188+ struct au_hinode *hi;
1facf9fc 7189+
7190+ if (!dentry || IS_ERR(dentry)) {
7191+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
7192+ return -1;
7193+ }
7194+ /* do not call dget_parent() here */
027c5e7a 7195+ /* note: access d_xxx without d_lock */
523b37e3
AM
7196+ dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
7197+ bindex, dentry, dentry,
1facf9fc 7198+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
c1595e42 7199+ au_dcount(dentry), dentry->d_flags,
523b37e3 7200+ d_unhashed(dentry) ? "un" : "");
53392da6 7201+ hn = -1;
5afbbe0d
AM
7202+ inode = NULL;
7203+ if (d_is_positive(dentry))
7204+ inode = d_inode(dentry);
7205+ if (inode
7206+ && au_test_aufs(dentry->d_sb)
7207+ && bindex >= 0
7208+ && !au_is_bad_inode(inode)) {
7209+ iinfo = au_ii(inode);
7210+ hi = au_hinode(iinfo, bindex);
7211+ hn = !!au_hn(hi);
7212+ wh = hi->hi_whdentry;
7213+ }
7214+ do_pri_inode(bindex, inode, hn, wh);
1facf9fc 7215+ return 0;
7216+}
7217+
7218+void au_dpri_dentry(struct dentry *dentry)
7219+{
7220+ struct au_dinfo *dinfo;
7221+ aufs_bindex_t bindex;
7222+ int err;
7223+
7224+ err = do_pri_dentry(-1, dentry);
7225+ if (err || !au_test_aufs(dentry->d_sb))
7226+ return;
7227+
7228+ dinfo = au_di(dentry);
7229+ if (!dinfo)
7230+ return;
5afbbe0d
AM
7231+ dpri("d-1: btop %d, bbot %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
7232+ dinfo->di_btop, dinfo->di_bbot,
38d290e6
JR
7233+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
7234+ dinfo->di_tmpfile);
5afbbe0d 7235+ if (dinfo->di_btop < 0)
1facf9fc 7236+ return;
5afbbe0d
AM
7237+ for (bindex = dinfo->di_btop; bindex <= dinfo->di_bbot; bindex++)
7238+ do_pri_dentry(bindex, au_hdentry(dinfo, bindex)->hd_dentry);
1facf9fc 7239+}
7240+
7241+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
7242+{
7243+ char a[32];
7244+
7245+ if (!file || IS_ERR(file)) {
7246+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
7247+ return -1;
7248+ }
7249+ a[0] = 0;
7250+ if (bindex < 0
b912730e 7251+ && !IS_ERR_OR_NULL(file->f_path.dentry)
2000de60 7252+ && au_test_aufs(file->f_path.dentry->d_sb)
1facf9fc 7253+ && au_fi(file))
e49829fe 7254+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
2cbb1c4b 7255+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
b752ccd1 7256+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
1facf9fc 7257+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
b752ccd1 7258+ file->f_version, file->f_pos, a);
b912730e 7259+ if (!IS_ERR_OR_NULL(file->f_path.dentry))
2000de60 7260+ do_pri_dentry(bindex, file->f_path.dentry);
1facf9fc 7261+ return 0;
7262+}
7263+
7264+void au_dpri_file(struct file *file)
7265+{
7266+ struct au_finfo *finfo;
4a4d8108
AM
7267+ struct au_fidir *fidir;
7268+ struct au_hfile *hfile;
1facf9fc 7269+ aufs_bindex_t bindex;
7270+ int err;
7271+
7272+ err = do_pri_file(-1, file);
2000de60 7273+ if (err
b912730e 7274+ || IS_ERR_OR_NULL(file->f_path.dentry)
2000de60 7275+ || !au_test_aufs(file->f_path.dentry->d_sb))
1facf9fc 7276+ return;
7277+
7278+ finfo = au_fi(file);
7279+ if (!finfo)
7280+ return;
4a4d8108 7281+ if (finfo->fi_btop < 0)
1facf9fc 7282+ return;
4a4d8108
AM
7283+ fidir = finfo->fi_hdir;
7284+ if (!fidir)
7285+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
7286+ else
e49829fe
JR
7287+ for (bindex = finfo->fi_btop;
7288+ bindex >= 0 && bindex <= fidir->fd_bbot;
4a4d8108
AM
7289+ bindex++) {
7290+ hfile = fidir->fd_hfile + bindex;
7291+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
7292+ }
1facf9fc 7293+}
7294+
7295+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
7296+{
7297+ struct vfsmount *mnt;
7298+ struct super_block *sb;
7299+
7300+ if (!br || IS_ERR(br))
7301+ goto out;
86dc4139 7302+ mnt = au_br_mnt(br);
1facf9fc 7303+ if (!mnt || IS_ERR(mnt))
7304+ goto out;
7305+ sb = mnt->mnt_sb;
7306+ if (!sb || IS_ERR(sb))
7307+ goto out;
7308+
5afbbe0d 7309+ dpri("s%d: {perm 0x%x, id %d, cnt %lld, wbr %p}, "
b752ccd1 7310+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
1facf9fc 7311+ "xino %d\n",
5afbbe0d 7312+ bindex, br->br_perm, br->br_id, au_br_count(br),
1e00d052 7313+ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
b752ccd1 7314+ sb->s_flags, sb->s_count,
1facf9fc 7315+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
7316+ return 0;
7317+
4f0767ce 7318+out:
1facf9fc 7319+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
7320+ return -1;
7321+}
7322+
7323+void au_dpri_sb(struct super_block *sb)
7324+{
7325+ struct au_sbinfo *sbinfo;
7326+ aufs_bindex_t bindex;
7327+ int err;
7328+ /* to reuduce stack size */
7329+ struct {
7330+ struct vfsmount mnt;
7331+ struct au_branch fake;
7332+ } *a;
7333+
7334+ /* this function can be called from magic sysrq */
7335+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
7336+ if (unlikely(!a)) {
7337+ dpri("no memory\n");
7338+ return;
7339+ }
7340+
7341+ a->mnt.mnt_sb = sb;
86dc4139 7342+ a->fake.br_path.mnt = &a->mnt;
5afbbe0d 7343+ au_br_count_init(&a->fake);
1facf9fc 7344+ err = do_pri_br(-1, &a->fake);
5afbbe0d 7345+ au_br_count_fin(&a->fake);
f0c0a007 7346+ au_delayed_kfree(a);
1facf9fc 7347+ dpri("dev 0x%x\n", sb->s_dev);
7348+ if (err || !au_test_aufs(sb))
7349+ return;
7350+
7351+ sbinfo = au_sbi(sb);
7352+ if (!sbinfo)
7353+ return;
f0c0a007
AM
7354+ dpri("nw %d, gen %u, kobj %d\n",
7355+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
1facf9fc 7356+ atomic_read(&sbinfo->si_kobj.kref.refcount));
5afbbe0d 7357+ for (bindex = 0; bindex <= sbinfo->si_bbot; bindex++)
1facf9fc 7358+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
7359+}
7360+
7361+/* ---------------------------------------------------------------------- */
7362+
027c5e7a
AM
7363+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
7364+{
5527c038 7365+ struct inode *h_inode, *inode = d_inode(dentry);
027c5e7a 7366+ struct dentry *h_dentry;
5afbbe0d 7367+ aufs_bindex_t bindex, bbot, bi;
027c5e7a
AM
7368+
7369+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
7370+ return;
7371+
5afbbe0d
AM
7372+ bbot = au_dbbot(dentry);
7373+ bi = au_ibbot(inode);
7374+ if (bi < bbot)
7375+ bbot = bi;
7376+ bindex = au_dbtop(dentry);
7377+ bi = au_ibtop(inode);
027c5e7a
AM
7378+ if (bi > bindex)
7379+ bindex = bi;
7380+
5afbbe0d 7381+ for (; bindex <= bbot; bindex++) {
027c5e7a
AM
7382+ h_dentry = au_h_dptr(dentry, bindex);
7383+ if (!h_dentry)
7384+ continue;
7385+ h_inode = au_h_iptr(inode, bindex);
5527c038 7386+ if (unlikely(h_inode != d_inode(h_dentry))) {
392086de 7387+ au_debug_on();
027c5e7a
AM
7388+ AuDbg("b%d, %s:%d\n", bindex, func, line);
7389+ AuDbgDentry(dentry);
7390+ AuDbgInode(inode);
392086de 7391+ au_debug_off();
027c5e7a
AM
7392+ BUG();
7393+ }
7394+ }
7395+}
7396+
1facf9fc 7397+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
7398+{
7399+ int err, i, j;
7400+ struct au_dcsub_pages dpages;
7401+ struct au_dpage *dpage;
7402+ struct dentry **dentries;
7403+
7404+ err = au_dpages_init(&dpages, GFP_NOFS);
7405+ AuDebugOn(err);
027c5e7a 7406+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
1facf9fc 7407+ AuDebugOn(err);
7408+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
7409+ dpage = dpages.dpages + i;
7410+ dentries = dpage->dentries;
7411+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
027c5e7a 7412+ AuDebugOn(au_digen_test(dentries[j], sigen));
1facf9fc 7413+ }
7414+ au_dpages_free(&dpages);
7415+}
7416+
1facf9fc 7417+void au_dbg_verify_kthread(void)
7418+{
53392da6 7419+ if (au_wkq_test()) {
1facf9fc 7420+ au_dbg_blocked();
1e00d052
AM
7421+ /*
7422+ * It may be recursive, but udba=notify between two aufs mounts,
7423+ * where a single ro branch is shared, is not a problem.
7424+ */
7425+ /* WARN_ON(1); */
1facf9fc 7426+ }
7427+}
7428+
7429+/* ---------------------------------------------------------------------- */
7430+
1facf9fc 7431+int __init au_debug_init(void)
7432+{
7433+ aufs_bindex_t bindex;
7434+ struct au_vdir_destr destr;
7435+
7436+ bindex = -1;
7437+ AuDebugOn(bindex >= 0);
7438+
7439+ destr.len = -1;
7440+ AuDebugOn(destr.len < NAME_MAX);
7441+
7442+#ifdef CONFIG_4KSTACKS
0c3ec466 7443+ pr_warn("CONFIG_4KSTACKS is defined.\n");
1facf9fc 7444+#endif
7445+
1facf9fc 7446+ return 0;
7447+}
7f207e10
AM
7448diff -urN /usr/share/empty/fs/aufs/debug.h linux/fs/aufs/debug.h
7449--- /usr/share/empty/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 7450+++ linux/fs/aufs/debug.h 2016-10-09 16:55:36.486034798 +0200
5527c038 7451@@ -0,0 +1,225 @@
1facf9fc 7452+/*
8cdd5066 7453+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 7454+ *
7455+ * This program, aufs is free software; you can redistribute it and/or modify
7456+ * it under the terms of the GNU General Public License as published by
7457+ * the Free Software Foundation; either version 2 of the License, or
7458+ * (at your option) any later version.
dece6358
AM
7459+ *
7460+ * This program is distributed in the hope that it will be useful,
7461+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7462+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7463+ * GNU General Public License for more details.
7464+ *
7465+ * You should have received a copy of the GNU General Public License
523b37e3 7466+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7467+ */
7468+
7469+/*
7470+ * debug print functions
7471+ */
7472+
7473+#ifndef __AUFS_DEBUG_H__
7474+#define __AUFS_DEBUG_H__
7475+
7476+#ifdef __KERNEL__
7477+
392086de 7478+#include <linux/atomic.h>
4a4d8108
AM
7479+#include <linux/module.h>
7480+#include <linux/kallsyms.h>
1facf9fc 7481+#include <linux/sysrq.h>
4a4d8108 7482+
1facf9fc 7483+#ifdef CONFIG_AUFS_DEBUG
7484+#define AuDebugOn(a) BUG_ON(a)
7485+
7486+/* module parameter */
392086de
AM
7487+extern atomic_t aufs_debug;
7488+static inline void au_debug_on(void)
1facf9fc 7489+{
392086de
AM
7490+ atomic_inc(&aufs_debug);
7491+}
7492+static inline void au_debug_off(void)
7493+{
7494+ atomic_dec_if_positive(&aufs_debug);
1facf9fc 7495+}
7496+
7497+static inline int au_debug_test(void)
7498+{
392086de 7499+ return atomic_read(&aufs_debug) > 0;
1facf9fc 7500+}
7501+#else
7502+#define AuDebugOn(a) do {} while (0)
392086de
AM
7503+AuStubVoid(au_debug_on, void)
7504+AuStubVoid(au_debug_off, void)
4a4d8108 7505+AuStubInt0(au_debug_test, void)
1facf9fc 7506+#endif /* CONFIG_AUFS_DEBUG */
7507+
392086de
AM
7508+#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
7509+
1facf9fc 7510+/* ---------------------------------------------------------------------- */
7511+
7512+/* debug print */
7513+
4a4d8108 7514+#define AuDbg(fmt, ...) do { \
1facf9fc 7515+ if (au_debug_test()) \
4a4d8108 7516+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
1facf9fc 7517+} while (0)
4a4d8108
AM
7518+#define AuLabel(l) AuDbg(#l "\n")
7519+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
7520+#define AuWarn1(fmt, ...) do { \
1facf9fc 7521+ static unsigned char _c; \
7522+ if (!_c++) \
0c3ec466 7523+ pr_warn(fmt, ##__VA_ARGS__); \
1facf9fc 7524+} while (0)
7525+
4a4d8108 7526+#define AuErr1(fmt, ...) do { \
1facf9fc 7527+ static unsigned char _c; \
7528+ if (!_c++) \
4a4d8108 7529+ pr_err(fmt, ##__VA_ARGS__); \
1facf9fc 7530+} while (0)
7531+
4a4d8108 7532+#define AuIOErr1(fmt, ...) do { \
1facf9fc 7533+ static unsigned char _c; \
7534+ if (!_c++) \
4a4d8108 7535+ AuIOErr(fmt, ##__VA_ARGS__); \
1facf9fc 7536+} while (0)
7537+
7538+#define AuUnsupportMsg "This operation is not supported." \
7539+ " Please report this application to aufs-users ML."
4a4d8108
AM
7540+#define AuUnsupport(fmt, ...) do { \
7541+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
1facf9fc 7542+ dump_stack(); \
7543+} while (0)
7544+
7545+#define AuTraceErr(e) do { \
7546+ if (unlikely((e) < 0)) \
7547+ AuDbg("err %d\n", (int)(e)); \
7548+} while (0)
7549+
7550+#define AuTraceErrPtr(p) do { \
7551+ if (IS_ERR(p)) \
7552+ AuDbg("err %ld\n", PTR_ERR(p)); \
7553+} while (0)
7554+
7555+/* dirty macros for debug print, use with "%.*s" and caution */
7556+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
1facf9fc 7557+
7558+/* ---------------------------------------------------------------------- */
7559+
dece6358 7560+struct dentry;
1facf9fc 7561+#ifdef CONFIG_AUFS_DEBUG
c1595e42 7562+extern struct mutex au_dbg_mtx;
1facf9fc 7563+extern char *au_plevel;
7564+struct au_nhash;
7565+void au_dpri_whlist(struct au_nhash *whlist);
7566+struct au_vdir;
7567+void au_dpri_vdir(struct au_vdir *vdir);
dece6358 7568+struct inode;
1facf9fc 7569+void au_dpri_inode(struct inode *inode);
2cbb1c4b 7570+void au_dpri_dalias(struct inode *inode);
1facf9fc 7571+void au_dpri_dentry(struct dentry *dentry);
dece6358 7572+struct file;
1facf9fc 7573+void au_dpri_file(struct file *filp);
dece6358 7574+struct super_block;
1facf9fc 7575+void au_dpri_sb(struct super_block *sb);
7576+
027c5e7a
AM
7577+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
7578+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
1facf9fc 7579+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
1facf9fc 7580+void au_dbg_verify_kthread(void);
7581+
7582+int __init au_debug_init(void);
7e9cd9fe 7583+
1facf9fc 7584+#define AuDbgWhlist(w) do { \
c1595e42 7585+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7586+ AuDbg(#w "\n"); \
7587+ au_dpri_whlist(w); \
c1595e42 7588+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7589+} while (0)
7590+
7591+#define AuDbgVdir(v) do { \
c1595e42 7592+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7593+ AuDbg(#v "\n"); \
7594+ au_dpri_vdir(v); \
c1595e42 7595+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7596+} while (0)
7597+
7598+#define AuDbgInode(i) do { \
c1595e42 7599+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7600+ AuDbg(#i "\n"); \
7601+ au_dpri_inode(i); \
c1595e42 7602+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7603+} while (0)
7604+
2cbb1c4b 7605+#define AuDbgDAlias(i) do { \
c1595e42 7606+ mutex_lock(&au_dbg_mtx); \
2cbb1c4b
JR
7607+ AuDbg(#i "\n"); \
7608+ au_dpri_dalias(i); \
c1595e42 7609+ mutex_unlock(&au_dbg_mtx); \
2cbb1c4b
JR
7610+} while (0)
7611+
1facf9fc 7612+#define AuDbgDentry(d) do { \
c1595e42 7613+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7614+ AuDbg(#d "\n"); \
7615+ au_dpri_dentry(d); \
c1595e42 7616+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7617+} while (0)
7618+
7619+#define AuDbgFile(f) do { \
c1595e42 7620+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7621+ AuDbg(#f "\n"); \
7622+ au_dpri_file(f); \
c1595e42 7623+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7624+} while (0)
7625+
7626+#define AuDbgSb(sb) do { \
c1595e42 7627+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7628+ AuDbg(#sb "\n"); \
7629+ au_dpri_sb(sb); \
c1595e42 7630+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7631+} while (0)
7632+
4a4d8108
AM
7633+#define AuDbgSym(addr) do { \
7634+ char sym[KSYM_SYMBOL_LEN]; \
7635+ sprint_symbol(sym, (unsigned long)addr); \
7636+ AuDbg("%s\n", sym); \
7637+} while (0)
1facf9fc 7638+#else
027c5e7a 7639+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
4a4d8108
AM
7640+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
7641+AuStubVoid(au_dbg_verify_kthread, void)
7642+AuStubInt0(__init au_debug_init, void)
1facf9fc 7643+
1facf9fc 7644+#define AuDbgWhlist(w) do {} while (0)
7645+#define AuDbgVdir(v) do {} while (0)
7646+#define AuDbgInode(i) do {} while (0)
2cbb1c4b 7647+#define AuDbgDAlias(i) do {} while (0)
1facf9fc 7648+#define AuDbgDentry(d) do {} while (0)
7649+#define AuDbgFile(f) do {} while (0)
7650+#define AuDbgSb(sb) do {} while (0)
4a4d8108 7651+#define AuDbgSym(addr) do {} while (0)
1facf9fc 7652+#endif /* CONFIG_AUFS_DEBUG */
7653+
7654+/* ---------------------------------------------------------------------- */
7655+
7656+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
7657+int __init au_sysrq_init(void);
7658+void au_sysrq_fin(void);
7659+
7660+#ifdef CONFIG_HW_CONSOLE
7661+#define au_dbg_blocked() do { \
7662+ WARN_ON(1); \
0c5527e5 7663+ handle_sysrq('w'); \
1facf9fc 7664+} while (0)
7665+#else
4a4d8108 7666+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7667+#endif
7668+
7669+#else
4a4d8108
AM
7670+AuStubInt0(__init au_sysrq_init, void)
7671+AuStubVoid(au_sysrq_fin, void)
7672+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7673+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
7674+
7675+#endif /* __KERNEL__ */
7676+#endif /* __AUFS_DEBUG_H__ */
7f207e10
AM
7677diff -urN /usr/share/empty/fs/aufs/dentry.c linux/fs/aufs/dentry.c
7678--- /usr/share/empty/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
7679+++ linux/fs/aufs/dentry.c 2016-10-09 16:55:38.889431135 +0200
7680@@ -0,0 +1,1130 @@
1facf9fc 7681+/*
8cdd5066 7682+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 7683+ *
7684+ * This program, aufs is free software; you can redistribute it and/or modify
7685+ * it under the terms of the GNU General Public License as published by
7686+ * the Free Software Foundation; either version 2 of the License, or
7687+ * (at your option) any later version.
dece6358
AM
7688+ *
7689+ * This program is distributed in the hope that it will be useful,
7690+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7691+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7692+ * GNU General Public License for more details.
7693+ *
7694+ * You should have received a copy of the GNU General Public License
523b37e3 7695+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7696+ */
7697+
7698+/*
7699+ * lookup and dentry operations
7700+ */
7701+
dece6358 7702+#include <linux/namei.h>
1facf9fc 7703+#include "aufs.h"
7704+
1facf9fc 7705+struct au_do_lookup_args {
7706+ unsigned int flags;
7707+ mode_t type;
1facf9fc 7708+};
7709+
7710+/*
7711+ * returns positive/negative dentry, NULL or an error.
7712+ * NULL means whiteout-ed or not-found.
7713+ */
7714+static struct dentry*
7715+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
7716+ aufs_bindex_t bindex, struct qstr *wh_name,
7717+ struct au_do_lookup_args *args)
7718+{
7719+ struct dentry *h_dentry;
2000de60 7720+ struct inode *h_inode;
1facf9fc 7721+ struct au_branch *br;
7722+ int wh_found, opq;
7723+ unsigned char wh_able;
7724+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
076b876e
AM
7725+ const unsigned char ignore_perm = !!au_ftest_lkup(args->flags,
7726+ IGNORE_PERM);
1facf9fc 7727+
1facf9fc 7728+ wh_found = 0;
7729+ br = au_sbr(dentry->d_sb, bindex);
7730+ wh_able = !!au_br_whable(br->br_perm);
7731+ if (wh_able)
e2f27e51 7732+ wh_found = au_wh_test(h_parent, wh_name, ignore_perm);
1facf9fc 7733+ h_dentry = ERR_PTR(wh_found);
7734+ if (!wh_found)
7735+ goto real_lookup;
7736+ if (unlikely(wh_found < 0))
7737+ goto out;
7738+
7739+ /* We found a whiteout */
5afbbe0d 7740+ /* au_set_dbbot(dentry, bindex); */
1facf9fc 7741+ au_set_dbwh(dentry, bindex);
7742+ if (!allow_neg)
7743+ return NULL; /* success */
7744+
4f0767ce 7745+real_lookup:
076b876e
AM
7746+ if (!ignore_perm)
7747+ h_dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
7748+ else
7749+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
2000de60
JR
7750+ if (IS_ERR(h_dentry)) {
7751+ if (PTR_ERR(h_dentry) == -ENAMETOOLONG
7752+ && !allow_neg)
7753+ h_dentry = NULL;
1facf9fc 7754+ goto out;
2000de60 7755+ }
1facf9fc 7756+
5527c038
JR
7757+ h_inode = d_inode(h_dentry);
7758+ if (d_is_negative(h_dentry)) {
1facf9fc 7759+ if (!allow_neg)
7760+ goto out_neg;
7761+ } else if (wh_found
7762+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
7763+ goto out_neg;
7764+
5afbbe0d
AM
7765+ if (au_dbbot(dentry) <= bindex)
7766+ au_set_dbbot(dentry, bindex);
7767+ if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
7768+ au_set_dbtop(dentry, bindex);
1facf9fc 7769+ au_set_h_dptr(dentry, bindex, h_dentry);
7770+
2000de60
JR
7771+ if (!d_is_dir(h_dentry)
7772+ || !wh_able
5527c038 7773+ || (d_really_is_positive(dentry) && !d_is_dir(dentry)))
1facf9fc 7774+ goto out; /* success */
7775+
febd17d6 7776+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
076b876e 7777+ opq = au_diropq_test(h_dentry);
febd17d6 7778+ inode_unlock(h_inode);
1facf9fc 7779+ if (opq > 0)
7780+ au_set_dbdiropq(dentry, bindex);
7781+ else if (unlikely(opq < 0)) {
7782+ au_set_h_dptr(dentry, bindex, NULL);
7783+ h_dentry = ERR_PTR(opq);
7784+ }
7785+ goto out;
7786+
4f0767ce 7787+out_neg:
1facf9fc 7788+ dput(h_dentry);
7789+ h_dentry = NULL;
4f0767ce 7790+out:
1facf9fc 7791+ return h_dentry;
7792+}
7793+
dece6358
AM
7794+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
7795+{
7796+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
7797+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
7798+ return -EPERM;
7799+ return 0;
7800+}
7801+
1facf9fc 7802+/*
7803+ * returns the number of lower positive dentries,
7804+ * otherwise an error.
7805+ * can be called at unlinking with @type is zero.
7806+ */
5afbbe0d
AM
7807+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
7808+ unsigned int flags)
1facf9fc 7809+{
7810+ int npositive, err;
7811+ aufs_bindex_t bindex, btail, bdiropq;
076b876e 7812+ unsigned char isdir, dirperm1;
1facf9fc 7813+ struct qstr whname;
7814+ struct au_do_lookup_args args = {
5afbbe0d 7815+ .flags = flags
1facf9fc 7816+ };
7817+ const struct qstr *name = &dentry->d_name;
7818+ struct dentry *parent;
076b876e 7819+ struct super_block *sb;
1facf9fc 7820+
076b876e
AM
7821+ sb = dentry->d_sb;
7822+ err = au_test_shwh(sb, name);
dece6358 7823+ if (unlikely(err))
1facf9fc 7824+ goto out;
7825+
7826+ err = au_wh_name_alloc(&whname, name);
7827+ if (unlikely(err))
7828+ goto out;
7829+
2000de60 7830+ isdir = !!d_is_dir(dentry);
076b876e 7831+ dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1);
1facf9fc 7832+
7833+ npositive = 0;
4a4d8108 7834+ parent = dget_parent(dentry);
1facf9fc 7835+ btail = au_dbtaildir(parent);
5afbbe0d 7836+ for (bindex = btop; bindex <= btail; bindex++) {
1facf9fc 7837+ struct dentry *h_parent, *h_dentry;
7838+ struct inode *h_inode, *h_dir;
7839+
7840+ h_dentry = au_h_dptr(dentry, bindex);
7841+ if (h_dentry) {
5527c038 7842+ if (d_is_positive(h_dentry))
1facf9fc 7843+ npositive++;
5afbbe0d 7844+ break;
1facf9fc 7845+ }
7846+ h_parent = au_h_dptr(parent, bindex);
2000de60 7847+ if (!h_parent || !d_is_dir(h_parent))
1facf9fc 7848+ continue;
7849+
5527c038 7850+ h_dir = d_inode(h_parent);
febd17d6 7851+ inode_lock_nested(h_dir, AuLsc_I_PARENT);
1facf9fc 7852+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
7853+ &args);
febd17d6 7854+ inode_unlock(h_dir);
1facf9fc 7855+ err = PTR_ERR(h_dentry);
7856+ if (IS_ERR(h_dentry))
4a4d8108 7857+ goto out_parent;
2000de60
JR
7858+ if (h_dentry)
7859+ au_fclr_lkup(args.flags, ALLOW_NEG);
076b876e
AM
7860+ if (dirperm1)
7861+ au_fset_lkup(args.flags, IGNORE_PERM);
1facf9fc 7862+
79b8bda9 7863+ if (au_dbwh(dentry) == bindex)
1facf9fc 7864+ break;
7865+ if (!h_dentry)
7866+ continue;
5527c038 7867+ if (d_is_negative(h_dentry))
1facf9fc 7868+ continue;
5527c038 7869+ h_inode = d_inode(h_dentry);
1facf9fc 7870+ npositive++;
7871+ if (!args.type)
7872+ args.type = h_inode->i_mode & S_IFMT;
7873+ if (args.type != S_IFDIR)
7874+ break;
7875+ else if (isdir) {
7876+ /* the type of lower may be different */
7877+ bdiropq = au_dbdiropq(dentry);
7878+ if (bdiropq >= 0 && bdiropq <= bindex)
7879+ break;
7880+ }
7881+ }
7882+
7883+ if (npositive) {
7884+ AuLabel(positive);
5afbbe0d 7885+ au_update_dbtop(dentry);
1facf9fc 7886+ }
7887+ err = npositive;
076b876e 7888+ if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
5afbbe0d 7889+ && au_dbtop(dentry) < 0)) {
1facf9fc 7890+ err = -EIO;
523b37e3
AM
7891+ AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
7892+ dentry, err);
027c5e7a 7893+ }
1facf9fc 7894+
4f0767ce 7895+out_parent:
4a4d8108 7896+ dput(parent);
f0c0a007 7897+ au_delayed_kfree(whname.name);
4f0767ce 7898+out:
1facf9fc 7899+ return err;
7900+}
7901+
076b876e 7902+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent)
1facf9fc 7903+{
7904+ struct dentry *dentry;
7905+ int wkq_err;
7906+
5527c038 7907+ if (!au_test_h_perm_sio(d_inode(parent), MAY_EXEC))
b4510431 7908+ dentry = vfsub_lkup_one(name, parent);
1facf9fc 7909+ else {
b4510431
AM
7910+ struct vfsub_lkup_one_args args = {
7911+ .errp = &dentry,
7912+ .name = name,
7913+ .parent = parent
1facf9fc 7914+ };
7915+
b4510431 7916+ wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args);
1facf9fc 7917+ if (unlikely(wkq_err))
7918+ dentry = ERR_PTR(wkq_err);
7919+ }
7920+
7921+ return dentry;
7922+}
7923+
7924+/*
7925+ * lookup @dentry on @bindex which should be negative.
7926+ */
86dc4139 7927+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
1facf9fc 7928+{
7929+ int err;
7930+ struct dentry *parent, *h_parent, *h_dentry;
86dc4139 7931+ struct au_branch *br;
1facf9fc 7932+
1facf9fc 7933+ parent = dget_parent(dentry);
7934+ h_parent = au_h_dptr(parent, bindex);
86dc4139
AM
7935+ br = au_sbr(dentry->d_sb, bindex);
7936+ if (wh)
7937+ h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
7938+ else
076b876e 7939+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
1facf9fc 7940+ err = PTR_ERR(h_dentry);
7941+ if (IS_ERR(h_dentry))
7942+ goto out;
5527c038 7943+ if (unlikely(d_is_positive(h_dentry))) {
1facf9fc 7944+ err = -EIO;
523b37e3 7945+ AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex);
1facf9fc 7946+ dput(h_dentry);
7947+ goto out;
7948+ }
7949+
4a4d8108 7950+ err = 0;
5afbbe0d
AM
7951+ if (bindex < au_dbtop(dentry))
7952+ au_set_dbtop(dentry, bindex);
7953+ if (au_dbbot(dentry) < bindex)
7954+ au_set_dbbot(dentry, bindex);
1facf9fc 7955+ au_set_h_dptr(dentry, bindex, h_dentry);
1facf9fc 7956+
4f0767ce 7957+out:
1facf9fc 7958+ dput(parent);
7959+ return err;
7960+}
7961+
7962+/* ---------------------------------------------------------------------- */
7963+
7964+/* subset of struct inode */
7965+struct au_iattr {
7966+ unsigned long i_ino;
7967+ /* unsigned int i_nlink; */
0c3ec466
AM
7968+ kuid_t i_uid;
7969+ kgid_t i_gid;
1facf9fc 7970+ u64 i_version;
7971+/*
7972+ loff_t i_size;
7973+ blkcnt_t i_blocks;
7974+*/
7975+ umode_t i_mode;
7976+};
7977+
7978+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
7979+{
7980+ ia->i_ino = h_inode->i_ino;
7981+ /* ia->i_nlink = h_inode->i_nlink; */
7982+ ia->i_uid = h_inode->i_uid;
7983+ ia->i_gid = h_inode->i_gid;
7984+ ia->i_version = h_inode->i_version;
7985+/*
7986+ ia->i_size = h_inode->i_size;
7987+ ia->i_blocks = h_inode->i_blocks;
7988+*/
7989+ ia->i_mode = (h_inode->i_mode & S_IFMT);
7990+}
7991+
7992+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
7993+{
7994+ return ia->i_ino != h_inode->i_ino
7995+ /* || ia->i_nlink != h_inode->i_nlink */
0c3ec466 7996+ || !uid_eq(ia->i_uid, h_inode->i_uid)
2dfbb274 7997+ || !gid_eq(ia->i_gid, h_inode->i_gid)
1facf9fc 7998+ || ia->i_version != h_inode->i_version
7999+/*
8000+ || ia->i_size != h_inode->i_size
8001+ || ia->i_blocks != h_inode->i_blocks
8002+*/
8003+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
8004+}
8005+
8006+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
8007+ struct au_branch *br)
8008+{
8009+ int err;
8010+ struct au_iattr ia;
8011+ struct inode *h_inode;
8012+ struct dentry *h_d;
8013+ struct super_block *h_sb;
8014+
8015+ err = 0;
8016+ memset(&ia, -1, sizeof(ia));
8017+ h_sb = h_dentry->d_sb;
5527c038
JR
8018+ h_inode = NULL;
8019+ if (d_is_positive(h_dentry)) {
8020+ h_inode = d_inode(h_dentry);
1facf9fc 8021+ au_iattr_save(&ia, h_inode);
5527c038 8022+ } else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
1facf9fc 8023+ /* nfs d_revalidate may return 0 for negative dentry */
8024+ /* fuse d_revalidate always return 0 for negative dentry */
8025+ goto out;
8026+
8027+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
b4510431 8028+ h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent);
1facf9fc 8029+ err = PTR_ERR(h_d);
8030+ if (IS_ERR(h_d))
8031+ goto out;
8032+
8033+ err = 0;
8034+ if (unlikely(h_d != h_dentry
5527c038 8035+ || d_inode(h_d) != h_inode
1facf9fc 8036+ || (h_inode && au_iattr_test(&ia, h_inode))))
8037+ err = au_busy_or_stale();
8038+ dput(h_d);
8039+
4f0767ce 8040+out:
1facf9fc 8041+ AuTraceErr(err);
8042+ return err;
8043+}
8044+
8045+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
8046+ struct dentry *h_parent, struct au_branch *br)
8047+{
8048+ int err;
8049+
8050+ err = 0;
027c5e7a
AM
8051+ if (udba == AuOpt_UDBA_REVAL
8052+ && !au_test_fs_remote(h_dentry->d_sb)) {
1facf9fc 8053+ IMustLock(h_dir);
5527c038 8054+ err = (d_inode(h_dentry->d_parent) != h_dir);
027c5e7a 8055+ } else if (udba != AuOpt_UDBA_NONE)
1facf9fc 8056+ err = au_h_verify_dentry(h_dentry, h_parent, br);
8057+
8058+ return err;
8059+}
8060+
8061+/* ---------------------------------------------------------------------- */
8062+
027c5e7a 8063+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
1facf9fc 8064+{
027c5e7a 8065+ int err;
5afbbe0d 8066+ aufs_bindex_t new_bindex, bindex, bbot, bwh, bdiropq;
027c5e7a
AM
8067+ struct au_hdentry tmp, *p, *q;
8068+ struct au_dinfo *dinfo;
8069+ struct super_block *sb;
1facf9fc 8070+
027c5e7a 8071+ DiMustWriteLock(dentry);
1308ab2a 8072+
027c5e7a
AM
8073+ sb = dentry->d_sb;
8074+ dinfo = au_di(dentry);
5afbbe0d 8075+ bbot = dinfo->di_bbot;
1facf9fc 8076+ bwh = dinfo->di_bwh;
8077+ bdiropq = dinfo->di_bdiropq;
5afbbe0d
AM
8078+ bindex = dinfo->di_btop;
8079+ p = au_hdentry(dinfo, bindex);
8080+ for (; bindex <= bbot; bindex++, p++) {
027c5e7a 8081+ if (!p->hd_dentry)
1facf9fc 8082+ continue;
8083+
027c5e7a
AM
8084+ new_bindex = au_br_index(sb, p->hd_id);
8085+ if (new_bindex == bindex)
1facf9fc 8086+ continue;
1facf9fc 8087+
1facf9fc 8088+ if (dinfo->di_bwh == bindex)
8089+ bwh = new_bindex;
8090+ if (dinfo->di_bdiropq == bindex)
8091+ bdiropq = new_bindex;
8092+ if (new_bindex < 0) {
8093+ au_hdput(p);
8094+ p->hd_dentry = NULL;
8095+ continue;
8096+ }
8097+
8098+ /* swap two lower dentries, and loop again */
5afbbe0d 8099+ q = au_hdentry(dinfo, new_bindex);
1facf9fc 8100+ tmp = *q;
8101+ *q = *p;
8102+ *p = tmp;
8103+ if (tmp.hd_dentry) {
8104+ bindex--;
8105+ p--;
8106+ }
8107+ }
8108+
1facf9fc 8109+ dinfo->di_bwh = -1;
5afbbe0d 8110+ if (bwh >= 0 && bwh <= au_sbbot(sb) && au_sbr_whable(sb, bwh))
1facf9fc 8111+ dinfo->di_bwh = bwh;
8112+
8113+ dinfo->di_bdiropq = -1;
8114+ if (bdiropq >= 0
5afbbe0d 8115+ && bdiropq <= au_sbbot(sb)
1facf9fc 8116+ && au_sbr_whable(sb, bdiropq))
8117+ dinfo->di_bdiropq = bdiropq;
8118+
027c5e7a 8119+ err = -EIO;
5afbbe0d
AM
8120+ dinfo->di_btop = -1;
8121+ dinfo->di_bbot = -1;
8122+ bbot = au_dbbot(parent);
8123+ bindex = 0;
8124+ p = au_hdentry(dinfo, bindex);
8125+ for (; bindex <= bbot; bindex++, p++)
1facf9fc 8126+ if (p->hd_dentry) {
5afbbe0d 8127+ dinfo->di_btop = bindex;
1facf9fc 8128+ break;
8129+ }
8130+
5afbbe0d
AM
8131+ if (dinfo->di_btop >= 0) {
8132+ bindex = bbot;
8133+ p = au_hdentry(dinfo, bindex);
8134+ for (; bindex >= 0; bindex--, p--)
027c5e7a 8135+ if (p->hd_dentry) {
5afbbe0d 8136+ dinfo->di_bbot = bindex;
027c5e7a
AM
8137+ err = 0;
8138+ break;
8139+ }
8140+ }
8141+
8142+ return err;
1facf9fc 8143+}
8144+
027c5e7a 8145+static void au_do_hide(struct dentry *dentry)
1facf9fc 8146+{
027c5e7a 8147+ struct inode *inode;
1facf9fc 8148+
5527c038
JR
8149+ if (d_really_is_positive(dentry)) {
8150+ inode = d_inode(dentry);
8151+ if (!d_is_dir(dentry)) {
027c5e7a
AM
8152+ if (inode->i_nlink && !d_unhashed(dentry))
8153+ drop_nlink(inode);
8154+ } else {
8155+ clear_nlink(inode);
8156+ /* stop next lookup */
8157+ inode->i_flags |= S_DEAD;
8158+ }
8159+ smp_mb(); /* necessary? */
8160+ }
8161+ d_drop(dentry);
8162+}
1308ab2a 8163+
027c5e7a
AM
8164+static int au_hide_children(struct dentry *parent)
8165+{
8166+ int err, i, j, ndentry;
8167+ struct au_dcsub_pages dpages;
8168+ struct au_dpage *dpage;
8169+ struct dentry *dentry;
1facf9fc 8170+
027c5e7a 8171+ err = au_dpages_init(&dpages, GFP_NOFS);
1facf9fc 8172+ if (unlikely(err))
8173+ goto out;
027c5e7a
AM
8174+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
8175+ if (unlikely(err))
8176+ goto out_dpages;
1facf9fc 8177+
027c5e7a
AM
8178+ /* in reverse order */
8179+ for (i = dpages.ndpage - 1; i >= 0; i--) {
8180+ dpage = dpages.dpages + i;
8181+ ndentry = dpage->ndentry;
8182+ for (j = ndentry - 1; j >= 0; j--) {
8183+ dentry = dpage->dentries[j];
8184+ if (dentry != parent)
8185+ au_do_hide(dentry);
8186+ }
8187+ }
1facf9fc 8188+
027c5e7a
AM
8189+out_dpages:
8190+ au_dpages_free(&dpages);
4f0767ce 8191+out:
027c5e7a 8192+ return err;
1facf9fc 8193+}
8194+
027c5e7a 8195+static void au_hide(struct dentry *dentry)
1facf9fc 8196+{
027c5e7a 8197+ int err;
1facf9fc 8198+
027c5e7a 8199+ AuDbgDentry(dentry);
2000de60 8200+ if (d_is_dir(dentry)) {
027c5e7a
AM
8201+ /* shrink_dcache_parent(dentry); */
8202+ err = au_hide_children(dentry);
8203+ if (unlikely(err))
523b37e3
AM
8204+ AuIOErr("%pd, failed hiding children, ignored %d\n",
8205+ dentry, err);
027c5e7a
AM
8206+ }
8207+ au_do_hide(dentry);
8208+}
1facf9fc 8209+
027c5e7a
AM
8210+/*
8211+ * By adding a dirty branch, a cached dentry may be affected in various ways.
8212+ *
8213+ * a dirty branch is added
8214+ * - on the top of layers
8215+ * - in the middle of layers
8216+ * - to the bottom of layers
8217+ *
8218+ * on the added branch there exists
8219+ * - a whiteout
8220+ * - a diropq
8221+ * - a same named entry
8222+ * + exist
8223+ * * negative --> positive
8224+ * * positive --> positive
8225+ * - type is unchanged
8226+ * - type is changed
8227+ * + doesn't exist
8228+ * * negative --> negative
8229+ * * positive --> negative (rejected by au_br_del() for non-dir case)
8230+ * - none
8231+ */
8232+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
8233+ struct au_dinfo *tmp)
8234+{
8235+ int err;
5afbbe0d 8236+ aufs_bindex_t bindex, bbot;
027c5e7a
AM
8237+ struct {
8238+ struct dentry *dentry;
8239+ struct inode *inode;
8240+ mode_t mode;
be52b249
AM
8241+ } orig_h, tmp_h = {
8242+ .dentry = NULL
8243+ };
027c5e7a
AM
8244+ struct au_hdentry *hd;
8245+ struct inode *inode, *h_inode;
8246+ struct dentry *h_dentry;
8247+
8248+ err = 0;
5afbbe0d 8249+ AuDebugOn(dinfo->di_btop < 0);
027c5e7a 8250+ orig_h.mode = 0;
5afbbe0d 8251+ orig_h.dentry = au_hdentry(dinfo, dinfo->di_btop)->hd_dentry;
5527c038
JR
8252+ orig_h.inode = NULL;
8253+ if (d_is_positive(orig_h.dentry)) {
8254+ orig_h.inode = d_inode(orig_h.dentry);
027c5e7a 8255+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
5527c038 8256+ }
5afbbe0d
AM
8257+ if (tmp->di_btop >= 0) {
8258+ tmp_h.dentry = au_hdentry(tmp, tmp->di_btop)->hd_dentry;
5527c038
JR
8259+ if (d_is_positive(tmp_h.dentry)) {
8260+ tmp_h.inode = d_inode(tmp_h.dentry);
027c5e7a 8261+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
5527c038 8262+ }
027c5e7a
AM
8263+ }
8264+
5527c038
JR
8265+ inode = NULL;
8266+ if (d_really_is_positive(dentry))
8267+ inode = d_inode(dentry);
027c5e7a
AM
8268+ if (!orig_h.inode) {
8269+ AuDbg("nagative originally\n");
8270+ if (inode) {
8271+ au_hide(dentry);
8272+ goto out;
8273+ }
8274+ AuDebugOn(inode);
5afbbe0d 8275+ AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
027c5e7a
AM
8276+ AuDebugOn(dinfo->di_bdiropq != -1);
8277+
8278+ if (!tmp_h.inode) {
8279+ AuDbg("negative --> negative\n");
8280+ /* should have only one negative lower */
5afbbe0d
AM
8281+ if (tmp->di_btop >= 0
8282+ && tmp->di_btop < dinfo->di_btop) {
8283+ AuDebugOn(tmp->di_btop != tmp->di_bbot);
8284+ AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
8285+ au_set_h_dptr(dentry, dinfo->di_btop, NULL);
027c5e7a 8286+ au_di_cp(dinfo, tmp);
5afbbe0d
AM
8287+ hd = au_hdentry(tmp, tmp->di_btop);
8288+ au_set_h_dptr(dentry, tmp->di_btop,
027c5e7a
AM
8289+ dget(hd->hd_dentry));
8290+ }
8291+ au_dbg_verify_dinode(dentry);
8292+ } else {
8293+ AuDbg("negative --> positive\n");
8294+ /*
8295+ * similar to the behaviour of creating with bypassing
8296+ * aufs.
8297+ * unhash it in order to force an error in the
8298+ * succeeding create operation.
8299+ * we should not set S_DEAD here.
8300+ */
8301+ d_drop(dentry);
8302+ /* au_di_swap(tmp, dinfo); */
8303+ au_dbg_verify_dinode(dentry);
8304+ }
8305+ } else {
8306+ AuDbg("positive originally\n");
8307+ /* inode may be NULL */
8308+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
8309+ if (!tmp_h.inode) {
8310+ AuDbg("positive --> negative\n");
8311+ /* or bypassing aufs */
8312+ au_hide(dentry);
5afbbe0d 8313+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_btop)
027c5e7a
AM
8314+ dinfo->di_bwh = tmp->di_bwh;
8315+ if (inode)
8316+ err = au_refresh_hinode_self(inode);
8317+ au_dbg_verify_dinode(dentry);
8318+ } else if (orig_h.mode == tmp_h.mode) {
8319+ AuDbg("positive --> positive, same type\n");
8320+ if (!S_ISDIR(orig_h.mode)
5afbbe0d 8321+ && dinfo->di_btop > tmp->di_btop) {
027c5e7a
AM
8322+ /*
8323+ * similar to the behaviour of removing and
8324+ * creating.
8325+ */
8326+ au_hide(dentry);
8327+ if (inode)
8328+ err = au_refresh_hinode_self(inode);
8329+ au_dbg_verify_dinode(dentry);
8330+ } else {
8331+ /* fill empty slots */
5afbbe0d
AM
8332+ if (dinfo->di_btop > tmp->di_btop)
8333+ dinfo->di_btop = tmp->di_btop;
8334+ if (dinfo->di_bbot < tmp->di_bbot)
8335+ dinfo->di_bbot = tmp->di_bbot;
027c5e7a
AM
8336+ dinfo->di_bwh = tmp->di_bwh;
8337+ dinfo->di_bdiropq = tmp->di_bdiropq;
5afbbe0d
AM
8338+ bbot = dinfo->di_bbot;
8339+ bindex = tmp->di_btop;
8340+ hd = au_hdentry(tmp, bindex);
8341+ for (; bindex <= bbot; bindex++, hd++) {
027c5e7a
AM
8342+ if (au_h_dptr(dentry, bindex))
8343+ continue;
5afbbe0d 8344+ h_dentry = hd->hd_dentry;
027c5e7a
AM
8345+ if (!h_dentry)
8346+ continue;
5527c038
JR
8347+ AuDebugOn(d_is_negative(h_dentry));
8348+ h_inode = d_inode(h_dentry);
027c5e7a
AM
8349+ AuDebugOn(orig_h.mode
8350+ != (h_inode->i_mode
8351+ & S_IFMT));
8352+ au_set_h_dptr(dentry, bindex,
8353+ dget(h_dentry));
8354+ }
5afbbe0d
AM
8355+ if (inode)
8356+ err = au_refresh_hinode(inode, dentry);
027c5e7a
AM
8357+ au_dbg_verify_dinode(dentry);
8358+ }
8359+ } else {
8360+ AuDbg("positive --> positive, different type\n");
8361+ /* similar to the behaviour of removing and creating */
8362+ au_hide(dentry);
8363+ if (inode)
8364+ err = au_refresh_hinode_self(inode);
8365+ au_dbg_verify_dinode(dentry);
8366+ }
8367+ }
8368+
8369+out:
8370+ return err;
8371+}
8372+
79b8bda9
AM
8373+void au_refresh_dop(struct dentry *dentry, int force_reval)
8374+{
8375+ const struct dentry_operations *dop
8376+ = force_reval ? &aufs_dop : dentry->d_sb->s_d_op;
8377+ static const unsigned int mask
8378+ = DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE;
8379+
8380+ BUILD_BUG_ON(sizeof(mask) != sizeof(dentry->d_flags));
8381+
8382+ if (dentry->d_op == dop)
8383+ return;
8384+
8385+ AuDbg("%pd\n", dentry);
8386+ spin_lock(&dentry->d_lock);
8387+ if (dop == &aufs_dop)
8388+ dentry->d_flags |= mask;
8389+ else
8390+ dentry->d_flags &= ~mask;
8391+ dentry->d_op = dop;
8392+ spin_unlock(&dentry->d_lock);
8393+}
8394+
027c5e7a
AM
8395+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
8396+{
e2f27e51 8397+ int err, ebrange, nbr;
027c5e7a
AM
8398+ unsigned int sigen;
8399+ struct au_dinfo *dinfo, *tmp;
8400+ struct super_block *sb;
8401+ struct inode *inode;
8402+
8403+ DiMustWriteLock(dentry);
8404+ AuDebugOn(IS_ROOT(dentry));
5527c038 8405+ AuDebugOn(d_really_is_negative(parent));
027c5e7a
AM
8406+
8407+ sb = dentry->d_sb;
027c5e7a
AM
8408+ sigen = au_sigen(sb);
8409+ err = au_digen_test(parent, sigen);
8410+ if (unlikely(err))
8411+ goto out;
8412+
e2f27e51 8413+ nbr = au_sbbot(sb) + 1;
027c5e7a 8414+ dinfo = au_di(dentry);
e2f27e51 8415+ err = au_di_realloc(dinfo, nbr, /*may_shrink*/0);
027c5e7a
AM
8416+ if (unlikely(err))
8417+ goto out;
8418+ ebrange = au_dbrange_test(dentry);
8419+ if (!ebrange)
8420+ ebrange = au_do_refresh_hdentry(dentry, parent);
8421+
38d290e6 8422+ if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
5afbbe0d 8423+ AuDebugOn(au_dbtop(dentry) < 0 && au_dbbot(dentry) >= 0);
5527c038
JR
8424+ if (d_really_is_positive(dentry)) {
8425+ inode = d_inode(dentry);
027c5e7a 8426+ err = au_refresh_hinode_self(inode);
5527c038 8427+ }
027c5e7a
AM
8428+ au_dbg_verify_dinode(dentry);
8429+ if (!err)
8430+ goto out_dgen; /* success */
8431+ goto out;
8432+ }
8433+
8434+ /* temporary dinfo */
8435+ AuDbgDentry(dentry);
8436+ err = -ENOMEM;
8437+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
8438+ if (unlikely(!tmp))
8439+ goto out;
8440+ au_di_swap(tmp, dinfo);
8441+ /* returns the number of positive dentries */
8442+ /*
8443+ * if current working dir is removed, it returns an error.
8444+ * but the dentry is legal.
8445+ */
5afbbe0d 8446+ err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
027c5e7a
AM
8447+ AuDbgDentry(dentry);
8448+ au_di_swap(tmp, dinfo);
8449+ if (err == -ENOENT)
8450+ err = 0;
8451+ if (err >= 0) {
8452+ /* compare/refresh by dinfo */
8453+ AuDbgDentry(dentry);
8454+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
8455+ au_dbg_verify_dinode(dentry);
8456+ AuTraceErr(err);
8457+ }
e2f27e51 8458+ au_di_realloc(dinfo, nbr, /*may_shrink*/1); /* harmless if err */
027c5e7a
AM
8459+ au_rw_write_unlock(&tmp->di_rwsem);
8460+ au_di_free(tmp);
8461+ if (unlikely(err))
8462+ goto out;
8463+
8464+out_dgen:
8465+ au_update_digen(dentry);
8466+out:
8467+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
523b37e3 8468+ AuIOErr("failed refreshing %pd, %d\n", dentry, err);
027c5e7a
AM
8469+ AuDbgDentry(dentry);
8470+ }
8471+ AuTraceErr(err);
8472+ return err;
8473+}
8474+
b4510431
AM
8475+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags,
8476+ struct dentry *dentry, aufs_bindex_t bindex)
027c5e7a
AM
8477+{
8478+ int err, valid;
027c5e7a
AM
8479+
8480+ err = 0;
8481+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
8482+ goto out;
027c5e7a
AM
8483+
8484+ AuDbg("b%d\n", bindex);
b4510431
AM
8485+ /*
8486+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
8487+ * due to whiteout and branch permission.
8488+ */
8489+ flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
8490+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
8491+ /* it may return tri-state */
8492+ valid = h_dentry->d_op->d_revalidate(h_dentry, flags);
1facf9fc 8493+
8494+ if (unlikely(valid < 0))
8495+ err = valid;
8496+ else if (!valid)
8497+ err = -EINVAL;
8498+
4f0767ce 8499+out:
1facf9fc 8500+ AuTraceErr(err);
8501+ return err;
8502+}
8503+
8504+/* todo: remove this */
8505+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
b4510431 8506+ unsigned int flags, int do_udba)
1facf9fc 8507+{
8508+ int err;
8509+ umode_t mode, h_mode;
5afbbe0d 8510+ aufs_bindex_t bindex, btail, btop, ibs, ibe;
38d290e6 8511+ unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
4a4d8108 8512+ struct inode *h_inode, *h_cached_inode;
1facf9fc 8513+ struct dentry *h_dentry;
8514+ struct qstr *name, *h_name;
8515+
8516+ err = 0;
8517+ plus = 0;
8518+ mode = 0;
1facf9fc 8519+ ibs = -1;
8520+ ibe = -1;
8521+ unhashed = !!d_unhashed(dentry);
8522+ is_root = !!IS_ROOT(dentry);
8523+ name = &dentry->d_name;
38d290e6 8524+ tmpfile = au_di(dentry)->di_tmpfile;
1facf9fc 8525+
8526+ /*
7f207e10
AM
8527+ * Theoretically, REVAL test should be unnecessary in case of
8528+ * {FS,I}NOTIFY.
8529+ * But {fs,i}notify doesn't fire some necessary events,
1facf9fc 8530+ * IN_ATTRIB for atime/nlink/pageio
1facf9fc 8531+ * Let's do REVAL test too.
8532+ */
8533+ if (do_udba && inode) {
8534+ mode = (inode->i_mode & S_IFMT);
8535+ plus = (inode->i_nlink > 0);
5afbbe0d
AM
8536+ ibs = au_ibtop(inode);
8537+ ibe = au_ibbot(inode);
1facf9fc 8538+ }
8539+
5afbbe0d
AM
8540+ btop = au_dbtop(dentry);
8541+ btail = btop;
1facf9fc 8542+ if (inode && S_ISDIR(inode->i_mode))
8543+ btail = au_dbtaildir(dentry);
5afbbe0d 8544+ for (bindex = btop; bindex <= btail; bindex++) {
1facf9fc 8545+ h_dentry = au_h_dptr(dentry, bindex);
8546+ if (!h_dentry)
8547+ continue;
8548+
523b37e3
AM
8549+ AuDbg("b%d, %pd\n", bindex, h_dentry);
8550+ h_nfs = !!au_test_nfs(h_dentry->d_sb);
027c5e7a 8551+ spin_lock(&h_dentry->d_lock);
1facf9fc 8552+ h_name = &h_dentry->d_name;
8553+ if (unlikely(do_udba
8554+ && !is_root
523b37e3
AM
8555+ && ((!h_nfs
8556+ && (unhashed != !!d_unhashed(h_dentry)
38d290e6
JR
8557+ || (!tmpfile
8558+ && !au_qstreq(name, h_name))
8559+ ))
523b37e3
AM
8560+ || (h_nfs
8561+ && !(flags & LOOKUP_OPEN)
8562+ && (h_dentry->d_flags
8563+ & DCACHE_NFSFS_RENAMED)))
1facf9fc 8564+ )) {
38d290e6
JR
8565+ int h_unhashed;
8566+
8567+ h_unhashed = d_unhashed(h_dentry);
027c5e7a 8568+ spin_unlock(&h_dentry->d_lock);
38d290e6
JR
8569+ AuDbg("unhash 0x%x 0x%x, %pd %pd\n",
8570+ unhashed, h_unhashed, dentry, h_dentry);
1facf9fc 8571+ goto err;
8572+ }
027c5e7a 8573+ spin_unlock(&h_dentry->d_lock);
1facf9fc 8574+
b4510431 8575+ err = au_do_h_d_reval(h_dentry, flags, dentry, bindex);
1facf9fc 8576+ if (unlikely(err))
8577+ /* do not goto err, to keep the errno */
8578+ break;
8579+
8580+ /* todo: plink too? */
8581+ if (!do_udba)
8582+ continue;
8583+
8584+ /* UDBA tests */
5527c038 8585+ if (unlikely(!!inode != d_is_positive(h_dentry)))
1facf9fc 8586+ goto err;
8587+
5527c038
JR
8588+ h_inode = NULL;
8589+ if (d_is_positive(h_dentry))
8590+ h_inode = d_inode(h_dentry);
1facf9fc 8591+ h_plus = plus;
8592+ h_mode = mode;
8593+ h_cached_inode = h_inode;
8594+ if (h_inode) {
8595+ h_mode = (h_inode->i_mode & S_IFMT);
8596+ h_plus = (h_inode->i_nlink > 0);
8597+ }
8598+ if (inode && ibs <= bindex && bindex <= ibe)
8599+ h_cached_inode = au_h_iptr(inode, bindex);
8600+
523b37e3 8601+ if (!h_nfs) {
38d290e6 8602+ if (unlikely(plus != h_plus && !tmpfile))
523b37e3
AM
8603+ goto err;
8604+ } else {
8605+ if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED)
8606+ && !is_root
8607+ && !IS_ROOT(h_dentry)
8608+ && unhashed != d_unhashed(h_dentry)))
8609+ goto err;
8610+ }
8611+ if (unlikely(mode != h_mode
1facf9fc 8612+ || h_cached_inode != h_inode))
8613+ goto err;
8614+ continue;
8615+
f6b6e03d 8616+err:
1facf9fc 8617+ err = -EINVAL;
8618+ break;
8619+ }
8620+
523b37e3 8621+ AuTraceErr(err);
1facf9fc 8622+ return err;
8623+}
8624+
027c5e7a 8625+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
1facf9fc 8626+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
8627+{
8628+ int err;
8629+ struct dentry *parent;
1facf9fc 8630+
027c5e7a 8631+ if (!au_digen_test(dentry, sigen))
1facf9fc 8632+ return 0;
8633+
8634+ parent = dget_parent(dentry);
8635+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8636+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 8637+ au_dbg_verify_gen(parent, sigen);
027c5e7a 8638+ err = au_refresh_dentry(dentry, parent);
1facf9fc 8639+ di_read_unlock(parent, AuLock_IR);
8640+ dput(parent);
027c5e7a 8641+ AuTraceErr(err);
1facf9fc 8642+ return err;
8643+}
8644+
8645+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
8646+{
8647+ int err;
8648+ struct dentry *d, *parent;
1facf9fc 8649+
027c5e7a 8650+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
1facf9fc 8651+ return simple_reval_dpath(dentry, sigen);
8652+
8653+ /* slow loop, keep it simple and stupid */
8654+ /* cf: au_cpup_dirs() */
8655+ err = 0;
8656+ parent = NULL;
027c5e7a 8657+ while (au_digen_test(dentry, sigen)) {
1facf9fc 8658+ d = dentry;
8659+ while (1) {
8660+ dput(parent);
8661+ parent = dget_parent(d);
027c5e7a 8662+ if (!au_digen_test(parent, sigen))
1facf9fc 8663+ break;
8664+ d = parent;
8665+ }
8666+
1facf9fc 8667+ if (d != dentry)
027c5e7a 8668+ di_write_lock_child2(d);
1facf9fc 8669+
8670+ /* someone might update our dentry while we were sleeping */
027c5e7a
AM
8671+ if (au_digen_test(d, sigen)) {
8672+ /*
8673+ * todo: consolidate with simple_reval_dpath(),
8674+ * do_refresh() and au_reval_for_attr().
8675+ */
1facf9fc 8676+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8677+ err = au_refresh_dentry(d, parent);
1facf9fc 8678+ di_read_unlock(parent, AuLock_IR);
8679+ }
8680+
8681+ if (d != dentry)
8682+ di_write_unlock(d);
8683+ dput(parent);
8684+ if (unlikely(err))
8685+ break;
8686+ }
8687+
8688+ return err;
8689+}
8690+
8691+/*
8692+ * if valid returns 1, otherwise 0.
8693+ */
b4510431 8694+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
1facf9fc 8695+{
8696+ int valid, err;
8697+ unsigned int sigen;
8698+ unsigned char do_udba;
8699+ struct super_block *sb;
8700+ struct inode *inode;
8701+
027c5e7a 8702+ /* todo: support rcu-walk? */
b4510431 8703+ if (flags & LOOKUP_RCU)
027c5e7a
AM
8704+ return -ECHILD;
8705+
8706+ valid = 0;
8707+ if (unlikely(!au_di(dentry)))
8708+ goto out;
8709+
e49829fe 8710+ valid = 1;
1facf9fc 8711+ sb = dentry->d_sb;
e49829fe
JR
8712+ /*
8713+ * todo: very ugly
8714+ * i_mutex of parent dir may be held,
8715+ * but we should not return 'invalid' due to busy.
8716+ */
8717+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
8718+ if (unlikely(err)) {
8719+ valid = err;
027c5e7a 8720+ AuTraceErr(err);
e49829fe
JR
8721+ goto out;
8722+ }
5527c038
JR
8723+ inode = NULL;
8724+ if (d_really_is_positive(dentry))
8725+ inode = d_inode(dentry);
5afbbe0d 8726+ if (unlikely(inode && au_is_bad_inode(inode))) {
c1595e42
JR
8727+ err = -EINVAL;
8728+ AuTraceErr(err);
8729+ goto out_dgrade;
8730+ }
027c5e7a
AM
8731+ if (unlikely(au_dbrange_test(dentry))) {
8732+ err = -EINVAL;
8733+ AuTraceErr(err);
8734+ goto out_dgrade;
1facf9fc 8735+ }
027c5e7a
AM
8736+
8737+ sigen = au_sigen(sb);
8738+ if (au_digen_test(dentry, sigen)) {
1facf9fc 8739+ AuDebugOn(IS_ROOT(dentry));
027c5e7a
AM
8740+ err = au_reval_dpath(dentry, sigen);
8741+ if (unlikely(err)) {
8742+ AuTraceErr(err);
1facf9fc 8743+ goto out_dgrade;
027c5e7a 8744+ }
1facf9fc 8745+ }
8746+ di_downgrade_lock(dentry, AuLock_IR);
8747+
1facf9fc 8748+ err = -EINVAL;
c1595e42 8749+ if (!(flags & (LOOKUP_OPEN | LOOKUP_EMPTY))
523b37e3 8750+ && inode
38d290e6 8751+ && !(inode->i_state && I_LINKABLE)
79b8bda9
AM
8752+ && (IS_DEADDIR(inode) || !inode->i_nlink)) {
8753+ AuTraceErr(err);
027c5e7a 8754+ goto out_inval;
79b8bda9 8755+ }
027c5e7a 8756+
1facf9fc 8757+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
8758+ if (do_udba && inode) {
5afbbe0d 8759+ aufs_bindex_t btop = au_ibtop(inode);
027c5e7a 8760+ struct inode *h_inode;
1facf9fc 8761+
5afbbe0d
AM
8762+ if (btop >= 0) {
8763+ h_inode = au_h_iptr(inode, btop);
79b8bda9
AM
8764+ if (h_inode && au_test_higen(inode, h_inode)) {
8765+ AuTraceErr(err);
027c5e7a 8766+ goto out_inval;
79b8bda9 8767+ }
027c5e7a 8768+ }
1facf9fc 8769+ }
8770+
b4510431 8771+ err = h_d_revalidate(dentry, inode, flags, do_udba);
5afbbe0d 8772+ if (unlikely(!err && do_udba && au_dbtop(dentry) < 0)) {
1facf9fc 8773+ err = -EIO;
523b37e3
AM
8774+ AuDbg("both of real entry and whiteout found, %p, err %d\n",
8775+ dentry, err);
027c5e7a 8776+ }
e49829fe 8777+ goto out_inval;
1facf9fc 8778+
4f0767ce 8779+out_dgrade:
1facf9fc 8780+ di_downgrade_lock(dentry, AuLock_IR);
e49829fe 8781+out_inval:
1facf9fc 8782+ aufs_read_unlock(dentry, AuLock_IR);
8783+ AuTraceErr(err);
8784+ valid = !err;
e49829fe 8785+out:
027c5e7a 8786+ if (!valid) {
523b37e3 8787+ AuDbg("%pd invalid, %d\n", dentry, valid);
027c5e7a
AM
8788+ d_drop(dentry);
8789+ }
1facf9fc 8790+ return valid;
8791+}
8792+
8793+static void aufs_d_release(struct dentry *dentry)
8794+{
027c5e7a 8795+ if (au_di(dentry)) {
4a4d8108
AM
8796+ au_di_fin(dentry);
8797+ au_hn_di_reinit(dentry);
1facf9fc 8798+ }
1facf9fc 8799+}
8800+
4a4d8108 8801+const struct dentry_operations aufs_dop = {
c06a8ce3
AM
8802+ .d_revalidate = aufs_d_revalidate,
8803+ .d_weak_revalidate = aufs_d_revalidate,
8804+ .d_release = aufs_d_release
1facf9fc 8805+};
79b8bda9
AM
8806+
8807+/* aufs_dop without d_revalidate */
8808+const struct dentry_operations aufs_dop_noreval = {
8809+ .d_release = aufs_d_release
8810+};
7f207e10
AM
8811diff -urN /usr/share/empty/fs/aufs/dentry.h linux/fs/aufs/dentry.h
8812--- /usr/share/empty/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 8813+++ linux/fs/aufs/dentry.h 2016-10-09 16:55:38.889431135 +0200
f0c0a007 8814@@ -0,0 +1,255 @@
1facf9fc 8815+/*
8cdd5066 8816+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 8817+ *
8818+ * This program, aufs is free software; you can redistribute it and/or modify
8819+ * it under the terms of the GNU General Public License as published by
8820+ * the Free Software Foundation; either version 2 of the License, or
8821+ * (at your option) any later version.
dece6358
AM
8822+ *
8823+ * This program is distributed in the hope that it will be useful,
8824+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8825+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8826+ * GNU General Public License for more details.
8827+ *
8828+ * You should have received a copy of the GNU General Public License
523b37e3 8829+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 8830+ */
8831+
8832+/*
8833+ * lookup and dentry operations
8834+ */
8835+
8836+#ifndef __AUFS_DENTRY_H__
8837+#define __AUFS_DENTRY_H__
8838+
8839+#ifdef __KERNEL__
8840+
dece6358 8841+#include <linux/dcache.h>
1facf9fc 8842+#include "rwsem.h"
8843+
1facf9fc 8844+struct au_hdentry {
8845+ struct dentry *hd_dentry;
027c5e7a 8846+ aufs_bindex_t hd_id;
1facf9fc 8847+};
8848+
8849+struct au_dinfo {
8850+ atomic_t di_generation;
8851+
dece6358 8852+ struct au_rwsem di_rwsem;
5afbbe0d 8853+ aufs_bindex_t di_btop, di_bbot, di_bwh, di_bdiropq;
38d290e6 8854+ unsigned char di_tmpfile; /* to allow the different name */
f0c0a007
AM
8855+ union {
8856+ struct au_hdentry *di_hdentry;
8857+ struct llist_node di_lnode; /* delayed free */
8858+ };
4a4d8108 8859+} ____cacheline_aligned_in_smp;
1facf9fc 8860+
8861+/* ---------------------------------------------------------------------- */
8862+
5afbbe0d
AM
8863+/* flags for au_lkup_dentry() */
8864+#define AuLkup_ALLOW_NEG 1
8865+#define AuLkup_IGNORE_PERM (1 << 1)
8866+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
8867+#define au_fset_lkup(flags, name) \
8868+ do { (flags) |= AuLkup_##name; } while (0)
8869+#define au_fclr_lkup(flags, name) \
8870+ do { (flags) &= ~AuLkup_##name; } while (0)
8871+
8872+/* ---------------------------------------------------------------------- */
8873+
1facf9fc 8874+/* dentry.c */
79b8bda9 8875+extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
1facf9fc 8876+struct au_branch;
076b876e 8877+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
1facf9fc 8878+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
8879+ struct dentry *h_parent, struct au_branch *br);
8880+
5afbbe0d
AM
8881+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
8882+ unsigned int flags);
86dc4139 8883+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
027c5e7a 8884+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
1facf9fc 8885+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
79b8bda9 8886+void au_refresh_dop(struct dentry *dentry, int force_reval);
1facf9fc 8887+
8888+/* dinfo.c */
4a4d8108 8889+void au_di_init_once(void *_di);
027c5e7a
AM
8890+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
8891+void au_di_free(struct au_dinfo *dinfo);
8892+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
8893+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
4a4d8108
AM
8894+int au_di_init(struct dentry *dentry);
8895+void au_di_fin(struct dentry *dentry);
e2f27e51 8896+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink);
1facf9fc 8897+
8898+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
8899+void di_read_unlock(struct dentry *d, int flags);
8900+void di_downgrade_lock(struct dentry *d, int flags);
8901+void di_write_lock(struct dentry *d, unsigned int lsc);
8902+void di_write_unlock(struct dentry *d);
8903+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
8904+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
8905+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
8906+
8907+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
2cbb1c4b 8908+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
1facf9fc 8909+aufs_bindex_t au_dbtail(struct dentry *dentry);
8910+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
8911+
8912+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
8913+ struct dentry *h_dentry);
027c5e7a
AM
8914+int au_digen_test(struct dentry *dentry, unsigned int sigen);
8915+int au_dbrange_test(struct dentry *dentry);
1facf9fc 8916+void au_update_digen(struct dentry *dentry);
8917+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
5afbbe0d
AM
8918+void au_update_dbtop(struct dentry *dentry);
8919+void au_update_dbbot(struct dentry *dentry);
1facf9fc 8920+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
8921+
8922+/* ---------------------------------------------------------------------- */
8923+
8924+static inline struct au_dinfo *au_di(struct dentry *dentry)
8925+{
8926+ return dentry->d_fsdata;
8927+}
8928+
8929+/* ---------------------------------------------------------------------- */
8930+
8931+/* lock subclass for dinfo */
8932+enum {
8933+ AuLsc_DI_CHILD, /* child first */
4a4d8108 8934+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
1facf9fc 8935+ AuLsc_DI_CHILD3, /* copyup dirs */
8936+ AuLsc_DI_PARENT,
8937+ AuLsc_DI_PARENT2,
027c5e7a
AM
8938+ AuLsc_DI_PARENT3,
8939+ AuLsc_DI_TMP /* temp for replacing dinfo */
1facf9fc 8940+};
8941+
8942+/*
8943+ * di_read_lock_child, di_write_lock_child,
8944+ * di_read_lock_child2, di_write_lock_child2,
8945+ * di_read_lock_child3, di_write_lock_child3,
8946+ * di_read_lock_parent, di_write_lock_parent,
8947+ * di_read_lock_parent2, di_write_lock_parent2,
8948+ * di_read_lock_parent3, di_write_lock_parent3,
8949+ */
8950+#define AuReadLockFunc(name, lsc) \
8951+static inline void di_read_lock_##name(struct dentry *d, int flags) \
8952+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
8953+
8954+#define AuWriteLockFunc(name, lsc) \
8955+static inline void di_write_lock_##name(struct dentry *d) \
8956+{ di_write_lock(d, AuLsc_DI_##lsc); }
8957+
8958+#define AuRWLockFuncs(name, lsc) \
8959+ AuReadLockFunc(name, lsc) \
8960+ AuWriteLockFunc(name, lsc)
8961+
8962+AuRWLockFuncs(child, CHILD);
8963+AuRWLockFuncs(child2, CHILD2);
8964+AuRWLockFuncs(child3, CHILD3);
8965+AuRWLockFuncs(parent, PARENT);
8966+AuRWLockFuncs(parent2, PARENT2);
8967+AuRWLockFuncs(parent3, PARENT3);
8968+
8969+#undef AuReadLockFunc
8970+#undef AuWriteLockFunc
8971+#undef AuRWLockFuncs
8972+
8973+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
dece6358
AM
8974+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
8975+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
1facf9fc 8976+
8977+/* ---------------------------------------------------------------------- */
8978+
8979+/* todo: memory barrier? */
8980+static inline unsigned int au_digen(struct dentry *d)
8981+{
8982+ return atomic_read(&au_di(d)->di_generation);
8983+}
8984+
8985+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
8986+{
8987+ hdentry->hd_dentry = NULL;
8988+}
8989+
5afbbe0d
AM
8990+static inline struct au_hdentry *au_hdentry(struct au_dinfo *di,
8991+ aufs_bindex_t bindex)
8992+{
8993+ return di->di_hdentry + bindex;
8994+}
8995+
1facf9fc 8996+static inline void au_hdput(struct au_hdentry *hd)
8997+{
4a4d8108
AM
8998+ if (hd)
8999+ dput(hd->hd_dentry);
1facf9fc 9000+}
9001+
5afbbe0d 9002+static inline aufs_bindex_t au_dbtop(struct dentry *dentry)
1facf9fc 9003+{
1308ab2a 9004+ DiMustAnyLock(dentry);
5afbbe0d 9005+ return au_di(dentry)->di_btop;
1facf9fc 9006+}
9007+
5afbbe0d 9008+static inline aufs_bindex_t au_dbbot(struct dentry *dentry)
1facf9fc 9009+{
1308ab2a 9010+ DiMustAnyLock(dentry);
5afbbe0d 9011+ return au_di(dentry)->di_bbot;
1facf9fc 9012+}
9013+
9014+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
9015+{
1308ab2a 9016+ DiMustAnyLock(dentry);
1facf9fc 9017+ return au_di(dentry)->di_bwh;
9018+}
9019+
9020+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
9021+{
1308ab2a 9022+ DiMustAnyLock(dentry);
1facf9fc 9023+ return au_di(dentry)->di_bdiropq;
9024+}
9025+
9026+/* todo: hard/soft set? */
5afbbe0d 9027+static inline void au_set_dbtop(struct dentry *dentry, aufs_bindex_t bindex)
1facf9fc 9028+{
1308ab2a 9029+ DiMustWriteLock(dentry);
5afbbe0d 9030+ au_di(dentry)->di_btop = bindex;
1facf9fc 9031+}
9032+
5afbbe0d 9033+static inline void au_set_dbbot(struct dentry *dentry, aufs_bindex_t bindex)
1facf9fc 9034+{
1308ab2a 9035+ DiMustWriteLock(dentry);
5afbbe0d 9036+ au_di(dentry)->di_bbot = bindex;
1facf9fc 9037+}
9038+
9039+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
9040+{
1308ab2a 9041+ DiMustWriteLock(dentry);
5afbbe0d 9042+ /* dbwh can be outside of btop - bbot range */
1facf9fc 9043+ au_di(dentry)->di_bwh = bindex;
9044+}
9045+
9046+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
9047+{
1308ab2a 9048+ DiMustWriteLock(dentry);
1facf9fc 9049+ au_di(dentry)->di_bdiropq = bindex;
9050+}
9051+
9052+/* ---------------------------------------------------------------------- */
9053+
4a4d8108 9054+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 9055+static inline void au_digen_dec(struct dentry *d)
9056+{
e49829fe 9057+ atomic_dec(&au_di(d)->di_generation);
1facf9fc 9058+}
9059+
4a4d8108 9060+static inline void au_hn_di_reinit(struct dentry *dentry)
1facf9fc 9061+{
9062+ dentry->d_fsdata = NULL;
9063+}
9064+#else
4a4d8108
AM
9065+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
9066+#endif /* CONFIG_AUFS_HNOTIFY */
1facf9fc 9067+
9068+#endif /* __KERNEL__ */
9069+#endif /* __AUFS_DENTRY_H__ */
7f207e10
AM
9070diff -urN /usr/share/empty/fs/aufs/dinfo.c linux/fs/aufs/dinfo.c
9071--- /usr/share/empty/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
9072+++ linux/fs/aufs/dinfo.c 2016-10-09 16:55:38.889431135 +0200
9073@@ -0,0 +1,553 @@
1facf9fc 9074+/*
8cdd5066 9075+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 9076+ *
9077+ * This program, aufs is free software; you can redistribute it and/or modify
9078+ * it under the terms of the GNU General Public License as published by
9079+ * the Free Software Foundation; either version 2 of the License, or
9080+ * (at your option) any later version.
dece6358
AM
9081+ *
9082+ * This program is distributed in the hope that it will be useful,
9083+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9084+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9085+ * GNU General Public License for more details.
9086+ *
9087+ * You should have received a copy of the GNU General Public License
523b37e3 9088+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 9089+ */
9090+
9091+/*
9092+ * dentry private data
9093+ */
9094+
9095+#include "aufs.h"
9096+
e49829fe 9097+void au_di_init_once(void *_dinfo)
4a4d8108 9098+{
e49829fe 9099+ struct au_dinfo *dinfo = _dinfo;
4a4d8108 9100+
e49829fe 9101+ au_rw_init(&dinfo->di_rwsem);
4a4d8108
AM
9102+}
9103+
027c5e7a 9104+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
1facf9fc 9105+{
9106+ struct au_dinfo *dinfo;
027c5e7a 9107+ int nbr, i;
1facf9fc 9108+
9109+ dinfo = au_cache_alloc_dinfo();
9110+ if (unlikely(!dinfo))
9111+ goto out;
9112+
5afbbe0d 9113+ nbr = au_sbbot(sb) + 1;
1facf9fc 9114+ if (nbr <= 0)
9115+ nbr = 1;
9116+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
027c5e7a
AM
9117+ if (dinfo->di_hdentry) {
9118+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
5afbbe0d
AM
9119+ dinfo->di_btop = -1;
9120+ dinfo->di_bbot = -1;
027c5e7a
AM
9121+ dinfo->di_bwh = -1;
9122+ dinfo->di_bdiropq = -1;
38d290e6 9123+ dinfo->di_tmpfile = 0;
027c5e7a
AM
9124+ for (i = 0; i < nbr; i++)
9125+ dinfo->di_hdentry[i].hd_id = -1;
9126+ goto out;
9127+ }
1facf9fc 9128+
f0c0a007 9129+ au_cache_dfree_dinfo(dinfo);
027c5e7a
AM
9130+ dinfo = NULL;
9131+
4f0767ce 9132+out:
027c5e7a 9133+ return dinfo;
1facf9fc 9134+}
9135+
027c5e7a 9136+void au_di_free(struct au_dinfo *dinfo)
4a4d8108 9137+{
4a4d8108 9138+ struct au_hdentry *p;
5afbbe0d 9139+ aufs_bindex_t bbot, bindex;
4a4d8108
AM
9140+
9141+ /* dentry may not be revalidated */
5afbbe0d 9142+ bindex = dinfo->di_btop;
4a4d8108 9143+ if (bindex >= 0) {
5afbbe0d
AM
9144+ bbot = dinfo->di_bbot;
9145+ p = au_hdentry(dinfo, bindex);
9146+ while (bindex++ <= bbot)
4a4d8108
AM
9147+ au_hdput(p++);
9148+ }
f0c0a007
AM
9149+ au_delayed_kfree(dinfo->di_hdentry);
9150+ au_cache_dfree_dinfo(dinfo);
027c5e7a
AM
9151+}
9152+
9153+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
9154+{
9155+ struct au_hdentry *p;
9156+ aufs_bindex_t bi;
9157+
9158+ AuRwMustWriteLock(&a->di_rwsem);
9159+ AuRwMustWriteLock(&b->di_rwsem);
9160+
9161+#define DiSwap(v, name) \
9162+ do { \
9163+ v = a->di_##name; \
9164+ a->di_##name = b->di_##name; \
9165+ b->di_##name = v; \
9166+ } while (0)
9167+
9168+ DiSwap(p, hdentry);
5afbbe0d
AM
9169+ DiSwap(bi, btop);
9170+ DiSwap(bi, bbot);
027c5e7a
AM
9171+ DiSwap(bi, bwh);
9172+ DiSwap(bi, bdiropq);
9173+ /* smp_mb(); */
9174+
9175+#undef DiSwap
9176+}
9177+
9178+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
9179+{
9180+ AuRwMustWriteLock(&dst->di_rwsem);
9181+ AuRwMustWriteLock(&src->di_rwsem);
9182+
5afbbe0d
AM
9183+ dst->di_btop = src->di_btop;
9184+ dst->di_bbot = src->di_bbot;
027c5e7a
AM
9185+ dst->di_bwh = src->di_bwh;
9186+ dst->di_bdiropq = src->di_bdiropq;
9187+ /* smp_mb(); */
9188+}
9189+
9190+int au_di_init(struct dentry *dentry)
9191+{
9192+ int err;
9193+ struct super_block *sb;
9194+ struct au_dinfo *dinfo;
9195+
9196+ err = 0;
9197+ sb = dentry->d_sb;
9198+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
9199+ if (dinfo) {
9200+ atomic_set(&dinfo->di_generation, au_sigen(sb));
9201+ /* smp_mb(); */ /* atomic_set */
9202+ dentry->d_fsdata = dinfo;
9203+ } else
9204+ err = -ENOMEM;
9205+
9206+ return err;
9207+}
9208+
9209+void au_di_fin(struct dentry *dentry)
9210+{
9211+ struct au_dinfo *dinfo;
9212+
9213+ dinfo = au_di(dentry);
9214+ AuRwDestroy(&dinfo->di_rwsem);
9215+ au_di_free(dinfo);
4a4d8108
AM
9216+}
9217+
e2f27e51 9218+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink)
1facf9fc 9219+{
9220+ int err, sz;
9221+ struct au_hdentry *hdp;
9222+
1308ab2a 9223+ AuRwMustWriteLock(&dinfo->di_rwsem);
9224+
1facf9fc 9225+ err = -ENOMEM;
5afbbe0d 9226+ sz = sizeof(*hdp) * (dinfo->di_bbot + 1);
1facf9fc 9227+ if (!sz)
9228+ sz = sizeof(*hdp);
e2f27e51
AM
9229+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS,
9230+ may_shrink);
1facf9fc 9231+ if (hdp) {
9232+ dinfo->di_hdentry = hdp;
9233+ err = 0;
9234+ }
9235+
9236+ return err;
9237+}
9238+
9239+/* ---------------------------------------------------------------------- */
9240+
9241+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
9242+{
9243+ switch (lsc) {
9244+ case AuLsc_DI_CHILD:
9245+ ii_write_lock_child(inode);
9246+ break;
9247+ case AuLsc_DI_CHILD2:
9248+ ii_write_lock_child2(inode);
9249+ break;
9250+ case AuLsc_DI_CHILD3:
9251+ ii_write_lock_child3(inode);
9252+ break;
9253+ case AuLsc_DI_PARENT:
9254+ ii_write_lock_parent(inode);
9255+ break;
9256+ case AuLsc_DI_PARENT2:
9257+ ii_write_lock_parent2(inode);
9258+ break;
9259+ case AuLsc_DI_PARENT3:
9260+ ii_write_lock_parent3(inode);
9261+ break;
9262+ default:
9263+ BUG();
9264+ }
9265+}
9266+
9267+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
9268+{
9269+ switch (lsc) {
9270+ case AuLsc_DI_CHILD:
9271+ ii_read_lock_child(inode);
9272+ break;
9273+ case AuLsc_DI_CHILD2:
9274+ ii_read_lock_child2(inode);
9275+ break;
9276+ case AuLsc_DI_CHILD3:
9277+ ii_read_lock_child3(inode);
9278+ break;
9279+ case AuLsc_DI_PARENT:
9280+ ii_read_lock_parent(inode);
9281+ break;
9282+ case AuLsc_DI_PARENT2:
9283+ ii_read_lock_parent2(inode);
9284+ break;
9285+ case AuLsc_DI_PARENT3:
9286+ ii_read_lock_parent3(inode);
9287+ break;
9288+ default:
9289+ BUG();
9290+ }
9291+}
9292+
9293+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
9294+{
5527c038
JR
9295+ struct inode *inode;
9296+
dece6358 9297+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
5527c038
JR
9298+ if (d_really_is_positive(d)) {
9299+ inode = d_inode(d);
1facf9fc 9300+ if (au_ftest_lock(flags, IW))
5527c038 9301+ do_ii_write_lock(inode, lsc);
1facf9fc 9302+ else if (au_ftest_lock(flags, IR))
5527c038 9303+ do_ii_read_lock(inode, lsc);
1facf9fc 9304+ }
9305+}
9306+
9307+void di_read_unlock(struct dentry *d, int flags)
9308+{
5527c038
JR
9309+ struct inode *inode;
9310+
9311+ if (d_really_is_positive(d)) {
9312+ inode = d_inode(d);
027c5e7a
AM
9313+ if (au_ftest_lock(flags, IW)) {
9314+ au_dbg_verify_dinode(d);
5527c038 9315+ ii_write_unlock(inode);
027c5e7a
AM
9316+ } else if (au_ftest_lock(flags, IR)) {
9317+ au_dbg_verify_dinode(d);
5527c038 9318+ ii_read_unlock(inode);
027c5e7a 9319+ }
1facf9fc 9320+ }
dece6358 9321+ au_rw_read_unlock(&au_di(d)->di_rwsem);
1facf9fc 9322+}
9323+
9324+void di_downgrade_lock(struct dentry *d, int flags)
9325+{
5527c038
JR
9326+ if (d_really_is_positive(d) && au_ftest_lock(flags, IR))
9327+ ii_downgrade_lock(d_inode(d));
dece6358 9328+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
1facf9fc 9329+}
9330+
9331+void di_write_lock(struct dentry *d, unsigned int lsc)
9332+{
dece6358 9333+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
5527c038
JR
9334+ if (d_really_is_positive(d))
9335+ do_ii_write_lock(d_inode(d), lsc);
1facf9fc 9336+}
9337+
9338+void di_write_unlock(struct dentry *d)
9339+{
027c5e7a 9340+ au_dbg_verify_dinode(d);
5527c038
JR
9341+ if (d_really_is_positive(d))
9342+ ii_write_unlock(d_inode(d));
dece6358 9343+ au_rw_write_unlock(&au_di(d)->di_rwsem);
1facf9fc 9344+}
9345+
9346+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
9347+{
9348+ AuDebugOn(d1 == d2
5527c038 9349+ || d_inode(d1) == d_inode(d2)
1facf9fc 9350+ || d1->d_sb != d2->d_sb);
9351+
9352+ if (isdir && au_test_subdir(d1, d2)) {
9353+ di_write_lock_child(d1);
9354+ di_write_lock_child2(d2);
9355+ } else {
9356+ /* there should be no races */
9357+ di_write_lock_child(d2);
9358+ di_write_lock_child2(d1);
9359+ }
9360+}
9361+
9362+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
9363+{
9364+ AuDebugOn(d1 == d2
5527c038 9365+ || d_inode(d1) == d_inode(d2)
1facf9fc 9366+ || d1->d_sb != d2->d_sb);
9367+
9368+ if (isdir && au_test_subdir(d1, d2)) {
9369+ di_write_lock_parent(d1);
9370+ di_write_lock_parent2(d2);
9371+ } else {
9372+ /* there should be no races */
9373+ di_write_lock_parent(d2);
9374+ di_write_lock_parent2(d1);
9375+ }
9376+}
9377+
9378+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
9379+{
9380+ di_write_unlock(d1);
5527c038 9381+ if (d_inode(d1) == d_inode(d2))
dece6358 9382+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
1facf9fc 9383+ else
9384+ di_write_unlock(d2);
9385+}
9386+
9387+/* ---------------------------------------------------------------------- */
9388+
9389+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
9390+{
9391+ struct dentry *d;
9392+
1308ab2a 9393+ DiMustAnyLock(dentry);
9394+
5afbbe0d 9395+ if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
1facf9fc 9396+ return NULL;
9397+ AuDebugOn(bindex < 0);
5afbbe0d 9398+ d = au_hdentry(au_di(dentry), bindex)->hd_dentry;
c1595e42 9399+ AuDebugOn(d && au_dcount(d) <= 0);
1facf9fc 9400+ return d;
9401+}
9402+
2cbb1c4b
JR
9403+/*
9404+ * extended version of au_h_dptr().
38d290e6
JR
9405+ * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
9406+ * error.
2cbb1c4b
JR
9407+ */
9408+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
9409+{
9410+ struct dentry *h_dentry;
9411+ struct inode *inode, *h_inode;
9412+
5527c038 9413+ AuDebugOn(d_really_is_negative(dentry));
2cbb1c4b
JR
9414+
9415+ h_dentry = NULL;
5afbbe0d
AM
9416+ if (au_dbtop(dentry) <= bindex
9417+ && bindex <= au_dbbot(dentry))
2cbb1c4b 9418+ h_dentry = au_h_dptr(dentry, bindex);
38d290e6 9419+ if (h_dentry && !au_d_linkable(h_dentry)) {
2cbb1c4b
JR
9420+ dget(h_dentry);
9421+ goto out; /* success */
9422+ }
9423+
5527c038 9424+ inode = d_inode(dentry);
5afbbe0d
AM
9425+ AuDebugOn(bindex < au_ibtop(inode));
9426+ AuDebugOn(au_ibbot(inode) < bindex);
2cbb1c4b
JR
9427+ h_inode = au_h_iptr(inode, bindex);
9428+ h_dentry = d_find_alias(h_inode);
9429+ if (h_dentry) {
9430+ if (!IS_ERR(h_dentry)) {
38d290e6 9431+ if (!au_d_linkable(h_dentry))
2cbb1c4b
JR
9432+ goto out; /* success */
9433+ dput(h_dentry);
9434+ } else
9435+ goto out;
9436+ }
9437+
9438+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
9439+ h_dentry = au_plink_lkup(inode, bindex);
9440+ AuDebugOn(!h_dentry);
9441+ if (!IS_ERR(h_dentry)) {
9442+ if (!au_d_hashed_positive(h_dentry))
9443+ goto out; /* success */
9444+ dput(h_dentry);
9445+ h_dentry = NULL;
9446+ }
9447+ }
9448+
9449+out:
9450+ AuDbgDentry(h_dentry);
9451+ return h_dentry;
9452+}
9453+
1facf9fc 9454+aufs_bindex_t au_dbtail(struct dentry *dentry)
9455+{
5afbbe0d 9456+ aufs_bindex_t bbot, bwh;
1facf9fc 9457+
5afbbe0d
AM
9458+ bbot = au_dbbot(dentry);
9459+ if (0 <= bbot) {
1facf9fc 9460+ bwh = au_dbwh(dentry);
9461+ if (!bwh)
9462+ return bwh;
5afbbe0d 9463+ if (0 < bwh && bwh < bbot)
1facf9fc 9464+ return bwh - 1;
9465+ }
5afbbe0d 9466+ return bbot;
1facf9fc 9467+}
9468+
9469+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
9470+{
5afbbe0d 9471+ aufs_bindex_t bbot, bopq;
1facf9fc 9472+
5afbbe0d
AM
9473+ bbot = au_dbtail(dentry);
9474+ if (0 <= bbot) {
1facf9fc 9475+ bopq = au_dbdiropq(dentry);
5afbbe0d
AM
9476+ if (0 <= bopq && bopq < bbot)
9477+ bbot = bopq;
1facf9fc 9478+ }
5afbbe0d 9479+ return bbot;
1facf9fc 9480+}
9481+
9482+/* ---------------------------------------------------------------------- */
9483+
9484+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
9485+ struct dentry *h_dentry)
9486+{
5afbbe0d
AM
9487+ struct au_dinfo *dinfo;
9488+ struct au_hdentry *hd;
027c5e7a 9489+ struct au_branch *br;
1facf9fc 9490+
1308ab2a 9491+ DiMustWriteLock(dentry);
9492+
5afbbe0d
AM
9493+ dinfo = au_di(dentry);
9494+ hd = au_hdentry(dinfo, bindex);
4a4d8108 9495+ au_hdput(hd);
1facf9fc 9496+ hd->hd_dentry = h_dentry;
027c5e7a
AM
9497+ if (h_dentry) {
9498+ br = au_sbr(dentry->d_sb, bindex);
9499+ hd->hd_id = br->br_id;
9500+ }
9501+}
9502+
9503+int au_dbrange_test(struct dentry *dentry)
9504+{
9505+ int err;
5afbbe0d 9506+ aufs_bindex_t btop, bbot;
027c5e7a
AM
9507+
9508+ err = 0;
5afbbe0d
AM
9509+ btop = au_dbtop(dentry);
9510+ bbot = au_dbbot(dentry);
9511+ if (btop >= 0)
9512+ AuDebugOn(bbot < 0 && btop > bbot);
027c5e7a
AM
9513+ else {
9514+ err = -EIO;
5afbbe0d 9515+ AuDebugOn(bbot >= 0);
027c5e7a
AM
9516+ }
9517+
9518+ return err;
9519+}
9520+
9521+int au_digen_test(struct dentry *dentry, unsigned int sigen)
9522+{
9523+ int err;
9524+
9525+ err = 0;
9526+ if (unlikely(au_digen(dentry) != sigen
5527c038 9527+ || au_iigen_test(d_inode(dentry), sigen)))
027c5e7a
AM
9528+ err = -EIO;
9529+
9530+ return err;
1facf9fc 9531+}
9532+
9533+void au_update_digen(struct dentry *dentry)
9534+{
9535+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
9536+ /* smp_mb(); */ /* atomic_set */
9537+}
9538+
9539+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
9540+{
9541+ struct au_dinfo *dinfo;
9542+ struct dentry *h_d;
4a4d8108 9543+ struct au_hdentry *hdp;
5afbbe0d 9544+ aufs_bindex_t bindex, bbot;
1facf9fc 9545+
1308ab2a 9546+ DiMustWriteLock(dentry);
9547+
1facf9fc 9548+ dinfo = au_di(dentry);
5afbbe0d 9549+ if (!dinfo || dinfo->di_btop < 0)
1facf9fc 9550+ return;
9551+
9552+ if (do_put_zero) {
5afbbe0d
AM
9553+ bbot = dinfo->di_bbot;
9554+ bindex = dinfo->di_btop;
9555+ hdp = au_hdentry(dinfo, bindex);
9556+ for (; bindex <= bbot; bindex++, hdp++) {
9557+ h_d = hdp->hd_dentry;
5527c038 9558+ if (h_d && d_is_negative(h_d))
1facf9fc 9559+ au_set_h_dptr(dentry, bindex, NULL);
9560+ }
9561+ }
9562+
5afbbe0d
AM
9563+ dinfo->di_btop = 0;
9564+ hdp = au_hdentry(dinfo, dinfo->di_btop);
9565+ for (; dinfo->di_btop <= dinfo->di_bbot; dinfo->di_btop++, hdp++)
9566+ if (hdp->hd_dentry)
1facf9fc 9567+ break;
5afbbe0d
AM
9568+ if (dinfo->di_btop > dinfo->di_bbot) {
9569+ dinfo->di_btop = -1;
9570+ dinfo->di_bbot = -1;
1facf9fc 9571+ return;
9572+ }
9573+
5afbbe0d
AM
9574+ hdp = au_hdentry(dinfo, dinfo->di_bbot);
9575+ for (; dinfo->di_bbot >= 0; dinfo->di_bbot--, hdp--)
9576+ if (hdp->hd_dentry)
1facf9fc 9577+ break;
5afbbe0d 9578+ AuDebugOn(dinfo->di_btop > dinfo->di_bbot || dinfo->di_bbot < 0);
1facf9fc 9579+}
9580+
5afbbe0d 9581+void au_update_dbtop(struct dentry *dentry)
1facf9fc 9582+{
5afbbe0d 9583+ aufs_bindex_t bindex, bbot;
1facf9fc 9584+ struct dentry *h_dentry;
9585+
5afbbe0d
AM
9586+ bbot = au_dbbot(dentry);
9587+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
1facf9fc 9588+ h_dentry = au_h_dptr(dentry, bindex);
9589+ if (!h_dentry)
9590+ continue;
5527c038 9591+ if (d_is_positive(h_dentry)) {
5afbbe0d 9592+ au_set_dbtop(dentry, bindex);
1facf9fc 9593+ return;
9594+ }
9595+ au_set_h_dptr(dentry, bindex, NULL);
9596+ }
9597+}
9598+
5afbbe0d 9599+void au_update_dbbot(struct dentry *dentry)
1facf9fc 9600+{
5afbbe0d 9601+ aufs_bindex_t bindex, btop;
1facf9fc 9602+ struct dentry *h_dentry;
9603+
5afbbe0d
AM
9604+ btop = au_dbtop(dentry);
9605+ for (bindex = au_dbbot(dentry); bindex >= btop; bindex--) {
1facf9fc 9606+ h_dentry = au_h_dptr(dentry, bindex);
9607+ if (!h_dentry)
9608+ continue;
5527c038 9609+ if (d_is_positive(h_dentry)) {
5afbbe0d 9610+ au_set_dbbot(dentry, bindex);
1facf9fc 9611+ return;
9612+ }
9613+ au_set_h_dptr(dentry, bindex, NULL);
9614+ }
9615+}
9616+
9617+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
9618+{
5afbbe0d 9619+ aufs_bindex_t bindex, bbot;
1facf9fc 9620+
5afbbe0d
AM
9621+ bbot = au_dbbot(dentry);
9622+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++)
1facf9fc 9623+ if (au_h_dptr(dentry, bindex) == h_dentry)
9624+ return bindex;
9625+ return -1;
9626+}
7f207e10
AM
9627diff -urN /usr/share/empty/fs/aufs/dir.c linux/fs/aufs/dir.c
9628--- /usr/share/empty/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 9629+++ linux/fs/aufs/dir.c 2016-10-09 16:55:36.489368218 +0200
f0c0a007 9630@@ -0,0 +1,762 @@
1facf9fc 9631+/*
8cdd5066 9632+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 9633+ *
9634+ * This program, aufs is free software; you can redistribute it and/or modify
9635+ * it under the terms of the GNU General Public License as published by
9636+ * the Free Software Foundation; either version 2 of the License, or
9637+ * (at your option) any later version.
dece6358
AM
9638+ *
9639+ * This program is distributed in the hope that it will be useful,
9640+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9641+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9642+ * GNU General Public License for more details.
9643+ *
9644+ * You should have received a copy of the GNU General Public License
523b37e3 9645+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 9646+ */
9647+
9648+/*
9649+ * directory operations
9650+ */
9651+
9652+#include <linux/fs_stack.h>
9653+#include "aufs.h"
9654+
9655+void au_add_nlink(struct inode *dir, struct inode *h_dir)
9656+{
9dbd164d
AM
9657+ unsigned int nlink;
9658+
1facf9fc 9659+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9660+
9dbd164d
AM
9661+ nlink = dir->i_nlink;
9662+ nlink += h_dir->i_nlink - 2;
1facf9fc 9663+ if (h_dir->i_nlink < 2)
9dbd164d 9664+ nlink += 2;
f6b6e03d 9665+ smp_mb(); /* for i_nlink */
7eafdf33 9666+ /* 0 can happen in revaliding */
92d182d2 9667+ set_nlink(dir, nlink);
1facf9fc 9668+}
9669+
9670+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
9671+{
9dbd164d
AM
9672+ unsigned int nlink;
9673+
1facf9fc 9674+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9675+
9dbd164d
AM
9676+ nlink = dir->i_nlink;
9677+ nlink -= h_dir->i_nlink - 2;
1facf9fc 9678+ if (h_dir->i_nlink < 2)
9dbd164d 9679+ nlink -= 2;
f6b6e03d 9680+ smp_mb(); /* for i_nlink */
92d182d2 9681+ /* nlink == 0 means the branch-fs is broken */
9dbd164d 9682+ set_nlink(dir, nlink);
1facf9fc 9683+}
9684+
1308ab2a 9685+loff_t au_dir_size(struct file *file, struct dentry *dentry)
9686+{
9687+ loff_t sz;
5afbbe0d 9688+ aufs_bindex_t bindex, bbot;
1308ab2a 9689+ struct file *h_file;
9690+ struct dentry *h_dentry;
9691+
9692+ sz = 0;
9693+ if (file) {
2000de60 9694+ AuDebugOn(!d_is_dir(file->f_path.dentry));
1308ab2a 9695+
5afbbe0d
AM
9696+ bbot = au_fbbot_dir(file);
9697+ for (bindex = au_fbtop(file);
9698+ bindex <= bbot && sz < KMALLOC_MAX_SIZE;
1308ab2a 9699+ bindex++) {
4a4d8108 9700+ h_file = au_hf_dir(file, bindex);
c06a8ce3
AM
9701+ if (h_file && file_inode(h_file))
9702+ sz += vfsub_f_size_read(h_file);
1308ab2a 9703+ }
9704+ } else {
9705+ AuDebugOn(!dentry);
2000de60 9706+ AuDebugOn(!d_is_dir(dentry));
1308ab2a 9707+
5afbbe0d
AM
9708+ bbot = au_dbtaildir(dentry);
9709+ for (bindex = au_dbtop(dentry);
9710+ bindex <= bbot && sz < KMALLOC_MAX_SIZE;
1308ab2a 9711+ bindex++) {
9712+ h_dentry = au_h_dptr(dentry, bindex);
5527c038
JR
9713+ if (h_dentry && d_is_positive(h_dentry))
9714+ sz += i_size_read(d_inode(h_dentry));
1308ab2a 9715+ }
9716+ }
9717+ if (sz < KMALLOC_MAX_SIZE)
9718+ sz = roundup_pow_of_two(sz);
9719+ if (sz > KMALLOC_MAX_SIZE)
9720+ sz = KMALLOC_MAX_SIZE;
9721+ else if (sz < NAME_MAX) {
9722+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
9723+ sz = AUFS_RDBLK_DEF;
9724+ }
9725+ return sz;
9726+}
9727+
b912730e
AM
9728+struct au_dir_ts_arg {
9729+ struct dentry *dentry;
9730+ aufs_bindex_t brid;
9731+};
9732+
9733+static void au_do_dir_ts(void *arg)
9734+{
9735+ struct au_dir_ts_arg *a = arg;
9736+ struct au_dtime dt;
9737+ struct path h_path;
9738+ struct inode *dir, *h_dir;
9739+ struct super_block *sb;
9740+ struct au_branch *br;
9741+ struct au_hinode *hdir;
9742+ int err;
5afbbe0d 9743+ aufs_bindex_t btop, bindex;
b912730e
AM
9744+
9745+ sb = a->dentry->d_sb;
5527c038 9746+ if (d_really_is_negative(a->dentry))
b912730e 9747+ goto out;
5527c038 9748+ /* no dir->i_mutex lock */
b95c5147
AM
9749+ aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
9750+
5527c038 9751+ dir = d_inode(a->dentry);
5afbbe0d 9752+ btop = au_ibtop(dir);
b912730e 9753+ bindex = au_br_index(sb, a->brid);
5afbbe0d 9754+ if (bindex < btop)
b912730e
AM
9755+ goto out_unlock;
9756+
9757+ br = au_sbr(sb, bindex);
9758+ h_path.dentry = au_h_dptr(a->dentry, bindex);
9759+ if (!h_path.dentry)
9760+ goto out_unlock;
9761+ h_path.mnt = au_br_mnt(br);
9762+ au_dtime_store(&dt, a->dentry, &h_path);
9763+
5afbbe0d 9764+ br = au_sbr(sb, btop);
b912730e
AM
9765+ if (!au_br_writable(br->br_perm))
9766+ goto out_unlock;
5afbbe0d 9767+ h_path.dentry = au_h_dptr(a->dentry, btop);
b912730e
AM
9768+ h_path.mnt = au_br_mnt(br);
9769+ err = vfsub_mnt_want_write(h_path.mnt);
9770+ if (err)
9771+ goto out_unlock;
5afbbe0d
AM
9772+ hdir = au_hi(dir, btop);
9773+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
9774+ h_dir = au_h_iptr(dir, btop);
b912730e
AM
9775+ if (h_dir->i_nlink
9776+ && timespec_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
9777+ dt.dt_h_path = h_path;
9778+ au_dtime_revert(&dt);
9779+ }
5afbbe0d 9780+ au_hn_inode_unlock(hdir);
b912730e
AM
9781+ vfsub_mnt_drop_write(h_path.mnt);
9782+ au_cpup_attr_timesizes(dir);
9783+
9784+out_unlock:
9785+ aufs_read_unlock(a->dentry, AuLock_DW);
9786+out:
9787+ dput(a->dentry);
9788+ au_nwt_done(&au_sbi(sb)->si_nowait);
f0c0a007 9789+ au_delayed_kfree(arg);
b912730e
AM
9790+}
9791+
9792+void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
9793+{
9794+ int perm, wkq_err;
5afbbe0d 9795+ aufs_bindex_t btop;
b912730e
AM
9796+ struct au_dir_ts_arg *arg;
9797+ struct dentry *dentry;
9798+ struct super_block *sb;
9799+
9800+ IMustLock(dir);
9801+
9802+ dentry = d_find_any_alias(dir);
9803+ AuDebugOn(!dentry);
9804+ sb = dentry->d_sb;
5afbbe0d
AM
9805+ btop = au_ibtop(dir);
9806+ if (btop == bindex) {
b912730e
AM
9807+ au_cpup_attr_timesizes(dir);
9808+ goto out;
9809+ }
9810+
5afbbe0d 9811+ perm = au_sbr_perm(sb, btop);
b912730e
AM
9812+ if (!au_br_writable(perm))
9813+ goto out;
9814+
9815+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
9816+ if (!arg)
9817+ goto out;
9818+
9819+ arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
9820+ arg->brid = au_sbr_id(sb, bindex);
9821+ wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
9822+ if (unlikely(wkq_err)) {
9823+ pr_err("wkq %d\n", wkq_err);
9824+ dput(dentry);
f0c0a007 9825+ au_delayed_kfree(arg);
b912730e
AM
9826+ }
9827+
9828+out:
9829+ dput(dentry);
9830+}
9831+
1facf9fc 9832+/* ---------------------------------------------------------------------- */
9833+
9834+static int reopen_dir(struct file *file)
9835+{
9836+ int err;
9837+ unsigned int flags;
5afbbe0d 9838+ aufs_bindex_t bindex, btail, btop;
1facf9fc 9839+ struct dentry *dentry, *h_dentry;
9840+ struct file *h_file;
9841+
9842+ /* open all lower dirs */
2000de60 9843+ dentry = file->f_path.dentry;
5afbbe0d
AM
9844+ btop = au_dbtop(dentry);
9845+ for (bindex = au_fbtop(file); bindex < btop; bindex++)
1facf9fc 9846+ au_set_h_fptr(file, bindex, NULL);
5afbbe0d 9847+ au_set_fbtop(file, btop);
1facf9fc 9848+
9849+ btail = au_dbtaildir(dentry);
5afbbe0d 9850+ for (bindex = au_fbbot_dir(file); btail < bindex; bindex--)
1facf9fc 9851+ au_set_h_fptr(file, bindex, NULL);
5afbbe0d 9852+ au_set_fbbot_dir(file, btail);
1facf9fc 9853+
4a4d8108 9854+ flags = vfsub_file_flags(file);
5afbbe0d 9855+ for (bindex = btop; bindex <= btail; bindex++) {
1facf9fc 9856+ h_dentry = au_h_dptr(dentry, bindex);
9857+ if (!h_dentry)
9858+ continue;
4a4d8108 9859+ h_file = au_hf_dir(file, bindex);
1facf9fc 9860+ if (h_file)
9861+ continue;
9862+
392086de 9863+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9864+ err = PTR_ERR(h_file);
9865+ if (IS_ERR(h_file))
9866+ goto out; /* close all? */
9867+ au_set_h_fptr(file, bindex, h_file);
9868+ }
9869+ au_update_figen(file);
9870+ /* todo: necessary? */
9871+ /* file->f_ra = h_file->f_ra; */
9872+ err = 0;
9873+
4f0767ce 9874+out:
1facf9fc 9875+ return err;
9876+}
9877+
b912730e 9878+static int do_open_dir(struct file *file, int flags, struct file *h_file)
1facf9fc 9879+{
9880+ int err;
9881+ aufs_bindex_t bindex, btail;
9882+ struct dentry *dentry, *h_dentry;
8cdd5066 9883+ struct vfsmount *mnt;
1facf9fc 9884+
1308ab2a 9885+ FiMustWriteLock(file);
b912730e 9886+ AuDebugOn(h_file);
1308ab2a 9887+
523b37e3 9888+ err = 0;
8cdd5066 9889+ mnt = file->f_path.mnt;
2000de60 9890+ dentry = file->f_path.dentry;
5527c038 9891+ file->f_version = d_inode(dentry)->i_version;
5afbbe0d
AM
9892+ bindex = au_dbtop(dentry);
9893+ au_set_fbtop(file, bindex);
1facf9fc 9894+ btail = au_dbtaildir(dentry);
5afbbe0d 9895+ au_set_fbbot_dir(file, btail);
1facf9fc 9896+ for (; !err && bindex <= btail; bindex++) {
9897+ h_dentry = au_h_dptr(dentry, bindex);
9898+ if (!h_dentry)
9899+ continue;
9900+
8cdd5066
JR
9901+ err = vfsub_test_mntns(mnt, h_dentry->d_sb);
9902+ if (unlikely(err))
9903+ break;
392086de 9904+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9905+ if (IS_ERR(h_file)) {
9906+ err = PTR_ERR(h_file);
9907+ break;
9908+ }
9909+ au_set_h_fptr(file, bindex, h_file);
9910+ }
9911+ au_update_figen(file);
9912+ /* todo: necessary? */
9913+ /* file->f_ra = h_file->f_ra; */
9914+ if (!err)
9915+ return 0; /* success */
9916+
9917+ /* close all */
5afbbe0d 9918+ for (bindex = au_fbtop(file); bindex <= btail; bindex++)
1facf9fc 9919+ au_set_h_fptr(file, bindex, NULL);
5afbbe0d
AM
9920+ au_set_fbtop(file, -1);
9921+ au_set_fbbot_dir(file, -1);
4a4d8108 9922+
1facf9fc 9923+ return err;
9924+}
9925+
9926+static int aufs_open_dir(struct inode *inode __maybe_unused,
9927+ struct file *file)
9928+{
4a4d8108
AM
9929+ int err;
9930+ struct super_block *sb;
9931+ struct au_fidir *fidir;
9932+
9933+ err = -ENOMEM;
2000de60 9934+ sb = file->f_path.dentry->d_sb;
4a4d8108 9935+ si_read_lock(sb, AuLock_FLUSH);
e49829fe 9936+ fidir = au_fidir_alloc(sb);
4a4d8108 9937+ if (fidir) {
b912730e
AM
9938+ struct au_do_open_args args = {
9939+ .open = do_open_dir,
9940+ .fidir = fidir
9941+ };
9942+ err = au_do_open(file, &args);
4a4d8108 9943+ if (unlikely(err))
f0c0a007 9944+ au_delayed_kfree(fidir);
4a4d8108
AM
9945+ }
9946+ si_read_unlock(sb);
9947+ return err;
1facf9fc 9948+}
9949+
9950+static int aufs_release_dir(struct inode *inode __maybe_unused,
9951+ struct file *file)
9952+{
9953+ struct au_vdir *vdir_cache;
4a4d8108
AM
9954+ struct au_finfo *finfo;
9955+ struct au_fidir *fidir;
f0c0a007 9956+ struct au_hfile *hf;
5afbbe0d 9957+ aufs_bindex_t bindex, bbot;
f0c0a007 9958+ int execed, delayed;
1facf9fc 9959+
f0c0a007 9960+ delayed = (current->flags & PF_KTHREAD) || in_interrupt();
4a4d8108
AM
9961+ finfo = au_fi(file);
9962+ fidir = finfo->fi_hdir;
9963+ if (fidir) {
076b876e 9964+ au_sphl_del(&finfo->fi_hlist,
2000de60 9965+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
4a4d8108
AM
9966+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
9967+ if (vdir_cache)
f0c0a007 9968+ au_vdir_free(vdir_cache, delayed);
4a4d8108
AM
9969+
9970+ bindex = finfo->fi_btop;
9971+ if (bindex >= 0) {
f0c0a007
AM
9972+ execed = vfsub_file_execed(file);
9973+ hf = fidir->fd_hfile + bindex;
4a4d8108
AM
9974+ /*
9975+ * calls fput() instead of filp_close(),
9976+ * since no dnotify or lock for the lower file.
9977+ */
5afbbe0d 9978+ bbot = fidir->fd_bbot;
f0c0a007
AM
9979+ for (; bindex <= bbot; bindex++, hf++)
9980+ if (hf->hf_file)
9981+ au_hfput(hf, execed);
4a4d8108 9982+ }
f0c0a007 9983+ au_delayed_kfree(fidir);
4a4d8108 9984+ finfo->fi_hdir = NULL;
1facf9fc 9985+ }
f0c0a007 9986+ au_finfo_fin(file, delayed);
1facf9fc 9987+ return 0;
9988+}
9989+
9990+/* ---------------------------------------------------------------------- */
9991+
4a4d8108
AM
9992+static int au_do_flush_dir(struct file *file, fl_owner_t id)
9993+{
9994+ int err;
5afbbe0d 9995+ aufs_bindex_t bindex, bbot;
4a4d8108
AM
9996+ struct file *h_file;
9997+
9998+ err = 0;
5afbbe0d
AM
9999+ bbot = au_fbbot_dir(file);
10000+ for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
4a4d8108
AM
10001+ h_file = au_hf_dir(file, bindex);
10002+ if (h_file)
10003+ err = vfsub_flush(h_file, id);
10004+ }
10005+ return err;
10006+}
10007+
10008+static int aufs_flush_dir(struct file *file, fl_owner_t id)
10009+{
10010+ return au_do_flush(file, id, au_do_flush_dir);
10011+}
10012+
10013+/* ---------------------------------------------------------------------- */
10014+
1facf9fc 10015+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
10016+{
10017+ int err;
5afbbe0d 10018+ aufs_bindex_t bbot, bindex;
1facf9fc 10019+ struct inode *inode;
10020+ struct super_block *sb;
10021+
10022+ err = 0;
10023+ sb = dentry->d_sb;
5527c038 10024+ inode = d_inode(dentry);
1facf9fc 10025+ IMustLock(inode);
5afbbe0d
AM
10026+ bbot = au_dbbot(dentry);
10027+ for (bindex = au_dbtop(dentry); !err && bindex <= bbot; bindex++) {
1facf9fc 10028+ struct path h_path;
1facf9fc 10029+
10030+ if (au_test_ro(sb, bindex, inode))
10031+ continue;
10032+ h_path.dentry = au_h_dptr(dentry, bindex);
10033+ if (!h_path.dentry)
10034+ continue;
1facf9fc 10035+
1facf9fc 10036+ h_path.mnt = au_sbr_mnt(sb, bindex);
53392da6 10037+ err = vfsub_fsync(NULL, &h_path, datasync);
1facf9fc 10038+ }
10039+
10040+ return err;
10041+}
10042+
10043+static int au_do_fsync_dir(struct file *file, int datasync)
10044+{
10045+ int err;
5afbbe0d 10046+ aufs_bindex_t bbot, bindex;
1facf9fc 10047+ struct file *h_file;
10048+ struct super_block *sb;
10049+ struct inode *inode;
1facf9fc 10050+
10051+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
10052+ if (unlikely(err))
10053+ goto out;
10054+
c06a8ce3 10055+ inode = file_inode(file);
b912730e 10056+ sb = inode->i_sb;
5afbbe0d
AM
10057+ bbot = au_fbbot_dir(file);
10058+ for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
4a4d8108 10059+ h_file = au_hf_dir(file, bindex);
1facf9fc 10060+ if (!h_file || au_test_ro(sb, bindex, inode))
10061+ continue;
10062+
53392da6 10063+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
1facf9fc 10064+ }
10065+
4f0767ce 10066+out:
1facf9fc 10067+ return err;
10068+}
10069+
10070+/*
10071+ * @file may be NULL
10072+ */
1e00d052
AM
10073+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
10074+ int datasync)
1facf9fc 10075+{
10076+ int err;
b752ccd1 10077+ struct dentry *dentry;
5527c038 10078+ struct inode *inode;
1facf9fc 10079+ struct super_block *sb;
1facf9fc 10080+
10081+ err = 0;
2000de60 10082+ dentry = file->f_path.dentry;
5527c038 10083+ inode = d_inode(dentry);
febd17d6 10084+ inode_lock(inode);
1facf9fc 10085+ sb = dentry->d_sb;
10086+ si_noflush_read_lock(sb);
10087+ if (file)
10088+ err = au_do_fsync_dir(file, datasync);
10089+ else {
10090+ di_write_lock_child(dentry);
10091+ err = au_do_fsync_dir_no_file(dentry, datasync);
10092+ }
5527c038 10093+ au_cpup_attr_timesizes(inode);
1facf9fc 10094+ di_write_unlock(dentry);
10095+ if (file)
10096+ fi_write_unlock(file);
10097+
10098+ si_read_unlock(sb);
febd17d6 10099+ inode_unlock(inode);
1facf9fc 10100+ return err;
10101+}
10102+
10103+/* ---------------------------------------------------------------------- */
10104+
5afbbe0d 10105+static int aufs_iterate_shared(struct file *file, struct dir_context *ctx)
1facf9fc 10106+{
10107+ int err;
10108+ struct dentry *dentry;
9dbd164d 10109+ struct inode *inode, *h_inode;
1facf9fc 10110+ struct super_block *sb;
10111+
523b37e3 10112+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 10113+
2000de60 10114+ dentry = file->f_path.dentry;
5527c038 10115+ inode = d_inode(dentry);
1facf9fc 10116+ IMustLock(inode);
10117+
10118+ sb = dentry->d_sb;
10119+ si_read_lock(sb, AuLock_FLUSH);
10120+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
10121+ if (unlikely(err))
10122+ goto out;
027c5e7a
AM
10123+ err = au_alive_dir(dentry);
10124+ if (!err)
10125+ err = au_vdir_init(file);
1facf9fc 10126+ di_downgrade_lock(dentry, AuLock_IR);
10127+ if (unlikely(err))
10128+ goto out_unlock;
10129+
5afbbe0d 10130+ h_inode = au_h_iptr(inode, au_ibtop(inode));
b752ccd1 10131+ if (!au_test_nfsd()) {
392086de 10132+ err = au_vdir_fill_de(file, ctx);
9dbd164d 10133+ fsstack_copy_attr_atime(inode, h_inode);
1facf9fc 10134+ } else {
10135+ /*
10136+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
10137+ * encode_fh() and others.
10138+ */
9dbd164d 10139+ atomic_inc(&h_inode->i_count);
1facf9fc 10140+ di_read_unlock(dentry, AuLock_IR);
10141+ si_read_unlock(sb);
392086de 10142+ err = au_vdir_fill_de(file, ctx);
1facf9fc 10143+ fsstack_copy_attr_atime(inode, h_inode);
10144+ fi_write_unlock(file);
9dbd164d 10145+ iput(h_inode);
1facf9fc 10146+
10147+ AuTraceErr(err);
10148+ return err;
10149+ }
10150+
4f0767ce 10151+out_unlock:
1facf9fc 10152+ di_read_unlock(dentry, AuLock_IR);
10153+ fi_write_unlock(file);
4f0767ce 10154+out:
1facf9fc 10155+ si_read_unlock(sb);
10156+ return err;
10157+}
10158+
10159+/* ---------------------------------------------------------------------- */
10160+
10161+#define AuTestEmpty_WHONLY 1
dece6358
AM
10162+#define AuTestEmpty_CALLED (1 << 1)
10163+#define AuTestEmpty_SHWH (1 << 2)
1facf9fc 10164+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
7f207e10
AM
10165+#define au_fset_testempty(flags, name) \
10166+ do { (flags) |= AuTestEmpty_##name; } while (0)
10167+#define au_fclr_testempty(flags, name) \
10168+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
1facf9fc 10169+
dece6358
AM
10170+#ifndef CONFIG_AUFS_SHWH
10171+#undef AuTestEmpty_SHWH
10172+#define AuTestEmpty_SHWH 0
10173+#endif
10174+
1facf9fc 10175+struct test_empty_arg {
392086de 10176+ struct dir_context ctx;
1308ab2a 10177+ struct au_nhash *whlist;
1facf9fc 10178+ unsigned int flags;
10179+ int err;
10180+ aufs_bindex_t bindex;
10181+};
10182+
392086de
AM
10183+static int test_empty_cb(struct dir_context *ctx, const char *__name,
10184+ int namelen, loff_t offset __maybe_unused, u64 ino,
dece6358 10185+ unsigned int d_type)
1facf9fc 10186+{
392086de
AM
10187+ struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
10188+ ctx);
1facf9fc 10189+ char *name = (void *)__name;
10190+
10191+ arg->err = 0;
10192+ au_fset_testempty(arg->flags, CALLED);
10193+ /* smp_mb(); */
10194+ if (name[0] == '.'
10195+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
10196+ goto out; /* success */
10197+
10198+ if (namelen <= AUFS_WH_PFX_LEN
10199+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
10200+ if (au_ftest_testempty(arg->flags, WHONLY)
1308ab2a 10201+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 10202+ arg->err = -ENOTEMPTY;
10203+ goto out;
10204+ }
10205+
10206+ name += AUFS_WH_PFX_LEN;
10207+ namelen -= AUFS_WH_PFX_LEN;
1308ab2a 10208+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 10209+ arg->err = au_nhash_append_wh
1308ab2a 10210+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
dece6358 10211+ au_ftest_testempty(arg->flags, SHWH));
1facf9fc 10212+
4f0767ce 10213+out:
1facf9fc 10214+ /* smp_mb(); */
10215+ AuTraceErr(arg->err);
10216+ return arg->err;
10217+}
10218+
10219+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
10220+{
10221+ int err;
10222+ struct file *h_file;
10223+
10224+ h_file = au_h_open(dentry, arg->bindex,
10225+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
392086de 10226+ /*file*/NULL, /*force_wr*/0);
1facf9fc 10227+ err = PTR_ERR(h_file);
10228+ if (IS_ERR(h_file))
10229+ goto out;
10230+
10231+ err = 0;
10232+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
c06a8ce3 10233+ && !file_inode(h_file)->i_nlink)
1facf9fc 10234+ goto out_put;
10235+
10236+ do {
10237+ arg->err = 0;
10238+ au_fclr_testempty(arg->flags, CALLED);
10239+ /* smp_mb(); */
392086de 10240+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1facf9fc 10241+ if (err >= 0)
10242+ err = arg->err;
10243+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
10244+
4f0767ce 10245+out_put:
1facf9fc 10246+ fput(h_file);
10247+ au_sbr_put(dentry->d_sb, arg->bindex);
4f0767ce 10248+out:
1facf9fc 10249+ return err;
10250+}
10251+
10252+struct do_test_empty_args {
10253+ int *errp;
10254+ struct dentry *dentry;
10255+ struct test_empty_arg *arg;
10256+};
10257+
10258+static void call_do_test_empty(void *args)
10259+{
10260+ struct do_test_empty_args *a = args;
10261+ *a->errp = do_test_empty(a->dentry, a->arg);
10262+}
10263+
10264+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
10265+{
10266+ int err, wkq_err;
10267+ struct dentry *h_dentry;
10268+ struct inode *h_inode;
10269+
10270+ h_dentry = au_h_dptr(dentry, arg->bindex);
5527c038 10271+ h_inode = d_inode(h_dentry);
53392da6 10272+ /* todo: i_mode changes anytime? */
febd17d6 10273+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
1facf9fc 10274+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
febd17d6 10275+ inode_unlock(h_inode);
1facf9fc 10276+ if (!err)
10277+ err = do_test_empty(dentry, arg);
10278+ else {
10279+ struct do_test_empty_args args = {
10280+ .errp = &err,
10281+ .dentry = dentry,
10282+ .arg = arg
10283+ };
10284+ unsigned int flags = arg->flags;
10285+
10286+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
10287+ if (unlikely(wkq_err))
10288+ err = wkq_err;
10289+ arg->flags = flags;
10290+ }
10291+
10292+ return err;
10293+}
10294+
10295+int au_test_empty_lower(struct dentry *dentry)
10296+{
10297+ int err;
1308ab2a 10298+ unsigned int rdhash;
5afbbe0d 10299+ aufs_bindex_t bindex, btop, btail;
1308ab2a 10300+ struct au_nhash whlist;
392086de
AM
10301+ struct test_empty_arg arg = {
10302+ .ctx = {
2000de60 10303+ .actor = test_empty_cb
392086de
AM
10304+ }
10305+ };
076b876e 10306+ int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
1facf9fc 10307+
dece6358
AM
10308+ SiMustAnyLock(dentry->d_sb);
10309+
1308ab2a 10310+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
10311+ if (!rdhash)
10312+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
10313+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
dece6358 10314+ if (unlikely(err))
1facf9fc 10315+ goto out;
10316+
1facf9fc 10317+ arg.flags = 0;
1308ab2a 10318+ arg.whlist = &whlist;
5afbbe0d 10319+ btop = au_dbtop(dentry);
dece6358
AM
10320+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10321+ au_fset_testempty(arg.flags, SHWH);
076b876e
AM
10322+ test_empty = do_test_empty;
10323+ if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
10324+ test_empty = sio_test_empty;
5afbbe0d 10325+ arg.bindex = btop;
076b876e 10326+ err = test_empty(dentry, &arg);
1facf9fc 10327+ if (unlikely(err))
10328+ goto out_whlist;
10329+
10330+ au_fset_testempty(arg.flags, WHONLY);
10331+ btail = au_dbtaildir(dentry);
5afbbe0d 10332+ for (bindex = btop + 1; !err && bindex <= btail; bindex++) {
1facf9fc 10333+ struct dentry *h_dentry;
10334+
10335+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 10336+ if (h_dentry && d_is_positive(h_dentry)) {
1facf9fc 10337+ arg.bindex = bindex;
076b876e 10338+ err = test_empty(dentry, &arg);
1facf9fc 10339+ }
10340+ }
10341+
4f0767ce 10342+out_whlist:
1308ab2a 10343+ au_nhash_wh_free(&whlist);
4f0767ce 10344+out:
1facf9fc 10345+ return err;
10346+}
10347+
10348+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
10349+{
10350+ int err;
392086de
AM
10351+ struct test_empty_arg arg = {
10352+ .ctx = {
2000de60 10353+ .actor = test_empty_cb
392086de
AM
10354+ }
10355+ };
1facf9fc 10356+ aufs_bindex_t bindex, btail;
10357+
10358+ err = 0;
1308ab2a 10359+ arg.whlist = whlist;
1facf9fc 10360+ arg.flags = AuTestEmpty_WHONLY;
dece6358
AM
10361+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10362+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 10363+ btail = au_dbtaildir(dentry);
5afbbe0d 10364+ for (bindex = au_dbtop(dentry); !err && bindex <= btail; bindex++) {
1facf9fc 10365+ struct dentry *h_dentry;
10366+
10367+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 10368+ if (h_dentry && d_is_positive(h_dentry)) {
1facf9fc 10369+ arg.bindex = bindex;
10370+ err = sio_test_empty(dentry, &arg);
10371+ }
10372+ }
10373+
10374+ return err;
10375+}
10376+
10377+/* ---------------------------------------------------------------------- */
10378+
10379+const struct file_operations aufs_dir_fop = {
4a4d8108 10380+ .owner = THIS_MODULE,
027c5e7a 10381+ .llseek = default_llseek,
1facf9fc 10382+ .read = generic_read_dir,
5afbbe0d 10383+ .iterate_shared = aufs_iterate_shared,
1facf9fc 10384+ .unlocked_ioctl = aufs_ioctl_dir,
b752ccd1
AM
10385+#ifdef CONFIG_COMPAT
10386+ .compat_ioctl = aufs_compat_ioctl_dir,
10387+#endif
1facf9fc 10388+ .open = aufs_open_dir,
10389+ .release = aufs_release_dir,
4a4d8108 10390+ .flush = aufs_flush_dir,
1facf9fc 10391+ .fsync = aufs_fsync_dir
10392+};
7f207e10
AM
10393diff -urN /usr/share/empty/fs/aufs/dir.h linux/fs/aufs/dir.h
10394--- /usr/share/empty/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 10395+++ linux/fs/aufs/dir.h 2016-10-09 16:55:36.489368218 +0200
f0c0a007 10396@@ -0,0 +1,137 @@
1facf9fc 10397+/*
8cdd5066 10398+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 10399+ *
10400+ * This program, aufs is free software; you can redistribute it and/or modify
10401+ * it under the terms of the GNU General Public License as published by
10402+ * the Free Software Foundation; either version 2 of the License, or
10403+ * (at your option) any later version.
dece6358
AM
10404+ *
10405+ * This program is distributed in the hope that it will be useful,
10406+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10407+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10408+ * GNU General Public License for more details.
10409+ *
10410+ * You should have received a copy of the GNU General Public License
523b37e3 10411+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10412+ */
10413+
10414+/*
10415+ * directory operations
10416+ */
10417+
10418+#ifndef __AUFS_DIR_H__
10419+#define __AUFS_DIR_H__
10420+
10421+#ifdef __KERNEL__
10422+
10423+#include <linux/fs.h>
1facf9fc 10424+
10425+/* ---------------------------------------------------------------------- */
10426+
10427+/* need to be faster and smaller */
10428+
10429+struct au_nhash {
dece6358
AM
10430+ unsigned int nh_num;
10431+ struct hlist_head *nh_head;
1facf9fc 10432+};
10433+
10434+struct au_vdir_destr {
10435+ unsigned char len;
10436+ unsigned char name[0];
10437+} __packed;
10438+
10439+struct au_vdir_dehstr {
10440+ struct hlist_node hash;
f0c0a007
AM
10441+ union {
10442+ struct au_vdir_destr *str;
10443+ struct llist_node lnode; /* delayed free */
10444+ };
4a4d8108 10445+} ____cacheline_aligned_in_smp;
1facf9fc 10446+
10447+struct au_vdir_de {
10448+ ino_t de_ino;
10449+ unsigned char de_type;
10450+ /* caution: packed */
10451+ struct au_vdir_destr de_str;
10452+} __packed;
10453+
10454+struct au_vdir_wh {
10455+ struct hlist_node wh_hash;
dece6358
AM
10456+#ifdef CONFIG_AUFS_SHWH
10457+ ino_t wh_ino;
1facf9fc 10458+ aufs_bindex_t wh_bindex;
dece6358
AM
10459+ unsigned char wh_type;
10460+#else
10461+ aufs_bindex_t wh_bindex;
10462+#endif
10463+ /* caution: packed */
1facf9fc 10464+ struct au_vdir_destr wh_str;
10465+} __packed;
10466+
10467+union au_vdir_deblk_p {
10468+ unsigned char *deblk;
10469+ struct au_vdir_de *de;
10470+};
10471+
10472+struct au_vdir {
10473+ unsigned char **vd_deblk;
10474+ unsigned long vd_nblk;
1facf9fc 10475+ struct {
10476+ unsigned long ul;
10477+ union au_vdir_deblk_p p;
10478+ } vd_last;
10479+
10480+ unsigned long vd_version;
dece6358 10481+ unsigned int vd_deblk_sz;
f0c0a007
AM
10482+ union {
10483+ unsigned long vd_jiffy;
10484+ struct llist_node vd_lnode; /* delayed free */
10485+ };
4a4d8108 10486+} ____cacheline_aligned_in_smp;
1facf9fc 10487+
10488+/* ---------------------------------------------------------------------- */
10489+
10490+/* dir.c */
10491+extern const struct file_operations aufs_dir_fop;
10492+void au_add_nlink(struct inode *dir, struct inode *h_dir);
10493+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
1308ab2a 10494+loff_t au_dir_size(struct file *file, struct dentry *dentry);
b912730e 10495+void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
1facf9fc 10496+int au_test_empty_lower(struct dentry *dentry);
10497+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
10498+
10499+/* vdir.c */
1308ab2a 10500+unsigned int au_rdhash_est(loff_t sz);
dece6358
AM
10501+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
10502+void au_nhash_wh_free(struct au_nhash *whlist);
1facf9fc 10503+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
10504+ int limit);
dece6358
AM
10505+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
10506+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
10507+ unsigned int d_type, aufs_bindex_t bindex,
10508+ unsigned char shwh);
f0c0a007 10509+void au_vdir_free(struct au_vdir *vdir, int atonce);
1facf9fc 10510+int au_vdir_init(struct file *file);
392086de 10511+int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
1facf9fc 10512+
10513+/* ioctl.c */
10514+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
10515+
1308ab2a 10516+#ifdef CONFIG_AUFS_RDU
10517+/* rdu.c */
10518+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
10519+#ifdef CONFIG_COMPAT
10520+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
10521+ unsigned long arg);
10522+#endif
1308ab2a 10523+#else
c1595e42
JR
10524+AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
10525+ unsigned int cmd, unsigned long arg)
b752ccd1 10526+#ifdef CONFIG_COMPAT
c1595e42
JR
10527+AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
10528+ unsigned int cmd, unsigned long arg)
b752ccd1 10529+#endif
1308ab2a 10530+#endif
10531+
1facf9fc 10532+#endif /* __KERNEL__ */
10533+#endif /* __AUFS_DIR_H__ */
7f207e10
AM
10534diff -urN /usr/share/empty/fs/aufs/dynop.c linux/fs/aufs/dynop.c
10535--- /usr/share/empty/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
10536+++ linux/fs/aufs/dynop.c 2016-10-09 16:55:36.489368218 +0200
10537@@ -0,0 +1,371 @@
1facf9fc 10538+/*
8cdd5066 10539+ * Copyright (C) 2010-2016 Junjiro R. Okajima
1facf9fc 10540+ *
10541+ * This program, aufs is free software; you can redistribute it and/or modify
10542+ * it under the terms of the GNU General Public License as published by
10543+ * the Free Software Foundation; either version 2 of the License, or
10544+ * (at your option) any later version.
dece6358
AM
10545+ *
10546+ * This program is distributed in the hope that it will be useful,
10547+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10548+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10549+ * GNU General Public License for more details.
10550+ *
10551+ * You should have received a copy of the GNU General Public License
523b37e3 10552+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10553+ */
10554+
10555+/*
4a4d8108 10556+ * dynamically customizable operations for regular files
1facf9fc 10557+ */
10558+
1facf9fc 10559+#include "aufs.h"
10560+
4a4d8108 10561+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
1facf9fc 10562+
4a4d8108
AM
10563+/*
10564+ * How large will these lists be?
10565+ * Usually just a few elements, 20-30 at most for each, I guess.
10566+ */
f0c0a007 10567+static struct au_sphlhead dynop[AuDyLast];
4a4d8108 10568+
f0c0a007 10569+static struct au_dykey *dy_gfind_get(struct au_sphlhead *sphl, const void *h_op)
1facf9fc 10570+{
4a4d8108 10571+ struct au_dykey *key, *tmp;
f0c0a007 10572+ struct hlist_head *head;
1facf9fc 10573+
4a4d8108 10574+ key = NULL;
f0c0a007 10575+ head = &sphl->head;
4a4d8108 10576+ rcu_read_lock();
f0c0a007 10577+ hlist_for_each_entry_rcu(tmp, head, dk_hnode)
4a4d8108
AM
10578+ if (tmp->dk_op.dy_hop == h_op) {
10579+ key = tmp;
10580+ kref_get(&key->dk_kref);
10581+ break;
10582+ }
10583+ rcu_read_unlock();
10584+
10585+ return key;
1facf9fc 10586+}
10587+
4a4d8108 10588+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
1facf9fc 10589+{
4a4d8108
AM
10590+ struct au_dykey **k, *found;
10591+ const void *h_op = key->dk_op.dy_hop;
10592+ int i;
1facf9fc 10593+
4a4d8108
AM
10594+ found = NULL;
10595+ k = br->br_dykey;
10596+ for (i = 0; i < AuBrDynOp; i++)
10597+ if (k[i]) {
10598+ if (k[i]->dk_op.dy_hop == h_op) {
10599+ found = k[i];
10600+ break;
10601+ }
10602+ } else
10603+ break;
10604+ if (!found) {
10605+ spin_lock(&br->br_dykey_lock);
10606+ for (; i < AuBrDynOp; i++)
10607+ if (k[i]) {
10608+ if (k[i]->dk_op.dy_hop == h_op) {
10609+ found = k[i];
10610+ break;
10611+ }
10612+ } else {
10613+ k[i] = key;
10614+ break;
10615+ }
10616+ spin_unlock(&br->br_dykey_lock);
10617+ BUG_ON(i == AuBrDynOp); /* expand the array */
10618+ }
10619+
10620+ return found;
1facf9fc 10621+}
10622+
4a4d8108 10623+/* kref_get() if @key is already added */
f0c0a007 10624+static struct au_dykey *dy_gadd(struct au_sphlhead *sphl, struct au_dykey *key)
4a4d8108
AM
10625+{
10626+ struct au_dykey *tmp, *found;
f0c0a007 10627+ struct hlist_head *head;
4a4d8108 10628+ const void *h_op = key->dk_op.dy_hop;
1facf9fc 10629+
4a4d8108 10630+ found = NULL;
f0c0a007
AM
10631+ head = &sphl->head;
10632+ spin_lock(&sphl->spin);
10633+ hlist_for_each_entry(tmp, head, dk_hnode)
4a4d8108
AM
10634+ if (tmp->dk_op.dy_hop == h_op) {
10635+ kref_get(&tmp->dk_kref);
10636+ found = tmp;
10637+ break;
10638+ }
10639+ if (!found)
f0c0a007
AM
10640+ hlist_add_head_rcu(&key->dk_hnode, head);
10641+ spin_unlock(&sphl->spin);
1facf9fc 10642+
4a4d8108
AM
10643+ if (!found)
10644+ DyPrSym(key);
10645+ return found;
10646+}
10647+
10648+static void dy_free_rcu(struct rcu_head *rcu)
1facf9fc 10649+{
4a4d8108
AM
10650+ struct au_dykey *key;
10651+
10652+ key = container_of(rcu, struct au_dykey, dk_rcu);
10653+ DyPrSym(key);
f0c0a007 10654+ kfree(key); /* not delayed */
1facf9fc 10655+}
10656+
4a4d8108
AM
10657+static void dy_free(struct kref *kref)
10658+{
10659+ struct au_dykey *key;
f0c0a007 10660+ struct au_sphlhead *sphl;
1facf9fc 10661+
4a4d8108 10662+ key = container_of(kref, struct au_dykey, dk_kref);
f0c0a007
AM
10663+ sphl = dynop + key->dk_op.dy_type;
10664+ au_sphl_del_rcu(&key->dk_hnode, sphl);
4a4d8108
AM
10665+ call_rcu(&key->dk_rcu, dy_free_rcu);
10666+}
10667+
10668+void au_dy_put(struct au_dykey *key)
1facf9fc 10669+{
4a4d8108
AM
10670+ kref_put(&key->dk_kref, dy_free);
10671+}
1facf9fc 10672+
4a4d8108
AM
10673+/* ---------------------------------------------------------------------- */
10674+
10675+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
10676+
10677+#ifdef CONFIG_AUFS_DEBUG
10678+#define DyDbgDeclare(cnt) unsigned int cnt = 0
4f0767ce 10679+#define DyDbgInc(cnt) do { cnt++; } while (0)
4a4d8108
AM
10680+#else
10681+#define DyDbgDeclare(cnt) do {} while (0)
10682+#define DyDbgInc(cnt) do {} while (0)
10683+#endif
10684+
10685+#define DySet(func, dst, src, h_op, h_sb) do { \
10686+ DyDbgInc(cnt); \
10687+ if (h_op->func) { \
10688+ if (src.func) \
10689+ dst.func = src.func; \
10690+ else \
10691+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
10692+ } \
10693+} while (0)
10694+
10695+#define DySetForce(func, dst, src) do { \
10696+ AuDebugOn(!src.func); \
10697+ DyDbgInc(cnt); \
10698+ dst.func = src.func; \
10699+} while (0)
10700+
10701+#define DySetAop(func) \
10702+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
10703+#define DySetAopForce(func) \
10704+ DySetForce(func, dyaop->da_op, aufs_aop)
10705+
10706+static void dy_aop(struct au_dykey *key, const void *h_op,
10707+ struct super_block *h_sb __maybe_unused)
10708+{
10709+ struct au_dyaop *dyaop = (void *)key;
10710+ const struct address_space_operations *h_aop = h_op;
10711+ DyDbgDeclare(cnt);
10712+
10713+ AuDbg("%s\n", au_sbtype(h_sb));
10714+
10715+ DySetAop(writepage);
10716+ DySetAopForce(readpage); /* force */
4a4d8108
AM
10717+ DySetAop(writepages);
10718+ DySetAop(set_page_dirty);
10719+ DySetAop(readpages);
10720+ DySetAop(write_begin);
10721+ DySetAop(write_end);
10722+ DySetAop(bmap);
10723+ DySetAop(invalidatepage);
10724+ DySetAop(releasepage);
027c5e7a 10725+ DySetAop(freepage);
7e9cd9fe 10726+ /* this one will be changed according to an aufs mount option */
4a4d8108 10727+ DySetAop(direct_IO);
4a4d8108 10728+ DySetAop(migratepage);
e2f27e51
AM
10729+ DySetAop(isolate_page);
10730+ DySetAop(putback_page);
4a4d8108
AM
10731+ DySetAop(launder_page);
10732+ DySetAop(is_partially_uptodate);
392086de 10733+ DySetAop(is_dirty_writeback);
4a4d8108 10734+ DySetAop(error_remove_page);
b4510431
AM
10735+ DySetAop(swap_activate);
10736+ DySetAop(swap_deactivate);
4a4d8108
AM
10737+
10738+ DyDbgSize(cnt, *h_aop);
4a4d8108
AM
10739+}
10740+
4a4d8108
AM
10741+/* ---------------------------------------------------------------------- */
10742+
10743+static void dy_bug(struct kref *kref)
10744+{
10745+ BUG();
10746+}
10747+
10748+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
10749+{
10750+ struct au_dykey *key, *old;
f0c0a007 10751+ struct au_sphlhead *sphl;
b752ccd1 10752+ struct op {
4a4d8108 10753+ unsigned int sz;
b752ccd1
AM
10754+ void (*set)(struct au_dykey *key, const void *h_op,
10755+ struct super_block *h_sb __maybe_unused);
10756+ };
10757+ static const struct op a[] = {
4a4d8108
AM
10758+ [AuDy_AOP] = {
10759+ .sz = sizeof(struct au_dyaop),
b752ccd1 10760+ .set = dy_aop
4a4d8108 10761+ }
b752ccd1
AM
10762+ };
10763+ const struct op *p;
4a4d8108 10764+
f0c0a007
AM
10765+ sphl = dynop + op->dy_type;
10766+ key = dy_gfind_get(sphl, op->dy_hop);
4a4d8108
AM
10767+ if (key)
10768+ goto out_add; /* success */
10769+
10770+ p = a + op->dy_type;
10771+ key = kzalloc(p->sz, GFP_NOFS);
10772+ if (unlikely(!key)) {
10773+ key = ERR_PTR(-ENOMEM);
10774+ goto out;
10775+ }
10776+
10777+ key->dk_op.dy_hop = op->dy_hop;
10778+ kref_init(&key->dk_kref);
86dc4139 10779+ p->set(key, op->dy_hop, au_br_sb(br));
f0c0a007 10780+ old = dy_gadd(sphl, key);
4a4d8108 10781+ if (old) {
f0c0a007 10782+ au_delayed_kfree(key);
4a4d8108
AM
10783+ key = old;
10784+ }
10785+
10786+out_add:
10787+ old = dy_bradd(br, key);
10788+ if (old)
10789+ /* its ref-count should never be zero here */
10790+ kref_put(&key->dk_kref, dy_bug);
10791+out:
10792+ return key;
10793+}
10794+
10795+/* ---------------------------------------------------------------------- */
10796+/*
10797+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
c1595e42 10798+ * This behaviour is necessary to return an error from open(O_DIRECT) instead
4a4d8108
AM
10799+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
10800+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
10801+ * See the aufs manual in detail.
4a4d8108
AM
10802+ */
10803+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
10804+{
7e9cd9fe 10805+ if (!do_dx)
4a4d8108 10806+ dyaop->da_op.direct_IO = NULL;
7e9cd9fe 10807+ else
4a4d8108 10808+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
4a4d8108
AM
10809+}
10810+
10811+static struct au_dyaop *dy_aget(struct au_branch *br,
10812+ const struct address_space_operations *h_aop,
10813+ int do_dx)
10814+{
10815+ struct au_dyaop *dyaop;
10816+ struct au_dynop op;
10817+
10818+ op.dy_type = AuDy_AOP;
10819+ op.dy_haop = h_aop;
10820+ dyaop = (void *)dy_get(&op, br);
10821+ if (IS_ERR(dyaop))
10822+ goto out;
10823+ dy_adx(dyaop, do_dx);
10824+
10825+out:
10826+ return dyaop;
10827+}
10828+
10829+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10830+ struct inode *h_inode)
10831+{
10832+ int err, do_dx;
10833+ struct super_block *sb;
10834+ struct au_branch *br;
10835+ struct au_dyaop *dyaop;
10836+
10837+ AuDebugOn(!S_ISREG(h_inode->i_mode));
10838+ IiMustWriteLock(inode);
10839+
10840+ sb = inode->i_sb;
10841+ br = au_sbr(sb, bindex);
10842+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
10843+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
10844+ err = PTR_ERR(dyaop);
10845+ if (IS_ERR(dyaop))
10846+ /* unnecessary to call dy_fput() */
10847+ goto out;
10848+
10849+ err = 0;
10850+ inode->i_mapping->a_ops = &dyaop->da_op;
10851+
10852+out:
10853+ return err;
10854+}
10855+
b752ccd1
AM
10856+/*
10857+ * Is it safe to replace a_ops during the inode/file is in operation?
10858+ * Yes, I hope so.
10859+ */
10860+int au_dy_irefresh(struct inode *inode)
10861+{
10862+ int err;
5afbbe0d 10863+ aufs_bindex_t btop;
b752ccd1
AM
10864+ struct inode *h_inode;
10865+
10866+ err = 0;
10867+ if (S_ISREG(inode->i_mode)) {
5afbbe0d
AM
10868+ btop = au_ibtop(inode);
10869+ h_inode = au_h_iptr(inode, btop);
10870+ err = au_dy_iaop(inode, btop, h_inode);
b752ccd1
AM
10871+ }
10872+ return err;
10873+}
10874+
4a4d8108
AM
10875+void au_dy_arefresh(int do_dx)
10876+{
f0c0a007
AM
10877+ struct au_sphlhead *sphl;
10878+ struct hlist_head *head;
4a4d8108
AM
10879+ struct au_dykey *key;
10880+
f0c0a007
AM
10881+ sphl = dynop + AuDy_AOP;
10882+ head = &sphl->head;
10883+ spin_lock(&sphl->spin);
10884+ hlist_for_each_entry(key, head, dk_hnode)
4a4d8108 10885+ dy_adx((void *)key, do_dx);
f0c0a007 10886+ spin_unlock(&sphl->spin);
4a4d8108
AM
10887+}
10888+
4a4d8108
AM
10889+/* ---------------------------------------------------------------------- */
10890+
10891+void __init au_dy_init(void)
10892+{
10893+ int i;
10894+
10895+ /* make sure that 'struct au_dykey *' can be any type */
10896+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
4a4d8108
AM
10897+
10898+ for (i = 0; i < AuDyLast; i++)
f0c0a007 10899+ au_sphl_init(dynop + i);
4a4d8108
AM
10900+}
10901+
10902+void au_dy_fin(void)
10903+{
10904+ int i;
10905+
10906+ for (i = 0; i < AuDyLast; i++)
f0c0a007 10907+ WARN_ON(!hlist_empty(&dynop[i].head));
4a4d8108 10908+}
7f207e10
AM
10909diff -urN /usr/share/empty/fs/aufs/dynop.h linux/fs/aufs/dynop.h
10910--- /usr/share/empty/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 10911+++ linux/fs/aufs/dynop.h 2016-10-09 16:55:36.489368218 +0200
7e9cd9fe 10912@@ -0,0 +1,74 @@
4a4d8108 10913+/*
8cdd5066 10914+ * Copyright (C) 2010-2016 Junjiro R. Okajima
4a4d8108
AM
10915+ *
10916+ * This program, aufs is free software; you can redistribute it and/or modify
10917+ * it under the terms of the GNU General Public License as published by
10918+ * the Free Software Foundation; either version 2 of the License, or
10919+ * (at your option) any later version.
10920+ *
10921+ * This program is distributed in the hope that it will be useful,
10922+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10923+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10924+ * GNU General Public License for more details.
10925+ *
10926+ * You should have received a copy of the GNU General Public License
523b37e3 10927+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
10928+ */
10929+
10930+/*
10931+ * dynamically customizable operations (for regular files only)
10932+ */
10933+
10934+#ifndef __AUFS_DYNOP_H__
10935+#define __AUFS_DYNOP_H__
10936+
10937+#ifdef __KERNEL__
10938+
7e9cd9fe
AM
10939+#include <linux/fs.h>
10940+#include <linux/kref.h>
4a4d8108 10941+
2cbb1c4b 10942+enum {AuDy_AOP, AuDyLast};
4a4d8108
AM
10943+
10944+struct au_dynop {
10945+ int dy_type;
10946+ union {
10947+ const void *dy_hop;
10948+ const struct address_space_operations *dy_haop;
4a4d8108
AM
10949+ };
10950+};
10951+
10952+struct au_dykey {
10953+ union {
f0c0a007 10954+ struct hlist_node dk_hnode;
4a4d8108
AM
10955+ struct rcu_head dk_rcu;
10956+ };
10957+ struct au_dynop dk_op;
10958+
10959+ /*
10960+ * during I am in the branch local array, kref is gotten. when the
10961+ * branch is removed, kref is put.
10962+ */
10963+ struct kref dk_kref;
10964+};
10965+
10966+/* stop unioning since their sizes are very different from each other */
10967+struct au_dyaop {
10968+ struct au_dykey da_key;
10969+ struct address_space_operations da_op; /* not const */
4a4d8108
AM
10970+};
10971+
4a4d8108
AM
10972+/* ---------------------------------------------------------------------- */
10973+
10974+/* dynop.c */
10975+struct au_branch;
10976+void au_dy_put(struct au_dykey *key);
10977+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10978+ struct inode *h_inode);
b752ccd1 10979+int au_dy_irefresh(struct inode *inode);
4a4d8108 10980+void au_dy_arefresh(int do_dio);
4a4d8108
AM
10981+
10982+void __init au_dy_init(void);
10983+void au_dy_fin(void);
10984+
4a4d8108
AM
10985+#endif /* __KERNEL__ */
10986+#endif /* __AUFS_DYNOP_H__ */
7f207e10
AM
10987diff -urN /usr/share/empty/fs/aufs/export.c linux/fs/aufs/export.c
10988--- /usr/share/empty/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
10989+++ linux/fs/aufs/export.c 2016-12-17 12:28:17.595211562 +0100
10990@@ -0,0 +1,836 @@
4a4d8108 10991+/*
8cdd5066 10992+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
10993+ *
10994+ * This program, aufs is free software; you can redistribute it and/or modify
10995+ * it under the terms of the GNU General Public License as published by
10996+ * the Free Software Foundation; either version 2 of the License, or
10997+ * (at your option) any later version.
10998+ *
10999+ * This program is distributed in the hope that it will be useful,
11000+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11001+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11002+ * GNU General Public License for more details.
11003+ *
11004+ * You should have received a copy of the GNU General Public License
523b37e3 11005+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
11006+ */
11007+
11008+/*
11009+ * export via nfs
11010+ */
11011+
11012+#include <linux/exportfs.h>
7eafdf33 11013+#include <linux/fs_struct.h>
4a4d8108
AM
11014+#include <linux/namei.h>
11015+#include <linux/nsproxy.h>
11016+#include <linux/random.h>
11017+#include <linux/writeback.h>
11018+#include "aufs.h"
11019+
11020+union conv {
11021+#ifdef CONFIG_AUFS_INO_T_64
11022+ __u32 a[2];
11023+#else
11024+ __u32 a[1];
11025+#endif
11026+ ino_t ino;
11027+};
11028+
11029+static ino_t decode_ino(__u32 *a)
11030+{
11031+ union conv u;
11032+
11033+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
11034+ u.a[0] = a[0];
11035+#ifdef CONFIG_AUFS_INO_T_64
11036+ u.a[1] = a[1];
11037+#endif
11038+ return u.ino;
11039+}
11040+
11041+static void encode_ino(__u32 *a, ino_t ino)
11042+{
11043+ union conv u;
11044+
11045+ u.ino = ino;
11046+ a[0] = u.a[0];
11047+#ifdef CONFIG_AUFS_INO_T_64
11048+ a[1] = u.a[1];
11049+#endif
11050+}
11051+
11052+/* NFS file handle */
11053+enum {
11054+ Fh_br_id,
11055+ Fh_sigen,
11056+#ifdef CONFIG_AUFS_INO_T_64
11057+ /* support 64bit inode number */
11058+ Fh_ino1,
11059+ Fh_ino2,
11060+ Fh_dir_ino1,
11061+ Fh_dir_ino2,
11062+#else
11063+ Fh_ino1,
11064+ Fh_dir_ino1,
11065+#endif
11066+ Fh_igen,
11067+ Fh_h_type,
11068+ Fh_tail,
11069+
11070+ Fh_ino = Fh_ino1,
11071+ Fh_dir_ino = Fh_dir_ino1
11072+};
11073+
11074+static int au_test_anon(struct dentry *dentry)
11075+{
027c5e7a 11076+ /* note: read d_flags without d_lock */
4a4d8108
AM
11077+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
11078+}
11079+
a2a7ad62
AM
11080+int au_test_nfsd(void)
11081+{
11082+ int ret;
11083+ struct task_struct *tsk = current;
11084+ char comm[sizeof(tsk->comm)];
11085+
11086+ ret = 0;
11087+ if (tsk->flags & PF_KTHREAD) {
11088+ get_task_comm(comm, tsk);
11089+ ret = !strcmp(comm, "nfsd");
11090+ }
11091+
11092+ return ret;
11093+}
11094+
4a4d8108
AM
11095+/* ---------------------------------------------------------------------- */
11096+/* inode generation external table */
11097+
b752ccd1 11098+void au_xigen_inc(struct inode *inode)
4a4d8108 11099+{
4a4d8108
AM
11100+ loff_t pos;
11101+ ssize_t sz;
11102+ __u32 igen;
11103+ struct super_block *sb;
11104+ struct au_sbinfo *sbinfo;
11105+
4a4d8108 11106+ sb = inode->i_sb;
b752ccd1 11107+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
1facf9fc 11108+
b752ccd1 11109+ sbinfo = au_sbi(sb);
1facf9fc 11110+ pos = inode->i_ino;
11111+ pos *= sizeof(igen);
11112+ igen = inode->i_generation + 1;
1facf9fc 11113+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
11114+ sizeof(igen), &pos);
11115+ if (sz == sizeof(igen))
b752ccd1 11116+ return; /* success */
1facf9fc 11117+
b752ccd1 11118+ if (unlikely(sz >= 0))
1facf9fc 11119+ AuIOErr("xigen error (%zd)\n", sz);
1facf9fc 11120+}
11121+
11122+int au_xigen_new(struct inode *inode)
11123+{
11124+ int err;
11125+ loff_t pos;
11126+ ssize_t sz;
11127+ struct super_block *sb;
11128+ struct au_sbinfo *sbinfo;
11129+ struct file *file;
11130+
11131+ err = 0;
11132+ /* todo: dirty, at mount time */
11133+ if (inode->i_ino == AUFS_ROOT_INO)
11134+ goto out;
11135+ sb = inode->i_sb;
dece6358 11136+ SiMustAnyLock(sb);
1facf9fc 11137+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
11138+ goto out;
11139+
11140+ err = -EFBIG;
11141+ pos = inode->i_ino;
11142+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
11143+ AuIOErr1("too large i%lld\n", pos);
11144+ goto out;
11145+ }
11146+ pos *= sizeof(inode->i_generation);
11147+
11148+ err = 0;
11149+ sbinfo = au_sbi(sb);
11150+ file = sbinfo->si_xigen;
11151+ BUG_ON(!file);
11152+
c06a8ce3 11153+ if (vfsub_f_size_read(file)
1facf9fc 11154+ < pos + sizeof(inode->i_generation)) {
11155+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
11156+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
11157+ sizeof(inode->i_generation), &pos);
11158+ } else
11159+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
11160+ sizeof(inode->i_generation), &pos);
11161+ if (sz == sizeof(inode->i_generation))
11162+ goto out; /* success */
11163+
11164+ err = sz;
11165+ if (unlikely(sz >= 0)) {
11166+ err = -EIO;
11167+ AuIOErr("xigen error (%zd)\n", sz);
11168+ }
11169+
4f0767ce 11170+out:
1facf9fc 11171+ return err;
11172+}
11173+
11174+int au_xigen_set(struct super_block *sb, struct file *base)
11175+{
11176+ int err;
11177+ struct au_sbinfo *sbinfo;
11178+ struct file *file;
11179+
dece6358
AM
11180+ SiMustWriteLock(sb);
11181+
1facf9fc 11182+ sbinfo = au_sbi(sb);
11183+ file = au_xino_create2(base, sbinfo->si_xigen);
11184+ err = PTR_ERR(file);
11185+ if (IS_ERR(file))
11186+ goto out;
11187+ err = 0;
11188+ if (sbinfo->si_xigen)
11189+ fput(sbinfo->si_xigen);
11190+ sbinfo->si_xigen = file;
11191+
4f0767ce 11192+out:
1facf9fc 11193+ return err;
11194+}
11195+
11196+void au_xigen_clr(struct super_block *sb)
11197+{
11198+ struct au_sbinfo *sbinfo;
11199+
dece6358
AM
11200+ SiMustWriteLock(sb);
11201+
1facf9fc 11202+ sbinfo = au_sbi(sb);
11203+ if (sbinfo->si_xigen) {
11204+ fput(sbinfo->si_xigen);
11205+ sbinfo->si_xigen = NULL;
11206+ }
11207+}
11208+
11209+/* ---------------------------------------------------------------------- */
11210+
11211+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
11212+ ino_t dir_ino)
11213+{
11214+ struct dentry *dentry, *d;
11215+ struct inode *inode;
11216+ unsigned int sigen;
11217+
11218+ dentry = NULL;
11219+ inode = ilookup(sb, ino);
11220+ if (!inode)
11221+ goto out;
11222+
11223+ dentry = ERR_PTR(-ESTALE);
11224+ sigen = au_sigen(sb);
5afbbe0d 11225+ if (unlikely(au_is_bad_inode(inode)
1facf9fc 11226+ || IS_DEADDIR(inode)
537831f9 11227+ || sigen != au_iigen(inode, NULL)))
1facf9fc 11228+ goto out_iput;
11229+
11230+ dentry = NULL;
11231+ if (!dir_ino || S_ISDIR(inode->i_mode))
11232+ dentry = d_find_alias(inode);
11233+ else {
027c5e7a 11234+ spin_lock(&inode->i_lock);
c1595e42 11235+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 11236+ spin_lock(&d->d_lock);
1facf9fc 11237+ if (!au_test_anon(d)
5527c038 11238+ && d_inode(d->d_parent)->i_ino == dir_ino) {
027c5e7a
AM
11239+ dentry = dget_dlock(d);
11240+ spin_unlock(&d->d_lock);
1facf9fc 11241+ break;
11242+ }
027c5e7a
AM
11243+ spin_unlock(&d->d_lock);
11244+ }
11245+ spin_unlock(&inode->i_lock);
1facf9fc 11246+ }
027c5e7a 11247+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
2cbb1c4b 11248+ /* need to refresh */
1facf9fc 11249+ dput(dentry);
2cbb1c4b 11250+ dentry = NULL;
1facf9fc 11251+ }
11252+
4f0767ce 11253+out_iput:
1facf9fc 11254+ iput(inode);
4f0767ce 11255+out:
2cbb1c4b 11256+ AuTraceErrPtr(dentry);
1facf9fc 11257+ return dentry;
11258+}
11259+
11260+/* ---------------------------------------------------------------------- */
11261+
11262+/* todo: dirty? */
11263+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
4a4d8108
AM
11264+
11265+struct au_compare_mnt_args {
11266+ /* input */
11267+ struct super_block *sb;
11268+
11269+ /* output */
11270+ struct vfsmount *mnt;
11271+};
11272+
11273+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
11274+{
11275+ struct au_compare_mnt_args *a = arg;
11276+
11277+ if (mnt->mnt_sb != a->sb)
11278+ return 0;
11279+ a->mnt = mntget(mnt);
11280+ return 1;
11281+}
11282+
1facf9fc 11283+static struct vfsmount *au_mnt_get(struct super_block *sb)
11284+{
4a4d8108 11285+ int err;
7eafdf33 11286+ struct path root;
4a4d8108
AM
11287+ struct au_compare_mnt_args args = {
11288+ .sb = sb
11289+ };
1facf9fc 11290+
7eafdf33 11291+ get_fs_root(current->fs, &root);
523b37e3 11292+ rcu_read_lock();
7eafdf33 11293+ err = iterate_mounts(au_compare_mnt, &args, root.mnt);
523b37e3 11294+ rcu_read_unlock();
7eafdf33 11295+ path_put(&root);
4a4d8108
AM
11296+ AuDebugOn(!err);
11297+ AuDebugOn(!args.mnt);
11298+ return args.mnt;
1facf9fc 11299+}
11300+
11301+struct au_nfsd_si_lock {
4a4d8108 11302+ unsigned int sigen;
027c5e7a 11303+ aufs_bindex_t bindex, br_id;
1facf9fc 11304+ unsigned char force_lock;
11305+};
11306+
027c5e7a
AM
11307+static int si_nfsd_read_lock(struct super_block *sb,
11308+ struct au_nfsd_si_lock *nsi_lock)
1facf9fc 11309+{
027c5e7a 11310+ int err;
1facf9fc 11311+ aufs_bindex_t bindex;
11312+
11313+ si_read_lock(sb, AuLock_FLUSH);
11314+
11315+ /* branch id may be wrapped around */
027c5e7a 11316+ err = 0;
1facf9fc 11317+ bindex = au_br_index(sb, nsi_lock->br_id);
11318+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
11319+ goto out; /* success */
11320+
027c5e7a
AM
11321+ err = -ESTALE;
11322+ bindex = -1;
1facf9fc 11323+ if (!nsi_lock->force_lock)
11324+ si_read_unlock(sb);
1facf9fc 11325+
4f0767ce 11326+out:
027c5e7a
AM
11327+ nsi_lock->bindex = bindex;
11328+ return err;
1facf9fc 11329+}
11330+
11331+struct find_name_by_ino {
392086de 11332+ struct dir_context ctx;
1facf9fc 11333+ int called, found;
11334+ ino_t ino;
11335+ char *name;
11336+ int namelen;
11337+};
11338+
11339+static int
392086de
AM
11340+find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
11341+ loff_t offset, u64 ino, unsigned int d_type)
1facf9fc 11342+{
392086de
AM
11343+ struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
11344+ ctx);
1facf9fc 11345+
11346+ a->called++;
11347+ if (a->ino != ino)
11348+ return 0;
11349+
11350+ memcpy(a->name, name, namelen);
11351+ a->namelen = namelen;
11352+ a->found = 1;
11353+ return 1;
11354+}
11355+
11356+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
11357+ struct au_nfsd_si_lock *nsi_lock)
11358+{
11359+ struct dentry *dentry, *parent;
11360+ struct file *file;
11361+ struct inode *dir;
392086de
AM
11362+ struct find_name_by_ino arg = {
11363+ .ctx = {
2000de60 11364+ .actor = find_name_by_ino
392086de
AM
11365+ }
11366+ };
1facf9fc 11367+ int err;
11368+
11369+ parent = path->dentry;
11370+ if (nsi_lock)
11371+ si_read_unlock(parent->d_sb);
4a4d8108 11372+ file = vfsub_dentry_open(path, au_dir_roflags);
1facf9fc 11373+ dentry = (void *)file;
11374+ if (IS_ERR(file))
11375+ goto out;
11376+
11377+ dentry = ERR_PTR(-ENOMEM);
537831f9 11378+ arg.name = (void *)__get_free_page(GFP_NOFS);
1facf9fc 11379+ if (unlikely(!arg.name))
11380+ goto out_file;
11381+ arg.ino = ino;
11382+ arg.found = 0;
11383+ do {
11384+ arg.called = 0;
11385+ /* smp_mb(); */
392086de 11386+ err = vfsub_iterate_dir(file, &arg.ctx);
1facf9fc 11387+ } while (!err && !arg.found && arg.called);
11388+ dentry = ERR_PTR(err);
11389+ if (unlikely(err))
11390+ goto out_name;
1716fcea
AM
11391+ /* instead of ENOENT */
11392+ dentry = ERR_PTR(-ESTALE);
1facf9fc 11393+ if (!arg.found)
11394+ goto out_name;
11395+
b4510431 11396+ /* do not call vfsub_lkup_one() */
5527c038 11397+ dir = d_inode(parent);
febd17d6 11398+ dentry = vfsub_lookup_one_len_unlocked(arg.name, parent, arg.namelen);
1facf9fc 11399+ AuTraceErrPtr(dentry);
11400+ if (IS_ERR(dentry))
11401+ goto out_name;
11402+ AuDebugOn(au_test_anon(dentry));
5527c038 11403+ if (unlikely(d_really_is_negative(dentry))) {
1facf9fc 11404+ dput(dentry);
11405+ dentry = ERR_PTR(-ENOENT);
11406+ }
11407+
4f0767ce 11408+out_name:
f0c0a007 11409+ au_delayed_free_page((unsigned long)arg.name);
4f0767ce 11410+out_file:
1facf9fc 11411+ fput(file);
4f0767ce 11412+out:
1facf9fc 11413+ if (unlikely(nsi_lock
11414+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
11415+ if (!IS_ERR(dentry)) {
11416+ dput(dentry);
11417+ dentry = ERR_PTR(-ESTALE);
11418+ }
11419+ AuTraceErrPtr(dentry);
11420+ return dentry;
11421+}
11422+
11423+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
11424+ ino_t dir_ino,
11425+ struct au_nfsd_si_lock *nsi_lock)
11426+{
11427+ struct dentry *dentry;
11428+ struct path path;
11429+
11430+ if (dir_ino != AUFS_ROOT_INO) {
11431+ path.dentry = decode_by_ino(sb, dir_ino, 0);
11432+ dentry = path.dentry;
11433+ if (!path.dentry || IS_ERR(path.dentry))
11434+ goto out;
11435+ AuDebugOn(au_test_anon(path.dentry));
11436+ } else
11437+ path.dentry = dget(sb->s_root);
11438+
11439+ path.mnt = au_mnt_get(sb);
11440+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
11441+ path_put(&path);
11442+
4f0767ce 11443+out:
1facf9fc 11444+ AuTraceErrPtr(dentry);
11445+ return dentry;
11446+}
11447+
11448+/* ---------------------------------------------------------------------- */
11449+
11450+static int h_acceptable(void *expv, struct dentry *dentry)
11451+{
11452+ return 1;
11453+}
11454+
11455+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
11456+ char *buf, int len, struct super_block *sb)
11457+{
11458+ char *p;
11459+ int n;
11460+ struct path path;
11461+
11462+ p = d_path(h_rootpath, buf, len);
11463+ if (IS_ERR(p))
11464+ goto out;
11465+ n = strlen(p);
11466+
11467+ path.mnt = h_rootpath->mnt;
11468+ path.dentry = h_parent;
11469+ p = d_path(&path, buf, len);
11470+ if (IS_ERR(p))
11471+ goto out;
11472+ if (n != 1)
11473+ p += n;
11474+
11475+ path.mnt = au_mnt_get(sb);
11476+ path.dentry = sb->s_root;
11477+ p = d_path(&path, buf, len - strlen(p));
11478+ mntput(path.mnt);
11479+ if (IS_ERR(p))
11480+ goto out;
11481+ if (n != 1)
11482+ p[strlen(p)] = '/';
11483+
4f0767ce 11484+out:
1facf9fc 11485+ AuTraceErrPtr(p);
11486+ return p;
11487+}
11488+
11489+static
027c5e7a
AM
11490+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
11491+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
1facf9fc 11492+{
11493+ struct dentry *dentry, *h_parent, *root;
11494+ struct super_block *h_sb;
11495+ char *pathname, *p;
11496+ struct vfsmount *h_mnt;
11497+ struct au_branch *br;
11498+ int err;
11499+ struct path path;
11500+
027c5e7a 11501+ br = au_sbr(sb, nsi_lock->bindex);
86dc4139 11502+ h_mnt = au_br_mnt(br);
1facf9fc 11503+ h_sb = h_mnt->mnt_sb;
11504+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
5afbbe0d 11505+ lockdep_off();
1facf9fc 11506+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
11507+ fh_len - Fh_tail, fh[Fh_h_type],
11508+ h_acceptable, /*context*/NULL);
5afbbe0d 11509+ lockdep_on();
1facf9fc 11510+ dentry = h_parent;
11511+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
11512+ AuWarn1("%s decode_fh failed, %ld\n",
11513+ au_sbtype(h_sb), PTR_ERR(h_parent));
11514+ goto out;
11515+ }
11516+ dentry = NULL;
11517+ if (unlikely(au_test_anon(h_parent))) {
11518+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
11519+ au_sbtype(h_sb));
11520+ goto out_h_parent;
11521+ }
11522+
11523+ dentry = ERR_PTR(-ENOMEM);
11524+ pathname = (void *)__get_free_page(GFP_NOFS);
11525+ if (unlikely(!pathname))
11526+ goto out_h_parent;
11527+
11528+ root = sb->s_root;
11529+ path.mnt = h_mnt;
11530+ di_read_lock_parent(root, !AuLock_IR);
027c5e7a 11531+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
1facf9fc 11532+ di_read_unlock(root, !AuLock_IR);
11533+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
11534+ dentry = (void *)p;
11535+ if (IS_ERR(p))
11536+ goto out_pathname;
11537+
11538+ si_read_unlock(sb);
11539+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
11540+ dentry = ERR_PTR(err);
11541+ if (unlikely(err))
11542+ goto out_relock;
11543+
11544+ dentry = ERR_PTR(-ENOENT);
11545+ AuDebugOn(au_test_anon(path.dentry));
5527c038 11546+ if (unlikely(d_really_is_negative(path.dentry)))
1facf9fc 11547+ goto out_path;
11548+
5527c038 11549+ if (ino != d_inode(path.dentry)->i_ino)
1facf9fc 11550+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
11551+ else
11552+ dentry = dget(path.dentry);
11553+
4f0767ce 11554+out_path:
1facf9fc 11555+ path_put(&path);
4f0767ce 11556+out_relock:
1facf9fc 11557+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
11558+ if (!IS_ERR(dentry)) {
11559+ dput(dentry);
11560+ dentry = ERR_PTR(-ESTALE);
11561+ }
4f0767ce 11562+out_pathname:
f0c0a007 11563+ au_delayed_free_page((unsigned long)pathname);
4f0767ce 11564+out_h_parent:
1facf9fc 11565+ dput(h_parent);
4f0767ce 11566+out:
1facf9fc 11567+ AuTraceErrPtr(dentry);
11568+ return dentry;
11569+}
11570+
11571+/* ---------------------------------------------------------------------- */
11572+
11573+static struct dentry *
11574+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
11575+ int fh_type)
11576+{
11577+ struct dentry *dentry;
11578+ __u32 *fh = fid->raw;
027c5e7a 11579+ struct au_branch *br;
1facf9fc 11580+ ino_t ino, dir_ino;
1facf9fc 11581+ struct au_nfsd_si_lock nsi_lock = {
1facf9fc 11582+ .force_lock = 0
11583+ };
11584+
1facf9fc 11585+ dentry = ERR_PTR(-ESTALE);
4a4d8108
AM
11586+ /* it should never happen, but the file handle is unreliable */
11587+ if (unlikely(fh_len < Fh_tail))
11588+ goto out;
11589+ nsi_lock.sigen = fh[Fh_sigen];
11590+ nsi_lock.br_id = fh[Fh_br_id];
11591+
1facf9fc 11592+ /* branch id may be wrapped around */
027c5e7a
AM
11593+ br = NULL;
11594+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
1facf9fc 11595+ goto out;
11596+ nsi_lock.force_lock = 1;
11597+
11598+ /* is this inode still cached? */
11599+ ino = decode_ino(fh + Fh_ino);
4a4d8108
AM
11600+ /* it should never happen */
11601+ if (unlikely(ino == AUFS_ROOT_INO))
8cdd5066 11602+ goto out_unlock;
4a4d8108 11603+
1facf9fc 11604+ dir_ino = decode_ino(fh + Fh_dir_ino);
11605+ dentry = decode_by_ino(sb, ino, dir_ino);
11606+ if (IS_ERR(dentry))
11607+ goto out_unlock;
11608+ if (dentry)
11609+ goto accept;
11610+
11611+ /* is the parent dir cached? */
027c5e7a 11612+ br = au_sbr(sb, nsi_lock.bindex);
5afbbe0d 11613+ au_br_get(br);
1facf9fc 11614+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
11615+ if (IS_ERR(dentry))
11616+ goto out_unlock;
11617+ if (dentry)
11618+ goto accept;
11619+
11620+ /* lookup path */
027c5e7a 11621+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
1facf9fc 11622+ if (IS_ERR(dentry))
11623+ goto out_unlock;
11624+ if (unlikely(!dentry))
11625+ /* todo?: make it ESTALE */
11626+ goto out_unlock;
11627+
4f0767ce 11628+accept:
027c5e7a 11629+ if (!au_digen_test(dentry, au_sigen(sb))
5527c038 11630+ && d_inode(dentry)->i_generation == fh[Fh_igen])
1facf9fc 11631+ goto out_unlock; /* success */
11632+
11633+ dput(dentry);
11634+ dentry = ERR_PTR(-ESTALE);
4f0767ce 11635+out_unlock:
027c5e7a 11636+ if (br)
5afbbe0d 11637+ au_br_put(br);
1facf9fc 11638+ si_read_unlock(sb);
4f0767ce 11639+out:
1facf9fc 11640+ AuTraceErrPtr(dentry);
11641+ return dentry;
11642+}
11643+
11644+#if 0 /* reserved for future use */
11645+/* support subtreecheck option */
11646+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
11647+ int fh_len, int fh_type)
11648+{
11649+ struct dentry *parent;
11650+ __u32 *fh = fid->raw;
11651+ ino_t dir_ino;
11652+
11653+ dir_ino = decode_ino(fh + Fh_dir_ino);
11654+ parent = decode_by_ino(sb, dir_ino, 0);
11655+ if (IS_ERR(parent))
11656+ goto out;
11657+ if (!parent)
11658+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
11659+ dir_ino, fh, fh_len);
11660+
4f0767ce 11661+out:
1facf9fc 11662+ AuTraceErrPtr(parent);
11663+ return parent;
11664+}
11665+#endif
11666+
11667+/* ---------------------------------------------------------------------- */
11668+
0c3ec466
AM
11669+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
11670+ struct inode *dir)
1facf9fc 11671+{
11672+ int err;
0c3ec466 11673+ aufs_bindex_t bindex;
1facf9fc 11674+ struct super_block *sb, *h_sb;
0c3ec466
AM
11675+ struct dentry *dentry, *parent, *h_parent;
11676+ struct inode *h_dir;
1facf9fc 11677+ struct au_branch *br;
11678+
1facf9fc 11679+ err = -ENOSPC;
11680+ if (unlikely(*max_len <= Fh_tail)) {
11681+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
11682+ goto out;
11683+ }
11684+
11685+ err = FILEID_ROOT;
0c3ec466
AM
11686+ if (inode->i_ino == AUFS_ROOT_INO) {
11687+ AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
1facf9fc 11688+ goto out;
11689+ }
11690+
1facf9fc 11691+ h_parent = NULL;
0c3ec466
AM
11692+ sb = inode->i_sb;
11693+ err = si_read_lock(sb, AuLock_FLUSH);
027c5e7a
AM
11694+ if (unlikely(err))
11695+ goto out;
11696+
1facf9fc 11697+#ifdef CONFIG_AUFS_DEBUG
11698+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
11699+ AuWarn1("NFS-exporting requires xino\n");
11700+#endif
027c5e7a 11701+ err = -EIO;
0c3ec466
AM
11702+ parent = NULL;
11703+ ii_read_lock_child(inode);
5afbbe0d 11704+ bindex = au_ibtop(inode);
0c3ec466 11705+ if (!dir) {
c1595e42 11706+ dentry = d_find_any_alias(inode);
0c3ec466
AM
11707+ if (unlikely(!dentry))
11708+ goto out_unlock;
11709+ AuDebugOn(au_test_anon(dentry));
11710+ parent = dget_parent(dentry);
11711+ dput(dentry);
11712+ if (unlikely(!parent))
11713+ goto out_unlock;
5527c038
JR
11714+ if (d_really_is_positive(parent))
11715+ dir = d_inode(parent);
1facf9fc 11716+ }
0c3ec466
AM
11717+
11718+ ii_read_lock_parent(dir);
11719+ h_dir = au_h_iptr(dir, bindex);
11720+ ii_read_unlock(dir);
11721+ if (unlikely(!h_dir))
11722+ goto out_parent;
c1595e42 11723+ h_parent = d_find_any_alias(h_dir);
1facf9fc 11724+ if (unlikely(!h_parent))
0c3ec466 11725+ goto out_hparent;
1facf9fc 11726+
11727+ err = -EPERM;
11728+ br = au_sbr(sb, bindex);
86dc4139 11729+ h_sb = au_br_sb(br);
1facf9fc 11730+ if (unlikely(!h_sb->s_export_op)) {
11731+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
0c3ec466 11732+ goto out_hparent;
1facf9fc 11733+ }
11734+
11735+ fh[Fh_br_id] = br->br_id;
11736+ fh[Fh_sigen] = au_sigen(sb);
11737+ encode_ino(fh + Fh_ino, inode->i_ino);
0c3ec466 11738+ encode_ino(fh + Fh_dir_ino, dir->i_ino);
1facf9fc 11739+ fh[Fh_igen] = inode->i_generation;
11740+
11741+ *max_len -= Fh_tail;
11742+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
11743+ max_len,
11744+ /*connectable or subtreecheck*/0);
11745+ err = fh[Fh_h_type];
11746+ *max_len += Fh_tail;
11747+ /* todo: macros? */
1716fcea 11748+ if (err != FILEID_INVALID)
1facf9fc 11749+ err = 99;
11750+ else
11751+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
11752+
0c3ec466 11753+out_hparent:
1facf9fc 11754+ dput(h_parent);
0c3ec466 11755+out_parent:
1facf9fc 11756+ dput(parent);
0c3ec466
AM
11757+out_unlock:
11758+ ii_read_unlock(inode);
11759+ si_read_unlock(sb);
4f0767ce 11760+out:
1facf9fc 11761+ if (unlikely(err < 0))
1716fcea 11762+ err = FILEID_INVALID;
1facf9fc 11763+ return err;
11764+}
11765+
11766+/* ---------------------------------------------------------------------- */
11767+
4a4d8108
AM
11768+static int aufs_commit_metadata(struct inode *inode)
11769+{
11770+ int err;
11771+ aufs_bindex_t bindex;
11772+ struct super_block *sb;
11773+ struct inode *h_inode;
11774+ int (*f)(struct inode *inode);
11775+
11776+ sb = inode->i_sb;
e49829fe 11777+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 11778+ ii_write_lock_child(inode);
5afbbe0d 11779+ bindex = au_ibtop(inode);
4a4d8108
AM
11780+ AuDebugOn(bindex < 0);
11781+ h_inode = au_h_iptr(inode, bindex);
11782+
11783+ f = h_inode->i_sb->s_export_op->commit_metadata;
11784+ if (f)
11785+ err = f(h_inode);
11786+ else {
11787+ struct writeback_control wbc = {
11788+ .sync_mode = WB_SYNC_ALL,
11789+ .nr_to_write = 0 /* metadata only */
11790+ };
11791+
11792+ err = sync_inode(h_inode, &wbc);
11793+ }
11794+
11795+ au_cpup_attr_timesizes(inode);
11796+ ii_write_unlock(inode);
11797+ si_read_unlock(sb);
11798+ return err;
11799+}
11800+
11801+/* ---------------------------------------------------------------------- */
11802+
1facf9fc 11803+static struct export_operations aufs_export_op = {
4a4d8108 11804+ .fh_to_dentry = aufs_fh_to_dentry,
1facf9fc 11805+ /* .fh_to_parent = aufs_fh_to_parent, */
4a4d8108
AM
11806+ .encode_fh = aufs_encode_fh,
11807+ .commit_metadata = aufs_commit_metadata
1facf9fc 11808+};
11809+
11810+void au_export_init(struct super_block *sb)
11811+{
11812+ struct au_sbinfo *sbinfo;
11813+ __u32 u;
11814+
5afbbe0d
AM
11815+ BUILD_BUG_ON_MSG(IS_BUILTIN(CONFIG_AUFS_FS)
11816+ && IS_MODULE(CONFIG_EXPORTFS),
11817+ AUFS_NAME ": unsupported configuration "
11818+ "CONFIG_EXPORTFS=m and CONFIG_AUFS_FS=y");
11819+
1facf9fc 11820+ sb->s_export_op = &aufs_export_op;
11821+ sbinfo = au_sbi(sb);
11822+ sbinfo->si_xigen = NULL;
11823+ get_random_bytes(&u, sizeof(u));
11824+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
11825+ atomic_set(&sbinfo->si_xigen_next, u);
11826+}
076b876e
AM
11827diff -urN /usr/share/empty/fs/aufs/fhsm.c linux/fs/aufs/fhsm.c
11828--- /usr/share/empty/fs/aufs/fhsm.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 11829+++ linux/fs/aufs/fhsm.c 2016-10-09 16:55:36.489368218 +0200
c1595e42 11830@@ -0,0 +1,426 @@
076b876e 11831+/*
8cdd5066 11832+ * Copyright (C) 2011-2016 Junjiro R. Okajima
076b876e
AM
11833+ *
11834+ * This program, aufs is free software; you can redistribute it and/or modify
11835+ * it under the terms of the GNU General Public License as published by
11836+ * the Free Software Foundation; either version 2 of the License, or
11837+ * (at your option) any later version.
11838+ *
11839+ * This program is distributed in the hope that it will be useful,
11840+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11841+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11842+ * GNU General Public License for more details.
11843+ *
11844+ * You should have received a copy of the GNU General Public License
11845+ * along with this program; if not, write to the Free Software
11846+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11847+ */
11848+
11849+/*
11850+ * File-based Hierarchy Storage Management
11851+ */
11852+
11853+#include <linux/anon_inodes.h>
11854+#include <linux/poll.h>
11855+#include <linux/seq_file.h>
11856+#include <linux/statfs.h>
11857+#include "aufs.h"
11858+
c1595e42
JR
11859+static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
11860+{
11861+ struct au_sbinfo *sbinfo;
11862+ struct au_fhsm *fhsm;
11863+
11864+ SiMustAnyLock(sb);
11865+
11866+ sbinfo = au_sbi(sb);
11867+ fhsm = &sbinfo->si_fhsm;
11868+ AuDebugOn(!fhsm);
11869+ return fhsm->fhsm_bottom;
11870+}
11871+
11872+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
11873+{
11874+ struct au_sbinfo *sbinfo;
11875+ struct au_fhsm *fhsm;
11876+
11877+ SiMustWriteLock(sb);
11878+
11879+ sbinfo = au_sbi(sb);
11880+ fhsm = &sbinfo->si_fhsm;
11881+ AuDebugOn(!fhsm);
11882+ fhsm->fhsm_bottom = bindex;
11883+}
11884+
11885+/* ---------------------------------------------------------------------- */
11886+
076b876e
AM
11887+static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
11888+{
11889+ struct au_br_fhsm *bf;
11890+
11891+ bf = br->br_fhsm;
11892+ MtxMustLock(&bf->bf_lock);
11893+
11894+ return !bf->bf_readable
11895+ || time_after(jiffies,
11896+ bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
11897+}
11898+
11899+/* ---------------------------------------------------------------------- */
11900+
11901+static void au_fhsm_notify(struct super_block *sb, int val)
11902+{
11903+ struct au_sbinfo *sbinfo;
11904+ struct au_fhsm *fhsm;
11905+
11906+ SiMustAnyLock(sb);
11907+
11908+ sbinfo = au_sbi(sb);
11909+ fhsm = &sbinfo->si_fhsm;
11910+ if (au_fhsm_pid(fhsm)
11911+ && atomic_read(&fhsm->fhsm_readable) != -1) {
11912+ atomic_set(&fhsm->fhsm_readable, val);
11913+ if (val)
11914+ wake_up(&fhsm->fhsm_wqh);
11915+ }
11916+}
11917+
11918+static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
11919+ struct aufs_stfs *rstfs, int do_lock, int do_notify)
11920+{
11921+ int err;
11922+ struct au_branch *br;
11923+ struct au_br_fhsm *bf;
11924+
11925+ br = au_sbr(sb, bindex);
11926+ AuDebugOn(au_br_rdonly(br));
11927+ bf = br->br_fhsm;
11928+ AuDebugOn(!bf);
11929+
11930+ if (do_lock)
11931+ mutex_lock(&bf->bf_lock);
11932+ else
11933+ MtxMustLock(&bf->bf_lock);
11934+
11935+ /* sb->s_root for NFS is unreliable */
11936+ err = au_br_stfs(br, &bf->bf_stfs);
11937+ if (unlikely(err)) {
11938+ AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
11939+ goto out;
11940+ }
11941+
11942+ bf->bf_jiffy = jiffies;
11943+ bf->bf_readable = 1;
11944+ if (do_notify)
11945+ au_fhsm_notify(sb, /*val*/1);
11946+ if (rstfs)
11947+ *rstfs = bf->bf_stfs;
11948+
11949+out:
11950+ if (do_lock)
11951+ mutex_unlock(&bf->bf_lock);
11952+ au_fhsm_notify(sb, /*val*/1);
11953+
11954+ return err;
11955+}
11956+
11957+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
11958+{
11959+ int err;
076b876e
AM
11960+ struct au_sbinfo *sbinfo;
11961+ struct au_fhsm *fhsm;
11962+ struct au_branch *br;
11963+ struct au_br_fhsm *bf;
11964+
11965+ AuDbg("b%d, force %d\n", bindex, force);
11966+ SiMustAnyLock(sb);
11967+
11968+ sbinfo = au_sbi(sb);
11969+ fhsm = &sbinfo->si_fhsm;
c1595e42
JR
11970+ if (!au_ftest_si(sbinfo, FHSM)
11971+ || fhsm->fhsm_bottom == bindex)
076b876e
AM
11972+ return;
11973+
11974+ br = au_sbr(sb, bindex);
11975+ bf = br->br_fhsm;
11976+ AuDebugOn(!bf);
11977+ mutex_lock(&bf->bf_lock);
11978+ if (force
11979+ || au_fhsm_pid(fhsm)
11980+ || au_fhsm_test_jiffy(sbinfo, br))
11981+ err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
11982+ /*do_notify*/1);
11983+ mutex_unlock(&bf->bf_lock);
11984+}
11985+
11986+void au_fhsm_wrote_all(struct super_block *sb, int force)
11987+{
5afbbe0d 11988+ aufs_bindex_t bindex, bbot;
076b876e
AM
11989+ struct au_branch *br;
11990+
11991+ /* exclude the bottom */
5afbbe0d
AM
11992+ bbot = au_fhsm_bottom(sb);
11993+ for (bindex = 0; bindex < bbot; bindex++) {
076b876e
AM
11994+ br = au_sbr(sb, bindex);
11995+ if (au_br_fhsm(br->br_perm))
11996+ au_fhsm_wrote(sb, bindex, force);
11997+ }
11998+}
11999+
12000+/* ---------------------------------------------------------------------- */
12001+
12002+static unsigned int au_fhsm_poll(struct file *file,
12003+ struct poll_table_struct *wait)
12004+{
12005+ unsigned int mask;
12006+ struct au_sbinfo *sbinfo;
12007+ struct au_fhsm *fhsm;
12008+
12009+ mask = 0;
12010+ sbinfo = file->private_data;
12011+ fhsm = &sbinfo->si_fhsm;
12012+ poll_wait(file, &fhsm->fhsm_wqh, wait);
12013+ if (atomic_read(&fhsm->fhsm_readable))
12014+ mask = POLLIN /* | POLLRDNORM */;
12015+
12016+ AuTraceErr((int)mask);
12017+ return mask;
12018+}
12019+
12020+static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
12021+ struct aufs_stfs *stfs, __s16 brid)
12022+{
12023+ int err;
12024+
12025+ err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
12026+ if (!err)
12027+ err = __put_user(brid, &stbr->brid);
12028+ if (unlikely(err))
12029+ err = -EFAULT;
12030+
12031+ return err;
12032+}
12033+
12034+static ssize_t au_fhsm_do_read(struct super_block *sb,
12035+ struct aufs_stbr __user *stbr, size_t count)
12036+{
12037+ ssize_t err;
12038+ int nstbr;
5afbbe0d 12039+ aufs_bindex_t bindex, bbot;
076b876e
AM
12040+ struct au_branch *br;
12041+ struct au_br_fhsm *bf;
12042+
12043+ /* except the bottom branch */
12044+ err = 0;
12045+ nstbr = 0;
5afbbe0d
AM
12046+ bbot = au_fhsm_bottom(sb);
12047+ for (bindex = 0; !err && bindex < bbot; bindex++) {
076b876e
AM
12048+ br = au_sbr(sb, bindex);
12049+ if (!au_br_fhsm(br->br_perm))
12050+ continue;
12051+
12052+ bf = br->br_fhsm;
12053+ mutex_lock(&bf->bf_lock);
12054+ if (bf->bf_readable) {
12055+ err = -EFAULT;
12056+ if (count >= sizeof(*stbr))
12057+ err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
12058+ br->br_id);
12059+ if (!err) {
12060+ bf->bf_readable = 0;
12061+ count -= sizeof(*stbr);
12062+ nstbr++;
12063+ }
12064+ }
12065+ mutex_unlock(&bf->bf_lock);
12066+ }
12067+ if (!err)
12068+ err = sizeof(*stbr) * nstbr;
12069+
12070+ return err;
12071+}
12072+
12073+static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
12074+ loff_t *pos)
12075+{
12076+ ssize_t err;
12077+ int readable;
5afbbe0d 12078+ aufs_bindex_t nfhsm, bindex, bbot;
076b876e
AM
12079+ struct au_sbinfo *sbinfo;
12080+ struct au_fhsm *fhsm;
12081+ struct au_branch *br;
12082+ struct super_block *sb;
12083+
12084+ err = 0;
12085+ sbinfo = file->private_data;
12086+ fhsm = &sbinfo->si_fhsm;
12087+need_data:
12088+ spin_lock_irq(&fhsm->fhsm_wqh.lock);
12089+ if (!atomic_read(&fhsm->fhsm_readable)) {
12090+ if (vfsub_file_flags(file) & O_NONBLOCK)
12091+ err = -EAGAIN;
12092+ else
12093+ err = wait_event_interruptible_locked_irq
12094+ (fhsm->fhsm_wqh,
12095+ atomic_read(&fhsm->fhsm_readable));
12096+ }
12097+ spin_unlock_irq(&fhsm->fhsm_wqh.lock);
12098+ if (unlikely(err))
12099+ goto out;
12100+
12101+ /* sb may already be dead */
12102+ au_rw_read_lock(&sbinfo->si_rwsem);
12103+ readable = atomic_read(&fhsm->fhsm_readable);
12104+ if (readable > 0) {
12105+ sb = sbinfo->si_sb;
12106+ AuDebugOn(!sb);
12107+ /* exclude the bottom branch */
12108+ nfhsm = 0;
5afbbe0d
AM
12109+ bbot = au_fhsm_bottom(sb);
12110+ for (bindex = 0; bindex < bbot; bindex++) {
076b876e
AM
12111+ br = au_sbr(sb, bindex);
12112+ if (au_br_fhsm(br->br_perm))
12113+ nfhsm++;
12114+ }
12115+ err = -EMSGSIZE;
12116+ if (nfhsm * sizeof(struct aufs_stbr) <= count) {
12117+ atomic_set(&fhsm->fhsm_readable, 0);
12118+ err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
12119+ count);
12120+ }
12121+ }
12122+ au_rw_read_unlock(&sbinfo->si_rwsem);
12123+ if (!readable)
12124+ goto need_data;
12125+
12126+out:
12127+ return err;
12128+}
12129+
12130+static int au_fhsm_release(struct inode *inode, struct file *file)
12131+{
12132+ struct au_sbinfo *sbinfo;
12133+ struct au_fhsm *fhsm;
12134+
12135+ /* sb may already be dead */
12136+ sbinfo = file->private_data;
12137+ fhsm = &sbinfo->si_fhsm;
12138+ spin_lock(&fhsm->fhsm_spin);
12139+ fhsm->fhsm_pid = 0;
12140+ spin_unlock(&fhsm->fhsm_spin);
12141+ kobject_put(&sbinfo->si_kobj);
12142+
12143+ return 0;
12144+}
12145+
12146+static const struct file_operations au_fhsm_fops = {
12147+ .owner = THIS_MODULE,
12148+ .llseek = noop_llseek,
12149+ .read = au_fhsm_read,
12150+ .poll = au_fhsm_poll,
12151+ .release = au_fhsm_release
12152+};
12153+
12154+int au_fhsm_fd(struct super_block *sb, int oflags)
12155+{
12156+ int err, fd;
12157+ struct au_sbinfo *sbinfo;
12158+ struct au_fhsm *fhsm;
12159+
12160+ err = -EPERM;
12161+ if (unlikely(!capable(CAP_SYS_ADMIN)))
12162+ goto out;
12163+
12164+ err = -EINVAL;
12165+ if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
12166+ goto out;
12167+
12168+ err = 0;
12169+ sbinfo = au_sbi(sb);
12170+ fhsm = &sbinfo->si_fhsm;
12171+ spin_lock(&fhsm->fhsm_spin);
12172+ if (!fhsm->fhsm_pid)
12173+ fhsm->fhsm_pid = current->pid;
12174+ else
12175+ err = -EBUSY;
12176+ spin_unlock(&fhsm->fhsm_spin);
12177+ if (unlikely(err))
12178+ goto out;
12179+
12180+ oflags |= O_RDONLY;
12181+ /* oflags |= FMODE_NONOTIFY; */
12182+ fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
12183+ err = fd;
12184+ if (unlikely(fd < 0))
12185+ goto out_pid;
12186+
12187+ /* succeed reglardless 'fhsm' status */
12188+ kobject_get(&sbinfo->si_kobj);
12189+ si_noflush_read_lock(sb);
12190+ if (au_ftest_si(sbinfo, FHSM))
12191+ au_fhsm_wrote_all(sb, /*force*/0);
12192+ si_read_unlock(sb);
12193+ goto out; /* success */
12194+
12195+out_pid:
12196+ spin_lock(&fhsm->fhsm_spin);
12197+ fhsm->fhsm_pid = 0;
12198+ spin_unlock(&fhsm->fhsm_spin);
12199+out:
12200+ AuTraceErr(err);
12201+ return err;
12202+}
12203+
12204+/* ---------------------------------------------------------------------- */
12205+
12206+int au_fhsm_br_alloc(struct au_branch *br)
12207+{
12208+ int err;
12209+
12210+ err = 0;
12211+ br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
12212+ if (br->br_fhsm)
12213+ au_br_fhsm_init(br->br_fhsm);
12214+ else
12215+ err = -ENOMEM;
12216+
12217+ return err;
12218+}
12219+
12220+/* ---------------------------------------------------------------------- */
12221+
12222+void au_fhsm_fin(struct super_block *sb)
12223+{
12224+ au_fhsm_notify(sb, /*val*/-1);
12225+}
12226+
12227+void au_fhsm_init(struct au_sbinfo *sbinfo)
12228+{
12229+ struct au_fhsm *fhsm;
12230+
12231+ fhsm = &sbinfo->si_fhsm;
12232+ spin_lock_init(&fhsm->fhsm_spin);
12233+ init_waitqueue_head(&fhsm->fhsm_wqh);
12234+ atomic_set(&fhsm->fhsm_readable, 0);
12235+ fhsm->fhsm_expire
12236+ = msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
c1595e42 12237+ fhsm->fhsm_bottom = -1;
076b876e
AM
12238+}
12239+
12240+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
12241+{
12242+ sbinfo->si_fhsm.fhsm_expire
12243+ = msecs_to_jiffies(sec * MSEC_PER_SEC);
12244+}
12245+
12246+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
12247+{
12248+ unsigned int u;
12249+
12250+ if (!au_ftest_si(sbinfo, FHSM))
12251+ return;
12252+
12253+ u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
12254+ if (u != AUFS_FHSM_CACHE_DEF_SEC)
12255+ seq_printf(seq, ",fhsm_sec=%u", u);
12256+}
7f207e10
AM
12257diff -urN /usr/share/empty/fs/aufs/file.c linux/fs/aufs/file.c
12258--- /usr/share/empty/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
12259+++ linux/fs/aufs/file.c 2016-10-09 16:55:38.889431135 +0200
12260@@ -0,0 +1,857 @@
1facf9fc 12261+/*
8cdd5066 12262+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 12263+ *
12264+ * This program, aufs is free software; you can redistribute it and/or modify
12265+ * it under the terms of the GNU General Public License as published by
12266+ * the Free Software Foundation; either version 2 of the License, or
12267+ * (at your option) any later version.
dece6358
AM
12268+ *
12269+ * This program is distributed in the hope that it will be useful,
12270+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12271+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12272+ * GNU General Public License for more details.
12273+ *
12274+ * You should have received a copy of the GNU General Public License
523b37e3 12275+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 12276+ */
12277+
12278+/*
4a4d8108 12279+ * handling file/dir, and address_space operation
1facf9fc 12280+ */
12281+
7eafdf33
AM
12282+#ifdef CONFIG_AUFS_DEBUG
12283+#include <linux/migrate.h>
12284+#endif
4a4d8108 12285+#include <linux/pagemap.h>
1facf9fc 12286+#include "aufs.h"
12287+
4a4d8108
AM
12288+/* drop flags for writing */
12289+unsigned int au_file_roflags(unsigned int flags)
12290+{
12291+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
12292+ flags |= O_RDONLY | O_NOATIME;
12293+ return flags;
12294+}
12295+
12296+/* common functions to regular file and dir */
12297+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 12298+ struct file *file, int force_wr)
1facf9fc 12299+{
1308ab2a 12300+ struct file *h_file;
4a4d8108
AM
12301+ struct dentry *h_dentry;
12302+ struct inode *h_inode;
12303+ struct super_block *sb;
12304+ struct au_branch *br;
12305+ struct path h_path;
b912730e 12306+ int err;
1facf9fc 12307+
4a4d8108
AM
12308+ /* a race condition can happen between open and unlink/rmdir */
12309+ h_file = ERR_PTR(-ENOENT);
12310+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 12311+ if (au_test_nfsd() && (!h_dentry || d_is_negative(h_dentry)))
4a4d8108 12312+ goto out;
5527c038 12313+ h_inode = d_inode(h_dentry);
027c5e7a
AM
12314+ spin_lock(&h_dentry->d_lock);
12315+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
5527c038 12316+ /* || !d_inode(dentry)->i_nlink */
027c5e7a
AM
12317+ ;
12318+ spin_unlock(&h_dentry->d_lock);
12319+ if (unlikely(err))
4a4d8108 12320+ goto out;
1facf9fc 12321+
4a4d8108
AM
12322+ sb = dentry->d_sb;
12323+ br = au_sbr(sb, bindex);
b912730e
AM
12324+ err = au_br_test_oflag(flags, br);
12325+ h_file = ERR_PTR(err);
12326+ if (unlikely(err))
027c5e7a 12327+ goto out;
1facf9fc 12328+
4a4d8108 12329+ /* drop flags for writing */
5527c038 12330+ if (au_test_ro(sb, bindex, d_inode(dentry))) {
392086de
AM
12331+ if (force_wr && !(flags & O_WRONLY))
12332+ force_wr = 0;
4a4d8108 12333+ flags = au_file_roflags(flags);
392086de
AM
12334+ if (force_wr) {
12335+ h_file = ERR_PTR(-EROFS);
12336+ flags = au_file_roflags(flags);
12337+ if (unlikely(vfsub_native_ro(h_inode)
12338+ || IS_APPEND(h_inode)))
12339+ goto out;
12340+ flags &= ~O_ACCMODE;
12341+ flags |= O_WRONLY;
12342+ }
12343+ }
4a4d8108 12344+ flags &= ~O_CREAT;
5afbbe0d 12345+ au_br_get(br);
4a4d8108 12346+ h_path.dentry = h_dentry;
86dc4139 12347+ h_path.mnt = au_br_mnt(br);
38d290e6 12348+ h_file = vfsub_dentry_open(&h_path, flags);
4a4d8108
AM
12349+ if (IS_ERR(h_file))
12350+ goto out_br;
dece6358 12351+
b912730e 12352+ if (flags & __FMODE_EXEC) {
4a4d8108
AM
12353+ err = deny_write_access(h_file);
12354+ if (unlikely(err)) {
12355+ fput(h_file);
12356+ h_file = ERR_PTR(err);
12357+ goto out_br;
12358+ }
12359+ }
953406b4 12360+ fsnotify_open(h_file);
4a4d8108 12361+ goto out; /* success */
1facf9fc 12362+
4f0767ce 12363+out_br:
5afbbe0d 12364+ au_br_put(br);
4f0767ce 12365+out:
4a4d8108
AM
12366+ return h_file;
12367+}
1308ab2a 12368+
076b876e
AM
12369+static int au_cmoo(struct dentry *dentry)
12370+{
12371+ int err, cmoo;
12372+ unsigned int udba;
12373+ struct path h_path;
12374+ struct au_pin pin;
12375+ struct au_cp_generic cpg = {
12376+ .dentry = dentry,
12377+ .bdst = -1,
12378+ .bsrc = -1,
12379+ .len = -1,
12380+ .pin = &pin,
12381+ .flags = AuCpup_DTIME | AuCpup_HOPEN
12382+ };
7e9cd9fe 12383+ struct inode *delegated;
076b876e
AM
12384+ struct super_block *sb;
12385+ struct au_sbinfo *sbinfo;
12386+ struct au_fhsm *fhsm;
12387+ pid_t pid;
12388+ struct au_branch *br;
12389+ struct dentry *parent;
12390+ struct au_hinode *hdir;
12391+
12392+ DiMustWriteLock(dentry);
5527c038 12393+ IiMustWriteLock(d_inode(dentry));
076b876e
AM
12394+
12395+ err = 0;
12396+ if (IS_ROOT(dentry))
12397+ goto out;
5afbbe0d 12398+ cpg.bsrc = au_dbtop(dentry);
076b876e
AM
12399+ if (!cpg.bsrc)
12400+ goto out;
12401+
12402+ sb = dentry->d_sb;
12403+ sbinfo = au_sbi(sb);
12404+ fhsm = &sbinfo->si_fhsm;
12405+ pid = au_fhsm_pid(fhsm);
12406+ if (pid
12407+ && (current->pid == pid
12408+ || current->real_parent->pid == pid))
12409+ goto out;
12410+
12411+ br = au_sbr(sb, cpg.bsrc);
12412+ cmoo = au_br_cmoo(br->br_perm);
12413+ if (!cmoo)
12414+ goto out;
7e9cd9fe 12415+ if (!d_is_reg(dentry))
076b876e
AM
12416+ cmoo &= AuBrAttr_COO_ALL;
12417+ if (!cmoo)
12418+ goto out;
12419+
12420+ parent = dget_parent(dentry);
12421+ di_write_lock_parent(parent);
12422+ err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
12423+ cpg.bdst = err;
12424+ if (unlikely(err < 0)) {
12425+ err = 0; /* there is no upper writable branch */
12426+ goto out_dgrade;
12427+ }
12428+ AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
12429+
12430+ /* do not respect the coo attrib for the target branch */
12431+ err = au_cpup_dirs(dentry, cpg.bdst);
12432+ if (unlikely(err))
12433+ goto out_dgrade;
12434+
12435+ di_downgrade_lock(parent, AuLock_IR);
12436+ udba = au_opt_udba(sb);
12437+ err = au_pin(&pin, dentry, cpg.bdst, udba,
12438+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12439+ if (unlikely(err))
12440+ goto out_parent;
12441+
12442+ err = au_sio_cpup_simple(&cpg);
12443+ au_unpin(&pin);
12444+ if (unlikely(err))
12445+ goto out_parent;
12446+ if (!(cmoo & AuBrWAttr_MOO))
12447+ goto out_parent; /* success */
12448+
12449+ err = au_pin(&pin, dentry, cpg.bsrc, udba,
12450+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12451+ if (unlikely(err))
12452+ goto out_parent;
12453+
12454+ h_path.mnt = au_br_mnt(br);
12455+ h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
5527c038 12456+ hdir = au_hi(d_inode(parent), cpg.bsrc);
076b876e
AM
12457+ delegated = NULL;
12458+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
12459+ au_unpin(&pin);
12460+ /* todo: keep h_dentry or not? */
12461+ if (unlikely(err == -EWOULDBLOCK)) {
12462+ pr_warn("cannot retry for NFSv4 delegation"
12463+ " for an internal unlink\n");
12464+ iput(delegated);
12465+ }
12466+ if (unlikely(err)) {
12467+ pr_err("unlink %pd after coo failed (%d), ignored\n",
12468+ dentry, err);
12469+ err = 0;
12470+ }
12471+ goto out_parent; /* success */
12472+
12473+out_dgrade:
12474+ di_downgrade_lock(parent, AuLock_IR);
12475+out_parent:
12476+ di_read_unlock(parent, AuLock_IR);
12477+ dput(parent);
12478+out:
12479+ AuTraceErr(err);
12480+ return err;
12481+}
12482+
b912730e 12483+int au_do_open(struct file *file, struct au_do_open_args *args)
1facf9fc 12484+{
b912730e 12485+ int err, no_lock = args->no_lock;
1facf9fc 12486+ struct dentry *dentry;
076b876e 12487+ struct au_finfo *finfo;
1308ab2a 12488+
b912730e
AM
12489+ if (!no_lock)
12490+ err = au_finfo_init(file, args->fidir);
12491+ else {
12492+ lockdep_off();
12493+ err = au_finfo_init(file, args->fidir);
12494+ lockdep_on();
12495+ }
4a4d8108
AM
12496+ if (unlikely(err))
12497+ goto out;
1facf9fc 12498+
2000de60 12499+ dentry = file->f_path.dentry;
b912730e
AM
12500+ AuDebugOn(IS_ERR_OR_NULL(dentry));
12501+ if (!no_lock) {
12502+ di_write_lock_child(dentry);
12503+ err = au_cmoo(dentry);
12504+ di_downgrade_lock(dentry, AuLock_IR);
12505+ if (!err)
12506+ err = args->open(file, vfsub_file_flags(file), NULL);
12507+ di_read_unlock(dentry, AuLock_IR);
12508+ } else {
12509+ err = au_cmoo(dentry);
12510+ if (!err)
12511+ err = args->open(file, vfsub_file_flags(file),
12512+ args->h_file);
5afbbe0d 12513+ if (!err && au_fbtop(file) != au_dbtop(dentry))
b912730e
AM
12514+ /*
12515+ * cmoo happens after h_file was opened.
12516+ * need to refresh file later.
12517+ */
12518+ atomic_dec(&au_fi(file)->fi_generation);
12519+ }
1facf9fc 12520+
076b876e
AM
12521+ finfo = au_fi(file);
12522+ if (!err) {
12523+ finfo->fi_file = file;
12524+ au_sphl_add(&finfo->fi_hlist,
2000de60 12525+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
076b876e 12526+ }
b912730e
AM
12527+ if (!no_lock)
12528+ fi_write_unlock(file);
12529+ else {
12530+ lockdep_off();
12531+ fi_write_unlock(file);
12532+ lockdep_on();
12533+ }
4a4d8108 12534+ if (unlikely(err)) {
076b876e 12535+ finfo->fi_hdir = NULL;
f0c0a007 12536+ au_finfo_fin(file, /*atonce*/0);
1308ab2a 12537+ }
4a4d8108 12538+
4f0767ce 12539+out:
1308ab2a 12540+ return err;
12541+}
dece6358 12542+
4a4d8108 12543+int au_reopen_nondir(struct file *file)
1308ab2a 12544+{
4a4d8108 12545+ int err;
5afbbe0d 12546+ aufs_bindex_t btop;
4a4d8108
AM
12547+ struct dentry *dentry;
12548+ struct file *h_file, *h_file_tmp;
1308ab2a 12549+
2000de60 12550+ dentry = file->f_path.dentry;
5afbbe0d 12551+ btop = au_dbtop(dentry);
4a4d8108 12552+ h_file_tmp = NULL;
5afbbe0d 12553+ if (au_fbtop(file) == btop) {
4a4d8108
AM
12554+ h_file = au_hf_top(file);
12555+ if (file->f_mode == h_file->f_mode)
12556+ return 0; /* success */
12557+ h_file_tmp = h_file;
12558+ get_file(h_file_tmp);
5afbbe0d 12559+ au_set_h_fptr(file, btop, NULL);
4a4d8108
AM
12560+ }
12561+ AuDebugOn(au_fi(file)->fi_hdir);
86dc4139
AM
12562+ /*
12563+ * it can happen
12564+ * file exists on both of rw and ro
5afbbe0d 12565+ * open --> dbtop and fbtop are both 0
86dc4139
AM
12566+ * prepend a branch as rw, "rw" become ro
12567+ * remove rw/file
12568+ * delete the top branch, "rw" becomes rw again
5afbbe0d
AM
12569+ * --> dbtop is 1, fbtop is still 0
12570+ * write --> fbtop is 0 but dbtop is 1
86dc4139 12571+ */
5afbbe0d 12572+ /* AuDebugOn(au_fbtop(file) < btop); */
1308ab2a 12573+
5afbbe0d 12574+ h_file = au_h_open(dentry, btop, vfsub_file_flags(file) & ~O_TRUNC,
392086de 12575+ file, /*force_wr*/0);
4a4d8108 12576+ err = PTR_ERR(h_file);
86dc4139
AM
12577+ if (IS_ERR(h_file)) {
12578+ if (h_file_tmp) {
5afbbe0d
AM
12579+ au_sbr_get(dentry->d_sb, btop);
12580+ au_set_h_fptr(file, btop, h_file_tmp);
86dc4139
AM
12581+ h_file_tmp = NULL;
12582+ }
4a4d8108 12583+ goto out; /* todo: close all? */
86dc4139 12584+ }
4a4d8108
AM
12585+
12586+ err = 0;
5afbbe0d
AM
12587+ au_set_fbtop(file, btop);
12588+ au_set_h_fptr(file, btop, h_file);
4a4d8108
AM
12589+ au_update_figen(file);
12590+ /* todo: necessary? */
12591+ /* file->f_ra = h_file->f_ra; */
12592+
4f0767ce 12593+out:
4a4d8108
AM
12594+ if (h_file_tmp)
12595+ fput(h_file_tmp);
12596+ return err;
1facf9fc 12597+}
12598+
1308ab2a 12599+/* ---------------------------------------------------------------------- */
12600+
4a4d8108
AM
12601+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
12602+ struct dentry *hi_wh)
1facf9fc 12603+{
4a4d8108 12604+ int err;
5afbbe0d 12605+ aufs_bindex_t btop;
4a4d8108
AM
12606+ struct au_dinfo *dinfo;
12607+ struct dentry *h_dentry;
12608+ struct au_hdentry *hdp;
1facf9fc 12609+
2000de60 12610+ dinfo = au_di(file->f_path.dentry);
4a4d8108 12611+ AuRwMustWriteLock(&dinfo->di_rwsem);
dece6358 12612+
5afbbe0d
AM
12613+ btop = dinfo->di_btop;
12614+ dinfo->di_btop = btgt;
12615+ hdp = au_hdentry(dinfo, btgt);
12616+ h_dentry = hdp->hd_dentry;
12617+ hdp->hd_dentry = hi_wh;
4a4d8108 12618+ err = au_reopen_nondir(file);
5afbbe0d
AM
12619+ hdp->hd_dentry = h_dentry;
12620+ dinfo->di_btop = btop;
1facf9fc 12621+
1facf9fc 12622+ return err;
12623+}
12624+
4a4d8108 12625+static int au_ready_to_write_wh(struct file *file, loff_t len,
86dc4139 12626+ aufs_bindex_t bcpup, struct au_pin *pin)
1facf9fc 12627+{
4a4d8108 12628+ int err;
027c5e7a 12629+ struct inode *inode, *h_inode;
c2b27bf2
AM
12630+ struct dentry *h_dentry, *hi_wh;
12631+ struct au_cp_generic cpg = {
2000de60 12632+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12633+ .bdst = bcpup,
12634+ .bsrc = -1,
12635+ .len = len,
12636+ .pin = pin
12637+ };
1facf9fc 12638+
5afbbe0d 12639+ au_update_dbtop(cpg.dentry);
5527c038 12640+ inode = d_inode(cpg.dentry);
027c5e7a 12641+ h_inode = NULL;
5afbbe0d
AM
12642+ if (au_dbtop(cpg.dentry) <= bcpup
12643+ && au_dbbot(cpg.dentry) >= bcpup) {
c2b27bf2 12644+ h_dentry = au_h_dptr(cpg.dentry, bcpup);
5527c038
JR
12645+ if (h_dentry && d_is_positive(h_dentry))
12646+ h_inode = d_inode(h_dentry);
027c5e7a 12647+ }
4a4d8108 12648+ hi_wh = au_hi_wh(inode, bcpup);
027c5e7a 12649+ if (!hi_wh && !h_inode)
c2b27bf2 12650+ err = au_sio_cpup_wh(&cpg, file);
4a4d8108
AM
12651+ else
12652+ /* already copied-up after unlink */
12653+ err = au_reopen_wh(file, bcpup, hi_wh);
1facf9fc 12654+
4a4d8108 12655+ if (!err
38d290e6
JR
12656+ && (inode->i_nlink > 1
12657+ || (inode->i_state & I_LINKABLE))
c2b27bf2
AM
12658+ && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
12659+ au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
1308ab2a 12660+
dece6358 12661+ return err;
1facf9fc 12662+}
12663+
4a4d8108
AM
12664+/*
12665+ * prepare the @file for writing.
12666+ */
12667+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
1facf9fc 12668+{
4a4d8108 12669+ int err;
5afbbe0d 12670+ aufs_bindex_t dbtop;
c1595e42 12671+ struct dentry *parent;
86dc4139 12672+ struct inode *inode;
1facf9fc 12673+ struct super_block *sb;
4a4d8108 12674+ struct file *h_file;
c2b27bf2 12675+ struct au_cp_generic cpg = {
2000de60 12676+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12677+ .bdst = -1,
12678+ .bsrc = -1,
12679+ .len = len,
12680+ .pin = pin,
12681+ .flags = AuCpup_DTIME
12682+ };
1facf9fc 12683+
c2b27bf2 12684+ sb = cpg.dentry->d_sb;
5527c038 12685+ inode = d_inode(cpg.dentry);
5afbbe0d 12686+ cpg.bsrc = au_fbtop(file);
c2b27bf2 12687+ err = au_test_ro(sb, cpg.bsrc, inode);
4a4d8108 12688+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
c2b27bf2
AM
12689+ err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
12690+ /*flags*/0);
1facf9fc 12691+ goto out;
4a4d8108 12692+ }
1facf9fc 12693+
027c5e7a 12694+ /* need to cpup or reopen */
c2b27bf2 12695+ parent = dget_parent(cpg.dentry);
4a4d8108 12696+ di_write_lock_parent(parent);
c2b27bf2
AM
12697+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12698+ cpg.bdst = err;
4a4d8108
AM
12699+ if (unlikely(err < 0))
12700+ goto out_dgrade;
12701+ err = 0;
12702+
c2b27bf2
AM
12703+ if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
12704+ err = au_cpup_dirs(cpg.dentry, cpg.bdst);
1facf9fc 12705+ if (unlikely(err))
4a4d8108
AM
12706+ goto out_dgrade;
12707+ }
12708+
c2b27bf2 12709+ err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108
AM
12710+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12711+ if (unlikely(err))
12712+ goto out_dgrade;
12713+
5afbbe0d
AM
12714+ dbtop = au_dbtop(cpg.dentry);
12715+ if (dbtop <= cpg.bdst)
c2b27bf2 12716+ cpg.bsrc = cpg.bdst;
027c5e7a 12717+
5afbbe0d 12718+ if (dbtop <= cpg.bdst /* just reopen */
c2b27bf2 12719+ || !d_unhashed(cpg.dentry) /* copyup and reopen */
027c5e7a 12720+ ) {
392086de 12721+ h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
86dc4139 12722+ if (IS_ERR(h_file))
027c5e7a 12723+ err = PTR_ERR(h_file);
86dc4139 12724+ else {
027c5e7a 12725+ di_downgrade_lock(parent, AuLock_IR);
5afbbe0d 12726+ if (dbtop > cpg.bdst)
c2b27bf2 12727+ err = au_sio_cpup_simple(&cpg);
027c5e7a
AM
12728+ if (!err)
12729+ err = au_reopen_nondir(file);
c2b27bf2 12730+ au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
027c5e7a 12731+ }
027c5e7a
AM
12732+ } else { /* copyup as wh and reopen */
12733+ /*
12734+ * since writable hfsplus branch is not supported,
12735+ * h_open_pre/post() are unnecessary.
12736+ */
c2b27bf2 12737+ err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
4a4d8108 12738+ di_downgrade_lock(parent, AuLock_IR);
4a4d8108 12739+ }
4a4d8108
AM
12740+
12741+ if (!err) {
12742+ au_pin_set_parent_lflag(pin, /*lflag*/0);
12743+ goto out_dput; /* success */
12744+ }
12745+ au_unpin(pin);
12746+ goto out_unlock;
1facf9fc 12747+
4f0767ce 12748+out_dgrade:
4a4d8108 12749+ di_downgrade_lock(parent, AuLock_IR);
4f0767ce 12750+out_unlock:
4a4d8108 12751+ di_read_unlock(parent, AuLock_IR);
4f0767ce 12752+out_dput:
4a4d8108 12753+ dput(parent);
4f0767ce 12754+out:
1facf9fc 12755+ return err;
12756+}
12757+
4a4d8108
AM
12758+/* ---------------------------------------------------------------------- */
12759+
12760+int au_do_flush(struct file *file, fl_owner_t id,
12761+ int (*flush)(struct file *file, fl_owner_t id))
1facf9fc 12762+{
4a4d8108 12763+ int err;
1facf9fc 12764+ struct super_block *sb;
4a4d8108 12765+ struct inode *inode;
1facf9fc 12766+
c06a8ce3
AM
12767+ inode = file_inode(file);
12768+ sb = inode->i_sb;
4a4d8108
AM
12769+ si_noflush_read_lock(sb);
12770+ fi_read_lock(file);
b752ccd1 12771+ ii_read_lock_child(inode);
1facf9fc 12772+
4a4d8108
AM
12773+ err = flush(file, id);
12774+ au_cpup_attr_timesizes(inode);
1facf9fc 12775+
b752ccd1 12776+ ii_read_unlock(inode);
4a4d8108 12777+ fi_read_unlock(file);
1308ab2a 12778+ si_read_unlock(sb);
dece6358 12779+ return err;
1facf9fc 12780+}
12781+
4a4d8108
AM
12782+/* ---------------------------------------------------------------------- */
12783+
12784+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
1facf9fc 12785+{
4a4d8108 12786+ int err;
4a4d8108
AM
12787+ struct au_pin pin;
12788+ struct au_finfo *finfo;
c2b27bf2 12789+ struct dentry *parent, *hi_wh;
4a4d8108 12790+ struct inode *inode;
1facf9fc 12791+ struct super_block *sb;
c2b27bf2 12792+ struct au_cp_generic cpg = {
2000de60 12793+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12794+ .bdst = -1,
12795+ .bsrc = -1,
12796+ .len = -1,
12797+ .pin = &pin,
12798+ .flags = AuCpup_DTIME
12799+ };
1facf9fc 12800+
4a4d8108
AM
12801+ FiMustWriteLock(file);
12802+
12803+ err = 0;
12804+ finfo = au_fi(file);
c2b27bf2 12805+ sb = cpg.dentry->d_sb;
5527c038 12806+ inode = d_inode(cpg.dentry);
5afbbe0d 12807+ cpg.bdst = au_ibtop(inode);
c2b27bf2 12808+ if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
1308ab2a 12809+ goto out;
dece6358 12810+
c2b27bf2
AM
12811+ parent = dget_parent(cpg.dentry);
12812+ if (au_test_ro(sb, cpg.bdst, inode)) {
4a4d8108 12813+ di_read_lock_parent(parent, !AuLock_IR);
c2b27bf2
AM
12814+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12815+ cpg.bdst = err;
4a4d8108
AM
12816+ di_read_unlock(parent, !AuLock_IR);
12817+ if (unlikely(err < 0))
12818+ goto out_parent;
12819+ err = 0;
1facf9fc 12820+ }
1facf9fc 12821+
4a4d8108 12822+ di_read_lock_parent(parent, AuLock_IR);
c2b27bf2 12823+ hi_wh = au_hi_wh(inode, cpg.bdst);
7f207e10
AM
12824+ if (!S_ISDIR(inode->i_mode)
12825+ && au_opt_test(au_mntflags(sb), PLINK)
4a4d8108 12826+ && au_plink_test(inode)
c2b27bf2 12827+ && !d_unhashed(cpg.dentry)
5afbbe0d 12828+ && cpg.bdst < au_dbtop(cpg.dentry)) {
c2b27bf2 12829+ err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
4a4d8108
AM
12830+ if (unlikely(err))
12831+ goto out_unlock;
12832+
12833+ /* always superio. */
c2b27bf2 12834+ err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108 12835+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 12836+ if (!err) {
c2b27bf2 12837+ err = au_sio_cpup_simple(&cpg);
367653fa
AM
12838+ au_unpin(&pin);
12839+ }
4a4d8108
AM
12840+ } else if (hi_wh) {
12841+ /* already copied-up after unlink */
c2b27bf2 12842+ err = au_reopen_wh(file, cpg.bdst, hi_wh);
4a4d8108
AM
12843+ *need_reopen = 0;
12844+ }
1facf9fc 12845+
4f0767ce 12846+out_unlock:
4a4d8108 12847+ di_read_unlock(parent, AuLock_IR);
4f0767ce 12848+out_parent:
4a4d8108 12849+ dput(parent);
4f0767ce 12850+out:
1308ab2a 12851+ return err;
dece6358 12852+}
1facf9fc 12853+
4a4d8108 12854+static void au_do_refresh_dir(struct file *file)
dece6358 12855+{
f0c0a007 12856+ int execed;
5afbbe0d 12857+ aufs_bindex_t bindex, bbot, new_bindex, brid;
4a4d8108
AM
12858+ struct au_hfile *p, tmp, *q;
12859+ struct au_finfo *finfo;
1308ab2a 12860+ struct super_block *sb;
4a4d8108 12861+ struct au_fidir *fidir;
1facf9fc 12862+
4a4d8108 12863+ FiMustWriteLock(file);
1facf9fc 12864+
2000de60 12865+ sb = file->f_path.dentry->d_sb;
4a4d8108
AM
12866+ finfo = au_fi(file);
12867+ fidir = finfo->fi_hdir;
12868+ AuDebugOn(!fidir);
12869+ p = fidir->fd_hfile + finfo->fi_btop;
12870+ brid = p->hf_br->br_id;
5afbbe0d
AM
12871+ bbot = fidir->fd_bbot;
12872+ for (bindex = finfo->fi_btop; bindex <= bbot; bindex++, p++) {
4a4d8108
AM
12873+ if (!p->hf_file)
12874+ continue;
1308ab2a 12875+
4a4d8108
AM
12876+ new_bindex = au_br_index(sb, p->hf_br->br_id);
12877+ if (new_bindex == bindex)
12878+ continue;
12879+ if (new_bindex < 0) {
12880+ au_set_h_fptr(file, bindex, NULL);
12881+ continue;
12882+ }
1308ab2a 12883+
4a4d8108
AM
12884+ /* swap two lower inode, and loop again */
12885+ q = fidir->fd_hfile + new_bindex;
12886+ tmp = *q;
12887+ *q = *p;
12888+ *p = tmp;
12889+ if (tmp.hf_file) {
12890+ bindex--;
12891+ p--;
12892+ }
12893+ }
1308ab2a 12894+
f0c0a007 12895+ execed = vfsub_file_execed(file);
4a4d8108 12896+ p = fidir->fd_hfile;
2000de60 12897+ if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
5afbbe0d
AM
12898+ bbot = au_sbbot(sb);
12899+ for (finfo->fi_btop = 0; finfo->fi_btop <= bbot;
4a4d8108
AM
12900+ finfo->fi_btop++, p++)
12901+ if (p->hf_file) {
c06a8ce3 12902+ if (file_inode(p->hf_file))
4a4d8108 12903+ break;
f0c0a007 12904+ au_hfput(p, execed);
4a4d8108
AM
12905+ }
12906+ } else {
5afbbe0d
AM
12907+ bbot = au_br_index(sb, brid);
12908+ for (finfo->fi_btop = 0; finfo->fi_btop < bbot;
4a4d8108
AM
12909+ finfo->fi_btop++, p++)
12910+ if (p->hf_file)
f0c0a007 12911+ au_hfput(p, execed);
5afbbe0d 12912+ bbot = au_sbbot(sb);
4a4d8108 12913+ }
1308ab2a 12914+
5afbbe0d
AM
12915+ p = fidir->fd_hfile + bbot;
12916+ for (fidir->fd_bbot = bbot; fidir->fd_bbot >= finfo->fi_btop;
4a4d8108
AM
12917+ fidir->fd_bbot--, p--)
12918+ if (p->hf_file) {
c06a8ce3 12919+ if (file_inode(p->hf_file))
4a4d8108 12920+ break;
f0c0a007 12921+ au_hfput(p, execed);
4a4d8108
AM
12922+ }
12923+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
1308ab2a 12924+}
12925+
4a4d8108
AM
12926+/*
12927+ * after branch manipulating, refresh the file.
12928+ */
12929+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
1facf9fc 12930+{
e2f27e51 12931+ int err, need_reopen, nbr;
5afbbe0d 12932+ aufs_bindex_t bbot, bindex;
4a4d8108 12933+ struct dentry *dentry;
e2f27e51 12934+ struct super_block *sb;
1308ab2a 12935+ struct au_finfo *finfo;
4a4d8108 12936+ struct au_hfile *hfile;
1facf9fc 12937+
2000de60 12938+ dentry = file->f_path.dentry;
e2f27e51
AM
12939+ sb = dentry->d_sb;
12940+ nbr = au_sbbot(sb) + 1;
1308ab2a 12941+ finfo = au_fi(file);
4a4d8108
AM
12942+ if (!finfo->fi_hdir) {
12943+ hfile = &finfo->fi_htop;
12944+ AuDebugOn(!hfile->hf_file);
e2f27e51 12945+ bindex = au_br_index(sb, hfile->hf_br->br_id);
4a4d8108
AM
12946+ AuDebugOn(bindex < 0);
12947+ if (bindex != finfo->fi_btop)
5afbbe0d 12948+ au_set_fbtop(file, bindex);
4a4d8108 12949+ } else {
e2f27e51 12950+ err = au_fidir_realloc(finfo, nbr, /*may_shrink*/0);
4a4d8108
AM
12951+ if (unlikely(err))
12952+ goto out;
12953+ au_do_refresh_dir(file);
12954+ }
1facf9fc 12955+
4a4d8108
AM
12956+ err = 0;
12957+ need_reopen = 1;
12958+ if (!au_test_mmapped(file))
12959+ err = au_file_refresh_by_inode(file, &need_reopen);
e2f27e51
AM
12960+ if (finfo->fi_hdir)
12961+ /* harmless if err */
12962+ au_fidir_realloc(finfo, nbr, /*may_shrink*/1);
027c5e7a 12963+ if (!err && need_reopen && !d_unlinked(dentry))
4a4d8108
AM
12964+ err = reopen(file);
12965+ if (!err) {
12966+ au_update_figen(file);
12967+ goto out; /* success */
12968+ }
12969+
12970+ /* error, close all lower files */
12971+ if (finfo->fi_hdir) {
5afbbe0d
AM
12972+ bbot = au_fbbot_dir(file);
12973+ for (bindex = au_fbtop(file); bindex <= bbot; bindex++)
4a4d8108
AM
12974+ au_set_h_fptr(file, bindex, NULL);
12975+ }
1facf9fc 12976+
4f0767ce 12977+out:
1facf9fc 12978+ return err;
12979+}
12980+
4a4d8108
AM
12981+/* common function to regular file and dir */
12982+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
12983+ int wlock)
dece6358 12984+{
1308ab2a 12985+ int err;
4a4d8108 12986+ unsigned int sigen, figen;
5afbbe0d 12987+ aufs_bindex_t btop;
4a4d8108
AM
12988+ unsigned char pseudo_link;
12989+ struct dentry *dentry;
12990+ struct inode *inode;
1facf9fc 12991+
4a4d8108 12992+ err = 0;
2000de60 12993+ dentry = file->f_path.dentry;
5527c038 12994+ inode = d_inode(dentry);
4a4d8108
AM
12995+ sigen = au_sigen(dentry->d_sb);
12996+ fi_write_lock(file);
12997+ figen = au_figen(file);
12998+ di_write_lock_child(dentry);
5afbbe0d
AM
12999+ btop = au_dbtop(dentry);
13000+ pseudo_link = (btop != au_ibtop(inode));
13001+ if (sigen == figen && !pseudo_link && au_fbtop(file) == btop) {
4a4d8108
AM
13002+ if (!wlock) {
13003+ di_downgrade_lock(dentry, AuLock_IR);
13004+ fi_downgrade_lock(file);
13005+ }
13006+ goto out; /* success */
13007+ }
dece6358 13008+
4a4d8108 13009+ AuDbg("sigen %d, figen %d\n", sigen, figen);
027c5e7a 13010+ if (au_digen_test(dentry, sigen)) {
4a4d8108 13011+ err = au_reval_dpath(dentry, sigen);
027c5e7a 13012+ AuDebugOn(!err && au_digen_test(dentry, sigen));
4a4d8108 13013+ }
dece6358 13014+
027c5e7a
AM
13015+ if (!err)
13016+ err = refresh_file(file, reopen);
4a4d8108
AM
13017+ if (!err) {
13018+ if (!wlock) {
13019+ di_downgrade_lock(dentry, AuLock_IR);
13020+ fi_downgrade_lock(file);
13021+ }
13022+ } else {
13023+ di_write_unlock(dentry);
13024+ fi_write_unlock(file);
13025+ }
1facf9fc 13026+
4f0767ce 13027+out:
1308ab2a 13028+ return err;
13029+}
1facf9fc 13030+
4a4d8108
AM
13031+/* ---------------------------------------------------------------------- */
13032+
13033+/* cf. aufs_nopage() */
13034+/* for madvise(2) */
13035+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
1308ab2a 13036+{
4a4d8108
AM
13037+ unlock_page(page);
13038+ return 0;
13039+}
1facf9fc 13040+
4a4d8108 13041+/* it will never be called, but necessary to support O_DIRECT */
5afbbe0d 13042+static ssize_t aufs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
4a4d8108 13043+{ BUG(); return 0; }
1facf9fc 13044+
4a4d8108
AM
13045+/* they will never be called. */
13046+#ifdef CONFIG_AUFS_DEBUG
13047+static int aufs_write_begin(struct file *file, struct address_space *mapping,
13048+ loff_t pos, unsigned len, unsigned flags,
13049+ struct page **pagep, void **fsdata)
13050+{ AuUnsupport(); return 0; }
13051+static int aufs_write_end(struct file *file, struct address_space *mapping,
13052+ loff_t pos, unsigned len, unsigned copied,
13053+ struct page *page, void *fsdata)
13054+{ AuUnsupport(); return 0; }
13055+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
13056+{ AuUnsupport(); return 0; }
1308ab2a 13057+
4a4d8108
AM
13058+static int aufs_set_page_dirty(struct page *page)
13059+{ AuUnsupport(); return 0; }
392086de
AM
13060+static void aufs_invalidatepage(struct page *page, unsigned int offset,
13061+ unsigned int length)
4a4d8108
AM
13062+{ AuUnsupport(); }
13063+static int aufs_releasepage(struct page *page, gfp_t gfp)
13064+{ AuUnsupport(); return 0; }
79b8bda9 13065+#if 0 /* called by memory compaction regardless file */
4a4d8108 13066+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
7eafdf33 13067+ struct page *page, enum migrate_mode mode)
4a4d8108 13068+{ AuUnsupport(); return 0; }
79b8bda9 13069+#endif
e2f27e51
AM
13070+static bool aufs_isolate_page(struct page *page, isolate_mode_t mode)
13071+{ AuUnsupport(); return true; }
13072+static void aufs_putback_page(struct page *page)
13073+{ AuUnsupport(); }
4a4d8108
AM
13074+static int aufs_launder_page(struct page *page)
13075+{ AuUnsupport(); return 0; }
13076+static int aufs_is_partially_uptodate(struct page *page,
38d290e6
JR
13077+ unsigned long from,
13078+ unsigned long count)
4a4d8108 13079+{ AuUnsupport(); return 0; }
392086de
AM
13080+static void aufs_is_dirty_writeback(struct page *page, bool *dirty,
13081+ bool *writeback)
13082+{ AuUnsupport(); }
4a4d8108
AM
13083+static int aufs_error_remove_page(struct address_space *mapping,
13084+ struct page *page)
13085+{ AuUnsupport(); return 0; }
b4510431
AM
13086+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
13087+ sector_t *span)
13088+{ AuUnsupport(); return 0; }
13089+static void aufs_swap_deactivate(struct file *file)
13090+{ AuUnsupport(); }
4a4d8108
AM
13091+#endif /* CONFIG_AUFS_DEBUG */
13092+
13093+const struct address_space_operations aufs_aop = {
13094+ .readpage = aufs_readpage,
13095+ .direct_IO = aufs_direct_IO,
4a4d8108
AM
13096+#ifdef CONFIG_AUFS_DEBUG
13097+ .writepage = aufs_writepage,
4a4d8108
AM
13098+ /* no writepages, because of writepage */
13099+ .set_page_dirty = aufs_set_page_dirty,
13100+ /* no readpages, because of readpage */
13101+ .write_begin = aufs_write_begin,
13102+ .write_end = aufs_write_end,
13103+ /* no bmap, no block device */
13104+ .invalidatepage = aufs_invalidatepage,
13105+ .releasepage = aufs_releasepage,
79b8bda9
AM
13106+ /* is fallback_migrate_page ok? */
13107+ /* .migratepage = aufs_migratepage, */
e2f27e51
AM
13108+ .isolate_page = aufs_isolate_page,
13109+ .putback_page = aufs_putback_page,
4a4d8108
AM
13110+ .launder_page = aufs_launder_page,
13111+ .is_partially_uptodate = aufs_is_partially_uptodate,
392086de 13112+ .is_dirty_writeback = aufs_is_dirty_writeback,
b4510431
AM
13113+ .error_remove_page = aufs_error_remove_page,
13114+ .swap_activate = aufs_swap_activate,
13115+ .swap_deactivate = aufs_swap_deactivate
4a4d8108 13116+#endif /* CONFIG_AUFS_DEBUG */
dece6358 13117+};
7f207e10
AM
13118diff -urN /usr/share/empty/fs/aufs/file.h linux/fs/aufs/file.h
13119--- /usr/share/empty/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 13120+++ linux/fs/aufs/file.h 2016-10-09 16:55:38.889431135 +0200
f0c0a007 13121@@ -0,0 +1,294 @@
4a4d8108 13122+/*
8cdd5066 13123+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
13124+ *
13125+ * This program, aufs is free software; you can redistribute it and/or modify
13126+ * it under the terms of the GNU General Public License as published by
13127+ * the Free Software Foundation; either version 2 of the License, or
13128+ * (at your option) any later version.
13129+ *
13130+ * This program is distributed in the hope that it will be useful,
13131+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13132+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13133+ * GNU General Public License for more details.
13134+ *
13135+ * You should have received a copy of the GNU General Public License
523b37e3 13136+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 13137+ */
1facf9fc 13138+
4a4d8108
AM
13139+/*
13140+ * file operations
13141+ */
1facf9fc 13142+
4a4d8108
AM
13143+#ifndef __AUFS_FILE_H__
13144+#define __AUFS_FILE_H__
1facf9fc 13145+
4a4d8108 13146+#ifdef __KERNEL__
1facf9fc 13147+
2cbb1c4b 13148+#include <linux/file.h>
4a4d8108
AM
13149+#include <linux/fs.h>
13150+#include <linux/poll.h>
4a4d8108 13151+#include "rwsem.h"
1facf9fc 13152+
4a4d8108
AM
13153+struct au_branch;
13154+struct au_hfile {
13155+ struct file *hf_file;
13156+ struct au_branch *hf_br;
13157+};
1facf9fc 13158+
4a4d8108
AM
13159+struct au_vdir;
13160+struct au_fidir {
13161+ aufs_bindex_t fd_bbot;
13162+ aufs_bindex_t fd_nent;
13163+ struct au_vdir *fd_vdir_cache;
13164+ struct au_hfile fd_hfile[];
13165+};
1facf9fc 13166+
4a4d8108 13167+static inline int au_fidir_sz(int nent)
dece6358 13168+{
4f0767ce
JR
13169+ AuDebugOn(nent < 0);
13170+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
4a4d8108 13171+}
1facf9fc 13172+
4a4d8108
AM
13173+struct au_finfo {
13174+ atomic_t fi_generation;
dece6358 13175+
4a4d8108
AM
13176+ struct au_rwsem fi_rwsem;
13177+ aufs_bindex_t fi_btop;
13178+
13179+ /* do not union them */
13180+ struct { /* for non-dir */
13181+ struct au_hfile fi_htop;
2cbb1c4b 13182+ atomic_t fi_mmapped;
4a4d8108
AM
13183+ };
13184+ struct au_fidir *fi_hdir; /* for dir only */
523b37e3
AM
13185+
13186+ struct hlist_node fi_hlist;
f0c0a007
AM
13187+ union {
13188+ struct file *fi_file; /* very ugly */
13189+ struct llist_node fi_lnode; /* delayed free */
13190+ };
4a4d8108 13191+} ____cacheline_aligned_in_smp;
1facf9fc 13192+
4a4d8108 13193+/* ---------------------------------------------------------------------- */
1facf9fc 13194+
4a4d8108
AM
13195+/* file.c */
13196+extern const struct address_space_operations aufs_aop;
13197+unsigned int au_file_roflags(unsigned int flags);
13198+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 13199+ struct file *file, int force_wr);
b912730e
AM
13200+struct au_do_open_args {
13201+ int no_lock;
13202+ int (*open)(struct file *file, int flags,
13203+ struct file *h_file);
13204+ struct au_fidir *fidir;
13205+ struct file *h_file;
13206+};
13207+int au_do_open(struct file *file, struct au_do_open_args *args);
4a4d8108
AM
13208+int au_reopen_nondir(struct file *file);
13209+struct au_pin;
13210+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
13211+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
13212+ int wlock);
13213+int au_do_flush(struct file *file, fl_owner_t id,
13214+ int (*flush)(struct file *file, fl_owner_t id));
1facf9fc 13215+
4a4d8108
AM
13216+/* poll.c */
13217+#ifdef CONFIG_AUFS_POLL
13218+unsigned int aufs_poll(struct file *file, poll_table *wait);
13219+#endif
1facf9fc 13220+
4a4d8108
AM
13221+#ifdef CONFIG_AUFS_BR_HFSPLUS
13222+/* hfsplus.c */
392086de
AM
13223+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
13224+ int force_wr);
4a4d8108
AM
13225+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
13226+ struct file *h_file);
13227+#else
c1595e42
JR
13228+AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
13229+ aufs_bindex_t bindex, int force_wr)
4a4d8108
AM
13230+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
13231+ struct file *h_file);
13232+#endif
1facf9fc 13233+
4a4d8108
AM
13234+/* f_op.c */
13235+extern const struct file_operations aufs_file_fop;
b912730e 13236+int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
4a4d8108 13237+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
b912730e 13238+struct file *au_read_pre(struct file *file, int keep_fi);
4a4d8108 13239+
4a4d8108 13240+/* finfo.c */
f0c0a007 13241+void au_hfput(struct au_hfile *hf, int execed);
4a4d8108
AM
13242+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
13243+ struct file *h_file);
1facf9fc 13244+
4a4d8108 13245+void au_update_figen(struct file *file);
4a4d8108 13246+struct au_fidir *au_fidir_alloc(struct super_block *sb);
e2f27e51 13247+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink);
1facf9fc 13248+
4a4d8108 13249+void au_fi_init_once(void *_fi);
f0c0a007 13250+void au_finfo_fin(struct file *file, int atonce);
4a4d8108 13251+int au_finfo_init(struct file *file, struct au_fidir *fidir);
1facf9fc 13252+
4a4d8108
AM
13253+/* ioctl.c */
13254+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
13255+#ifdef CONFIG_COMPAT
13256+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
13257+ unsigned long arg);
c2b27bf2
AM
13258+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
13259+ unsigned long arg);
b752ccd1 13260+#endif
1facf9fc 13261+
4a4d8108 13262+/* ---------------------------------------------------------------------- */
1facf9fc 13263+
4a4d8108
AM
13264+static inline struct au_finfo *au_fi(struct file *file)
13265+{
38d290e6 13266+ return file->private_data;
4a4d8108 13267+}
1facf9fc 13268+
4a4d8108 13269+/* ---------------------------------------------------------------------- */
1facf9fc 13270+
4a4d8108
AM
13271+/*
13272+ * fi_read_lock, fi_write_lock,
13273+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
13274+ */
13275+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
1308ab2a 13276+
4a4d8108
AM
13277+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
13278+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
13279+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
1facf9fc 13280+
1308ab2a 13281+/* ---------------------------------------------------------------------- */
13282+
4a4d8108 13283+/* todo: hard/soft set? */
5afbbe0d 13284+static inline aufs_bindex_t au_fbtop(struct file *file)
dece6358 13285+{
4a4d8108
AM
13286+ FiMustAnyLock(file);
13287+ return au_fi(file)->fi_btop;
13288+}
dece6358 13289+
5afbbe0d 13290+static inline aufs_bindex_t au_fbbot_dir(struct file *file)
4a4d8108
AM
13291+{
13292+ FiMustAnyLock(file);
13293+ AuDebugOn(!au_fi(file)->fi_hdir);
13294+ return au_fi(file)->fi_hdir->fd_bbot;
13295+}
1facf9fc 13296+
4a4d8108
AM
13297+static inline struct au_vdir *au_fvdir_cache(struct file *file)
13298+{
13299+ FiMustAnyLock(file);
13300+ AuDebugOn(!au_fi(file)->fi_hdir);
13301+ return au_fi(file)->fi_hdir->fd_vdir_cache;
13302+}
1facf9fc 13303+
5afbbe0d 13304+static inline void au_set_fbtop(struct file *file, aufs_bindex_t bindex)
4a4d8108
AM
13305+{
13306+ FiMustWriteLock(file);
13307+ au_fi(file)->fi_btop = bindex;
13308+}
1facf9fc 13309+
5afbbe0d 13310+static inline void au_set_fbbot_dir(struct file *file, aufs_bindex_t bindex)
4a4d8108
AM
13311+{
13312+ FiMustWriteLock(file);
13313+ AuDebugOn(!au_fi(file)->fi_hdir);
13314+ au_fi(file)->fi_hdir->fd_bbot = bindex;
13315+}
1308ab2a 13316+
4a4d8108
AM
13317+static inline void au_set_fvdir_cache(struct file *file,
13318+ struct au_vdir *vdir_cache)
13319+{
13320+ FiMustWriteLock(file);
13321+ AuDebugOn(!au_fi(file)->fi_hdir);
13322+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
13323+}
dece6358 13324+
4a4d8108
AM
13325+static inline struct file *au_hf_top(struct file *file)
13326+{
13327+ FiMustAnyLock(file);
13328+ AuDebugOn(au_fi(file)->fi_hdir);
13329+ return au_fi(file)->fi_htop.hf_file;
13330+}
1facf9fc 13331+
4a4d8108
AM
13332+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
13333+{
13334+ FiMustAnyLock(file);
13335+ AuDebugOn(!au_fi(file)->fi_hdir);
13336+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
dece6358
AM
13337+}
13338+
4a4d8108
AM
13339+/* todo: memory barrier? */
13340+static inline unsigned int au_figen(struct file *f)
dece6358 13341+{
4a4d8108
AM
13342+ return atomic_read(&au_fi(f)->fi_generation);
13343+}
dece6358 13344+
2cbb1c4b
JR
13345+static inline void au_set_mmapped(struct file *f)
13346+{
13347+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
13348+ return;
0c3ec466 13349+ pr_warn("fi_mmapped wrapped around\n");
2cbb1c4b
JR
13350+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
13351+ ;
13352+}
13353+
13354+static inline void au_unset_mmapped(struct file *f)
13355+{
13356+ atomic_dec(&au_fi(f)->fi_mmapped);
13357+}
13358+
4a4d8108
AM
13359+static inline int au_test_mmapped(struct file *f)
13360+{
2cbb1c4b
JR
13361+ return atomic_read(&au_fi(f)->fi_mmapped);
13362+}
13363+
13364+/* customize vma->vm_file */
13365+
13366+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
13367+ struct file *file)
13368+{
53392da6
AM
13369+ struct file *f;
13370+
13371+ f = vma->vm_file;
2cbb1c4b
JR
13372+ get_file(file);
13373+ vma->vm_file = file;
53392da6 13374+ fput(f);
2cbb1c4b
JR
13375+}
13376+
13377+#ifdef CONFIG_MMU
13378+#define AuDbgVmRegion(file, vma) do {} while (0)
13379+
13380+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13381+ struct file *file)
13382+{
13383+ au_do_vm_file_reset(vma, file);
13384+}
13385+#else
13386+#define AuDbgVmRegion(file, vma) \
13387+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
13388+
13389+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13390+ struct file *file)
13391+{
53392da6
AM
13392+ struct file *f;
13393+
2cbb1c4b 13394+ au_do_vm_file_reset(vma, file);
53392da6 13395+ f = vma->vm_region->vm_file;
2cbb1c4b
JR
13396+ get_file(file);
13397+ vma->vm_region->vm_file = file;
53392da6 13398+ fput(f);
2cbb1c4b
JR
13399+}
13400+#endif /* CONFIG_MMU */
13401+
13402+/* handle vma->vm_prfile */
fb47a38f 13403+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
2cbb1c4b
JR
13404+ struct file *file)
13405+{
2cbb1c4b
JR
13406+ get_file(file);
13407+ vma->vm_prfile = file;
13408+#ifndef CONFIG_MMU
13409+ get_file(file);
13410+ vma->vm_region->vm_prfile = file;
13411+#endif
fb47a38f 13412+}
1308ab2a 13413+
4a4d8108
AM
13414+#endif /* __KERNEL__ */
13415+#endif /* __AUFS_FILE_H__ */
7f207e10
AM
13416diff -urN /usr/share/empty/fs/aufs/finfo.c linux/fs/aufs/finfo.c
13417--- /usr/share/empty/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 13418+++ linux/fs/aufs/finfo.c 2016-10-09 16:55:38.889431135 +0200
f0c0a007 13419@@ -0,0 +1,151 @@
4a4d8108 13420+/*
8cdd5066 13421+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
13422+ *
13423+ * This program, aufs is free software; you can redistribute it and/or modify
13424+ * it under the terms of the GNU General Public License as published by
13425+ * the Free Software Foundation; either version 2 of the License, or
13426+ * (at your option) any later version.
13427+ *
13428+ * This program is distributed in the hope that it will be useful,
13429+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13430+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13431+ * GNU General Public License for more details.
13432+ *
13433+ * You should have received a copy of the GNU General Public License
523b37e3 13434+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 13435+ */
1308ab2a 13436+
4a4d8108
AM
13437+/*
13438+ * file private data
13439+ */
1facf9fc 13440+
4a4d8108 13441+#include "aufs.h"
1facf9fc 13442+
f0c0a007 13443+void au_hfput(struct au_hfile *hf, int execed)
4a4d8108 13444+{
f0c0a007 13445+ if (execed)
4a4d8108
AM
13446+ allow_write_access(hf->hf_file);
13447+ fput(hf->hf_file);
13448+ hf->hf_file = NULL;
5afbbe0d 13449+ au_br_put(hf->hf_br);
4a4d8108
AM
13450+ hf->hf_br = NULL;
13451+}
1facf9fc 13452+
4a4d8108
AM
13453+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
13454+{
13455+ struct au_finfo *finfo = au_fi(file);
13456+ struct au_hfile *hf;
13457+ struct au_fidir *fidir;
13458+
13459+ fidir = finfo->fi_hdir;
13460+ if (!fidir) {
13461+ AuDebugOn(finfo->fi_btop != bindex);
13462+ hf = &finfo->fi_htop;
13463+ } else
13464+ hf = fidir->fd_hfile + bindex;
13465+
13466+ if (hf && hf->hf_file)
f0c0a007 13467+ au_hfput(hf, vfsub_file_execed(file));
4a4d8108
AM
13468+ if (val) {
13469+ FiMustWriteLock(file);
b912730e 13470+ AuDebugOn(IS_ERR_OR_NULL(file->f_path.dentry));
4a4d8108 13471+ hf->hf_file = val;
2000de60 13472+ hf->hf_br = au_sbr(file->f_path.dentry->d_sb, bindex);
1308ab2a 13473+ }
4a4d8108 13474+}
1facf9fc 13475+
4a4d8108
AM
13476+void au_update_figen(struct file *file)
13477+{
2000de60 13478+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_path.dentry));
4a4d8108 13479+ /* smp_mb(); */ /* atomic_set */
1facf9fc 13480+}
13481+
4a4d8108
AM
13482+/* ---------------------------------------------------------------------- */
13483+
4a4d8108
AM
13484+struct au_fidir *au_fidir_alloc(struct super_block *sb)
13485+{
13486+ struct au_fidir *fidir;
13487+ int nbr;
13488+
5afbbe0d 13489+ nbr = au_sbbot(sb) + 1;
4a4d8108
AM
13490+ if (nbr < 2)
13491+ nbr = 2; /* initial allocate for 2 branches */
13492+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
13493+ if (fidir) {
13494+ fidir->fd_bbot = -1;
13495+ fidir->fd_nent = nbr;
4a4d8108
AM
13496+ }
13497+
13498+ return fidir;
13499+}
13500+
e2f27e51 13501+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink)
4a4d8108
AM
13502+{
13503+ int err;
13504+ struct au_fidir *fidir, *p;
13505+
13506+ AuRwMustWriteLock(&finfo->fi_rwsem);
13507+ fidir = finfo->fi_hdir;
13508+ AuDebugOn(!fidir);
13509+
13510+ err = -ENOMEM;
13511+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
e2f27e51 13512+ GFP_NOFS, may_shrink);
4a4d8108
AM
13513+ if (p) {
13514+ p->fd_nent = nbr;
13515+ finfo->fi_hdir = p;
13516+ err = 0;
13517+ }
1facf9fc 13518+
dece6358 13519+ return err;
1facf9fc 13520+}
1308ab2a 13521+
13522+/* ---------------------------------------------------------------------- */
13523+
f0c0a007 13524+void au_finfo_fin(struct file *file, int atonce)
1308ab2a 13525+{
4a4d8108
AM
13526+ struct au_finfo *finfo;
13527+
2000de60 13528+ au_nfiles_dec(file->f_path.dentry->d_sb);
7f207e10 13529+
4a4d8108
AM
13530+ finfo = au_fi(file);
13531+ AuDebugOn(finfo->fi_hdir);
13532+ AuRwDestroy(&finfo->fi_rwsem);
f0c0a007
AM
13533+ if (!atonce)
13534+ au_cache_dfree_finfo(finfo);
13535+ else
13536+ au_cache_free_finfo(finfo);
1308ab2a 13537+}
1308ab2a 13538+
e49829fe 13539+void au_fi_init_once(void *_finfo)
4a4d8108 13540+{
e49829fe 13541+ struct au_finfo *finfo = _finfo;
1308ab2a 13542+
e49829fe 13543+ au_rw_init(&finfo->fi_rwsem);
4a4d8108 13544+}
1308ab2a 13545+
4a4d8108
AM
13546+int au_finfo_init(struct file *file, struct au_fidir *fidir)
13547+{
1716fcea 13548+ int err;
4a4d8108
AM
13549+ struct au_finfo *finfo;
13550+ struct dentry *dentry;
13551+
13552+ err = -ENOMEM;
2000de60 13553+ dentry = file->f_path.dentry;
4a4d8108
AM
13554+ finfo = au_cache_alloc_finfo();
13555+ if (unlikely(!finfo))
13556+ goto out;
13557+
13558+ err = 0;
7f207e10 13559+ au_nfiles_inc(dentry->d_sb);
4a4d8108
AM
13560+ au_rw_write_lock(&finfo->fi_rwsem);
13561+ finfo->fi_btop = -1;
13562+ finfo->fi_hdir = fidir;
13563+ atomic_set(&finfo->fi_generation, au_digen(dentry));
13564+ /* smp_mb(); */ /* atomic_set */
13565+
13566+ file->private_data = finfo;
13567+
13568+out:
13569+ return err;
13570+}
7f207e10
AM
13571diff -urN /usr/share/empty/fs/aufs/f_op.c linux/fs/aufs/f_op.c
13572--- /usr/share/empty/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
13573+++ linux/fs/aufs/f_op.c 2016-12-17 12:28:17.595211562 +0100
13574@@ -0,0 +1,723 @@
dece6358 13575+/*
8cdd5066 13576+ * Copyright (C) 2005-2016 Junjiro R. Okajima
dece6358
AM
13577+ *
13578+ * This program, aufs is free software; you can redistribute it and/or modify
13579+ * it under the terms of the GNU General Public License as published by
13580+ * the Free Software Foundation; either version 2 of the License, or
13581+ * (at your option) any later version.
13582+ *
13583+ * This program is distributed in the hope that it will be useful,
13584+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13585+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13586+ * GNU General Public License for more details.
13587+ *
13588+ * You should have received a copy of the GNU General Public License
523b37e3 13589+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 13590+ */
1facf9fc 13591+
13592+/*
4a4d8108 13593+ * file and vm operations
1facf9fc 13594+ */
dece6358 13595+
86dc4139 13596+#include <linux/aio.h>
4a4d8108
AM
13597+#include <linux/fs_stack.h>
13598+#include <linux/mman.h>
4a4d8108 13599+#include <linux/security.h>
dece6358
AM
13600+#include "aufs.h"
13601+
b912730e 13602+int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
1facf9fc 13603+{
4a4d8108
AM
13604+ int err;
13605+ aufs_bindex_t bindex;
8cdd5066 13606+ struct dentry *dentry, *h_dentry;
4a4d8108 13607+ struct au_finfo *finfo;
38d290e6 13608+ struct inode *h_inode;
4a4d8108
AM
13609+
13610+ FiMustWriteLock(file);
13611+
523b37e3 13612+ err = 0;
2000de60 13613+ dentry = file->f_path.dentry;
b912730e 13614+ AuDebugOn(IS_ERR_OR_NULL(dentry));
4a4d8108
AM
13615+ finfo = au_fi(file);
13616+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
2cbb1c4b 13617+ atomic_set(&finfo->fi_mmapped, 0);
5afbbe0d 13618+ bindex = au_dbtop(dentry);
8cdd5066
JR
13619+ if (!h_file) {
13620+ h_dentry = au_h_dptr(dentry, bindex);
13621+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
13622+ if (unlikely(err))
13623+ goto out;
b912730e 13624+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
8cdd5066
JR
13625+ } else {
13626+ h_dentry = h_file->f_path.dentry;
13627+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
13628+ if (unlikely(err))
13629+ goto out;
b912730e 13630+ get_file(h_file);
8cdd5066 13631+ }
4a4d8108
AM
13632+ if (IS_ERR(h_file))
13633+ err = PTR_ERR(h_file);
13634+ else {
38d290e6
JR
13635+ if ((flags & __O_TMPFILE)
13636+ && !(flags & O_EXCL)) {
13637+ h_inode = file_inode(h_file);
13638+ spin_lock(&h_inode->i_lock);
13639+ h_inode->i_state |= I_LINKABLE;
13640+ spin_unlock(&h_inode->i_lock);
13641+ }
5afbbe0d 13642+ au_set_fbtop(file, bindex);
4a4d8108
AM
13643+ au_set_h_fptr(file, bindex, h_file);
13644+ au_update_figen(file);
13645+ /* todo: necessary? */
13646+ /* file->f_ra = h_file->f_ra; */
13647+ }
027c5e7a 13648+
8cdd5066 13649+out:
4a4d8108 13650+ return err;
1facf9fc 13651+}
13652+
4a4d8108
AM
13653+static int aufs_open_nondir(struct inode *inode __maybe_unused,
13654+ struct file *file)
1facf9fc 13655+{
4a4d8108 13656+ int err;
1308ab2a 13657+ struct super_block *sb;
b912730e
AM
13658+ struct au_do_open_args args = {
13659+ .open = au_do_open_nondir
13660+ };
1facf9fc 13661+
523b37e3
AM
13662+ AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
13663+ file, vfsub_file_flags(file), file->f_mode);
1facf9fc 13664+
2000de60 13665+ sb = file->f_path.dentry->d_sb;
4a4d8108 13666+ si_read_lock(sb, AuLock_FLUSH);
b912730e 13667+ err = au_do_open(file, &args);
4a4d8108
AM
13668+ si_read_unlock(sb);
13669+ return err;
13670+}
1facf9fc 13671+
4a4d8108
AM
13672+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
13673+{
13674+ struct au_finfo *finfo;
13675+ aufs_bindex_t bindex;
f0c0a007 13676+ int delayed;
1facf9fc 13677+
4a4d8108 13678+ finfo = au_fi(file);
2000de60
JR
13679+ au_sphl_del(&finfo->fi_hlist,
13680+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
4a4d8108 13681+ bindex = finfo->fi_btop;
b4510431 13682+ if (bindex >= 0)
4a4d8108 13683+ au_set_h_fptr(file, bindex, NULL);
7f207e10 13684+
f0c0a007
AM
13685+ delayed = (current->flags & PF_KTHREAD) || in_interrupt();
13686+ au_finfo_fin(file, delayed);
4a4d8108 13687+ return 0;
1facf9fc 13688+}
13689+
4a4d8108
AM
13690+/* ---------------------------------------------------------------------- */
13691+
13692+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
dece6358 13693+{
1308ab2a 13694+ int err;
4a4d8108
AM
13695+ struct file *h_file;
13696+
13697+ err = 0;
13698+ h_file = au_hf_top(file);
13699+ if (h_file)
13700+ err = vfsub_flush(h_file, id);
13701+ return err;
13702+}
13703+
13704+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
13705+{
13706+ return au_do_flush(file, id, au_do_flush_nondir);
13707+}
13708+
13709+/* ---------------------------------------------------------------------- */
9dbd164d
AM
13710+/*
13711+ * read and write functions acquire [fdi]_rwsem once, but release before
13712+ * mmap_sem. This is because to stop a race condition between mmap(2).
13713+ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
13714+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
13715+ * read functions after [fdi]_rwsem are released, but it should be harmless.
13716+ */
4a4d8108 13717+
b912730e
AM
13718+/* Callers should call au_read_post() or fput() in the end */
13719+struct file *au_read_pre(struct file *file, int keep_fi)
4a4d8108 13720+{
4a4d8108 13721+ struct file *h_file;
b912730e 13722+ int err;
1facf9fc 13723+
4a4d8108 13724+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
b912730e
AM
13725+ if (!err) {
13726+ di_read_unlock(file->f_path.dentry, AuLock_IR);
13727+ h_file = au_hf_top(file);
13728+ get_file(h_file);
13729+ if (!keep_fi)
13730+ fi_read_unlock(file);
13731+ } else
13732+ h_file = ERR_PTR(err);
13733+
13734+ return h_file;
13735+}
13736+
13737+static void au_read_post(struct inode *inode, struct file *h_file)
13738+{
13739+ /* update without lock, I don't think it a problem */
13740+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13741+ fput(h_file);
13742+}
13743+
13744+struct au_write_pre {
13745+ blkcnt_t blks;
5afbbe0d 13746+ aufs_bindex_t btop;
b912730e
AM
13747+};
13748+
13749+/*
13750+ * return with iinfo is write-locked
13751+ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the
13752+ * end
13753+ */
13754+static struct file *au_write_pre(struct file *file, int do_ready,
13755+ struct au_write_pre *wpre)
13756+{
13757+ struct file *h_file;
13758+ struct dentry *dentry;
13759+ int err;
13760+ struct au_pin pin;
13761+
13762+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
13763+ h_file = ERR_PTR(err);
dece6358
AM
13764+ if (unlikely(err))
13765+ goto out;
1facf9fc 13766+
b912730e
AM
13767+ dentry = file->f_path.dentry;
13768+ if (do_ready) {
13769+ err = au_ready_to_write(file, -1, &pin);
13770+ if (unlikely(err)) {
13771+ h_file = ERR_PTR(err);
13772+ di_write_unlock(dentry);
13773+ goto out_fi;
13774+ }
13775+ }
13776+
13777+ di_downgrade_lock(dentry, /*flags*/0);
13778+ if (wpre)
5afbbe0d 13779+ wpre->btop = au_fbtop(file);
4a4d8108 13780+ h_file = au_hf_top(file);
9dbd164d 13781+ get_file(h_file);
b912730e
AM
13782+ if (wpre)
13783+ wpre->blks = file_inode(h_file)->i_blocks;
13784+ if (do_ready)
13785+ au_unpin(&pin);
13786+ di_read_unlock(dentry, /*flags*/0);
13787+
13788+out_fi:
13789+ fi_write_unlock(file);
13790+out:
13791+ return h_file;
13792+}
13793+
13794+static void au_write_post(struct inode *inode, struct file *h_file,
13795+ struct au_write_pre *wpre, ssize_t written)
13796+{
13797+ struct inode *h_inode;
13798+
13799+ au_cpup_attr_timesizes(inode);
5afbbe0d 13800+ AuDebugOn(au_ibtop(inode) != wpre->btop);
b912730e
AM
13801+ h_inode = file_inode(h_file);
13802+ inode->i_mode = h_inode->i_mode;
13803+ ii_write_unlock(inode);
13804+ fput(h_file);
13805+
13806+ /* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
13807+ if (written > 0)
5afbbe0d 13808+ au_fhsm_wrote(inode->i_sb, wpre->btop,
b912730e
AM
13809+ /*force*/h_inode->i_blocks > wpre->blks);
13810+}
13811+
13812+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
13813+ loff_t *ppos)
13814+{
13815+ ssize_t err;
13816+ struct inode *inode;
13817+ struct file *h_file;
13818+ struct super_block *sb;
13819+
13820+ inode = file_inode(file);
13821+ sb = inode->i_sb;
13822+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
13823+
13824+ h_file = au_read_pre(file, /*keep_fi*/0);
13825+ err = PTR_ERR(h_file);
13826+ if (IS_ERR(h_file))
13827+ goto out;
9dbd164d
AM
13828+
13829+ /* filedata may be obsoleted by concurrent copyup, but no problem */
4a4d8108
AM
13830+ err = vfsub_read_u(h_file, buf, count, ppos);
13831+ /* todo: necessary? */
13832+ /* file->f_ra = h_file->f_ra; */
b912730e 13833+ au_read_post(inode, h_file);
1308ab2a 13834+
4f0767ce 13835+out:
dece6358
AM
13836+ si_read_unlock(sb);
13837+ return err;
13838+}
1facf9fc 13839+
e49829fe
JR
13840+/*
13841+ * todo: very ugly
13842+ * it locks both of i_mutex and si_rwsem for read in safe.
13843+ * if the plink maintenance mode continues forever (that is the problem),
13844+ * may loop forever.
13845+ */
13846+static void au_mtx_and_read_lock(struct inode *inode)
13847+{
13848+ int err;
13849+ struct super_block *sb = inode->i_sb;
13850+
13851+ while (1) {
febd17d6 13852+ inode_lock(inode);
e49829fe
JR
13853+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
13854+ if (!err)
13855+ break;
febd17d6 13856+ inode_unlock(inode);
e49829fe
JR
13857+ si_read_lock(sb, AuLock_NOPLMW);
13858+ si_read_unlock(sb);
13859+ }
13860+}
13861+
4a4d8108
AM
13862+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
13863+ size_t count, loff_t *ppos)
dece6358 13864+{
4a4d8108 13865+ ssize_t err;
b912730e
AM
13866+ struct au_write_pre wpre;
13867+ struct inode *inode;
4a4d8108
AM
13868+ struct file *h_file;
13869+ char __user *buf = (char __user *)ubuf;
1facf9fc 13870+
b912730e 13871+ inode = file_inode(file);
e49829fe 13872+ au_mtx_and_read_lock(inode);
1facf9fc 13873+
b912730e
AM
13874+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13875+ err = PTR_ERR(h_file);
13876+ if (IS_ERR(h_file))
9dbd164d 13877+ goto out;
9dbd164d 13878+
4a4d8108 13879+ err = vfsub_write_u(h_file, buf, count, ppos);
b912730e 13880+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13881+
4f0767ce 13882+out:
b912730e 13883+ si_read_unlock(inode->i_sb);
febd17d6 13884+ inode_unlock(inode);
dece6358
AM
13885+ return err;
13886+}
1facf9fc 13887+
076b876e
AM
13888+static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
13889+ struct iov_iter *iov_iter)
dece6358 13890+{
4a4d8108
AM
13891+ ssize_t err;
13892+ struct file *file;
076b876e 13893+ ssize_t (*iter)(struct kiocb *, struct iov_iter *);
1facf9fc 13894+
4a4d8108
AM
13895+ err = security_file_permission(h_file, rw);
13896+ if (unlikely(err))
13897+ goto out;
1facf9fc 13898+
4a4d8108 13899+ err = -ENOSYS;
076b876e 13900+ iter = NULL;
5527c038 13901+ if (rw == MAY_READ)
076b876e 13902+ iter = h_file->f_op->read_iter;
5527c038 13903+ else if (rw == MAY_WRITE)
076b876e 13904+ iter = h_file->f_op->write_iter;
076b876e
AM
13905+
13906+ file = kio->ki_filp;
13907+ kio->ki_filp = h_file;
13908+ if (iter) {
2cbb1c4b 13909+ lockdep_off();
076b876e
AM
13910+ err = iter(kio, iov_iter);
13911+ lockdep_on();
4a4d8108
AM
13912+ } else
13913+ /* currently there is no such fs */
13914+ WARN_ON_ONCE(1);
076b876e 13915+ kio->ki_filp = file;
1facf9fc 13916+
4f0767ce 13917+out:
dece6358
AM
13918+ return err;
13919+}
1facf9fc 13920+
076b876e 13921+static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1facf9fc 13922+{
4a4d8108
AM
13923+ ssize_t err;
13924+ struct file *file, *h_file;
b912730e 13925+ struct inode *inode;
dece6358 13926+ struct super_block *sb;
1facf9fc 13927+
4a4d8108 13928+ file = kio->ki_filp;
b912730e
AM
13929+ inode = file_inode(file);
13930+ sb = inode->i_sb;
e49829fe 13931+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 13932+
5afbbe0d 13933+ h_file = au_read_pre(file, /*keep_fi*/1);
b912730e
AM
13934+ err = PTR_ERR(h_file);
13935+ if (IS_ERR(h_file))
13936+ goto out;
9dbd164d 13937+
5afbbe0d
AM
13938+ if (au_test_loopback_kthread()) {
13939+ au_warn_loopback(h_file->f_path.dentry->d_sb);
13940+ if (file->f_mapping != h_file->f_mapping) {
13941+ file->f_mapping = h_file->f_mapping;
13942+ smp_mb(); /* unnecessary? */
13943+ }
13944+ }
13945+ fi_read_unlock(file);
13946+
076b876e 13947+ err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
4a4d8108
AM
13948+ /* todo: necessary? */
13949+ /* file->f_ra = h_file->f_ra; */
b912730e 13950+ au_read_post(inode, h_file);
1facf9fc 13951+
4f0767ce 13952+out:
4a4d8108 13953+ si_read_unlock(sb);
1308ab2a 13954+ return err;
13955+}
1facf9fc 13956+
076b876e 13957+static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1308ab2a 13958+{
4a4d8108 13959+ ssize_t err;
b912730e
AM
13960+ struct au_write_pre wpre;
13961+ struct inode *inode;
4a4d8108 13962+ struct file *file, *h_file;
1308ab2a 13963+
4a4d8108 13964+ file = kio->ki_filp;
b912730e 13965+ inode = file_inode(file);
e49829fe
JR
13966+ au_mtx_and_read_lock(inode);
13967+
b912730e
AM
13968+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13969+ err = PTR_ERR(h_file);
13970+ if (IS_ERR(h_file))
9dbd164d 13971+ goto out;
9dbd164d 13972+
076b876e 13973+ err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
b912730e 13974+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13975+
4f0767ce 13976+out:
b912730e 13977+ si_read_unlock(inode->i_sb);
febd17d6 13978+ inode_unlock(inode);
dece6358 13979+ return err;
1facf9fc 13980+}
13981+
4a4d8108
AM
13982+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
13983+ struct pipe_inode_info *pipe, size_t len,
13984+ unsigned int flags)
1facf9fc 13985+{
4a4d8108
AM
13986+ ssize_t err;
13987+ struct file *h_file;
b912730e 13988+ struct inode *inode;
dece6358 13989+ struct super_block *sb;
1facf9fc 13990+
b912730e
AM
13991+ inode = file_inode(file);
13992+ sb = inode->i_sb;
e49829fe 13993+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e 13994+
5afbbe0d 13995+ h_file = au_read_pre(file, /*keep_fi*/0);
b912730e
AM
13996+ err = PTR_ERR(h_file);
13997+ if (IS_ERR(h_file))
dece6358 13998+ goto out;
1facf9fc 13999+
4a4d8108
AM
14000+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
14001+ /* todo: necessasry? */
14002+ /* file->f_ra = h_file->f_ra; */
b912730e 14003+ au_read_post(inode, h_file);
1facf9fc 14004+
4f0767ce 14005+out:
4a4d8108 14006+ si_read_unlock(sb);
dece6358 14007+ return err;
1facf9fc 14008+}
14009+
4a4d8108
AM
14010+static ssize_t
14011+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
14012+ size_t len, unsigned int flags)
1facf9fc 14013+{
4a4d8108 14014+ ssize_t err;
b912730e
AM
14015+ struct au_write_pre wpre;
14016+ struct inode *inode;
076b876e 14017+ struct file *h_file;
1facf9fc 14018+
b912730e 14019+ inode = file_inode(file);
e49829fe 14020+ au_mtx_and_read_lock(inode);
9dbd164d 14021+
b912730e
AM
14022+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14023+ err = PTR_ERR(h_file);
14024+ if (IS_ERR(h_file))
9dbd164d 14025+ goto out;
9dbd164d 14026+
4a4d8108 14027+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
b912730e 14028+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 14029+
4f0767ce 14030+out:
b912730e 14031+ si_read_unlock(inode->i_sb);
febd17d6 14032+ inode_unlock(inode);
4a4d8108
AM
14033+ return err;
14034+}
1facf9fc 14035+
38d290e6
JR
14036+static long aufs_fallocate(struct file *file, int mode, loff_t offset,
14037+ loff_t len)
14038+{
14039+ long err;
b912730e 14040+ struct au_write_pre wpre;
38d290e6
JR
14041+ struct inode *inode;
14042+ struct file *h_file;
14043+
b912730e 14044+ inode = file_inode(file);
38d290e6
JR
14045+ au_mtx_and_read_lock(inode);
14046+
b912730e
AM
14047+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14048+ err = PTR_ERR(h_file);
14049+ if (IS_ERR(h_file))
38d290e6 14050+ goto out;
38d290e6
JR
14051+
14052+ lockdep_off();
03673fb0 14053+ err = vfs_fallocate(h_file, mode, offset, len);
38d290e6 14054+ lockdep_on();
b912730e 14055+ au_write_post(inode, h_file, &wpre, /*written*/1);
38d290e6
JR
14056+
14057+out:
b912730e 14058+ si_read_unlock(inode->i_sb);
febd17d6 14059+ inode_unlock(inode);
38d290e6
JR
14060+ return err;
14061+}
14062+
4a4d8108
AM
14063+/* ---------------------------------------------------------------------- */
14064+
9dbd164d
AM
14065+/*
14066+ * The locking order around current->mmap_sem.
14067+ * - in most and regular cases
14068+ * file I/O syscall -- aufs_read() or something
14069+ * -- si_rwsem for read -- mmap_sem
14070+ * (Note that [fdi]i_rwsem are released before mmap_sem).
14071+ * - in mmap case
14072+ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
14073+ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
14074+ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
14075+ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
14076+ * It means that when aufs acquires si_rwsem for write, the process should never
14077+ * acquire mmap_sem.
14078+ *
392086de 14079+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
9dbd164d
AM
14080+ * problem either since any directory is not able to be mmap-ed.
14081+ * The similar scenario is applied to aufs_readlink() too.
14082+ */
14083+
38d290e6 14084+#if 0 /* stop calling security_file_mmap() */
2dfbb274
AM
14085+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
14086+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
14087+
14088+static unsigned long au_arch_prot_conv(unsigned long flags)
14089+{
14090+ /* currently ppc64 only */
14091+#ifdef CONFIG_PPC64
14092+ /* cf. linux/arch/powerpc/include/asm/mman.h */
14093+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
14094+ return AuConv_VM_PROT(flags, SAO);
14095+#else
14096+ AuDebugOn(arch_calc_vm_prot_bits(-1));
14097+ return 0;
14098+#endif
14099+}
14100+
14101+static unsigned long au_prot_conv(unsigned long flags)
14102+{
14103+ return AuConv_VM_PROT(flags, READ)
14104+ | AuConv_VM_PROT(flags, WRITE)
14105+ | AuConv_VM_PROT(flags, EXEC)
14106+ | au_arch_prot_conv(flags);
14107+}
14108+
14109+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
14110+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
14111+
14112+static unsigned long au_flag_conv(unsigned long flags)
14113+{
14114+ return AuConv_VM_MAP(flags, GROWSDOWN)
14115+ | AuConv_VM_MAP(flags, DENYWRITE)
2dfbb274
AM
14116+ | AuConv_VM_MAP(flags, LOCKED);
14117+}
38d290e6 14118+#endif
2dfbb274 14119+
9dbd164d 14120+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
dece6358 14121+{
4a4d8108 14122+ int err;
4a4d8108 14123+ const unsigned char wlock
9dbd164d 14124+ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
4a4d8108 14125+ struct super_block *sb;
9dbd164d 14126+ struct file *h_file;
b912730e 14127+ struct inode *inode;
9dbd164d
AM
14128+
14129+ AuDbgVmRegion(file, vma);
1308ab2a 14130+
b912730e
AM
14131+ inode = file_inode(file);
14132+ sb = inode->i_sb;
9dbd164d 14133+ lockdep_off();
e49829fe 14134+ si_read_lock(sb, AuLock_NOPLMW);
4a4d8108 14135+
b912730e 14136+ h_file = au_write_pre(file, wlock, /*wpre*/NULL);
9dbd164d 14137+ lockdep_on();
b912730e
AM
14138+ err = PTR_ERR(h_file);
14139+ if (IS_ERR(h_file))
14140+ goto out;
1308ab2a 14141+
b912730e
AM
14142+ err = 0;
14143+ au_set_mmapped(file);
9dbd164d 14144+ au_vm_file_reset(vma, h_file);
38d290e6
JR
14145+ /*
14146+ * we cannot call security_mmap_file() here since it may acquire
14147+ * mmap_sem or i_mutex.
14148+ *
14149+ * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
14150+ * au_flag_conv(vma->vm_flags));
14151+ */
9dbd164d
AM
14152+ if (!err)
14153+ err = h_file->f_op->mmap(h_file, vma);
b912730e
AM
14154+ if (!err) {
14155+ au_vm_prfile_set(vma, file);
14156+ fsstack_copy_attr_atime(inode, file_inode(h_file));
14157+ goto out_fput; /* success */
14158+ }
2cbb1c4b
JR
14159+ au_unset_mmapped(file);
14160+ au_vm_file_reset(vma, file);
b912730e 14161+
2cbb1c4b 14162+out_fput:
9dbd164d 14163+ lockdep_off();
b912730e
AM
14164+ ii_write_unlock(inode);
14165+ lockdep_on();
14166+ fput(h_file);
4f0767ce 14167+out:
b912730e 14168+ lockdep_off();
9dbd164d
AM
14169+ si_read_unlock(sb);
14170+ lockdep_on();
14171+ AuTraceErr(err);
4a4d8108
AM
14172+ return err;
14173+}
14174+
14175+/* ---------------------------------------------------------------------- */
14176+
1e00d052
AM
14177+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
14178+ int datasync)
4a4d8108
AM
14179+{
14180+ int err;
b912730e 14181+ struct au_write_pre wpre;
4a4d8108
AM
14182+ struct inode *inode;
14183+ struct file *h_file;
4a4d8108
AM
14184+
14185+ err = 0; /* -EBADF; */ /* posix? */
14186+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
b912730e 14187+ goto out;
4a4d8108 14188+
b912730e
AM
14189+ inode = file_inode(file);
14190+ au_mtx_and_read_lock(inode);
14191+
14192+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14193+ err = PTR_ERR(h_file);
14194+ if (IS_ERR(h_file))
4a4d8108 14195+ goto out_unlock;
4a4d8108 14196+
53392da6 14197+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
b912730e 14198+ au_write_post(inode, h_file, &wpre, /*written*/0);
4a4d8108 14199+
4f0767ce 14200+out_unlock:
b912730e 14201+ si_read_unlock(inode->i_sb);
febd17d6 14202+ inode_unlock(inode);
b912730e 14203+out:
4a4d8108 14204+ return err;
dece6358
AM
14205+}
14206+
4a4d8108 14207+static int aufs_fasync(int fd, struct file *file, int flag)
dece6358 14208+{
4a4d8108
AM
14209+ int err;
14210+ struct file *h_file;
4a4d8108 14211+ struct super_block *sb;
1308ab2a 14212+
b912730e 14213+ sb = file->f_path.dentry->d_sb;
e49829fe 14214+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
14215+
14216+ h_file = au_read_pre(file, /*keep_fi*/0);
14217+ err = PTR_ERR(h_file);
14218+ if (IS_ERR(h_file))
4a4d8108
AM
14219+ goto out;
14220+
523b37e3 14221+ if (h_file->f_op->fasync)
4a4d8108 14222+ err = h_file->f_op->fasync(fd, h_file, flag);
b912730e 14223+ fput(h_file); /* instead of au_read_post() */
1308ab2a 14224+
4f0767ce 14225+out:
4a4d8108 14226+ si_read_unlock(sb);
1308ab2a 14227+ return err;
dece6358 14228+}
4a4d8108 14229+
febd17d6
JR
14230+static int aufs_setfl(struct file *file, unsigned long arg)
14231+{
14232+ int err;
14233+ struct file *h_file;
14234+ struct super_block *sb;
14235+
14236+ sb = file->f_path.dentry->d_sb;
14237+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
14238+
14239+ h_file = au_read_pre(file, /*keep_fi*/0);
14240+ err = PTR_ERR(h_file);
14241+ if (IS_ERR(h_file))
14242+ goto out;
14243+
14244+ arg |= vfsub_file_flags(file) & FASYNC; /* stop calling h_file->fasync */
14245+ err = setfl(/*unused fd*/-1, h_file, arg);
14246+ fput(h_file); /* instead of au_read_post() */
14247+
14248+out:
14249+ si_read_unlock(sb);
14250+ return err;
14251+}
14252+
4a4d8108
AM
14253+/* ---------------------------------------------------------------------- */
14254+
14255+/* no one supports this operation, currently */
14256+#if 0
14257+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
2000de60 14258+ size_t len, loff_t *pos, int more)
4a4d8108
AM
14259+{
14260+}
14261+#endif
14262+
14263+/* ---------------------------------------------------------------------- */
14264+
14265+const struct file_operations aufs_file_fop = {
14266+ .owner = THIS_MODULE,
2cbb1c4b 14267+
027c5e7a 14268+ .llseek = default_llseek,
4a4d8108
AM
14269+
14270+ .read = aufs_read,
14271+ .write = aufs_write,
076b876e
AM
14272+ .read_iter = aufs_read_iter,
14273+ .write_iter = aufs_write_iter,
14274+
4a4d8108
AM
14275+#ifdef CONFIG_AUFS_POLL
14276+ .poll = aufs_poll,
14277+#endif
14278+ .unlocked_ioctl = aufs_ioctl_nondir,
b752ccd1 14279+#ifdef CONFIG_COMPAT
c2b27bf2 14280+ .compat_ioctl = aufs_compat_ioctl_nondir,
b752ccd1 14281+#endif
4a4d8108
AM
14282+ .mmap = aufs_mmap,
14283+ .open = aufs_open_nondir,
14284+ .flush = aufs_flush_nondir,
14285+ .release = aufs_release_nondir,
14286+ .fsync = aufs_fsync_nondir,
4a4d8108
AM
14287+ .fasync = aufs_fasync,
14288+ /* .sendpage = aufs_sendpage, */
febd17d6 14289+ .setfl = aufs_setfl,
4a4d8108
AM
14290+ .splice_write = aufs_splice_write,
14291+ .splice_read = aufs_splice_read,
14292+#if 0
14293+ .aio_splice_write = aufs_aio_splice_write,
38d290e6 14294+ .aio_splice_read = aufs_aio_splice_read,
4a4d8108 14295+#endif
38d290e6 14296+ .fallocate = aufs_fallocate
4a4d8108 14297+};
7f207e10
AM
14298diff -urN /usr/share/empty/fs/aufs/fstype.h linux/fs/aufs/fstype.h
14299--- /usr/share/empty/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 14300+++ linux/fs/aufs/fstype.h 2016-10-09 16:55:36.492701639 +0200
b912730e 14301@@ -0,0 +1,400 @@
4a4d8108 14302+/*
8cdd5066 14303+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
14304+ *
14305+ * This program, aufs is free software; you can redistribute it and/or modify
14306+ * it under the terms of the GNU General Public License as published by
14307+ * the Free Software Foundation; either version 2 of the License, or
14308+ * (at your option) any later version.
14309+ *
14310+ * This program is distributed in the hope that it will be useful,
14311+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14312+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14313+ * GNU General Public License for more details.
14314+ *
14315+ * You should have received a copy of the GNU General Public License
523b37e3 14316+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
14317+ */
14318+
14319+/*
14320+ * judging filesystem type
14321+ */
14322+
14323+#ifndef __AUFS_FSTYPE_H__
14324+#define __AUFS_FSTYPE_H__
14325+
14326+#ifdef __KERNEL__
14327+
14328+#include <linux/fs.h>
14329+#include <linux/magic.h>
b912730e 14330+#include <linux/nfs_fs.h>
b95c5147 14331+#include <linux/romfs_fs.h>
4a4d8108
AM
14332+
14333+static inline int au_test_aufs(struct super_block *sb)
14334+{
14335+ return sb->s_magic == AUFS_SUPER_MAGIC;
14336+}
14337+
14338+static inline const char *au_sbtype(struct super_block *sb)
14339+{
14340+ return sb->s_type->name;
14341+}
1308ab2a 14342+
14343+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
14344+{
f0c0a007 14345+#if IS_ENABLED(CONFIG_ISO9660_FS)
2000de60 14346+ return sb->s_magic == ISOFS_SUPER_MAGIC;
dece6358
AM
14347+#else
14348+ return 0;
14349+#endif
14350+}
14351+
1308ab2a 14352+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
dece6358 14353+{
f0c0a007 14354+#if IS_ENABLED(CONFIG_ROMFS_FS)
2000de60 14355+ return sb->s_magic == ROMFS_MAGIC;
dece6358
AM
14356+#else
14357+ return 0;
14358+#endif
14359+}
14360+
1308ab2a 14361+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
dece6358 14362+{
f0c0a007 14363+#if IS_ENABLED(CONFIG_CRAMFS)
1308ab2a 14364+ return sb->s_magic == CRAMFS_MAGIC;
14365+#endif
14366+ return 0;
14367+}
14368+
14369+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
14370+{
f0c0a007 14371+#if IS_ENABLED(CONFIG_NFS_FS)
1308ab2a 14372+ return sb->s_magic == NFS_SUPER_MAGIC;
dece6358
AM
14373+#else
14374+ return 0;
14375+#endif
14376+}
14377+
1308ab2a 14378+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
dece6358 14379+{
f0c0a007 14380+#if IS_ENABLED(CONFIG_FUSE_FS)
1308ab2a 14381+ return sb->s_magic == FUSE_SUPER_MAGIC;
dece6358
AM
14382+#else
14383+ return 0;
14384+#endif
14385+}
14386+
1308ab2a 14387+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
dece6358 14388+{
f0c0a007 14389+#if IS_ENABLED(CONFIG_XFS_FS)
1308ab2a 14390+ return sb->s_magic == XFS_SB_MAGIC;
dece6358
AM
14391+#else
14392+ return 0;
14393+#endif
14394+}
14395+
1308ab2a 14396+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
dece6358 14397+{
1308ab2a 14398+#ifdef CONFIG_TMPFS
14399+ return sb->s_magic == TMPFS_MAGIC;
14400+#else
14401+ return 0;
dece6358 14402+#endif
dece6358
AM
14403+}
14404+
1308ab2a 14405+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
1facf9fc 14406+{
f0c0a007 14407+#if IS_ENABLED(CONFIG_ECRYPT_FS)
1308ab2a 14408+ return !strcmp(au_sbtype(sb), "ecryptfs");
14409+#else
14410+ return 0;
14411+#endif
1facf9fc 14412+}
14413+
1308ab2a 14414+static inline int au_test_ramfs(struct super_block *sb)
14415+{
14416+ return sb->s_magic == RAMFS_MAGIC;
14417+}
14418+
14419+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
14420+{
f0c0a007 14421+#if IS_ENABLED(CONFIG_UBIFS_FS)
1308ab2a 14422+ return sb->s_magic == UBIFS_SUPER_MAGIC;
14423+#else
14424+ return 0;
14425+#endif
14426+}
14427+
14428+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
14429+{
14430+#ifdef CONFIG_PROC_FS
14431+ return sb->s_magic == PROC_SUPER_MAGIC;
14432+#else
14433+ return 0;
14434+#endif
14435+}
14436+
14437+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
14438+{
14439+#ifdef CONFIG_SYSFS
14440+ return sb->s_magic == SYSFS_MAGIC;
14441+#else
14442+ return 0;
14443+#endif
14444+}
14445+
14446+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
14447+{
f0c0a007 14448+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
1308ab2a 14449+ return sb->s_magic == CONFIGFS_MAGIC;
14450+#else
14451+ return 0;
14452+#endif
14453+}
14454+
14455+static inline int au_test_minix(struct super_block *sb __maybe_unused)
14456+{
f0c0a007 14457+#if IS_ENABLED(CONFIG_MINIX_FS)
1308ab2a 14458+ return sb->s_magic == MINIX3_SUPER_MAGIC
14459+ || sb->s_magic == MINIX2_SUPER_MAGIC
14460+ || sb->s_magic == MINIX2_SUPER_MAGIC2
14461+ || sb->s_magic == MINIX_SUPER_MAGIC
14462+ || sb->s_magic == MINIX_SUPER_MAGIC2;
14463+#else
14464+ return 0;
14465+#endif
14466+}
14467+
1308ab2a 14468+static inline int au_test_fat(struct super_block *sb __maybe_unused)
14469+{
f0c0a007 14470+#if IS_ENABLED(CONFIG_FAT_FS)
1308ab2a 14471+ return sb->s_magic == MSDOS_SUPER_MAGIC;
14472+#else
14473+ return 0;
14474+#endif
14475+}
14476+
14477+static inline int au_test_msdos(struct super_block *sb)
14478+{
14479+ return au_test_fat(sb);
14480+}
14481+
14482+static inline int au_test_vfat(struct super_block *sb)
14483+{
14484+ return au_test_fat(sb);
14485+}
14486+
14487+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
14488+{
14489+#ifdef CONFIG_SECURITYFS
14490+ return sb->s_magic == SECURITYFS_MAGIC;
14491+#else
14492+ return 0;
14493+#endif
14494+}
14495+
14496+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
14497+{
f0c0a007 14498+#if IS_ENABLED(CONFIG_SQUASHFS)
1308ab2a 14499+ return sb->s_magic == SQUASHFS_MAGIC;
14500+#else
14501+ return 0;
14502+#endif
14503+}
14504+
14505+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
14506+{
f0c0a007 14507+#if IS_ENABLED(CONFIG_BTRFS_FS)
1308ab2a 14508+ return sb->s_magic == BTRFS_SUPER_MAGIC;
14509+#else
14510+ return 0;
14511+#endif
14512+}
14513+
14514+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
14515+{
f0c0a007 14516+#if IS_ENABLED(CONFIG_XENFS)
1308ab2a 14517+ return sb->s_magic == XENFS_SUPER_MAGIC;
14518+#else
14519+ return 0;
14520+#endif
14521+}
14522+
14523+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
14524+{
14525+#ifdef CONFIG_DEBUG_FS
14526+ return sb->s_magic == DEBUGFS_MAGIC;
14527+#else
14528+ return 0;
14529+#endif
14530+}
14531+
14532+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
14533+{
f0c0a007 14534+#if IS_ENABLED(CONFIG_NILFS)
1308ab2a 14535+ return sb->s_magic == NILFS_SUPER_MAGIC;
14536+#else
14537+ return 0;
14538+#endif
14539+}
14540+
4a4d8108
AM
14541+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
14542+{
f0c0a007 14543+#if IS_ENABLED(CONFIG_HFSPLUS_FS)
4a4d8108
AM
14544+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
14545+#else
14546+ return 0;
14547+#endif
14548+}
14549+
1308ab2a 14550+/* ---------------------------------------------------------------------- */
14551+/*
14552+ * they can't be an aufs branch.
14553+ */
14554+static inline int au_test_fs_unsuppoted(struct super_block *sb)
14555+{
14556+ return
14557+#ifndef CONFIG_AUFS_BR_RAMFS
14558+ au_test_ramfs(sb) ||
14559+#endif
14560+ au_test_procfs(sb)
14561+ || au_test_sysfs(sb)
14562+ || au_test_configfs(sb)
14563+ || au_test_debugfs(sb)
14564+ || au_test_securityfs(sb)
14565+ || au_test_xenfs(sb)
14566+ || au_test_ecryptfs(sb)
14567+ /* || !strcmp(au_sbtype(sb), "unionfs") */
14568+ || au_test_aufs(sb); /* will be supported in next version */
14569+}
14570+
1308ab2a 14571+static inline int au_test_fs_remote(struct super_block *sb)
14572+{
14573+ return !au_test_tmpfs(sb)
14574+#ifdef CONFIG_AUFS_BR_RAMFS
14575+ && !au_test_ramfs(sb)
14576+#endif
14577+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
14578+}
14579+
14580+/* ---------------------------------------------------------------------- */
14581+
14582+/*
14583+ * Note: these functions (below) are created after reading ->getattr() in all
14584+ * filesystems under linux/fs. it means we have to do so in every update...
14585+ */
14586+
14587+/*
14588+ * some filesystems require getattr to refresh the inode attributes before
14589+ * referencing.
14590+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
14591+ * and leave the work for d_revalidate()
14592+ */
14593+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
14594+{
14595+ return au_test_nfs(sb)
14596+ || au_test_fuse(sb)
1308ab2a 14597+ /* || au_test_btrfs(sb) */ /* untested */
1308ab2a 14598+ ;
14599+}
14600+
14601+/*
14602+ * filesystems which don't maintain i_size or i_blocks.
14603+ */
14604+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
14605+{
14606+ return au_test_xfs(sb)
4a4d8108
AM
14607+ || au_test_btrfs(sb)
14608+ || au_test_ubifs(sb)
14609+ || au_test_hfsplus(sb) /* maintained, but incorrect */
1308ab2a 14610+ /* || au_test_minix(sb) */ /* untested */
14611+ ;
14612+}
14613+
14614+/*
14615+ * filesystems which don't store the correct value in some of their inode
14616+ * attributes.
14617+ */
14618+static inline int au_test_fs_bad_iattr(struct super_block *sb)
14619+{
14620+ return au_test_fs_bad_iattr_size(sb)
1308ab2a 14621+ || au_test_fat(sb)
14622+ || au_test_msdos(sb)
14623+ || au_test_vfat(sb);
1facf9fc 14624+}
14625+
14626+/* they don't check i_nlink in link(2) */
14627+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
14628+{
14629+ return au_test_tmpfs(sb)
14630+#ifdef CONFIG_AUFS_BR_RAMFS
14631+ || au_test_ramfs(sb)
14632+#endif
4a4d8108 14633+ || au_test_ubifs(sb)
4a4d8108 14634+ || au_test_hfsplus(sb);
1facf9fc 14635+}
14636+
14637+/*
14638+ * filesystems which sets S_NOATIME and S_NOCMTIME.
14639+ */
14640+static inline int au_test_fs_notime(struct super_block *sb)
14641+{
14642+ return au_test_nfs(sb)
14643+ || au_test_fuse(sb)
dece6358 14644+ || au_test_ubifs(sb)
1facf9fc 14645+ ;
14646+}
14647+
1facf9fc 14648+/* temporary support for i#1 in cramfs */
14649+static inline int au_test_fs_unique_ino(struct inode *inode)
14650+{
14651+ if (au_test_cramfs(inode->i_sb))
14652+ return inode->i_ino != 1;
14653+ return 1;
14654+}
14655+
14656+/* ---------------------------------------------------------------------- */
14657+
14658+/*
14659+ * the filesystem where the xino files placed must support i/o after unlink and
14660+ * maintain i_size and i_blocks.
14661+ */
14662+static inline int au_test_fs_bad_xino(struct super_block *sb)
14663+{
14664+ return au_test_fs_remote(sb)
14665+ || au_test_fs_bad_iattr_size(sb)
1facf9fc 14666+ /* don't want unnecessary work for xino */
14667+ || au_test_aufs(sb)
1308ab2a 14668+ || au_test_ecryptfs(sb)
14669+ || au_test_nilfs(sb);
1facf9fc 14670+}
14671+
14672+static inline int au_test_fs_trunc_xino(struct super_block *sb)
14673+{
14674+ return au_test_tmpfs(sb)
14675+ || au_test_ramfs(sb);
14676+}
14677+
14678+/*
14679+ * test if the @sb is real-readonly.
14680+ */
14681+static inline int au_test_fs_rr(struct super_block *sb)
14682+{
14683+ return au_test_squashfs(sb)
14684+ || au_test_iso9660(sb)
14685+ || au_test_cramfs(sb)
14686+ || au_test_romfs(sb);
14687+}
14688+
b912730e
AM
14689+/*
14690+ * test if the @inode is nfs with 'noacl' option
14691+ * NFS always sets MS_POSIXACL regardless its mount option 'noacl.'
14692+ */
14693+static inline int au_test_nfs_noacl(struct inode *inode)
14694+{
14695+ return au_test_nfs(inode->i_sb)
14696+ /* && IS_POSIXACL(inode) */
14697+ && !nfs_server_capable(inode, NFS_CAP_ACLS);
14698+}
14699+
1facf9fc 14700+#endif /* __KERNEL__ */
14701+#endif /* __AUFS_FSTYPE_H__ */
7f207e10
AM
14702diff -urN /usr/share/empty/fs/aufs/hfsnotify.c linux/fs/aufs/hfsnotify.c
14703--- /usr/share/empty/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 14704+++ linux/fs/aufs/hfsnotify.c 2016-10-09 16:55:36.492701639 +0200
5afbbe0d 14705@@ -0,0 +1,287 @@
1facf9fc 14706+/*
8cdd5066 14707+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 14708+ *
14709+ * This program, aufs is free software; you can redistribute it and/or modify
14710+ * it under the terms of the GNU General Public License as published by
14711+ * the Free Software Foundation; either version 2 of the License, or
14712+ * (at your option) any later version.
dece6358
AM
14713+ *
14714+ * This program is distributed in the hope that it will be useful,
14715+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14716+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14717+ * GNU General Public License for more details.
14718+ *
14719+ * You should have received a copy of the GNU General Public License
523b37e3 14720+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 14721+ */
14722+
14723+/*
4a4d8108 14724+ * fsnotify for the lower directories
1facf9fc 14725+ */
14726+
14727+#include "aufs.h"
14728+
4a4d8108
AM
14729+/* FS_IN_IGNORED is unnecessary */
14730+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
14731+ | FS_CREATE | FS_EVENT_ON_CHILD);
7f207e10 14732+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
7eafdf33 14733+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
1facf9fc 14734+
0c5527e5 14735+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
1facf9fc 14736+{
0c5527e5
AM
14737+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
14738+ hn_mark);
5afbbe0d 14739+ /* AuDbg("here\n"); */
f0c0a007 14740+ au_cache_dfree_hnotify(hn);
076b876e 14741+ smp_mb__before_atomic();
1716fcea
AM
14742+ if (atomic64_dec_and_test(&au_hfsn_ifree))
14743+ wake_up(&au_hfsn_wq);
4a4d8108 14744+}
1facf9fc 14745+
027c5e7a 14746+static int au_hfsn_alloc(struct au_hinode *hinode)
4a4d8108 14747+{
1716fcea 14748+ int err;
027c5e7a
AM
14749+ struct au_hnotify *hn;
14750+ struct super_block *sb;
14751+ struct au_branch *br;
0c5527e5 14752+ struct fsnotify_mark *mark;
027c5e7a 14753+ aufs_bindex_t bindex;
1facf9fc 14754+
027c5e7a
AM
14755+ hn = hinode->hi_notify;
14756+ sb = hn->hn_aufs_inode->i_sb;
14757+ bindex = au_br_index(sb, hinode->hi_id);
14758+ br = au_sbr(sb, bindex);
1716fcea
AM
14759+ AuDebugOn(!br->br_hfsn);
14760+
0c5527e5
AM
14761+ mark = &hn->hn_mark;
14762+ fsnotify_init_mark(mark, au_hfsn_free_mark);
14763+ mark->mask = AuHfsnMask;
7f207e10
AM
14764+ /*
14765+ * by udba rename or rmdir, aufs assign a new inode to the known
14766+ * h_inode, so specify 1 to allow dups.
14767+ */
c1595e42 14768+ lockdep_off();
1716fcea 14769+ err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode,
027c5e7a 14770+ /*mnt*/NULL, /*allow_dups*/1);
c1595e42 14771+ lockdep_on();
1716fcea
AM
14772+
14773+ return err;
1facf9fc 14774+}
14775+
7eafdf33 14776+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
1facf9fc 14777+{
0c5527e5 14778+ struct fsnotify_mark *mark;
7eafdf33 14779+ unsigned long long ull;
1716fcea 14780+ struct fsnotify_group *group;
7eafdf33
AM
14781+
14782+ ull = atomic64_inc_return(&au_hfsn_ifree);
14783+ BUG_ON(!ull);
953406b4 14784+
0c5527e5 14785+ mark = &hn->hn_mark;
1716fcea
AM
14786+ spin_lock(&mark->lock);
14787+ group = mark->group;
14788+ fsnotify_get_group(group);
14789+ spin_unlock(&mark->lock);
c1595e42 14790+ lockdep_off();
1716fcea 14791+ fsnotify_destroy_mark(mark, group);
5afbbe0d 14792+ fsnotify_put_mark(mark);
1716fcea 14793+ fsnotify_put_group(group);
c1595e42 14794+ lockdep_on();
7f207e10 14795+
7eafdf33
AM
14796+ /* free hn by myself */
14797+ return 0;
1facf9fc 14798+}
14799+
14800+/* ---------------------------------------------------------------------- */
14801+
4a4d8108 14802+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
1facf9fc 14803+{
0c5527e5 14804+ struct fsnotify_mark *mark;
1facf9fc 14805+
0c5527e5
AM
14806+ mark = &hinode->hi_notify->hn_mark;
14807+ spin_lock(&mark->lock);
1facf9fc 14808+ if (do_set) {
0c5527e5
AM
14809+ AuDebugOn(mark->mask & AuHfsnMask);
14810+ mark->mask |= AuHfsnMask;
1facf9fc 14811+ } else {
0c5527e5
AM
14812+ AuDebugOn(!(mark->mask & AuHfsnMask));
14813+ mark->mask &= ~AuHfsnMask;
1facf9fc 14814+ }
0c5527e5 14815+ spin_unlock(&mark->lock);
4a4d8108 14816+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
1facf9fc 14817+}
14818+
4a4d8108 14819+/* ---------------------------------------------------------------------- */
1facf9fc 14820+
4a4d8108
AM
14821+/* #define AuDbgHnotify */
14822+#ifdef AuDbgHnotify
14823+static char *au_hfsn_name(u32 mask)
14824+{
14825+#ifdef CONFIG_AUFS_DEBUG
c06a8ce3
AM
14826+#define test_ret(flag) \
14827+ do { \
14828+ if (mask & flag) \
14829+ return #flag; \
14830+ } while (0)
4a4d8108
AM
14831+ test_ret(FS_ACCESS);
14832+ test_ret(FS_MODIFY);
14833+ test_ret(FS_ATTRIB);
14834+ test_ret(FS_CLOSE_WRITE);
14835+ test_ret(FS_CLOSE_NOWRITE);
14836+ test_ret(FS_OPEN);
14837+ test_ret(FS_MOVED_FROM);
14838+ test_ret(FS_MOVED_TO);
14839+ test_ret(FS_CREATE);
14840+ test_ret(FS_DELETE);
14841+ test_ret(FS_DELETE_SELF);
14842+ test_ret(FS_MOVE_SELF);
14843+ test_ret(FS_UNMOUNT);
14844+ test_ret(FS_Q_OVERFLOW);
14845+ test_ret(FS_IN_IGNORED);
b912730e 14846+ test_ret(FS_ISDIR);
4a4d8108
AM
14847+ test_ret(FS_IN_ONESHOT);
14848+ test_ret(FS_EVENT_ON_CHILD);
14849+ return "";
14850+#undef test_ret
14851+#else
14852+ return "??";
14853+#endif
1facf9fc 14854+}
4a4d8108 14855+#endif
1facf9fc 14856+
14857+/* ---------------------------------------------------------------------- */
14858+
1716fcea
AM
14859+static void au_hfsn_free_group(struct fsnotify_group *group)
14860+{
14861+ struct au_br_hfsnotify *hfsn = group->private;
14862+
5afbbe0d 14863+ /* AuDbg("here\n"); */
f0c0a007 14864+ au_delayed_kfree(hfsn);
1716fcea
AM
14865+}
14866+
4a4d8108 14867+static int au_hfsn_handle_event(struct fsnotify_group *group,
fb47a38f 14868+ struct inode *inode,
0c5527e5
AM
14869+ struct fsnotify_mark *inode_mark,
14870+ struct fsnotify_mark *vfsmount_mark,
fb47a38f
JR
14871+ u32 mask, void *data, int data_type,
14872+ const unsigned char *file_name, u32 cookie)
1facf9fc 14873+{
14874+ int err;
4a4d8108
AM
14875+ struct au_hnotify *hnotify;
14876+ struct inode *h_dir, *h_inode;
fb47a38f 14877+ struct qstr h_child_qstr = QSTR_INIT(file_name, strlen(file_name));
4a4d8108 14878+
fb47a38f 14879+ AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
1facf9fc 14880+
14881+ err = 0;
0c5527e5 14882+ /* if FS_UNMOUNT happens, there must be another bug */
4a4d8108 14883+ AuDebugOn(mask & FS_UNMOUNT);
0c5527e5 14884+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
1facf9fc 14885+ goto out;
1facf9fc 14886+
fb47a38f
JR
14887+ h_dir = inode;
14888+ h_inode = NULL;
4a4d8108 14889+#ifdef AuDbgHnotify
392086de 14890+ au_debug_on();
4a4d8108
AM
14891+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
14892+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
14893+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
14894+ h_dir->i_ino, mask, au_hfsn_name(mask),
14895+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
14896+ /* WARN_ON(1); */
1facf9fc 14897+ }
392086de 14898+ au_debug_off();
1facf9fc 14899+#endif
4a4d8108 14900+
0c5527e5
AM
14901+ AuDebugOn(!inode_mark);
14902+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
14903+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
1facf9fc 14904+
4a4d8108
AM
14905+out:
14906+ return err;
14907+}
1facf9fc 14908+
4a4d8108 14909+static struct fsnotify_ops au_hfsn_ops = {
1716fcea
AM
14910+ .handle_event = au_hfsn_handle_event,
14911+ .free_group_priv = au_hfsn_free_group
4a4d8108
AM
14912+};
14913+
14914+/* ---------------------------------------------------------------------- */
14915+
027c5e7a
AM
14916+static void au_hfsn_fin_br(struct au_branch *br)
14917+{
1716fcea 14918+ struct au_br_hfsnotify *hfsn;
027c5e7a 14919+
1716fcea 14920+ hfsn = br->br_hfsn;
c1595e42
JR
14921+ if (hfsn) {
14922+ lockdep_off();
1716fcea 14923+ fsnotify_put_group(hfsn->hfsn_group);
c1595e42
JR
14924+ lockdep_on();
14925+ }
027c5e7a
AM
14926+}
14927+
1716fcea 14928+static int au_hfsn_init_br(struct au_branch *br, int perm)
4a4d8108
AM
14929+{
14930+ int err;
1716fcea
AM
14931+ struct fsnotify_group *group;
14932+ struct au_br_hfsnotify *hfsn;
1facf9fc 14933+
4a4d8108 14934+ err = 0;
1716fcea
AM
14935+ br->br_hfsn = NULL;
14936+ if (!au_br_hnotifyable(perm))
027c5e7a 14937+ goto out;
027c5e7a 14938+
1716fcea
AM
14939+ err = -ENOMEM;
14940+ hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
14941+ if (unlikely(!hfsn))
027c5e7a
AM
14942+ goto out;
14943+
1716fcea
AM
14944+ err = 0;
14945+ group = fsnotify_alloc_group(&au_hfsn_ops);
14946+ if (IS_ERR(group)) {
14947+ err = PTR_ERR(group);
0c5527e5 14948+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
1716fcea 14949+ goto out_hfsn;
4a4d8108 14950+ }
1facf9fc 14951+
1716fcea
AM
14952+ group->private = hfsn;
14953+ hfsn->hfsn_group = group;
14954+ br->br_hfsn = hfsn;
14955+ goto out; /* success */
14956+
14957+out_hfsn:
f0c0a007 14958+ au_delayed_kfree(hfsn);
027c5e7a 14959+out:
1716fcea
AM
14960+ return err;
14961+}
14962+
14963+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
14964+{
14965+ int err;
14966+
14967+ err = 0;
14968+ if (!br->br_hfsn)
14969+ err = au_hfsn_init_br(br, perm);
14970+
1facf9fc 14971+ return err;
14972+}
14973+
7eafdf33
AM
14974+/* ---------------------------------------------------------------------- */
14975+
14976+static void au_hfsn_fin(void)
14977+{
14978+ AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
14979+ wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
14980+}
14981+
4a4d8108
AM
14982+const struct au_hnotify_op au_hnotify_op = {
14983+ .ctl = au_hfsn_ctl,
14984+ .alloc = au_hfsn_alloc,
14985+ .free = au_hfsn_free,
1facf9fc 14986+
7eafdf33
AM
14987+ .fin = au_hfsn_fin,
14988+
027c5e7a
AM
14989+ .reset_br = au_hfsn_reset_br,
14990+ .fin_br = au_hfsn_fin_br,
14991+ .init_br = au_hfsn_init_br
4a4d8108 14992+};
7f207e10
AM
14993diff -urN /usr/share/empty/fs/aufs/hfsplus.c linux/fs/aufs/hfsplus.c
14994--- /usr/share/empty/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 14995+++ linux/fs/aufs/hfsplus.c 2016-10-09 16:55:36.492701639 +0200
523b37e3 14996@@ -0,0 +1,56 @@
4a4d8108 14997+/*
8cdd5066 14998+ * Copyright (C) 2010-2016 Junjiro R. Okajima
4a4d8108
AM
14999+ *
15000+ * This program, aufs is free software; you can redistribute it and/or modify
15001+ * it under the terms of the GNU General Public License as published by
15002+ * the Free Software Foundation; either version 2 of the License, or
15003+ * (at your option) any later version.
15004+ *
15005+ * This program is distributed in the hope that it will be useful,
15006+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15007+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15008+ * GNU General Public License for more details.
15009+ *
15010+ * You should have received a copy of the GNU General Public License
523b37e3 15011+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 15012+ */
1facf9fc 15013+
4a4d8108
AM
15014+/*
15015+ * special support for filesystems which aqucires an inode mutex
15016+ * at final closing a file, eg, hfsplus.
15017+ *
15018+ * This trick is very simple and stupid, just to open the file before really
15019+ * neceeary open to tell hfsplus that this is not the final closing.
15020+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
15021+ * and au_h_open_post() after releasing it.
15022+ */
1facf9fc 15023+
4a4d8108 15024+#include "aufs.h"
1facf9fc 15025+
392086de
AM
15026+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
15027+ int force_wr)
4a4d8108
AM
15028+{
15029+ struct file *h_file;
15030+ struct dentry *h_dentry;
1facf9fc 15031+
4a4d8108
AM
15032+ h_dentry = au_h_dptr(dentry, bindex);
15033+ AuDebugOn(!h_dentry);
5527c038 15034+ AuDebugOn(d_is_negative(h_dentry));
4a4d8108
AM
15035+
15036+ h_file = NULL;
15037+ if (au_test_hfsplus(h_dentry->d_sb)
7e9cd9fe 15038+ && d_is_reg(h_dentry))
4a4d8108
AM
15039+ h_file = au_h_open(dentry, bindex,
15040+ O_RDONLY | O_NOATIME | O_LARGEFILE,
392086de 15041+ /*file*/NULL, force_wr);
4a4d8108 15042+ return h_file;
1facf9fc 15043+}
15044+
4a4d8108
AM
15045+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
15046+ struct file *h_file)
15047+{
15048+ if (h_file) {
15049+ fput(h_file);
15050+ au_sbr_put(dentry->d_sb, bindex);
15051+ }
15052+}
7f207e10
AM
15053diff -urN /usr/share/empty/fs/aufs/hnotify.c linux/fs/aufs/hnotify.c
15054--- /usr/share/empty/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 15055+++ linux/fs/aufs/hnotify.c 2016-10-09 16:55:36.492701639 +0200
f0c0a007 15056@@ -0,0 +1,723 @@
e49829fe 15057+/*
8cdd5066 15058+ * Copyright (C) 2005-2016 Junjiro R. Okajima
e49829fe
JR
15059+ *
15060+ * This program, aufs is free software; you can redistribute it and/or modify
15061+ * it under the terms of the GNU General Public License as published by
15062+ * the Free Software Foundation; either version 2 of the License, or
15063+ * (at your option) any later version.
15064+ *
15065+ * This program is distributed in the hope that it will be useful,
15066+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15067+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15068+ * GNU General Public License for more details.
15069+ *
15070+ * You should have received a copy of the GNU General Public License
523b37e3 15071+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
15072+ */
15073+
15074+/*
7f207e10 15075+ * abstraction to notify the direct changes on lower directories
e49829fe
JR
15076+ */
15077+
15078+#include "aufs.h"
15079+
027c5e7a 15080+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
e49829fe
JR
15081+{
15082+ int err;
7f207e10 15083+ struct au_hnotify *hn;
1facf9fc 15084+
4a4d8108
AM
15085+ err = -ENOMEM;
15086+ hn = au_cache_alloc_hnotify();
15087+ if (hn) {
15088+ hn->hn_aufs_inode = inode;
027c5e7a
AM
15089+ hinode->hi_notify = hn;
15090+ err = au_hnotify_op.alloc(hinode);
15091+ AuTraceErr(err);
15092+ if (unlikely(err)) {
15093+ hinode->hi_notify = NULL;
f0c0a007 15094+ au_cache_dfree_hnotify(hn);
4a4d8108
AM
15095+ /*
15096+ * The upper dir was removed by udba, but the same named
15097+ * dir left. In this case, aufs assignes a new inode
15098+ * number and set the monitor again.
15099+ * For the lower dir, the old monitnor is still left.
15100+ */
15101+ if (err == -EEXIST)
15102+ err = 0;
15103+ }
1308ab2a 15104+ }
1308ab2a 15105+
027c5e7a 15106+ AuTraceErr(err);
1308ab2a 15107+ return err;
dece6358 15108+}
1facf9fc 15109+
4a4d8108 15110+void au_hn_free(struct au_hinode *hinode)
dece6358 15111+{
4a4d8108 15112+ struct au_hnotify *hn;
1facf9fc 15113+
4a4d8108
AM
15114+ hn = hinode->hi_notify;
15115+ if (hn) {
4a4d8108 15116+ hinode->hi_notify = NULL;
7eafdf33 15117+ if (au_hnotify_op.free(hinode, hn))
f0c0a007 15118+ au_cache_dfree_hnotify(hn);
4a4d8108
AM
15119+ }
15120+}
dece6358 15121+
4a4d8108 15122+/* ---------------------------------------------------------------------- */
dece6358 15123+
4a4d8108
AM
15124+void au_hn_ctl(struct au_hinode *hinode, int do_set)
15125+{
15126+ if (hinode->hi_notify)
15127+ au_hnotify_op.ctl(hinode, do_set);
15128+}
15129+
15130+void au_hn_reset(struct inode *inode, unsigned int flags)
15131+{
5afbbe0d 15132+ aufs_bindex_t bindex, bbot;
4a4d8108
AM
15133+ struct inode *hi;
15134+ struct dentry *iwhdentry;
1facf9fc 15135+
5afbbe0d
AM
15136+ bbot = au_ibbot(inode);
15137+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
4a4d8108
AM
15138+ hi = au_h_iptr(inode, bindex);
15139+ if (!hi)
15140+ continue;
1308ab2a 15141+
febd17d6 15142+ /* inode_lock_nested(hi, AuLsc_I_CHILD); */
4a4d8108
AM
15143+ iwhdentry = au_hi_wh(inode, bindex);
15144+ if (iwhdentry)
15145+ dget(iwhdentry);
15146+ au_igrab(hi);
15147+ au_set_h_iptr(inode, bindex, NULL, 0);
15148+ au_set_h_iptr(inode, bindex, au_igrab(hi),
15149+ flags & ~AuHi_XINO);
15150+ iput(hi);
15151+ dput(iwhdentry);
febd17d6 15152+ /* inode_unlock(hi); */
1facf9fc 15153+ }
1facf9fc 15154+}
15155+
1308ab2a 15156+/* ---------------------------------------------------------------------- */
1facf9fc 15157+
4a4d8108 15158+static int hn_xino(struct inode *inode, struct inode *h_inode)
1facf9fc 15159+{
4a4d8108 15160+ int err;
5afbbe0d 15161+ aufs_bindex_t bindex, bbot, bfound, btop;
4a4d8108 15162+ struct inode *h_i;
1facf9fc 15163+
4a4d8108
AM
15164+ err = 0;
15165+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15166+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15167+ goto out;
15168+ }
1facf9fc 15169+
4a4d8108 15170+ bfound = -1;
5afbbe0d
AM
15171+ bbot = au_ibbot(inode);
15172+ btop = au_ibtop(inode);
4a4d8108 15173+#if 0 /* reserved for future use */
5afbbe0d 15174+ if (bindex == bbot) {
4a4d8108
AM
15175+ /* keep this ino in rename case */
15176+ goto out;
15177+ }
15178+#endif
5afbbe0d 15179+ for (bindex = btop; bindex <= bbot; bindex++)
4a4d8108
AM
15180+ if (au_h_iptr(inode, bindex) == h_inode) {
15181+ bfound = bindex;
15182+ break;
15183+ }
15184+ if (bfound < 0)
1308ab2a 15185+ goto out;
1facf9fc 15186+
5afbbe0d 15187+ for (bindex = btop; bindex <= bbot; bindex++) {
4a4d8108
AM
15188+ h_i = au_h_iptr(inode, bindex);
15189+ if (!h_i)
15190+ continue;
1facf9fc 15191+
4a4d8108
AM
15192+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
15193+ /* ignore this error */
15194+ /* bad action? */
1facf9fc 15195+ }
1facf9fc 15196+
4a4d8108 15197+ /* children inode number will be broken */
1facf9fc 15198+
4f0767ce 15199+out:
4a4d8108
AM
15200+ AuTraceErr(err);
15201+ return err;
1facf9fc 15202+}
15203+
4a4d8108 15204+static int hn_gen_tree(struct dentry *dentry)
1facf9fc 15205+{
4a4d8108
AM
15206+ int err, i, j, ndentry;
15207+ struct au_dcsub_pages dpages;
15208+ struct au_dpage *dpage;
15209+ struct dentry **dentries;
1facf9fc 15210+
4a4d8108
AM
15211+ err = au_dpages_init(&dpages, GFP_NOFS);
15212+ if (unlikely(err))
15213+ goto out;
15214+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
15215+ if (unlikely(err))
15216+ goto out_dpages;
1facf9fc 15217+
4a4d8108
AM
15218+ for (i = 0; i < dpages.ndpage; i++) {
15219+ dpage = dpages.dpages + i;
15220+ dentries = dpage->dentries;
15221+ ndentry = dpage->ndentry;
15222+ for (j = 0; j < ndentry; j++) {
15223+ struct dentry *d;
15224+
15225+ d = dentries[j];
15226+ if (IS_ROOT(d))
15227+ continue;
15228+
4a4d8108 15229+ au_digen_dec(d);
5527c038 15230+ if (d_really_is_positive(d))
4a4d8108
AM
15231+ /* todo: reset children xino?
15232+ cached children only? */
5527c038 15233+ au_iigen_dec(d_inode(d));
1308ab2a 15234+ }
dece6358 15235+ }
1facf9fc 15236+
4f0767ce 15237+out_dpages:
4a4d8108 15238+ au_dpages_free(&dpages);
dece6358 15239+
027c5e7a 15240+#if 0
4a4d8108
AM
15241+ /* discard children */
15242+ dentry_unhash(dentry);
15243+ dput(dentry);
027c5e7a 15244+#endif
4f0767ce 15245+out:
dece6358
AM
15246+ return err;
15247+}
15248+
1308ab2a 15249+/*
4a4d8108 15250+ * return 0 if processed.
1308ab2a 15251+ */
4a4d8108
AM
15252+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
15253+ const unsigned int isdir)
dece6358 15254+{
1308ab2a 15255+ int err;
4a4d8108
AM
15256+ struct dentry *d;
15257+ struct qstr *dname;
1facf9fc 15258+
4a4d8108
AM
15259+ err = 1;
15260+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15261+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15262+ err = 0;
15263+ goto out;
15264+ }
dece6358 15265+
4a4d8108
AM
15266+ if (!isdir) {
15267+ AuDebugOn(!name);
15268+ au_iigen_dec(inode);
027c5e7a 15269+ spin_lock(&inode->i_lock);
c1595e42 15270+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 15271+ spin_lock(&d->d_lock);
4a4d8108
AM
15272+ dname = &d->d_name;
15273+ if (dname->len != nlen
027c5e7a
AM
15274+ && memcmp(dname->name, name, nlen)) {
15275+ spin_unlock(&d->d_lock);
4a4d8108 15276+ continue;
027c5e7a 15277+ }
4a4d8108 15278+ err = 0;
4a4d8108
AM
15279+ au_digen_dec(d);
15280+ spin_unlock(&d->d_lock);
15281+ break;
1facf9fc 15282+ }
027c5e7a 15283+ spin_unlock(&inode->i_lock);
1308ab2a 15284+ } else {
027c5e7a 15285+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
c1595e42 15286+ d = d_find_any_alias(inode);
4a4d8108
AM
15287+ if (!d) {
15288+ au_iigen_dec(inode);
15289+ goto out;
15290+ }
1facf9fc 15291+
027c5e7a 15292+ spin_lock(&d->d_lock);
4a4d8108 15293+ dname = &d->d_name;
027c5e7a
AM
15294+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
15295+ spin_unlock(&d->d_lock);
4a4d8108 15296+ err = hn_gen_tree(d);
027c5e7a
AM
15297+ spin_lock(&d->d_lock);
15298+ }
15299+ spin_unlock(&d->d_lock);
4a4d8108
AM
15300+ dput(d);
15301+ }
1facf9fc 15302+
4f0767ce 15303+out:
4a4d8108 15304+ AuTraceErr(err);
1308ab2a 15305+ return err;
15306+}
dece6358 15307+
4a4d8108 15308+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
1facf9fc 15309+{
4a4d8108 15310+ int err;
1facf9fc 15311+
5527c038 15312+ if (IS_ROOT(dentry)) {
0c3ec466 15313+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15314+ return 0;
15315+ }
1308ab2a 15316+
4a4d8108
AM
15317+ err = 0;
15318+ if (!isdir) {
4a4d8108 15319+ au_digen_dec(dentry);
5527c038
JR
15320+ if (d_really_is_positive(dentry))
15321+ au_iigen_dec(d_inode(dentry));
4a4d8108 15322+ } else {
027c5e7a 15323+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
5527c038 15324+ if (d_really_is_positive(dentry))
4a4d8108
AM
15325+ err = hn_gen_tree(dentry);
15326+ }
15327+
15328+ AuTraceErr(err);
15329+ return err;
1facf9fc 15330+}
15331+
4a4d8108 15332+/* ---------------------------------------------------------------------- */
1facf9fc 15333+
4a4d8108
AM
15334+/* hnotify job flags */
15335+#define AuHnJob_XINO0 1
15336+#define AuHnJob_GEN (1 << 1)
15337+#define AuHnJob_DIRENT (1 << 2)
15338+#define AuHnJob_ISDIR (1 << 3)
15339+#define AuHnJob_TRYXINO0 (1 << 4)
15340+#define AuHnJob_MNTPNT (1 << 5)
15341+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
7f207e10
AM
15342+#define au_fset_hnjob(flags, name) \
15343+ do { (flags) |= AuHnJob_##name; } while (0)
15344+#define au_fclr_hnjob(flags, name) \
15345+ do { (flags) &= ~AuHnJob_##name; } while (0)
1facf9fc 15346+
4a4d8108
AM
15347+enum {
15348+ AuHn_CHILD,
15349+ AuHn_PARENT,
15350+ AuHnLast
15351+};
1facf9fc 15352+
4a4d8108
AM
15353+struct au_hnotify_args {
15354+ struct inode *h_dir, *dir, *h_child_inode;
15355+ u32 mask;
15356+ unsigned int flags[AuHnLast];
15357+ unsigned int h_child_nlen;
15358+ char h_child_name[];
15359+};
1facf9fc 15360+
4a4d8108
AM
15361+struct hn_job_args {
15362+ unsigned int flags;
15363+ struct inode *inode, *h_inode, *dir, *h_dir;
15364+ struct dentry *dentry;
15365+ char *h_name;
15366+ int h_nlen;
15367+};
1308ab2a 15368+
4a4d8108
AM
15369+static int hn_job(struct hn_job_args *a)
15370+{
15371+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
076b876e 15372+ int e;
1308ab2a 15373+
4a4d8108
AM
15374+ /* reset xino */
15375+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
15376+ hn_xino(a->inode, a->h_inode); /* ignore this error */
1308ab2a 15377+
4a4d8108
AM
15378+ if (au_ftest_hnjob(a->flags, TRYXINO0)
15379+ && a->inode
15380+ && a->h_inode) {
febd17d6 15381+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
38d290e6
JR
15382+ if (!a->h_inode->i_nlink
15383+ && !(a->h_inode->i_state & I_LINKABLE))
4a4d8108 15384+ hn_xino(a->inode, a->h_inode); /* ignore this error */
febd17d6 15385+ inode_unlock(a->h_inode);
1308ab2a 15386+ }
1facf9fc 15387+
4a4d8108
AM
15388+ /* make the generation obsolete */
15389+ if (au_ftest_hnjob(a->flags, GEN)) {
076b876e 15390+ e = -1;
4a4d8108 15391+ if (a->inode)
076b876e 15392+ e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
4a4d8108 15393+ isdir);
076b876e 15394+ if (e && a->dentry)
4a4d8108
AM
15395+ hn_gen_by_name(a->dentry, isdir);
15396+ /* ignore this error */
1facf9fc 15397+ }
1facf9fc 15398+
4a4d8108
AM
15399+ /* make dir entries obsolete */
15400+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
15401+ struct au_vdir *vdir;
1facf9fc 15402+
4a4d8108
AM
15403+ vdir = au_ivdir(a->inode);
15404+ if (vdir)
15405+ vdir->vd_jiffy = 0;
15406+ /* IMustLock(a->inode); */
15407+ /* a->inode->i_version++; */
15408+ }
1facf9fc 15409+
4a4d8108
AM
15410+ /* can do nothing but warn */
15411+ if (au_ftest_hnjob(a->flags, MNTPNT)
15412+ && a->dentry
15413+ && d_mountpoint(a->dentry))
523b37e3 15414+ pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
1facf9fc 15415+
4a4d8108 15416+ return 0;
1308ab2a 15417+}
1facf9fc 15418+
1308ab2a 15419+/* ---------------------------------------------------------------------- */
1facf9fc 15420+
4a4d8108
AM
15421+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
15422+ struct inode *dir)
1308ab2a 15423+{
4a4d8108
AM
15424+ struct dentry *dentry, *d, *parent;
15425+ struct qstr *dname;
1308ab2a 15426+
c1595e42 15427+ parent = d_find_any_alias(dir);
4a4d8108
AM
15428+ if (!parent)
15429+ return NULL;
1308ab2a 15430+
4a4d8108 15431+ dentry = NULL;
027c5e7a 15432+ spin_lock(&parent->d_lock);
c1595e42 15433+ list_for_each_entry(d, &parent->d_subdirs, d_child) {
523b37e3 15434+ /* AuDbg("%pd\n", d); */
027c5e7a 15435+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
4a4d8108
AM
15436+ dname = &d->d_name;
15437+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
027c5e7a
AM
15438+ goto cont_unlock;
15439+ if (au_di(d))
15440+ au_digen_dec(d);
15441+ else
15442+ goto cont_unlock;
c1595e42 15443+ if (au_dcount(d) > 0) {
027c5e7a 15444+ dentry = dget_dlock(d);
4a4d8108 15445+ spin_unlock(&d->d_lock);
027c5e7a 15446+ break;
dece6358 15447+ }
1facf9fc 15448+
f6b6e03d 15449+cont_unlock:
027c5e7a 15450+ spin_unlock(&d->d_lock);
1308ab2a 15451+ }
027c5e7a 15452+ spin_unlock(&parent->d_lock);
4a4d8108 15453+ dput(parent);
1facf9fc 15454+
4a4d8108
AM
15455+ if (dentry)
15456+ di_write_lock_child(dentry);
1308ab2a 15457+
4a4d8108
AM
15458+ return dentry;
15459+}
dece6358 15460+
4a4d8108
AM
15461+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
15462+ aufs_bindex_t bindex, ino_t h_ino)
15463+{
15464+ struct inode *inode;
15465+ ino_t ino;
15466+ int err;
15467+
15468+ inode = NULL;
15469+ err = au_xino_read(sb, bindex, h_ino, &ino);
15470+ if (!err && ino)
15471+ inode = ilookup(sb, ino);
15472+ if (!inode)
15473+ goto out;
15474+
15475+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15476+ pr_warn("wrong root branch\n");
4a4d8108
AM
15477+ iput(inode);
15478+ inode = NULL;
15479+ goto out;
1308ab2a 15480+ }
15481+
4a4d8108 15482+ ii_write_lock_child(inode);
1308ab2a 15483+
4f0767ce 15484+out:
4a4d8108 15485+ return inode;
dece6358
AM
15486+}
15487+
4a4d8108 15488+static void au_hn_bh(void *_args)
1facf9fc 15489+{
4a4d8108
AM
15490+ struct au_hnotify_args *a = _args;
15491+ struct super_block *sb;
5afbbe0d 15492+ aufs_bindex_t bindex, bbot, bfound;
4a4d8108 15493+ unsigned char xino, try_iput;
1facf9fc 15494+ int err;
1308ab2a 15495+ struct inode *inode;
4a4d8108
AM
15496+ ino_t h_ino;
15497+ struct hn_job_args args;
15498+ struct dentry *dentry;
15499+ struct au_sbinfo *sbinfo;
1facf9fc 15500+
4a4d8108
AM
15501+ AuDebugOn(!_args);
15502+ AuDebugOn(!a->h_dir);
15503+ AuDebugOn(!a->dir);
15504+ AuDebugOn(!a->mask);
15505+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
15506+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
15507+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
1facf9fc 15508+
4a4d8108
AM
15509+ inode = NULL;
15510+ dentry = NULL;
15511+ /*
15512+ * do not lock a->dir->i_mutex here
15513+ * because of d_revalidate() may cause a deadlock.
15514+ */
15515+ sb = a->dir->i_sb;
15516+ AuDebugOn(!sb);
15517+ sbinfo = au_sbi(sb);
15518+ AuDebugOn(!sbinfo);
7f207e10 15519+ si_write_lock(sb, AuLock_NOPLMW);
1facf9fc 15520+
4a4d8108
AM
15521+ ii_read_lock_parent(a->dir);
15522+ bfound = -1;
5afbbe0d
AM
15523+ bbot = au_ibbot(a->dir);
15524+ for (bindex = au_ibtop(a->dir); bindex <= bbot; bindex++)
4a4d8108
AM
15525+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
15526+ bfound = bindex;
15527+ break;
15528+ }
15529+ ii_read_unlock(a->dir);
15530+ if (unlikely(bfound < 0))
15531+ goto out;
1facf9fc 15532+
4a4d8108
AM
15533+ xino = !!au_opt_test(au_mntflags(sb), XINO);
15534+ h_ino = 0;
15535+ if (a->h_child_inode)
15536+ h_ino = a->h_child_inode->i_ino;
1facf9fc 15537+
4a4d8108
AM
15538+ if (a->h_child_nlen
15539+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
15540+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
15541+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
15542+ a->dir);
15543+ try_iput = 0;
5527c038
JR
15544+ if (dentry && d_really_is_positive(dentry))
15545+ inode = d_inode(dentry);
4a4d8108
AM
15546+ if (xino && !inode && h_ino
15547+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
15548+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
15549+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
15550+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
15551+ try_iput = 1;
f0c0a007 15552+ }
1facf9fc 15553+
4a4d8108
AM
15554+ args.flags = a->flags[AuHn_CHILD];
15555+ args.dentry = dentry;
15556+ args.inode = inode;
15557+ args.h_inode = a->h_child_inode;
15558+ args.dir = a->dir;
15559+ args.h_dir = a->h_dir;
15560+ args.h_name = a->h_child_name;
15561+ args.h_nlen = a->h_child_nlen;
15562+ err = hn_job(&args);
15563+ if (dentry) {
027c5e7a 15564+ if (au_di(dentry))
4a4d8108
AM
15565+ di_write_unlock(dentry);
15566+ dput(dentry);
15567+ }
15568+ if (inode && try_iput) {
15569+ ii_write_unlock(inode);
15570+ iput(inode);
15571+ }
1facf9fc 15572+
4a4d8108
AM
15573+ ii_write_lock_parent(a->dir);
15574+ args.flags = a->flags[AuHn_PARENT];
15575+ args.dentry = NULL;
15576+ args.inode = a->dir;
15577+ args.h_inode = a->h_dir;
15578+ args.dir = NULL;
15579+ args.h_dir = NULL;
15580+ args.h_name = NULL;
15581+ args.h_nlen = 0;
15582+ err = hn_job(&args);
15583+ ii_write_unlock(a->dir);
1facf9fc 15584+
4f0767ce 15585+out:
4a4d8108
AM
15586+ iput(a->h_child_inode);
15587+ iput(a->h_dir);
15588+ iput(a->dir);
027c5e7a
AM
15589+ si_write_unlock(sb);
15590+ au_nwt_done(&sbinfo->si_nowait);
f0c0a007 15591+ au_delayed_kfree(a);
dece6358 15592+}
1facf9fc 15593+
4a4d8108
AM
15594+/* ---------------------------------------------------------------------- */
15595+
15596+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
15597+ struct qstr *h_child_qstr, struct inode *h_child_inode)
dece6358 15598+{
4a4d8108 15599+ int err, len;
53392da6 15600+ unsigned int flags[AuHnLast], f;
4a4d8108
AM
15601+ unsigned char isdir, isroot, wh;
15602+ struct inode *dir;
15603+ struct au_hnotify_args *args;
15604+ char *p, *h_child_name;
dece6358 15605+
1308ab2a 15606+ err = 0;
4a4d8108
AM
15607+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
15608+ dir = igrab(hnotify->hn_aufs_inode);
15609+ if (!dir)
15610+ goto out;
1facf9fc 15611+
4a4d8108
AM
15612+ isroot = (dir->i_ino == AUFS_ROOT_INO);
15613+ wh = 0;
15614+ h_child_name = (void *)h_child_qstr->name;
15615+ len = h_child_qstr->len;
15616+ if (h_child_name) {
15617+ if (len > AUFS_WH_PFX_LEN
15618+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
15619+ h_child_name += AUFS_WH_PFX_LEN;
15620+ len -= AUFS_WH_PFX_LEN;
15621+ wh = 1;
15622+ }
1facf9fc 15623+ }
dece6358 15624+
4a4d8108
AM
15625+ isdir = 0;
15626+ if (h_child_inode)
15627+ isdir = !!S_ISDIR(h_child_inode->i_mode);
15628+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
15629+ flags[AuHn_CHILD] = 0;
15630+ if (isdir)
15631+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
15632+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
15633+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
15634+ switch (mask & FS_EVENTS_POSS_ON_CHILD) {
15635+ case FS_MOVED_FROM:
15636+ case FS_MOVED_TO:
15637+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
15638+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15639+ /*FALLTHROUGH*/
15640+ case FS_CREATE:
fb47a38f 15641+ AuDebugOn(!h_child_name);
4a4d8108 15642+ break;
1facf9fc 15643+
4a4d8108
AM
15644+ case FS_DELETE:
15645+ /*
15646+ * aufs never be able to get this child inode.
15647+ * revalidation should be in d_revalidate()
15648+ * by checking i_nlink, i_generation or d_unhashed().
15649+ */
15650+ AuDebugOn(!h_child_name);
15651+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
15652+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15653+ break;
dece6358 15654+
4a4d8108
AM
15655+ default:
15656+ AuDebugOn(1);
15657+ }
1308ab2a 15658+
4a4d8108
AM
15659+ if (wh)
15660+ h_child_inode = NULL;
1308ab2a 15661+
4a4d8108
AM
15662+ err = -ENOMEM;
15663+ /* iput() and kfree() will be called in au_hnotify() */
4a4d8108 15664+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
4a4d8108
AM
15665+ if (unlikely(!args)) {
15666+ AuErr1("no memory\n");
15667+ iput(dir);
15668+ goto out;
15669+ }
15670+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
15671+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
15672+ args->mask = mask;
15673+ args->dir = dir;
15674+ args->h_dir = igrab(h_dir);
15675+ if (h_child_inode)
15676+ h_child_inode = igrab(h_child_inode); /* can be NULL */
15677+ args->h_child_inode = h_child_inode;
15678+ args->h_child_nlen = len;
15679+ if (len) {
15680+ p = (void *)args;
15681+ p += sizeof(*args);
15682+ memcpy(p, h_child_name, len);
15683+ p[len] = 0;
1308ab2a 15684+ }
1308ab2a 15685+
38d290e6 15686+ /* NFS fires the event for silly-renamed one from kworker */
53392da6 15687+ f = 0;
38d290e6
JR
15688+ if (!dir->i_nlink
15689+ || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
53392da6
AM
15690+ f = AuWkq_NEST;
15691+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
4a4d8108
AM
15692+ if (unlikely(err)) {
15693+ pr_err("wkq %d\n", err);
15694+ iput(args->h_child_inode);
15695+ iput(args->h_dir);
15696+ iput(args->dir);
f0c0a007 15697+ au_delayed_kfree(args);
1facf9fc 15698+ }
1facf9fc 15699+
4a4d8108 15700+out:
1facf9fc 15701+ return err;
15702+}
15703+
027c5e7a
AM
15704+/* ---------------------------------------------------------------------- */
15705+
15706+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
15707+{
15708+ int err;
15709+
15710+ AuDebugOn(!(udba & AuOptMask_UDBA));
15711+
15712+ err = 0;
15713+ if (au_hnotify_op.reset_br)
15714+ err = au_hnotify_op.reset_br(udba, br, perm);
15715+
15716+ return err;
15717+}
15718+
15719+int au_hnotify_init_br(struct au_branch *br, int perm)
15720+{
15721+ int err;
15722+
15723+ err = 0;
15724+ if (au_hnotify_op.init_br)
15725+ err = au_hnotify_op.init_br(br, perm);
15726+
15727+ return err;
15728+}
15729+
15730+void au_hnotify_fin_br(struct au_branch *br)
15731+{
15732+ if (au_hnotify_op.fin_br)
15733+ au_hnotify_op.fin_br(br);
15734+}
15735+
4a4d8108
AM
15736+static void au_hn_destroy_cache(void)
15737+{
f0c0a007
AM
15738+ struct au_cache *cp;
15739+
15740+ flush_delayed_work(&au_dfree.dwork);
15741+ cp = au_dfree.cache + AuCache_HNOTIFY;
15742+ AuDebugOn(!llist_empty(&cp->llist));
15743+ kmem_cache_destroy(cp->cache);
15744+ cp->cache = NULL;
4a4d8108 15745+}
1308ab2a 15746+
f0c0a007
AM
15747+AU_CACHE_DFREE_FUNC(hnotify, HNOTIFY, hn_lnode);
15748+
4a4d8108 15749+int __init au_hnotify_init(void)
1facf9fc 15750+{
1308ab2a 15751+ int err;
f0c0a007 15752+ struct au_cache *cp;
1308ab2a 15753+
4a4d8108 15754+ err = -ENOMEM;
f0c0a007
AM
15755+ cp = au_dfree.cache + AuCache_HNOTIFY;
15756+ cp->cache = AuCache(au_hnotify);
15757+ if (cp->cache) {
027c5e7a
AM
15758+ err = 0;
15759+ if (au_hnotify_op.init)
15760+ err = au_hnotify_op.init();
4a4d8108
AM
15761+ if (unlikely(err))
15762+ au_hn_destroy_cache();
1308ab2a 15763+ }
1308ab2a 15764+ AuTraceErr(err);
4a4d8108 15765+ return err;
1308ab2a 15766+}
15767+
4a4d8108 15768+void au_hnotify_fin(void)
1308ab2a 15769+{
f0c0a007
AM
15770+ struct au_cache *cp;
15771+
027c5e7a
AM
15772+ if (au_hnotify_op.fin)
15773+ au_hnotify_op.fin();
f0c0a007 15774+
4a4d8108 15775+ /* cf. au_cache_fin() */
f0c0a007
AM
15776+ cp = au_dfree.cache + AuCache_HNOTIFY;
15777+ if (cp->cache)
4a4d8108 15778+ au_hn_destroy_cache();
dece6358 15779+}
7f207e10
AM
15780diff -urN /usr/share/empty/fs/aufs/iinfo.c linux/fs/aufs/iinfo.c
15781--- /usr/share/empty/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
15782+++ linux/fs/aufs/iinfo.c 2016-10-09 16:55:38.889431135 +0200
15783@@ -0,0 +1,285 @@
dece6358 15784+/*
8cdd5066 15785+ * Copyright (C) 2005-2016 Junjiro R. Okajima
dece6358
AM
15786+ *
15787+ * This program, aufs is free software; you can redistribute it and/or modify
15788+ * it under the terms of the GNU General Public License as published by
15789+ * the Free Software Foundation; either version 2 of the License, or
15790+ * (at your option) any later version.
15791+ *
15792+ * This program is distributed in the hope that it will be useful,
15793+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15794+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15795+ * GNU General Public License for more details.
15796+ *
15797+ * You should have received a copy of the GNU General Public License
523b37e3 15798+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 15799+ */
1facf9fc 15800+
dece6358 15801+/*
4a4d8108 15802+ * inode private data
dece6358 15803+ */
1facf9fc 15804+
1308ab2a 15805+#include "aufs.h"
1facf9fc 15806+
4a4d8108 15807+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 15808+{
4a4d8108 15809+ struct inode *h_inode;
5afbbe0d 15810+ struct au_hinode *hinode;
1facf9fc 15811+
4a4d8108 15812+ IiMustAnyLock(inode);
1facf9fc 15813+
5afbbe0d
AM
15814+ hinode = au_hinode(au_ii(inode), bindex);
15815+ h_inode = hinode->hi_inode;
4a4d8108
AM
15816+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15817+ return h_inode;
15818+}
1facf9fc 15819+
4a4d8108
AM
15820+/* todo: hard/soft set? */
15821+void au_hiput(struct au_hinode *hinode)
15822+{
15823+ au_hn_free(hinode);
15824+ dput(hinode->hi_whdentry);
15825+ iput(hinode->hi_inode);
15826+}
1facf9fc 15827+
4a4d8108
AM
15828+unsigned int au_hi_flags(struct inode *inode, int isdir)
15829+{
15830+ unsigned int flags;
15831+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
1facf9fc 15832+
4a4d8108
AM
15833+ flags = 0;
15834+ if (au_opt_test(mnt_flags, XINO))
15835+ au_fset_hi(flags, XINO);
15836+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
15837+ au_fset_hi(flags, HNOTIFY);
15838+ return flags;
1facf9fc 15839+}
15840+
4a4d8108
AM
15841+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
15842+ struct inode *h_inode, unsigned int flags)
1308ab2a 15843+{
4a4d8108
AM
15844+ struct au_hinode *hinode;
15845+ struct inode *hi;
15846+ struct au_iinfo *iinfo = au_ii(inode);
1facf9fc 15847+
4a4d8108 15848+ IiMustWriteLock(inode);
dece6358 15849+
5afbbe0d 15850+ hinode = au_hinode(iinfo, bindex);
4a4d8108
AM
15851+ hi = hinode->hi_inode;
15852+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15853+
15854+ if (hi)
15855+ au_hiput(hinode);
15856+ hinode->hi_inode = h_inode;
15857+ if (h_inode) {
15858+ int err;
15859+ struct super_block *sb = inode->i_sb;
15860+ struct au_branch *br;
15861+
027c5e7a
AM
15862+ AuDebugOn(inode->i_mode
15863+ && (h_inode->i_mode & S_IFMT)
15864+ != (inode->i_mode & S_IFMT));
5afbbe0d 15865+ if (bindex == iinfo->ii_btop)
4a4d8108
AM
15866+ au_cpup_igen(inode, h_inode);
15867+ br = au_sbr(sb, bindex);
15868+ hinode->hi_id = br->br_id;
15869+ if (au_ftest_hi(flags, XINO)) {
15870+ err = au_xino_write(sb, bindex, h_inode->i_ino,
15871+ inode->i_ino);
15872+ if (unlikely(err))
15873+ AuIOErr1("failed au_xino_write() %d\n", err);
15874+ }
15875+
15876+ if (au_ftest_hi(flags, HNOTIFY)
15877+ && au_br_hnotifyable(br->br_perm)) {
027c5e7a 15878+ err = au_hn_alloc(hinode, inode);
4a4d8108
AM
15879+ if (unlikely(err))
15880+ AuIOErr1("au_hn_alloc() %d\n", err);
1308ab2a 15881+ }
15882+ }
4a4d8108 15883+}
dece6358 15884+
4a4d8108
AM
15885+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
15886+ struct dentry *h_wh)
15887+{
15888+ struct au_hinode *hinode;
dece6358 15889+
4a4d8108
AM
15890+ IiMustWriteLock(inode);
15891+
5afbbe0d 15892+ hinode = au_hinode(au_ii(inode), bindex);
4a4d8108
AM
15893+ AuDebugOn(hinode->hi_whdentry);
15894+ hinode->hi_whdentry = h_wh;
1facf9fc 15895+}
15896+
537831f9 15897+void au_update_iigen(struct inode *inode, int half)
1308ab2a 15898+{
537831f9
AM
15899+ struct au_iinfo *iinfo;
15900+ struct au_iigen *iigen;
15901+ unsigned int sigen;
15902+
15903+ sigen = au_sigen(inode->i_sb);
15904+ iinfo = au_ii(inode);
15905+ iigen = &iinfo->ii_generation;
be52b249 15906+ spin_lock(&iigen->ig_spin);
537831f9
AM
15907+ iigen->ig_generation = sigen;
15908+ if (half)
15909+ au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
15910+ else
15911+ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
be52b249 15912+ spin_unlock(&iigen->ig_spin);
4a4d8108 15913+}
1facf9fc 15914+
4a4d8108
AM
15915+/* it may be called at remount time, too */
15916+void au_update_ibrange(struct inode *inode, int do_put_zero)
15917+{
15918+ struct au_iinfo *iinfo;
5afbbe0d 15919+ aufs_bindex_t bindex, bbot;
1facf9fc 15920+
5afbbe0d 15921+ AuDebugOn(au_is_bad_inode(inode));
4a4d8108 15922+ IiMustWriteLock(inode);
1facf9fc 15923+
5afbbe0d
AM
15924+ iinfo = au_ii(inode);
15925+ if (do_put_zero && iinfo->ii_btop >= 0) {
15926+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
4a4d8108
AM
15927+ bindex++) {
15928+ struct inode *h_i;
1facf9fc 15929+
5afbbe0d 15930+ h_i = au_hinode(iinfo, bindex)->hi_inode;
38d290e6
JR
15931+ if (h_i
15932+ && !h_i->i_nlink
15933+ && !(h_i->i_state & I_LINKABLE))
027c5e7a
AM
15934+ au_set_h_iptr(inode, bindex, NULL, 0);
15935+ }
4a4d8108
AM
15936+ }
15937+
5afbbe0d
AM
15938+ iinfo->ii_btop = -1;
15939+ iinfo->ii_bbot = -1;
15940+ bbot = au_sbbot(inode->i_sb);
15941+ for (bindex = 0; bindex <= bbot; bindex++)
15942+ if (au_hinode(iinfo, bindex)->hi_inode) {
15943+ iinfo->ii_btop = bindex;
4a4d8108 15944+ break;
027c5e7a 15945+ }
5afbbe0d
AM
15946+ if (iinfo->ii_btop >= 0)
15947+ for (bindex = bbot; bindex >= iinfo->ii_btop; bindex--)
15948+ if (au_hinode(iinfo, bindex)->hi_inode) {
15949+ iinfo->ii_bbot = bindex;
027c5e7a
AM
15950+ break;
15951+ }
5afbbe0d 15952+ AuDebugOn(iinfo->ii_btop > iinfo->ii_bbot);
1308ab2a 15953+}
1facf9fc 15954+
dece6358 15955+/* ---------------------------------------------------------------------- */
1facf9fc 15956+
4a4d8108 15957+void au_icntnr_init_once(void *_c)
dece6358 15958+{
4a4d8108
AM
15959+ struct au_icntnr *c = _c;
15960+ struct au_iinfo *iinfo = &c->iinfo;
1facf9fc 15961+
be52b249 15962+ spin_lock_init(&iinfo->ii_generation.ig_spin);
4a4d8108
AM
15963+ au_rw_init(&iinfo->ii_rwsem);
15964+ inode_init_once(&c->vfs_inode);
15965+}
1facf9fc 15966+
5afbbe0d
AM
15967+void au_hinode_init(struct au_hinode *hinode)
15968+{
15969+ hinode->hi_inode = NULL;
15970+ hinode->hi_id = -1;
15971+ au_hn_init(hinode);
15972+ hinode->hi_whdentry = NULL;
15973+}
15974+
4a4d8108
AM
15975+int au_iinfo_init(struct inode *inode)
15976+{
15977+ struct au_iinfo *iinfo;
15978+ struct super_block *sb;
5afbbe0d 15979+ struct au_hinode *hi;
4a4d8108 15980+ int nbr, i;
1facf9fc 15981+
4a4d8108
AM
15982+ sb = inode->i_sb;
15983+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
5afbbe0d 15984+ nbr = au_sbbot(sb) + 1;
4a4d8108
AM
15985+ if (unlikely(nbr <= 0))
15986+ nbr = 1;
5afbbe0d
AM
15987+ hi = kmalloc_array(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
15988+ if (hi) {
7f207e10 15989+ au_ninodes_inc(sb);
5afbbe0d
AM
15990+
15991+ iinfo->ii_hinode = hi;
15992+ for (i = 0; i < nbr; i++, hi++)
15993+ au_hinode_init(hi);
1facf9fc 15994+
537831f9 15995+ iinfo->ii_generation.ig_generation = au_sigen(sb);
5afbbe0d
AM
15996+ iinfo->ii_btop = -1;
15997+ iinfo->ii_bbot = -1;
4a4d8108
AM
15998+ iinfo->ii_vdir = NULL;
15999+ return 0;
1308ab2a 16000+ }
4a4d8108
AM
16001+ return -ENOMEM;
16002+}
1facf9fc 16003+
e2f27e51 16004+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink)
4a4d8108 16005+{
5afbbe0d 16006+ int err, i;
4a4d8108 16007+ struct au_hinode *hip;
1facf9fc 16008+
4a4d8108
AM
16009+ AuRwMustWriteLock(&iinfo->ii_rwsem);
16010+
16011+ err = -ENOMEM;
e2f27e51
AM
16012+ hip = au_krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS,
16013+ may_shrink);
4a4d8108
AM
16014+ if (hip) {
16015+ iinfo->ii_hinode = hip;
5afbbe0d
AM
16016+ i = iinfo->ii_bbot + 1;
16017+ hip += i;
16018+ for (; i < nbr; i++, hip++)
16019+ au_hinode_init(hip);
4a4d8108 16020+ err = 0;
1308ab2a 16021+ }
4a4d8108 16022+
1308ab2a 16023+ return err;
1facf9fc 16024+}
16025+
4a4d8108 16026+void au_iinfo_fin(struct inode *inode)
1facf9fc 16027+{
4a4d8108
AM
16028+ struct au_iinfo *iinfo;
16029+ struct au_hinode *hi;
16030+ struct super_block *sb;
5afbbe0d 16031+ aufs_bindex_t bindex, bbot;
b752ccd1 16032+ const unsigned char unlinked = !inode->i_nlink;
1308ab2a 16033+
5afbbe0d 16034+ AuDebugOn(au_is_bad_inode(inode));
1308ab2a 16035+
b752ccd1 16036+ sb = inode->i_sb;
7f207e10 16037+ au_ninodes_dec(sb);
b752ccd1
AM
16038+ if (si_pid_test(sb))
16039+ au_xino_delete_inode(inode, unlinked);
16040+ else {
16041+ /*
16042+ * it is safe to hide the dependency between sbinfo and
16043+ * sb->s_umount.
16044+ */
16045+ lockdep_off();
16046+ si_noflush_read_lock(sb);
16047+ au_xino_delete_inode(inode, unlinked);
16048+ si_read_unlock(sb);
16049+ lockdep_on();
16050+ }
16051+
5afbbe0d 16052+ iinfo = au_ii(inode);
4a4d8108 16053+ if (iinfo->ii_vdir)
f0c0a007 16054+ au_vdir_free(iinfo->ii_vdir, /*atonce*/0);
1308ab2a 16055+
5afbbe0d 16056+ bindex = iinfo->ii_btop;
b752ccd1 16057+ if (bindex >= 0) {
5afbbe0d
AM
16058+ hi = au_hinode(iinfo, bindex);
16059+ bbot = iinfo->ii_bbot;
16060+ while (bindex++ <= bbot) {
b752ccd1 16061+ if (hi->hi_inode)
4a4d8108 16062+ au_hiput(hi);
4a4d8108
AM
16063+ hi++;
16064+ }
16065+ }
f0c0a007 16066+ au_delayed_kfree(iinfo->ii_hinode);
4a4d8108 16067+ AuRwDestroy(&iinfo->ii_rwsem);
dece6358 16068+}
7f207e10
AM
16069diff -urN /usr/share/empty/fs/aufs/inode.c linux/fs/aufs/inode.c
16070--- /usr/share/empty/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
16071+++ linux/fs/aufs/inode.c 2016-10-09 16:55:38.889431135 +0200
16072@@ -0,0 +1,519 @@
4a4d8108 16073+/*
8cdd5066 16074+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
16075+ *
16076+ * This program, aufs is free software; you can redistribute it and/or modify
16077+ * it under the terms of the GNU General Public License as published by
16078+ * the Free Software Foundation; either version 2 of the License, or
16079+ * (at your option) any later version.
16080+ *
16081+ * This program is distributed in the hope that it will be useful,
16082+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16083+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16084+ * GNU General Public License for more details.
16085+ *
16086+ * You should have received a copy of the GNU General Public License
523b37e3 16087+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 16088+ */
1facf9fc 16089+
4a4d8108
AM
16090+/*
16091+ * inode functions
16092+ */
1facf9fc 16093+
4a4d8108 16094+#include "aufs.h"
1308ab2a 16095+
4a4d8108
AM
16096+struct inode *au_igrab(struct inode *inode)
16097+{
16098+ if (inode) {
16099+ AuDebugOn(!atomic_read(&inode->i_count));
027c5e7a 16100+ ihold(inode);
1facf9fc 16101+ }
4a4d8108
AM
16102+ return inode;
16103+}
1facf9fc 16104+
4a4d8108
AM
16105+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
16106+{
16107+ au_cpup_attr_all(inode, /*force*/0);
537831f9 16108+ au_update_iigen(inode, /*half*/1);
4a4d8108
AM
16109+ if (do_version)
16110+ inode->i_version++;
dece6358 16111+}
1facf9fc 16112+
027c5e7a 16113+static int au_ii_refresh(struct inode *inode, int *update)
dece6358 16114+{
e2f27e51 16115+ int err, e, nbr;
027c5e7a 16116+ umode_t type;
4a4d8108 16117+ aufs_bindex_t bindex, new_bindex;
1308ab2a 16118+ struct super_block *sb;
4a4d8108 16119+ struct au_iinfo *iinfo;
027c5e7a 16120+ struct au_hinode *p, *q, tmp;
1facf9fc 16121+
5afbbe0d 16122+ AuDebugOn(au_is_bad_inode(inode));
4a4d8108 16123+ IiMustWriteLock(inode);
1facf9fc 16124+
027c5e7a 16125+ *update = 0;
4a4d8108 16126+ sb = inode->i_sb;
e2f27e51 16127+ nbr = au_sbbot(sb) + 1;
027c5e7a 16128+ type = inode->i_mode & S_IFMT;
4a4d8108 16129+ iinfo = au_ii(inode);
e2f27e51 16130+ err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0);
4a4d8108 16131+ if (unlikely(err))
1308ab2a 16132+ goto out;
1facf9fc 16133+
5afbbe0d
AM
16134+ AuDebugOn(iinfo->ii_btop < 0);
16135+ p = au_hinode(iinfo, iinfo->ii_btop);
16136+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
4a4d8108
AM
16137+ bindex++, p++) {
16138+ if (!p->hi_inode)
16139+ continue;
1facf9fc 16140+
027c5e7a 16141+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
4a4d8108
AM
16142+ new_bindex = au_br_index(sb, p->hi_id);
16143+ if (new_bindex == bindex)
16144+ continue;
1facf9fc 16145+
4a4d8108 16146+ if (new_bindex < 0) {
027c5e7a 16147+ *update = 1;
4a4d8108
AM
16148+ au_hiput(p);
16149+ p->hi_inode = NULL;
16150+ continue;
1308ab2a 16151+ }
4a4d8108 16152+
5afbbe0d
AM
16153+ if (new_bindex < iinfo->ii_btop)
16154+ iinfo->ii_btop = new_bindex;
16155+ if (iinfo->ii_bbot < new_bindex)
16156+ iinfo->ii_bbot = new_bindex;
4a4d8108 16157+ /* swap two lower inode, and loop again */
5afbbe0d 16158+ q = au_hinode(iinfo, new_bindex);
4a4d8108
AM
16159+ tmp = *q;
16160+ *q = *p;
16161+ *p = tmp;
16162+ if (tmp.hi_inode) {
16163+ bindex--;
16164+ p--;
1308ab2a 16165+ }
16166+ }
4a4d8108 16167+ au_update_ibrange(inode, /*do_put_zero*/0);
e2f27e51 16168+ au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */
4a4d8108
AM
16169+ e = au_dy_irefresh(inode);
16170+ if (unlikely(e && !err))
16171+ err = e;
1facf9fc 16172+
4f0767ce 16173+out:
027c5e7a
AM
16174+ AuTraceErr(err);
16175+ return err;
16176+}
16177+
b95c5147
AM
16178+void au_refresh_iop(struct inode *inode, int force_getattr)
16179+{
16180+ int type;
16181+ struct au_sbinfo *sbi = au_sbi(inode->i_sb);
16182+ const struct inode_operations *iop
16183+ = force_getattr ? aufs_iop : sbi->si_iop_array;
16184+
16185+ if (inode->i_op == iop)
16186+ return;
16187+
16188+ switch (inode->i_mode & S_IFMT) {
16189+ case S_IFDIR:
16190+ type = AuIop_DIR;
16191+ break;
16192+ case S_IFLNK:
16193+ type = AuIop_SYMLINK;
16194+ break;
16195+ default:
16196+ type = AuIop_OTHER;
16197+ break;
16198+ }
16199+
16200+ inode->i_op = iop + type;
16201+ /* unnecessary smp_wmb() */
16202+}
16203+
027c5e7a
AM
16204+int au_refresh_hinode_self(struct inode *inode)
16205+{
16206+ int err, update;
16207+
16208+ err = au_ii_refresh(inode, &update);
16209+ if (!err)
16210+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
16211+
16212+ AuTraceErr(err);
4a4d8108
AM
16213+ return err;
16214+}
1facf9fc 16215+
4a4d8108
AM
16216+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
16217+{
027c5e7a 16218+ int err, e, update;
4a4d8108 16219+ unsigned int flags;
027c5e7a 16220+ umode_t mode;
5afbbe0d 16221+ aufs_bindex_t bindex, bbot;
027c5e7a 16222+ unsigned char isdir;
4a4d8108
AM
16223+ struct au_hinode *p;
16224+ struct au_iinfo *iinfo;
1facf9fc 16225+
027c5e7a 16226+ err = au_ii_refresh(inode, &update);
4a4d8108
AM
16227+ if (unlikely(err))
16228+ goto out;
16229+
16230+ update = 0;
16231+ iinfo = au_ii(inode);
5afbbe0d 16232+ p = au_hinode(iinfo, iinfo->ii_btop);
027c5e7a
AM
16233+ mode = (inode->i_mode & S_IFMT);
16234+ isdir = S_ISDIR(mode);
4a4d8108 16235+ flags = au_hi_flags(inode, isdir);
5afbbe0d
AM
16236+ bbot = au_dbbot(dentry);
16237+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
5527c038 16238+ struct inode *h_i, *h_inode;
4a4d8108
AM
16239+ struct dentry *h_d;
16240+
16241+ h_d = au_h_dptr(dentry, bindex);
5527c038 16242+ if (!h_d || d_is_negative(h_d))
4a4d8108
AM
16243+ continue;
16244+
5527c038
JR
16245+ h_inode = d_inode(h_d);
16246+ AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
5afbbe0d 16247+ if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) {
4a4d8108
AM
16248+ h_i = au_h_iptr(inode, bindex);
16249+ if (h_i) {
5527c038 16250+ if (h_i == h_inode)
4a4d8108
AM
16251+ continue;
16252+ err = -EIO;
16253+ break;
16254+ }
16255+ }
5afbbe0d
AM
16256+ if (bindex < iinfo->ii_btop)
16257+ iinfo->ii_btop = bindex;
16258+ if (iinfo->ii_bbot < bindex)
16259+ iinfo->ii_bbot = bindex;
5527c038 16260+ au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
4a4d8108 16261+ update = 1;
1308ab2a 16262+ }
4a4d8108
AM
16263+ au_update_ibrange(inode, /*do_put_zero*/0);
16264+ e = au_dy_irefresh(inode);
16265+ if (unlikely(e && !err))
16266+ err = e;
027c5e7a
AM
16267+ if (!err)
16268+ au_refresh_hinode_attr(inode, update && isdir);
4a4d8108 16269+
4f0767ce 16270+out:
4a4d8108 16271+ AuTraceErr(err);
1308ab2a 16272+ return err;
dece6358
AM
16273+}
16274+
4a4d8108 16275+static int set_inode(struct inode *inode, struct dentry *dentry)
dece6358 16276+{
4a4d8108
AM
16277+ int err;
16278+ unsigned int flags;
16279+ umode_t mode;
5afbbe0d 16280+ aufs_bindex_t bindex, btop, btail;
4a4d8108
AM
16281+ unsigned char isdir;
16282+ struct dentry *h_dentry;
16283+ struct inode *h_inode;
16284+ struct au_iinfo *iinfo;
b95c5147 16285+ struct inode_operations *iop;
dece6358 16286+
4a4d8108 16287+ IiMustWriteLock(inode);
dece6358 16288+
4a4d8108
AM
16289+ err = 0;
16290+ isdir = 0;
b95c5147 16291+ iop = au_sbi(inode->i_sb)->si_iop_array;
5afbbe0d
AM
16292+ btop = au_dbtop(dentry);
16293+ h_dentry = au_h_dptr(dentry, btop);
5527c038 16294+ h_inode = d_inode(h_dentry);
4a4d8108
AM
16295+ mode = h_inode->i_mode;
16296+ switch (mode & S_IFMT) {
16297+ case S_IFREG:
16298+ btail = au_dbtail(dentry);
b95c5147 16299+ inode->i_op = iop + AuIop_OTHER;
4a4d8108 16300+ inode->i_fop = &aufs_file_fop;
5afbbe0d 16301+ err = au_dy_iaop(inode, btop, h_inode);
4a4d8108
AM
16302+ if (unlikely(err))
16303+ goto out;
16304+ break;
16305+ case S_IFDIR:
16306+ isdir = 1;
16307+ btail = au_dbtaildir(dentry);
b95c5147 16308+ inode->i_op = iop + AuIop_DIR;
4a4d8108
AM
16309+ inode->i_fop = &aufs_dir_fop;
16310+ break;
16311+ case S_IFLNK:
16312+ btail = au_dbtail(dentry);
b95c5147 16313+ inode->i_op = iop + AuIop_SYMLINK;
4a4d8108
AM
16314+ break;
16315+ case S_IFBLK:
16316+ case S_IFCHR:
16317+ case S_IFIFO:
16318+ case S_IFSOCK:
16319+ btail = au_dbtail(dentry);
b95c5147 16320+ inode->i_op = iop + AuIop_OTHER;
38d290e6 16321+ init_special_inode(inode, mode, h_inode->i_rdev);
4a4d8108
AM
16322+ break;
16323+ default:
16324+ AuIOErr("Unknown file type 0%o\n", mode);
16325+ err = -EIO;
1308ab2a 16326+ goto out;
4a4d8108 16327+ }
dece6358 16328+
4a4d8108
AM
16329+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
16330+ flags = au_hi_flags(inode, isdir);
16331+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
16332+ && au_ftest_hi(flags, HNOTIFY)
16333+ && dentry->d_name.len > AUFS_WH_PFX_LEN
16334+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
16335+ au_fclr_hi(flags, HNOTIFY);
16336+ iinfo = au_ii(inode);
5afbbe0d
AM
16337+ iinfo->ii_btop = btop;
16338+ iinfo->ii_bbot = btail;
16339+ for (bindex = btop; bindex <= btail; bindex++) {
4a4d8108
AM
16340+ h_dentry = au_h_dptr(dentry, bindex);
16341+ if (h_dentry)
16342+ au_set_h_iptr(inode, bindex,
5527c038 16343+ au_igrab(d_inode(h_dentry)), flags);
4a4d8108
AM
16344+ }
16345+ au_cpup_attr_all(inode, /*force*/1);
c1595e42
JR
16346+ /*
16347+ * to force calling aufs_get_acl() every time,
16348+ * do not call cache_no_acl() for aufs inode.
16349+ */
dece6358 16350+
4f0767ce 16351+out:
4a4d8108
AM
16352+ return err;
16353+}
dece6358 16354+
027c5e7a
AM
16355+/*
16356+ * successful returns with iinfo write_locked
16357+ * minus: errno
16358+ * zero: success, matched
16359+ * plus: no error, but unmatched
16360+ */
16361+static int reval_inode(struct inode *inode, struct dentry *dentry)
4a4d8108
AM
16362+{
16363+ int err;
cfc41e69 16364+ unsigned int gen, igflags;
5afbbe0d 16365+ aufs_bindex_t bindex, bbot;
4a4d8108 16366+ struct inode *h_inode, *h_dinode;
5527c038 16367+ struct dentry *h_dentry;
dece6358 16368+
4a4d8108
AM
16369+ /*
16370+ * before this function, if aufs got any iinfo lock, it must be only
16371+ * one, the parent dir.
16372+ * it can happen by UDBA and the obsoleted inode number.
16373+ */
16374+ err = -EIO;
16375+ if (unlikely(inode->i_ino == parent_ino(dentry)))
16376+ goto out;
16377+
027c5e7a 16378+ err = 1;
4a4d8108 16379+ ii_write_lock_new_child(inode);
5afbbe0d 16380+ h_dentry = au_h_dptr(dentry, au_dbtop(dentry));
5527c038 16381+ h_dinode = d_inode(h_dentry);
5afbbe0d
AM
16382+ bbot = au_ibbot(inode);
16383+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
4a4d8108 16384+ h_inode = au_h_iptr(inode, bindex);
537831f9
AM
16385+ if (!h_inode || h_inode != h_dinode)
16386+ continue;
16387+
16388+ err = 0;
cfc41e69 16389+ gen = au_iigen(inode, &igflags);
537831f9 16390+ if (gen == au_digen(dentry)
cfc41e69 16391+ && !au_ig_ftest(igflags, HALF_REFRESHED))
4a4d8108 16392+ break;
537831f9
AM
16393+
16394+ /* fully refresh inode using dentry */
16395+ err = au_refresh_hinode(inode, dentry);
16396+ if (!err)
16397+ au_update_iigen(inode, /*half*/0);
16398+ break;
1facf9fc 16399+ }
dece6358 16400+
4a4d8108
AM
16401+ if (unlikely(err))
16402+ ii_write_unlock(inode);
4f0767ce 16403+out:
1facf9fc 16404+ return err;
16405+}
1facf9fc 16406+
4a4d8108
AM
16407+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16408+ unsigned int d_type, ino_t *ino)
1facf9fc 16409+{
4a4d8108
AM
16410+ int err;
16411+ struct mutex *mtx;
1facf9fc 16412+
b752ccd1 16413+ /* prevent hardlinked inode number from race condition */
4a4d8108 16414+ mtx = NULL;
b752ccd1 16415+ if (d_type != DT_DIR) {
4a4d8108
AM
16416+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
16417+ mutex_lock(mtx);
16418+ }
16419+ err = au_xino_read(sb, bindex, h_ino, ino);
16420+ if (unlikely(err))
16421+ goto out;
1308ab2a 16422+
4a4d8108
AM
16423+ if (!*ino) {
16424+ err = -EIO;
16425+ *ino = au_xino_new_ino(sb);
16426+ if (unlikely(!*ino))
1facf9fc 16427+ goto out;
4a4d8108
AM
16428+ err = au_xino_write(sb, bindex, h_ino, *ino);
16429+ if (unlikely(err))
1308ab2a 16430+ goto out;
1308ab2a 16431+ }
1facf9fc 16432+
4f0767ce 16433+out:
b752ccd1 16434+ if (mtx)
4a4d8108 16435+ mutex_unlock(mtx);
1facf9fc 16436+ return err;
16437+}
16438+
4a4d8108
AM
16439+/* successful returns with iinfo write_locked */
16440+/* todo: return with unlocked? */
16441+struct inode *au_new_inode(struct dentry *dentry, int must_new)
1facf9fc 16442+{
5527c038 16443+ struct inode *inode, *h_inode;
4a4d8108
AM
16444+ struct dentry *h_dentry;
16445+ struct super_block *sb;
b752ccd1 16446+ struct mutex *mtx;
4a4d8108 16447+ ino_t h_ino, ino;
1716fcea 16448+ int err;
5afbbe0d 16449+ aufs_bindex_t btop;
1facf9fc 16450+
4a4d8108 16451+ sb = dentry->d_sb;
5afbbe0d
AM
16452+ btop = au_dbtop(dentry);
16453+ h_dentry = au_h_dptr(dentry, btop);
5527c038
JR
16454+ h_inode = d_inode(h_dentry);
16455+ h_ino = h_inode->i_ino;
b752ccd1
AM
16456+
16457+ /*
16458+ * stop 'race'-ing between hardlinks under different
16459+ * parents.
16460+ */
16461+ mtx = NULL;
2000de60 16462+ if (!d_is_dir(h_dentry))
5afbbe0d 16463+ mtx = &au_sbr(sb, btop)->br_xino.xi_nondir_mtx;
b752ccd1 16464+
4f0767ce 16465+new_ino:
b752ccd1
AM
16466+ if (mtx)
16467+ mutex_lock(mtx);
5afbbe0d 16468+ err = au_xino_read(sb, btop, h_ino, &ino);
4a4d8108
AM
16469+ inode = ERR_PTR(err);
16470+ if (unlikely(err))
16471+ goto out;
b752ccd1 16472+
4a4d8108
AM
16473+ if (!ino) {
16474+ ino = au_xino_new_ino(sb);
16475+ if (unlikely(!ino)) {
16476+ inode = ERR_PTR(-EIO);
dece6358
AM
16477+ goto out;
16478+ }
16479+ }
1facf9fc 16480+
4a4d8108
AM
16481+ AuDbg("i%lu\n", (unsigned long)ino);
16482+ inode = au_iget_locked(sb, ino);
16483+ err = PTR_ERR(inode);
16484+ if (IS_ERR(inode))
1facf9fc 16485+ goto out;
1facf9fc 16486+
4a4d8108
AM
16487+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
16488+ if (inode->i_state & I_NEW) {
16489+ ii_write_lock_new_child(inode);
16490+ err = set_inode(inode, dentry);
16491+ if (!err) {
16492+ unlock_new_inode(inode);
16493+ goto out; /* success */
16494+ }
1308ab2a 16495+
027c5e7a
AM
16496+ /*
16497+ * iget_failed() calls iput(), but we need to call
16498+ * ii_write_unlock() after iget_failed(). so dirty hack for
16499+ * i_count.
16500+ */
16501+ atomic_inc(&inode->i_count);
4a4d8108 16502+ iget_failed(inode);
027c5e7a 16503+ ii_write_unlock(inode);
5afbbe0d 16504+ au_xino_write(sb, btop, h_ino, /*ino*/0);
027c5e7a
AM
16505+ /* ignore this error */
16506+ goto out_iput;
16507+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
b752ccd1
AM
16508+ /*
16509+ * horrible race condition between lookup, readdir and copyup
16510+ * (or something).
16511+ */
16512+ if (mtx)
16513+ mutex_unlock(mtx);
027c5e7a
AM
16514+ err = reval_inode(inode, dentry);
16515+ if (unlikely(err < 0)) {
16516+ mtx = NULL;
16517+ goto out_iput;
16518+ }
16519+
b752ccd1
AM
16520+ if (!err) {
16521+ mtx = NULL;
4a4d8108 16522+ goto out; /* success */
b752ccd1
AM
16523+ } else if (mtx)
16524+ mutex_lock(mtx);
4a4d8108
AM
16525+ }
16526+
5527c038 16527+ if (unlikely(au_test_fs_unique_ino(h_inode)))
4a4d8108 16528+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
523b37e3 16529+ " b%d, %s, %pd, hi%lu, i%lu.\n",
5afbbe0d 16530+ btop, au_sbtype(h_dentry->d_sb), dentry,
4a4d8108
AM
16531+ (unsigned long)h_ino, (unsigned long)ino);
16532+ ino = 0;
5afbbe0d 16533+ err = au_xino_write(sb, btop, h_ino, /*ino*/0);
4a4d8108
AM
16534+ if (!err) {
16535+ iput(inode);
b752ccd1
AM
16536+ if (mtx)
16537+ mutex_unlock(mtx);
4a4d8108
AM
16538+ goto new_ino;
16539+ }
1308ab2a 16540+
4f0767ce 16541+out_iput:
4a4d8108 16542+ iput(inode);
4a4d8108 16543+ inode = ERR_PTR(err);
4f0767ce 16544+out:
b752ccd1
AM
16545+ if (mtx)
16546+ mutex_unlock(mtx);
4a4d8108 16547+ return inode;
1facf9fc 16548+}
16549+
4a4d8108 16550+/* ---------------------------------------------------------------------- */
1facf9fc 16551+
4a4d8108
AM
16552+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16553+ struct inode *inode)
16554+{
16555+ int err;
076b876e 16556+ struct inode *hi;
1facf9fc 16557+
4a4d8108 16558+ err = au_br_rdonly(au_sbr(sb, bindex));
1facf9fc 16559+
4a4d8108
AM
16560+ /* pseudo-link after flushed may happen out of bounds */
16561+ if (!err
16562+ && inode
5afbbe0d
AM
16563+ && au_ibtop(inode) <= bindex
16564+ && bindex <= au_ibbot(inode)) {
4a4d8108
AM
16565+ /*
16566+ * permission check is unnecessary since vfsub routine
16567+ * will be called later
16568+ */
076b876e 16569+ hi = au_h_iptr(inode, bindex);
4a4d8108
AM
16570+ if (hi)
16571+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
1facf9fc 16572+ }
16573+
4a4d8108
AM
16574+ return err;
16575+}
dece6358 16576+
4a4d8108
AM
16577+int au_test_h_perm(struct inode *h_inode, int mask)
16578+{
2dfbb274 16579+ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
4a4d8108
AM
16580+ return 0;
16581+ return inode_permission(h_inode, mask);
16582+}
1facf9fc 16583+
4a4d8108
AM
16584+int au_test_h_perm_sio(struct inode *h_inode, int mask)
16585+{
16586+ if (au_test_nfs(h_inode->i_sb)
16587+ && (mask & MAY_WRITE)
16588+ && S_ISDIR(h_inode->i_mode))
16589+ mask |= MAY_READ; /* force permission check */
16590+ return au_test_h_perm(h_inode, mask);
1facf9fc 16591+}
7f207e10
AM
16592diff -urN /usr/share/empty/fs/aufs/inode.h linux/fs/aufs/inode.h
16593--- /usr/share/empty/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
16594+++ linux/fs/aufs/inode.h 2016-12-17 12:28:17.595211562 +0100
16595@@ -0,0 +1,691 @@
4a4d8108 16596+/*
8cdd5066 16597+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
16598+ *
16599+ * This program, aufs is free software; you can redistribute it and/or modify
16600+ * it under the terms of the GNU General Public License as published by
16601+ * the Free Software Foundation; either version 2 of the License, or
16602+ * (at your option) any later version.
16603+ *
16604+ * This program is distributed in the hope that it will be useful,
16605+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16606+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16607+ * GNU General Public License for more details.
16608+ *
16609+ * You should have received a copy of the GNU General Public License
523b37e3 16610+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 16611+ */
1facf9fc 16612+
1308ab2a 16613+/*
4a4d8108 16614+ * inode operations
1308ab2a 16615+ */
dece6358 16616+
4a4d8108
AM
16617+#ifndef __AUFS_INODE_H__
16618+#define __AUFS_INODE_H__
dece6358 16619+
4a4d8108 16620+#ifdef __KERNEL__
1308ab2a 16621+
4a4d8108 16622+#include <linux/fsnotify.h>
4a4d8108 16623+#include "rwsem.h"
1308ab2a 16624+
4a4d8108 16625+struct vfsmount;
1facf9fc 16626+
4a4d8108
AM
16627+struct au_hnotify {
16628+#ifdef CONFIG_AUFS_HNOTIFY
16629+#ifdef CONFIG_AUFS_HFSNOTIFY
7f207e10 16630+ /* never use fsnotify_add_vfsmount_mark() */
0c5527e5 16631+ struct fsnotify_mark hn_mark;
4a4d8108 16632+#endif
f0c0a007
AM
16633+ union {
16634+ struct inode *hn_aufs_inode; /* no get/put */
16635+ struct llist_node hn_lnode; /* delayed free */
16636+ };
4a4d8108
AM
16637+#endif
16638+} ____cacheline_aligned_in_smp;
1facf9fc 16639+
4a4d8108
AM
16640+struct au_hinode {
16641+ struct inode *hi_inode;
16642+ aufs_bindex_t hi_id;
16643+#ifdef CONFIG_AUFS_HNOTIFY
16644+ struct au_hnotify *hi_notify;
16645+#endif
dece6358 16646+
4a4d8108
AM
16647+ /* reference to the copied-up whiteout with get/put */
16648+ struct dentry *hi_whdentry;
16649+};
dece6358 16650+
537831f9
AM
16651+/* ig_flags */
16652+#define AuIG_HALF_REFRESHED 1
16653+#define au_ig_ftest(flags, name) ((flags) & AuIG_##name)
16654+#define au_ig_fset(flags, name) \
16655+ do { (flags) |= AuIG_##name; } while (0)
16656+#define au_ig_fclr(flags, name) \
16657+ do { (flags) &= ~AuIG_##name; } while (0)
16658+
16659+struct au_iigen {
be52b249 16660+ spinlock_t ig_spin;
537831f9
AM
16661+ __u32 ig_generation, ig_flags;
16662+};
16663+
4a4d8108
AM
16664+struct au_vdir;
16665+struct au_iinfo {
7a9e40b8 16666+ struct au_iigen ii_generation;
4a4d8108 16667+ struct super_block *ii_hsb1; /* no get/put */
1facf9fc 16668+
4a4d8108 16669+ struct au_rwsem ii_rwsem;
5afbbe0d 16670+ aufs_bindex_t ii_btop, ii_bbot;
4a4d8108
AM
16671+ __u32 ii_higen;
16672+ struct au_hinode *ii_hinode;
16673+ struct au_vdir *ii_vdir;
16674+};
1facf9fc 16675+
4a4d8108
AM
16676+struct au_icntnr {
16677+ struct au_iinfo iinfo;
16678+ struct inode vfs_inode;
f0c0a007
AM
16679+ union {
16680+ struct hlist_node plink;
16681+ struct llist_node lnode; /* delayed free */
16682+ };
4a4d8108 16683+} ____cacheline_aligned_in_smp;
1308ab2a 16684+
4a4d8108
AM
16685+/* au_pin flags */
16686+#define AuPin_DI_LOCKED 1
16687+#define AuPin_MNT_WRITE (1 << 1)
16688+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
7f207e10
AM
16689+#define au_fset_pin(flags, name) \
16690+ do { (flags) |= AuPin_##name; } while (0)
16691+#define au_fclr_pin(flags, name) \
16692+ do { (flags) &= ~AuPin_##name; } while (0)
4a4d8108
AM
16693+
16694+struct au_pin {
16695+ /* input */
16696+ struct dentry *dentry;
16697+ unsigned int udba;
16698+ unsigned char lsc_di, lsc_hi, flags;
16699+ aufs_bindex_t bindex;
16700+
16701+ /* output */
16702+ struct dentry *parent;
16703+ struct au_hinode *hdir;
16704+ struct vfsmount *h_mnt;
86dc4139
AM
16705+
16706+ /* temporary unlock/relock for copyup */
16707+ struct dentry *h_dentry, *h_parent;
16708+ struct au_branch *br;
16709+ struct task_struct *task;
4a4d8108 16710+};
1facf9fc 16711+
86dc4139 16712+void au_pin_hdir_unlock(struct au_pin *p);
c1595e42 16713+int au_pin_hdir_lock(struct au_pin *p);
86dc4139 16714+int au_pin_hdir_relock(struct au_pin *p);
86dc4139
AM
16715+void au_pin_hdir_acquire_nest(struct au_pin *p);
16716+void au_pin_hdir_release(struct au_pin *p);
16717+
1308ab2a 16718+/* ---------------------------------------------------------------------- */
16719+
4a4d8108 16720+static inline struct au_iinfo *au_ii(struct inode *inode)
1facf9fc 16721+{
5afbbe0d
AM
16722+ BUG_ON(is_bad_inode(inode));
16723+ return &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
4a4d8108 16724+}
1facf9fc 16725+
4a4d8108 16726+/* ---------------------------------------------------------------------- */
1facf9fc 16727+
4a4d8108
AM
16728+/* inode.c */
16729+struct inode *au_igrab(struct inode *inode);
b95c5147 16730+void au_refresh_iop(struct inode *inode, int force_getattr);
027c5e7a 16731+int au_refresh_hinode_self(struct inode *inode);
4a4d8108
AM
16732+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
16733+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16734+ unsigned int d_type, ino_t *ino);
16735+struct inode *au_new_inode(struct dentry *dentry, int must_new);
16736+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16737+ struct inode *inode);
16738+int au_test_h_perm(struct inode *h_inode, int mask);
16739+int au_test_h_perm_sio(struct inode *h_inode, int mask);
1facf9fc 16740+
4a4d8108
AM
16741+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
16742+ ino_t h_ino, unsigned int d_type, ino_t *ino)
16743+{
16744+#ifdef CONFIG_AUFS_SHWH
16745+ return au_ino(sb, bindex, h_ino, d_type, ino);
16746+#else
16747+ return 0;
16748+#endif
16749+}
1facf9fc 16750+
4a4d8108 16751+/* i_op.c */
b95c5147
AM
16752+enum {
16753+ AuIop_SYMLINK,
16754+ AuIop_DIR,
16755+ AuIop_OTHER,
16756+ AuIop_Last
16757+};
16758+extern struct inode_operations aufs_iop[AuIop_Last],
16759+ aufs_iop_nogetattr[AuIop_Last];
1308ab2a 16760+
4a4d8108
AM
16761+/* au_wr_dir flags */
16762+#define AuWrDir_ADD_ENTRY 1
7e9cd9fe
AM
16763+#define AuWrDir_ISDIR (1 << 1)
16764+#define AuWrDir_TMPFILE (1 << 2)
4a4d8108 16765+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
7f207e10
AM
16766+#define au_fset_wrdir(flags, name) \
16767+ do { (flags) |= AuWrDir_##name; } while (0)
16768+#define au_fclr_wrdir(flags, name) \
16769+ do { (flags) &= ~AuWrDir_##name; } while (0)
1facf9fc 16770+
4a4d8108
AM
16771+struct au_wr_dir_args {
16772+ aufs_bindex_t force_btgt;
16773+ unsigned char flags;
16774+};
16775+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
16776+ struct au_wr_dir_args *args);
dece6358 16777+
4a4d8108
AM
16778+struct dentry *au_pinned_h_parent(struct au_pin *pin);
16779+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
16780+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
16781+ unsigned int udba, unsigned char flags);
16782+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
16783+ unsigned int udba, unsigned char flags) __must_check;
16784+int au_do_pin(struct au_pin *pin) __must_check;
16785+void au_unpin(struct au_pin *pin);
c1595e42
JR
16786+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
16787+
16788+#define AuIcpup_DID_CPUP 1
16789+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
16790+#define au_fset_icpup(flags, name) \
16791+ do { (flags) |= AuIcpup_##name; } while (0)
16792+#define au_fclr_icpup(flags, name) \
16793+ do { (flags) &= ~AuIcpup_##name; } while (0)
16794+
16795+struct au_icpup_args {
16796+ unsigned char flags;
16797+ unsigned char pin_flags;
16798+ aufs_bindex_t btgt;
16799+ unsigned int udba;
16800+ struct au_pin pin;
16801+ struct path h_path;
16802+ struct inode *h_inode;
16803+};
16804+
16805+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
16806+ struct au_icpup_args *a);
16807+
16808+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path);
1facf9fc 16809+
4a4d8108
AM
16810+/* i_op_add.c */
16811+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
16812+ struct dentry *h_parent, int isdir);
7eafdf33
AM
16813+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
16814+ dev_t dev);
4a4d8108 16815+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
7eafdf33 16816+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 16817+ bool want_excl);
b912730e
AM
16818+struct vfsub_aopen_args;
16819+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
16820+ struct vfsub_aopen_args *args);
38d290e6 16821+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode);
4a4d8108
AM
16822+int aufs_link(struct dentry *src_dentry, struct inode *dir,
16823+ struct dentry *dentry);
7eafdf33 16824+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
1facf9fc 16825+
4a4d8108
AM
16826+/* i_op_del.c */
16827+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
16828+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
16829+ struct dentry *h_parent, int isdir);
16830+int aufs_unlink(struct inode *dir, struct dentry *dentry);
16831+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
1308ab2a 16832+
4a4d8108
AM
16833+/* i_op_ren.c */
16834+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
16835+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
f2c43d5f
AM
16836+ struct inode *dir, struct dentry *dentry,
16837+ unsigned int flags);
1facf9fc 16838+
4a4d8108
AM
16839+/* iinfo.c */
16840+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
16841+void au_hiput(struct au_hinode *hinode);
16842+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
16843+ struct dentry *h_wh);
16844+unsigned int au_hi_flags(struct inode *inode, int isdir);
1308ab2a 16845+
4a4d8108
AM
16846+/* hinode flags */
16847+#define AuHi_XINO 1
16848+#define AuHi_HNOTIFY (1 << 1)
16849+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
7f207e10
AM
16850+#define au_fset_hi(flags, name) \
16851+ do { (flags) |= AuHi_##name; } while (0)
16852+#define au_fclr_hi(flags, name) \
16853+ do { (flags) &= ~AuHi_##name; } while (0)
1facf9fc 16854+
4a4d8108
AM
16855+#ifndef CONFIG_AUFS_HNOTIFY
16856+#undef AuHi_HNOTIFY
16857+#define AuHi_HNOTIFY 0
16858+#endif
1facf9fc 16859+
4a4d8108
AM
16860+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
16861+ struct inode *h_inode, unsigned int flags);
1facf9fc 16862+
537831f9 16863+void au_update_iigen(struct inode *inode, int half);
4a4d8108 16864+void au_update_ibrange(struct inode *inode, int do_put_zero);
1facf9fc 16865+
4a4d8108 16866+void au_icntnr_init_once(void *_c);
5afbbe0d 16867+void au_hinode_init(struct au_hinode *hinode);
4a4d8108
AM
16868+int au_iinfo_init(struct inode *inode);
16869+void au_iinfo_fin(struct inode *inode);
e2f27e51 16870+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink);
1308ab2a 16871+
e49829fe 16872+#ifdef CONFIG_PROC_FS
4a4d8108 16873+/* plink.c */
e49829fe 16874+int au_plink_maint(struct super_block *sb, int flags);
7e9cd9fe 16875+struct au_sbinfo;
e49829fe
JR
16876+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
16877+int au_plink_maint_enter(struct super_block *sb);
4a4d8108
AM
16878+#ifdef CONFIG_AUFS_DEBUG
16879+void au_plink_list(struct super_block *sb);
16880+#else
16881+AuStubVoid(au_plink_list, struct super_block *sb)
16882+#endif
16883+int au_plink_test(struct inode *inode);
16884+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
16885+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
16886+ struct dentry *h_dentry);
e49829fe
JR
16887+void au_plink_put(struct super_block *sb, int verbose);
16888+void au_plink_clean(struct super_block *sb, int verbose);
4a4d8108 16889+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
e49829fe
JR
16890+#else
16891+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
16892+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
16893+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
16894+AuStubVoid(au_plink_list, struct super_block *sb);
16895+AuStubInt0(au_plink_test, struct inode *inode);
16896+AuStub(struct dentry *, au_plink_lkup, return NULL,
16897+ struct inode *inode, aufs_bindex_t bindex);
16898+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
16899+ struct dentry *h_dentry);
16900+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
16901+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
16902+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
16903+#endif /* CONFIG_PROC_FS */
1facf9fc 16904+
c1595e42
JR
16905+#ifdef CONFIG_AUFS_XATTR
16906+/* xattr.c */
7e9cd9fe
AM
16907+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
16908+ unsigned int verbose);
c1595e42 16909+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
f2c43d5f 16910+void au_xattr_init(struct super_block *sb);
c1595e42
JR
16911+#else
16912+AuStubInt0(au_cpup_xattr, struct dentry *h_dst, struct dentry *h_src,
7e9cd9fe 16913+ int ignore_flags, unsigned int verbose);
f2c43d5f 16914+AuStubVoid(au_xattr_init, struct super_block *sb);
c1595e42
JR
16915+#endif
16916+
16917+#ifdef CONFIG_FS_POSIX_ACL
16918+struct posix_acl *aufs_get_acl(struct inode *inode, int type);
16919+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
16920+#endif
16921+
16922+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
16923+enum {
16924+ AU_XATTR_SET,
c1595e42
JR
16925+ AU_ACL_SET
16926+};
16927+
f2c43d5f 16928+struct au_sxattr {
c1595e42
JR
16929+ int type;
16930+ union {
16931+ struct {
16932+ const char *name;
16933+ const void *value;
16934+ size_t size;
16935+ int flags;
16936+ } set;
16937+ struct {
c1595e42
JR
16938+ struct posix_acl *acl;
16939+ int type;
16940+ } acl_set;
16941+ } u;
16942+};
f2c43d5f
AM
16943+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
16944+ struct au_sxattr *arg);
c1595e42
JR
16945+#endif
16946+
4a4d8108 16947+/* ---------------------------------------------------------------------- */
1308ab2a 16948+
4a4d8108
AM
16949+/* lock subclass for iinfo */
16950+enum {
16951+ AuLsc_II_CHILD, /* child first */
16952+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
16953+ AuLsc_II_CHILD3, /* copyup dirs */
16954+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
16955+ AuLsc_II_PARENT2,
16956+ AuLsc_II_PARENT3, /* copyup dirs */
16957+ AuLsc_II_NEW_CHILD
16958+};
1308ab2a 16959+
1facf9fc 16960+/*
4a4d8108
AM
16961+ * ii_read_lock_child, ii_write_lock_child,
16962+ * ii_read_lock_child2, ii_write_lock_child2,
16963+ * ii_read_lock_child3, ii_write_lock_child3,
16964+ * ii_read_lock_parent, ii_write_lock_parent,
16965+ * ii_read_lock_parent2, ii_write_lock_parent2,
16966+ * ii_read_lock_parent3, ii_write_lock_parent3,
16967+ * ii_read_lock_new_child, ii_write_lock_new_child,
1facf9fc 16968+ */
4a4d8108
AM
16969+#define AuReadLockFunc(name, lsc) \
16970+static inline void ii_read_lock_##name(struct inode *i) \
16971+{ \
16972+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
16973+}
16974+
16975+#define AuWriteLockFunc(name, lsc) \
16976+static inline void ii_write_lock_##name(struct inode *i) \
16977+{ \
16978+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
16979+}
16980+
16981+#define AuRWLockFuncs(name, lsc) \
16982+ AuReadLockFunc(name, lsc) \
16983+ AuWriteLockFunc(name, lsc)
16984+
16985+AuRWLockFuncs(child, CHILD);
16986+AuRWLockFuncs(child2, CHILD2);
16987+AuRWLockFuncs(child3, CHILD3);
16988+AuRWLockFuncs(parent, PARENT);
16989+AuRWLockFuncs(parent2, PARENT2);
16990+AuRWLockFuncs(parent3, PARENT3);
16991+AuRWLockFuncs(new_child, NEW_CHILD);
16992+
16993+#undef AuReadLockFunc
16994+#undef AuWriteLockFunc
16995+#undef AuRWLockFuncs
1facf9fc 16996+
16997+/*
4a4d8108 16998+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
1facf9fc 16999+ */
4a4d8108 17000+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
1facf9fc 17001+
4a4d8108
AM
17002+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
17003+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
17004+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
1facf9fc 17005+
4a4d8108 17006+/* ---------------------------------------------------------------------- */
1308ab2a 17007+
027c5e7a
AM
17008+static inline void au_icntnr_init(struct au_icntnr *c)
17009+{
17010+#ifdef CONFIG_AUFS_DEBUG
17011+ c->vfs_inode.i_mode = 0;
17012+#endif
17013+}
17014+
cfc41e69 17015+static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags)
4a4d8108 17016+{
537831f9
AM
17017+ unsigned int gen;
17018+ struct au_iinfo *iinfo;
be52b249 17019+ struct au_iigen *iigen;
537831f9
AM
17020+
17021+ iinfo = au_ii(inode);
be52b249
AM
17022+ iigen = &iinfo->ii_generation;
17023+ spin_lock(&iigen->ig_spin);
cfc41e69
AM
17024+ if (igflags)
17025+ *igflags = iigen->ig_flags;
be52b249
AM
17026+ gen = iigen->ig_generation;
17027+ spin_unlock(&iigen->ig_spin);
537831f9
AM
17028+
17029+ return gen;
4a4d8108 17030+}
1308ab2a 17031+
4a4d8108
AM
17032+/* tiny test for inode number */
17033+/* tmpfs generation is too rough */
17034+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
17035+{
17036+ struct au_iinfo *iinfo;
1308ab2a 17037+
4a4d8108
AM
17038+ iinfo = au_ii(inode);
17039+ AuRwMustAnyLock(&iinfo->ii_rwsem);
17040+ return !(iinfo->ii_hsb1 == h_inode->i_sb
17041+ && iinfo->ii_higen == h_inode->i_generation);
17042+}
1308ab2a 17043+
4a4d8108
AM
17044+static inline void au_iigen_dec(struct inode *inode)
17045+{
537831f9 17046+ struct au_iinfo *iinfo;
be52b249 17047+ struct au_iigen *iigen;
537831f9
AM
17048+
17049+ iinfo = au_ii(inode);
be52b249
AM
17050+ iigen = &iinfo->ii_generation;
17051+ spin_lock(&iigen->ig_spin);
17052+ iigen->ig_generation--;
17053+ spin_unlock(&iigen->ig_spin);
027c5e7a
AM
17054+}
17055+
17056+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
17057+{
17058+ int err;
17059+
17060+ err = 0;
537831f9 17061+ if (unlikely(inode && au_iigen(inode, NULL) != sigen))
027c5e7a
AM
17062+ err = -EIO;
17063+
17064+ return err;
4a4d8108 17065+}
1308ab2a 17066+
4a4d8108 17067+/* ---------------------------------------------------------------------- */
1308ab2a 17068+
5afbbe0d
AM
17069+static inline struct au_hinode *au_hinode(struct au_iinfo *iinfo,
17070+ aufs_bindex_t bindex)
17071+{
17072+ return iinfo->ii_hinode + bindex;
17073+}
17074+
17075+static inline int au_is_bad_inode(struct inode *inode)
17076+{
17077+ return !!(is_bad_inode(inode) || !au_hinode(au_ii(inode), 0));
17078+}
17079+
4a4d8108
AM
17080+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
17081+ aufs_bindex_t bindex)
17082+{
17083+ IiMustAnyLock(inode);
5afbbe0d 17084+ return au_hinode(au_ii(inode), bindex)->hi_id;
4a4d8108 17085+}
1308ab2a 17086+
5afbbe0d 17087+static inline aufs_bindex_t au_ibtop(struct inode *inode)
4a4d8108
AM
17088+{
17089+ IiMustAnyLock(inode);
5afbbe0d 17090+ return au_ii(inode)->ii_btop;
4a4d8108 17091+}
1308ab2a 17092+
5afbbe0d 17093+static inline aufs_bindex_t au_ibbot(struct inode *inode)
4a4d8108
AM
17094+{
17095+ IiMustAnyLock(inode);
5afbbe0d 17096+ return au_ii(inode)->ii_bbot;
4a4d8108 17097+}
1308ab2a 17098+
4a4d8108
AM
17099+static inline struct au_vdir *au_ivdir(struct inode *inode)
17100+{
17101+ IiMustAnyLock(inode);
17102+ return au_ii(inode)->ii_vdir;
17103+}
1308ab2a 17104+
4a4d8108
AM
17105+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
17106+{
17107+ IiMustAnyLock(inode);
5afbbe0d 17108+ return au_hinode(au_ii(inode), bindex)->hi_whdentry;
4a4d8108 17109+}
1308ab2a 17110+
5afbbe0d 17111+static inline void au_set_ibtop(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 17112+{
4a4d8108 17113+ IiMustWriteLock(inode);
5afbbe0d 17114+ au_ii(inode)->ii_btop = bindex;
4a4d8108 17115+}
1308ab2a 17116+
5afbbe0d 17117+static inline void au_set_ibbot(struct inode *inode, aufs_bindex_t bindex)
4a4d8108
AM
17118+{
17119+ IiMustWriteLock(inode);
5afbbe0d 17120+ au_ii(inode)->ii_bbot = bindex;
1308ab2a 17121+}
17122+
4a4d8108
AM
17123+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
17124+{
17125+ IiMustWriteLock(inode);
17126+ au_ii(inode)->ii_vdir = vdir;
17127+}
1facf9fc 17128+
4a4d8108 17129+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 17130+{
4a4d8108 17131+ IiMustAnyLock(inode);
5afbbe0d 17132+ return au_hinode(au_ii(inode), bindex);
4a4d8108 17133+}
dece6358 17134+
4a4d8108 17135+/* ---------------------------------------------------------------------- */
1facf9fc 17136+
4a4d8108
AM
17137+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
17138+{
17139+ if (pin)
17140+ return pin->parent;
17141+ return NULL;
1facf9fc 17142+}
17143+
4a4d8108 17144+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
1facf9fc 17145+{
4a4d8108
AM
17146+ if (pin && pin->hdir)
17147+ return pin->hdir->hi_inode;
17148+ return NULL;
1308ab2a 17149+}
1facf9fc 17150+
4a4d8108
AM
17151+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
17152+{
17153+ if (pin)
17154+ return pin->hdir;
17155+ return NULL;
17156+}
1facf9fc 17157+
4a4d8108 17158+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
1308ab2a 17159+{
4a4d8108
AM
17160+ if (pin)
17161+ pin->dentry = dentry;
17162+}
1308ab2a 17163+
4a4d8108
AM
17164+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
17165+ unsigned char lflag)
17166+{
17167+ if (pin) {
7f207e10 17168+ if (lflag)
4a4d8108 17169+ au_fset_pin(pin->flags, DI_LOCKED);
7f207e10 17170+ else
4a4d8108 17171+ au_fclr_pin(pin->flags, DI_LOCKED);
1308ab2a 17172+ }
4a4d8108
AM
17173+}
17174+
7e9cd9fe 17175+#if 0 /* reserved */
4a4d8108
AM
17176+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
17177+{
17178+ if (pin) {
17179+ dput(pin->parent);
17180+ pin->parent = dget(parent);
1facf9fc 17181+ }
4a4d8108 17182+}
7e9cd9fe 17183+#endif
1facf9fc 17184+
4a4d8108
AM
17185+/* ---------------------------------------------------------------------- */
17186+
027c5e7a 17187+struct au_branch;
4a4d8108
AM
17188+#ifdef CONFIG_AUFS_HNOTIFY
17189+struct au_hnotify_op {
17190+ void (*ctl)(struct au_hinode *hinode, int do_set);
027c5e7a 17191+ int (*alloc)(struct au_hinode *hinode);
7eafdf33
AM
17192+
17193+ /*
17194+ * if it returns true, the the caller should free hinode->hi_notify,
17195+ * otherwise ->free() frees it.
17196+ */
17197+ int (*free)(struct au_hinode *hinode,
17198+ struct au_hnotify *hn) __must_check;
4a4d8108
AM
17199+
17200+ void (*fin)(void);
17201+ int (*init)(void);
027c5e7a
AM
17202+
17203+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
17204+ void (*fin_br)(struct au_branch *br);
17205+ int (*init_br)(struct au_branch *br, int perm);
4a4d8108
AM
17206+};
17207+
17208+/* hnotify.c */
027c5e7a 17209+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
4a4d8108
AM
17210+void au_hn_free(struct au_hinode *hinode);
17211+void au_hn_ctl(struct au_hinode *hinode, int do_set);
17212+void au_hn_reset(struct inode *inode, unsigned int flags);
17213+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
17214+ struct qstr *h_child_qstr, struct inode *h_child_inode);
027c5e7a
AM
17215+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
17216+int au_hnotify_init_br(struct au_branch *br, int perm);
17217+void au_hnotify_fin_br(struct au_branch *br);
4a4d8108
AM
17218+int __init au_hnotify_init(void);
17219+void au_hnotify_fin(void);
17220+
7f207e10 17221+/* hfsnotify.c */
4a4d8108
AM
17222+extern const struct au_hnotify_op au_hnotify_op;
17223+
17224+static inline
17225+void au_hn_init(struct au_hinode *hinode)
17226+{
17227+ hinode->hi_notify = NULL;
1308ab2a 17228+}
17229+
53392da6
AM
17230+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
17231+{
17232+ return hinode->hi_notify;
17233+}
17234+
4a4d8108 17235+#else
c1595e42
JR
17236+AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
17237+ struct au_hinode *hinode __maybe_unused,
17238+ struct inode *inode __maybe_unused)
17239+AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
4a4d8108
AM
17240+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
17241+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
17242+ int do_set __maybe_unused)
17243+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
17244+ unsigned int flags __maybe_unused)
027c5e7a
AM
17245+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
17246+ struct au_branch *br __maybe_unused,
17247+ int perm __maybe_unused)
17248+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
17249+ int perm __maybe_unused)
17250+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
4a4d8108
AM
17251+AuStubInt0(__init au_hnotify_init, void)
17252+AuStubVoid(au_hnotify_fin, void)
17253+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
17254+#endif /* CONFIG_AUFS_HNOTIFY */
17255+
17256+static inline void au_hn_suspend(struct au_hinode *hdir)
17257+{
17258+ au_hn_ctl(hdir, /*do_set*/0);
1308ab2a 17259+}
17260+
4a4d8108 17261+static inline void au_hn_resume(struct au_hinode *hdir)
1308ab2a 17262+{
4a4d8108
AM
17263+ au_hn_ctl(hdir, /*do_set*/1);
17264+}
1308ab2a 17265+
5afbbe0d 17266+static inline void au_hn_inode_lock(struct au_hinode *hdir)
4a4d8108 17267+{
febd17d6 17268+ inode_lock(hdir->hi_inode);
4a4d8108
AM
17269+ au_hn_suspend(hdir);
17270+}
dece6358 17271+
5afbbe0d 17272+static inline void au_hn_inode_lock_nested(struct au_hinode *hdir,
4a4d8108
AM
17273+ unsigned int sc __maybe_unused)
17274+{
febd17d6 17275+ inode_lock_nested(hdir->hi_inode, sc);
4a4d8108 17276+ au_hn_suspend(hdir);
1facf9fc 17277+}
1facf9fc 17278+
5afbbe0d 17279+static inline void au_hn_inode_unlock(struct au_hinode *hdir)
4a4d8108
AM
17280+{
17281+ au_hn_resume(hdir);
febd17d6 17282+ inode_unlock(hdir->hi_inode);
4a4d8108
AM
17283+}
17284+
17285+#endif /* __KERNEL__ */
17286+#endif /* __AUFS_INODE_H__ */
7f207e10
AM
17287diff -urN /usr/share/empty/fs/aufs/ioctl.c linux/fs/aufs/ioctl.c
17288--- /usr/share/empty/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 17289+++ linux/fs/aufs/ioctl.c 2016-10-09 16:55:36.492701639 +0200
c1595e42 17290@@ -0,0 +1,219 @@
4a4d8108 17291+/*
8cdd5066 17292+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
17293+ *
17294+ * This program, aufs is free software; you can redistribute it and/or modify
17295+ * it under the terms of the GNU General Public License as published by
17296+ * the Free Software Foundation; either version 2 of the License, or
17297+ * (at your option) any later version.
17298+ *
17299+ * This program is distributed in the hope that it will be useful,
17300+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17301+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17302+ * GNU General Public License for more details.
17303+ *
17304+ * You should have received a copy of the GNU General Public License
523b37e3 17305+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
17306+ */
17307+
17308+/*
17309+ * ioctl
17310+ * plink-management and readdir in userspace.
17311+ * assist the pathconf(3) wrapper library.
c2b27bf2 17312+ * move-down
076b876e 17313+ * File-based Hierarchical Storage Management.
4a4d8108
AM
17314+ */
17315+
c2b27bf2
AM
17316+#include <linux/compat.h>
17317+#include <linux/file.h>
4a4d8108
AM
17318+#include "aufs.h"
17319+
1e00d052 17320+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
4a4d8108
AM
17321+{
17322+ int err, fd;
5afbbe0d 17323+ aufs_bindex_t wbi, bindex, bbot;
4a4d8108
AM
17324+ struct file *h_file;
17325+ struct super_block *sb;
17326+ struct dentry *root;
1e00d052
AM
17327+ struct au_branch *br;
17328+ struct aufs_wbr_fd wbrfd = {
17329+ .oflags = au_dir_roflags,
17330+ .brid = -1
17331+ };
17332+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
17333+ | O_NOATIME | O_CLOEXEC;
4a4d8108 17334+
1e00d052
AM
17335+ AuDebugOn(wbrfd.oflags & ~valid);
17336+
17337+ if (arg) {
17338+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
17339+ if (unlikely(err)) {
17340+ err = -EFAULT;
17341+ goto out;
17342+ }
17343+
17344+ err = -EINVAL;
17345+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
17346+ wbrfd.oflags |= au_dir_roflags;
17347+ AuDbg("0%o\n", wbrfd.oflags);
17348+ if (unlikely(wbrfd.oflags & ~valid))
17349+ goto out;
17350+ }
17351+
2000de60 17352+ fd = get_unused_fd_flags(0);
1e00d052
AM
17353+ err = fd;
17354+ if (unlikely(fd < 0))
4a4d8108 17355+ goto out;
4a4d8108 17356+
1e00d052 17357+ h_file = ERR_PTR(-EINVAL);
4a4d8108 17358+ wbi = 0;
1e00d052 17359+ br = NULL;
4a4d8108
AM
17360+ sb = path->dentry->d_sb;
17361+ root = sb->s_root;
17362+ aufs_read_lock(root, AuLock_IR);
5afbbe0d 17363+ bbot = au_sbbot(sb);
1e00d052
AM
17364+ if (wbrfd.brid >= 0) {
17365+ wbi = au_br_index(sb, wbrfd.brid);
5afbbe0d 17366+ if (unlikely(wbi < 0 || wbi > bbot))
1e00d052
AM
17367+ goto out_unlock;
17368+ }
17369+
17370+ h_file = ERR_PTR(-ENOENT);
17371+ br = au_sbr(sb, wbi);
17372+ if (!au_br_writable(br->br_perm)) {
17373+ if (arg)
17374+ goto out_unlock;
17375+
17376+ bindex = wbi + 1;
17377+ wbi = -1;
5afbbe0d 17378+ for (; bindex <= bbot; bindex++) {
1e00d052
AM
17379+ br = au_sbr(sb, bindex);
17380+ if (au_br_writable(br->br_perm)) {
4a4d8108 17381+ wbi = bindex;
1e00d052 17382+ br = au_sbr(sb, wbi);
4a4d8108
AM
17383+ break;
17384+ }
17385+ }
4a4d8108
AM
17386+ }
17387+ AuDbg("wbi %d\n", wbi);
1e00d052 17388+ if (wbi >= 0)
392086de
AM
17389+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
17390+ /*force_wr*/0);
1e00d052
AM
17391+
17392+out_unlock:
4a4d8108
AM
17393+ aufs_read_unlock(root, AuLock_IR);
17394+ err = PTR_ERR(h_file);
17395+ if (IS_ERR(h_file))
17396+ goto out_fd;
17397+
5afbbe0d 17398+ au_br_put(br); /* cf. au_h_open() */
4a4d8108
AM
17399+ fd_install(fd, h_file);
17400+ err = fd;
17401+ goto out; /* success */
17402+
4f0767ce 17403+out_fd:
4a4d8108 17404+ put_unused_fd(fd);
4f0767ce 17405+out:
1e00d052 17406+ AuTraceErr(err);
4a4d8108
AM
17407+ return err;
17408+}
17409+
17410+/* ---------------------------------------------------------------------- */
17411+
17412+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
17413+{
17414+ long err;
c1595e42 17415+ struct dentry *dentry;
4a4d8108
AM
17416+
17417+ switch (cmd) {
4a4d8108
AM
17418+ case AUFS_CTL_RDU:
17419+ case AUFS_CTL_RDU_INO:
17420+ err = au_rdu_ioctl(file, cmd, arg);
17421+ break;
17422+
17423+ case AUFS_CTL_WBR_FD:
1e00d052 17424+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
17425+ break;
17426+
027c5e7a
AM
17427+ case AUFS_CTL_IBUSY:
17428+ err = au_ibusy_ioctl(file, arg);
17429+ break;
17430+
076b876e
AM
17431+ case AUFS_CTL_BRINFO:
17432+ err = au_brinfo_ioctl(file, arg);
17433+ break;
17434+
17435+ case AUFS_CTL_FHSM_FD:
2000de60 17436+ dentry = file->f_path.dentry;
c1595e42
JR
17437+ if (IS_ROOT(dentry))
17438+ err = au_fhsm_fd(dentry->d_sb, arg);
17439+ else
17440+ err = -ENOTTY;
076b876e
AM
17441+ break;
17442+
4a4d8108
AM
17443+ default:
17444+ /* do not call the lower */
17445+ AuDbg("0x%x\n", cmd);
17446+ err = -ENOTTY;
17447+ }
17448+
17449+ AuTraceErr(err);
17450+ return err;
17451+}
17452+
17453+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
17454+{
17455+ long err;
17456+
17457+ switch (cmd) {
c2b27bf2 17458+ case AUFS_CTL_MVDOWN:
2000de60 17459+ err = au_mvdown(file->f_path.dentry, (void __user *)arg);
c2b27bf2
AM
17460+ break;
17461+
4a4d8108 17462+ case AUFS_CTL_WBR_FD:
1e00d052 17463+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
17464+ break;
17465+
17466+ default:
17467+ /* do not call the lower */
17468+ AuDbg("0x%x\n", cmd);
17469+ err = -ENOTTY;
17470+ }
17471+
17472+ AuTraceErr(err);
17473+ return err;
17474+}
b752ccd1
AM
17475+
17476+#ifdef CONFIG_COMPAT
17477+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
17478+ unsigned long arg)
17479+{
17480+ long err;
17481+
17482+ switch (cmd) {
17483+ case AUFS_CTL_RDU:
17484+ case AUFS_CTL_RDU_INO:
17485+ err = au_rdu_compat_ioctl(file, cmd, arg);
17486+ break;
17487+
027c5e7a
AM
17488+ case AUFS_CTL_IBUSY:
17489+ err = au_ibusy_compat_ioctl(file, arg);
17490+ break;
17491+
076b876e
AM
17492+ case AUFS_CTL_BRINFO:
17493+ err = au_brinfo_compat_ioctl(file, arg);
17494+ break;
17495+
b752ccd1
AM
17496+ default:
17497+ err = aufs_ioctl_dir(file, cmd, arg);
17498+ }
17499+
17500+ AuTraceErr(err);
17501+ return err;
17502+}
17503+
b752ccd1
AM
17504+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
17505+ unsigned long arg)
17506+{
17507+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
17508+}
17509+#endif
7f207e10
AM
17510diff -urN /usr/share/empty/fs/aufs/i_op_add.c linux/fs/aufs/i_op_add.c
17511--- /usr/share/empty/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
17512+++ linux/fs/aufs/i_op_add.c 2016-12-17 12:28:17.595211562 +0100
17513@@ -0,0 +1,928 @@
4a4d8108 17514+/*
8cdd5066 17515+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
17516+ *
17517+ * This program, aufs is free software; you can redistribute it and/or modify
17518+ * it under the terms of the GNU General Public License as published by
17519+ * the Free Software Foundation; either version 2 of the License, or
17520+ * (at your option) any later version.
17521+ *
17522+ * This program is distributed in the hope that it will be useful,
17523+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17524+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17525+ * GNU General Public License for more details.
17526+ *
17527+ * You should have received a copy of the GNU General Public License
523b37e3 17528+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
17529+ */
17530+
17531+/*
17532+ * inode operations (add entry)
17533+ */
17534+
17535+#include "aufs.h"
17536+
17537+/*
17538+ * final procedure of adding a new entry, except link(2).
17539+ * remove whiteout, instantiate, copyup the parent dir's times and size
17540+ * and update version.
17541+ * if it failed, re-create the removed whiteout.
17542+ */
17543+static int epilog(struct inode *dir, aufs_bindex_t bindex,
17544+ struct dentry *wh_dentry, struct dentry *dentry)
17545+{
17546+ int err, rerr;
17547+ aufs_bindex_t bwh;
17548+ struct path h_path;
076b876e 17549+ struct super_block *sb;
4a4d8108
AM
17550+ struct inode *inode, *h_dir;
17551+ struct dentry *wh;
17552+
17553+ bwh = -1;
076b876e 17554+ sb = dir->i_sb;
4a4d8108 17555+ if (wh_dentry) {
5527c038 17556+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
4a4d8108
AM
17557+ IMustLock(h_dir);
17558+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
17559+ bwh = au_dbwh(dentry);
17560+ h_path.dentry = wh_dentry;
076b876e 17561+ h_path.mnt = au_sbr_mnt(sb, bindex);
4a4d8108
AM
17562+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
17563+ dentry);
17564+ if (unlikely(err))
17565+ goto out;
17566+ }
17567+
17568+ inode = au_new_inode(dentry, /*must_new*/1);
17569+ if (!IS_ERR(inode)) {
17570+ d_instantiate(dentry, inode);
5527c038 17571+ dir = d_inode(dentry->d_parent); /* dir inode is locked */
4a4d8108 17572+ IMustLock(dir);
b912730e 17573+ au_dir_ts(dir, bindex);
4a4d8108 17574+ dir->i_version++;
076b876e 17575+ au_fhsm_wrote(sb, bindex, /*force*/0);
4a4d8108
AM
17576+ return 0; /* success */
17577+ }
17578+
17579+ err = PTR_ERR(inode);
17580+ if (!wh_dentry)
17581+ goto out;
17582+
17583+ /* revert */
17584+ /* dir inode is locked */
17585+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
17586+ rerr = PTR_ERR(wh);
17587+ if (IS_ERR(wh)) {
523b37e3
AM
17588+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
17589+ dentry, err, rerr);
4a4d8108
AM
17590+ err = -EIO;
17591+ } else
17592+ dput(wh);
17593+
4f0767ce 17594+out:
4a4d8108
AM
17595+ return err;
17596+}
17597+
027c5e7a
AM
17598+static int au_d_may_add(struct dentry *dentry)
17599+{
17600+ int err;
17601+
17602+ err = 0;
17603+ if (unlikely(d_unhashed(dentry)))
17604+ err = -ENOENT;
5527c038 17605+ if (unlikely(d_really_is_positive(dentry)))
027c5e7a
AM
17606+ err = -EEXIST;
17607+ return err;
17608+}
17609+
4a4d8108
AM
17610+/*
17611+ * simple tests for the adding inode operations.
17612+ * following the checks in vfs, plus the parent-child relationship.
17613+ */
17614+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
17615+ struct dentry *h_parent, int isdir)
17616+{
17617+ int err;
17618+ umode_t h_mode;
17619+ struct dentry *h_dentry;
17620+ struct inode *h_inode;
17621+
17622+ err = -ENAMETOOLONG;
17623+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17624+ goto out;
17625+
17626+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 17627+ if (d_really_is_negative(dentry)) {
4a4d8108 17628+ err = -EEXIST;
5527c038 17629+ if (unlikely(d_is_positive(h_dentry)))
4a4d8108
AM
17630+ goto out;
17631+ } else {
17632+ /* rename(2) case */
17633+ err = -EIO;
5527c038
JR
17634+ if (unlikely(d_is_negative(h_dentry)))
17635+ goto out;
17636+ h_inode = d_inode(h_dentry);
17637+ if (unlikely(!h_inode->i_nlink))
4a4d8108
AM
17638+ goto out;
17639+
17640+ h_mode = h_inode->i_mode;
17641+ if (!isdir) {
17642+ err = -EISDIR;
17643+ if (unlikely(S_ISDIR(h_mode)))
17644+ goto out;
17645+ } else if (unlikely(!S_ISDIR(h_mode))) {
17646+ err = -ENOTDIR;
17647+ goto out;
17648+ }
17649+ }
17650+
17651+ err = 0;
17652+ /* expected parent dir is locked */
17653+ if (unlikely(h_parent != h_dentry->d_parent))
17654+ err = -EIO;
17655+
4f0767ce 17656+out:
4a4d8108
AM
17657+ AuTraceErr(err);
17658+ return err;
17659+}
17660+
17661+/*
17662+ * initial procedure of adding a new entry.
17663+ * prepare writable branch and the parent dir, lock it,
17664+ * and lookup whiteout for the new entry.
17665+ */
17666+static struct dentry*
17667+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
17668+ struct dentry *src_dentry, struct au_pin *pin,
17669+ struct au_wr_dir_args *wr_dir_args)
17670+{
17671+ struct dentry *wh_dentry, *h_parent;
17672+ struct super_block *sb;
17673+ struct au_branch *br;
17674+ int err;
17675+ unsigned int udba;
17676+ aufs_bindex_t bcpup;
17677+
523b37e3 17678+ AuDbg("%pd\n", dentry);
4a4d8108
AM
17679+
17680+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
17681+ bcpup = err;
17682+ wh_dentry = ERR_PTR(err);
17683+ if (unlikely(err < 0))
17684+ goto out;
17685+
17686+ sb = dentry->d_sb;
17687+ udba = au_opt_udba(sb);
17688+ err = au_pin(pin, dentry, bcpup, udba,
17689+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17690+ wh_dentry = ERR_PTR(err);
17691+ if (unlikely(err))
17692+ goto out;
17693+
17694+ h_parent = au_pinned_h_parent(pin);
17695+ if (udba != AuOpt_UDBA_NONE
5afbbe0d 17696+ && au_dbtop(dentry) == bcpup)
4a4d8108
AM
17697+ err = au_may_add(dentry, bcpup, h_parent,
17698+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
17699+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17700+ err = -ENAMETOOLONG;
17701+ wh_dentry = ERR_PTR(err);
17702+ if (unlikely(err))
17703+ goto out_unpin;
17704+
17705+ br = au_sbr(sb, bcpup);
17706+ if (dt) {
17707+ struct path tmp = {
17708+ .dentry = h_parent,
86dc4139 17709+ .mnt = au_br_mnt(br)
4a4d8108
AM
17710+ };
17711+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
17712+ }
17713+
17714+ wh_dentry = NULL;
17715+ if (bcpup != au_dbwh(dentry))
17716+ goto out; /* success */
17717+
2000de60
JR
17718+ /*
17719+ * ENAMETOOLONG here means that if we allowed create such name, then it
17720+ * would not be able to removed in the future. So we don't allow such
17721+ * name here and we don't handle ENAMETOOLONG differently here.
17722+ */
4a4d8108
AM
17723+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
17724+
4f0767ce 17725+out_unpin:
4a4d8108
AM
17726+ if (IS_ERR(wh_dentry))
17727+ au_unpin(pin);
4f0767ce 17728+out:
4a4d8108
AM
17729+ return wh_dentry;
17730+}
17731+
17732+/* ---------------------------------------------------------------------- */
17733+
17734+enum { Mknod, Symlink, Creat };
17735+struct simple_arg {
17736+ int type;
17737+ union {
17738+ struct {
b912730e
AM
17739+ umode_t mode;
17740+ bool want_excl;
17741+ bool try_aopen;
17742+ struct vfsub_aopen_args *aopen;
4a4d8108
AM
17743+ } c;
17744+ struct {
17745+ const char *symname;
17746+ } s;
17747+ struct {
7eafdf33 17748+ umode_t mode;
4a4d8108
AM
17749+ dev_t dev;
17750+ } m;
17751+ } u;
17752+};
17753+
17754+static int add_simple(struct inode *dir, struct dentry *dentry,
17755+ struct simple_arg *arg)
17756+{
076b876e 17757+ int err, rerr;
5afbbe0d 17758+ aufs_bindex_t btop;
4a4d8108 17759+ unsigned char created;
b912730e
AM
17760+ const unsigned char try_aopen
17761+ = (arg->type == Creat && arg->u.c.try_aopen);
4a4d8108
AM
17762+ struct dentry *wh_dentry, *parent;
17763+ struct inode *h_dir;
b912730e
AM
17764+ struct super_block *sb;
17765+ struct au_branch *br;
c2b27bf2
AM
17766+ /* to reuduce stack size */
17767+ struct {
17768+ struct au_dtime dt;
17769+ struct au_pin pin;
17770+ struct path h_path;
17771+ struct au_wr_dir_args wr_dir_args;
17772+ } *a;
4a4d8108 17773+
523b37e3 17774+ AuDbg("%pd\n", dentry);
4a4d8108
AM
17775+ IMustLock(dir);
17776+
c2b27bf2
AM
17777+ err = -ENOMEM;
17778+ a = kmalloc(sizeof(*a), GFP_NOFS);
17779+ if (unlikely(!a))
17780+ goto out;
17781+ a->wr_dir_args.force_btgt = -1;
17782+ a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
17783+
4a4d8108 17784+ parent = dentry->d_parent; /* dir inode is locked */
b912730e
AM
17785+ if (!try_aopen) {
17786+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
17787+ if (unlikely(err))
17788+ goto out_free;
17789+ }
027c5e7a
AM
17790+ err = au_d_may_add(dentry);
17791+ if (unlikely(err))
17792+ goto out_unlock;
b912730e
AM
17793+ if (!try_aopen)
17794+ di_write_lock_parent(parent);
c2b27bf2
AM
17795+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
17796+ &a->pin, &a->wr_dir_args);
4a4d8108
AM
17797+ err = PTR_ERR(wh_dentry);
17798+ if (IS_ERR(wh_dentry))
027c5e7a 17799+ goto out_parent;
4a4d8108 17800+
5afbbe0d 17801+ btop = au_dbtop(dentry);
b912730e 17802+ sb = dentry->d_sb;
5afbbe0d
AM
17803+ br = au_sbr(sb, btop);
17804+ a->h_path.dentry = au_h_dptr(dentry, btop);
b912730e 17805+ a->h_path.mnt = au_br_mnt(br);
c2b27bf2 17806+ h_dir = au_pinned_h_dir(&a->pin);
4a4d8108
AM
17807+ switch (arg->type) {
17808+ case Creat:
b912730e
AM
17809+ err = 0;
17810+ if (!try_aopen || !h_dir->i_op->atomic_open)
17811+ err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
17812+ arg->u.c.want_excl);
17813+ else
17814+ err = vfsub_atomic_open(h_dir, a->h_path.dentry,
17815+ arg->u.c.aopen, br);
4a4d8108
AM
17816+ break;
17817+ case Symlink:
c2b27bf2 17818+ err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
4a4d8108
AM
17819+ break;
17820+ case Mknod:
c2b27bf2
AM
17821+ err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
17822+ arg->u.m.dev);
4a4d8108
AM
17823+ break;
17824+ default:
17825+ BUG();
17826+ }
17827+ created = !err;
17828+ if (!err)
5afbbe0d 17829+ err = epilog(dir, btop, wh_dentry, dentry);
4a4d8108
AM
17830+
17831+ /* revert */
5527c038 17832+ if (unlikely(created && err && d_is_positive(a->h_path.dentry))) {
523b37e3
AM
17833+ /* no delegation since it is just created */
17834+ rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
17835+ /*force*/0);
4a4d8108 17836+ if (rerr) {
523b37e3
AM
17837+ AuIOErr("%pd revert failure(%d, %d)\n",
17838+ dentry, err, rerr);
4a4d8108
AM
17839+ err = -EIO;
17840+ }
c2b27bf2 17841+ au_dtime_revert(&a->dt);
4a4d8108
AM
17842+ }
17843+
b912730e
AM
17844+ if (!err && try_aopen && !h_dir->i_op->atomic_open)
17845+ *arg->u.c.aopen->opened |= FILE_CREATED;
17846+
c2b27bf2 17847+ au_unpin(&a->pin);
4a4d8108
AM
17848+ dput(wh_dentry);
17849+
027c5e7a 17850+out_parent:
b912730e
AM
17851+ if (!try_aopen)
17852+ di_write_unlock(parent);
027c5e7a 17853+out_unlock:
4a4d8108 17854+ if (unlikely(err)) {
5afbbe0d 17855+ au_update_dbtop(dentry);
4a4d8108
AM
17856+ d_drop(dentry);
17857+ }
b912730e
AM
17858+ if (!try_aopen)
17859+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2 17860+out_free:
f0c0a007 17861+ au_delayed_kfree(a);
027c5e7a 17862+out:
4a4d8108
AM
17863+ return err;
17864+}
17865+
7eafdf33
AM
17866+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
17867+ dev_t dev)
4a4d8108
AM
17868+{
17869+ struct simple_arg arg = {
17870+ .type = Mknod,
17871+ .u.m = {
17872+ .mode = mode,
17873+ .dev = dev
17874+ }
17875+ };
17876+ return add_simple(dir, dentry, &arg);
17877+}
17878+
17879+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
17880+{
17881+ struct simple_arg arg = {
17882+ .type = Symlink,
17883+ .u.s.symname = symname
17884+ };
17885+ return add_simple(dir, dentry, &arg);
17886+}
17887+
7eafdf33 17888+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 17889+ bool want_excl)
4a4d8108
AM
17890+{
17891+ struct simple_arg arg = {
17892+ .type = Creat,
17893+ .u.c = {
b4510431
AM
17894+ .mode = mode,
17895+ .want_excl = want_excl
4a4d8108
AM
17896+ }
17897+ };
17898+ return add_simple(dir, dentry, &arg);
17899+}
17900+
b912730e
AM
17901+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
17902+ struct vfsub_aopen_args *aopen_args)
17903+{
17904+ struct simple_arg arg = {
17905+ .type = Creat,
17906+ .u.c = {
17907+ .mode = aopen_args->create_mode,
17908+ .want_excl = aopen_args->open_flag & O_EXCL,
17909+ .try_aopen = true,
17910+ .aopen = aopen_args
17911+ }
17912+ };
17913+ return add_simple(dir, dentry, &arg);
17914+}
17915+
38d290e6
JR
17916+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
17917+{
17918+ int err;
17919+ aufs_bindex_t bindex;
17920+ struct super_block *sb;
17921+ struct dentry *parent, *h_parent, *h_dentry;
17922+ struct inode *h_dir, *inode;
17923+ struct vfsmount *h_mnt;
17924+ struct au_wr_dir_args wr_dir_args = {
17925+ .force_btgt = -1,
17926+ .flags = AuWrDir_TMPFILE
17927+ };
17928+
17929+ /* copy-up may happen */
febd17d6 17930+ inode_lock(dir);
38d290e6
JR
17931+
17932+ sb = dir->i_sb;
17933+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
17934+ if (unlikely(err))
17935+ goto out;
17936+
17937+ err = au_di_init(dentry);
17938+ if (unlikely(err))
17939+ goto out_si;
17940+
17941+ err = -EBUSY;
17942+ parent = d_find_any_alias(dir);
17943+ AuDebugOn(!parent);
17944+ di_write_lock_parent(parent);
5527c038 17945+ if (unlikely(d_inode(parent) != dir))
38d290e6
JR
17946+ goto out_parent;
17947+
17948+ err = au_digen_test(parent, au_sigen(sb));
17949+ if (unlikely(err))
17950+ goto out_parent;
17951+
5afbbe0d
AM
17952+ bindex = au_dbtop(parent);
17953+ au_set_dbtop(dentry, bindex);
17954+ au_set_dbbot(dentry, bindex);
38d290e6
JR
17955+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
17956+ bindex = err;
17957+ if (unlikely(err < 0))
17958+ goto out_parent;
17959+
17960+ err = -EOPNOTSUPP;
17961+ h_dir = au_h_iptr(dir, bindex);
17962+ if (unlikely(!h_dir->i_op->tmpfile))
17963+ goto out_parent;
17964+
17965+ h_mnt = au_sbr_mnt(sb, bindex);
17966+ err = vfsub_mnt_want_write(h_mnt);
17967+ if (unlikely(err))
17968+ goto out_parent;
17969+
17970+ h_parent = au_h_dptr(parent, bindex);
5527c038 17971+ err = inode_permission(d_inode(h_parent), MAY_WRITE | MAY_EXEC);
38d290e6
JR
17972+ if (unlikely(err))
17973+ goto out_mnt;
17974+
17975+ err = -ENOMEM;
17976+ h_dentry = d_alloc(h_parent, &dentry->d_name);
17977+ if (unlikely(!h_dentry))
17978+ goto out_mnt;
17979+
17980+ err = h_dir->i_op->tmpfile(h_dir, h_dentry, mode);
17981+ if (unlikely(err))
17982+ goto out_dentry;
17983+
5afbbe0d
AM
17984+ au_set_dbtop(dentry, bindex);
17985+ au_set_dbbot(dentry, bindex);
38d290e6
JR
17986+ au_set_h_dptr(dentry, bindex, dget(h_dentry));
17987+ inode = au_new_inode(dentry, /*must_new*/1);
17988+ if (IS_ERR(inode)) {
17989+ err = PTR_ERR(inode);
17990+ au_set_h_dptr(dentry, bindex, NULL);
5afbbe0d
AM
17991+ au_set_dbtop(dentry, -1);
17992+ au_set_dbbot(dentry, -1);
38d290e6
JR
17993+ } else {
17994+ if (!inode->i_nlink)
17995+ set_nlink(inode, 1);
17996+ d_tmpfile(dentry, inode);
17997+ au_di(dentry)->di_tmpfile = 1;
17998+
17999+ /* update without i_mutex */
5afbbe0d 18000+ if (au_ibtop(dir) == au_dbtop(dentry))
38d290e6
JR
18001+ au_cpup_attr_timesizes(dir);
18002+ }
18003+
18004+out_dentry:
18005+ dput(h_dentry);
18006+out_mnt:
18007+ vfsub_mnt_drop_write(h_mnt);
18008+out_parent:
18009+ di_write_unlock(parent);
18010+ dput(parent);
18011+ di_write_unlock(dentry);
5afbbe0d 18012+ if (unlikely(err)) {
38d290e6
JR
18013+ au_di_fin(dentry);
18014+ dentry->d_fsdata = NULL;
18015+ }
18016+out_si:
18017+ si_read_unlock(sb);
18018+out:
febd17d6 18019+ inode_unlock(dir);
38d290e6
JR
18020+ return err;
18021+}
18022+
4a4d8108
AM
18023+/* ---------------------------------------------------------------------- */
18024+
18025+struct au_link_args {
18026+ aufs_bindex_t bdst, bsrc;
18027+ struct au_pin pin;
18028+ struct path h_path;
18029+ struct dentry *src_parent, *parent;
18030+};
18031+
18032+static int au_cpup_before_link(struct dentry *src_dentry,
18033+ struct au_link_args *a)
18034+{
18035+ int err;
18036+ struct dentry *h_src_dentry;
c2b27bf2
AM
18037+ struct au_cp_generic cpg = {
18038+ .dentry = src_dentry,
18039+ .bdst = a->bdst,
18040+ .bsrc = a->bsrc,
18041+ .len = -1,
18042+ .pin = &a->pin,
18043+ .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
18044+ };
4a4d8108
AM
18045+
18046+ di_read_lock_parent(a->src_parent, AuLock_IR);
18047+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
18048+ if (unlikely(err))
18049+ goto out;
18050+
18051+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
4a4d8108
AM
18052+ err = au_pin(&a->pin, src_dentry, a->bdst,
18053+ au_opt_udba(src_dentry->d_sb),
18054+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
18055+ if (unlikely(err))
18056+ goto out;
367653fa 18057+
c2b27bf2 18058+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
18059+ au_unpin(&a->pin);
18060+
4f0767ce 18061+out:
4a4d8108
AM
18062+ di_read_unlock(a->src_parent, AuLock_IR);
18063+ return err;
18064+}
18065+
86dc4139
AM
18066+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
18067+ struct au_link_args *a)
4a4d8108
AM
18068+{
18069+ int err;
18070+ unsigned char plink;
5afbbe0d 18071+ aufs_bindex_t bbot;
4a4d8108 18072+ struct dentry *h_src_dentry;
523b37e3 18073+ struct inode *h_inode, *inode, *delegated;
4a4d8108
AM
18074+ struct super_block *sb;
18075+ struct file *h_file;
18076+
18077+ plink = 0;
18078+ h_inode = NULL;
18079+ sb = src_dentry->d_sb;
5527c038 18080+ inode = d_inode(src_dentry);
5afbbe0d 18081+ if (au_ibtop(inode) <= a->bdst)
4a4d8108
AM
18082+ h_inode = au_h_iptr(inode, a->bdst);
18083+ if (!h_inode || !h_inode->i_nlink) {
18084+ /* copyup src_dentry as the name of dentry. */
5afbbe0d
AM
18085+ bbot = au_dbbot(dentry);
18086+ if (bbot < a->bsrc)
18087+ au_set_dbbot(dentry, a->bsrc);
86dc4139
AM
18088+ au_set_h_dptr(dentry, a->bsrc,
18089+ dget(au_h_dptr(src_dentry, a->bsrc)));
18090+ dget(a->h_path.dentry);
18091+ au_set_h_dptr(dentry, a->bdst, NULL);
c1595e42
JR
18092+ AuDbg("temporary d_inode...\n");
18093+ spin_lock(&dentry->d_lock);
5527c038 18094+ dentry->d_inode = d_inode(src_dentry); /* tmp */
c1595e42 18095+ spin_unlock(&dentry->d_lock);
392086de 18096+ h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
86dc4139 18097+ if (IS_ERR(h_file))
4a4d8108 18098+ err = PTR_ERR(h_file);
86dc4139 18099+ else {
c2b27bf2
AM
18100+ struct au_cp_generic cpg = {
18101+ .dentry = dentry,
18102+ .bdst = a->bdst,
18103+ .bsrc = -1,
18104+ .len = -1,
18105+ .pin = &a->pin,
18106+ .flags = AuCpup_KEEPLINO
18107+ };
18108+ err = au_sio_cpup_simple(&cpg);
86dc4139
AM
18109+ au_h_open_post(dentry, a->bsrc, h_file);
18110+ if (!err) {
18111+ dput(a->h_path.dentry);
18112+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
18113+ } else
18114+ au_set_h_dptr(dentry, a->bdst,
18115+ a->h_path.dentry);
18116+ }
c1595e42 18117+ spin_lock(&dentry->d_lock);
86dc4139 18118+ dentry->d_inode = NULL; /* restore */
c1595e42
JR
18119+ spin_unlock(&dentry->d_lock);
18120+ AuDbg("temporary d_inode...done\n");
86dc4139 18121+ au_set_h_dptr(dentry, a->bsrc, NULL);
5afbbe0d 18122+ au_set_dbbot(dentry, bbot);
4a4d8108
AM
18123+ } else {
18124+ /* the inode of src_dentry already exists on a.bdst branch */
18125+ h_src_dentry = d_find_alias(h_inode);
18126+ if (!h_src_dentry && au_plink_test(inode)) {
18127+ plink = 1;
18128+ h_src_dentry = au_plink_lkup(inode, a->bdst);
18129+ err = PTR_ERR(h_src_dentry);
18130+ if (IS_ERR(h_src_dentry))
18131+ goto out;
18132+
5527c038 18133+ if (unlikely(d_is_negative(h_src_dentry))) {
4a4d8108
AM
18134+ dput(h_src_dentry);
18135+ h_src_dentry = NULL;
18136+ }
18137+
18138+ }
18139+ if (h_src_dentry) {
523b37e3 18140+ delegated = NULL;
4a4d8108 18141+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
18142+ &a->h_path, &delegated);
18143+ if (unlikely(err == -EWOULDBLOCK)) {
18144+ pr_warn("cannot retry for NFSv4 delegation"
18145+ " for an internal link\n");
18146+ iput(delegated);
18147+ }
4a4d8108
AM
18148+ dput(h_src_dentry);
18149+ } else {
18150+ AuIOErr("no dentry found for hi%lu on b%d\n",
18151+ h_inode->i_ino, a->bdst);
18152+ err = -EIO;
18153+ }
18154+ }
18155+
18156+ if (!err && !plink)
18157+ au_plink_append(inode, a->bdst, a->h_path.dentry);
18158+
18159+out:
2cbb1c4b 18160+ AuTraceErr(err);
4a4d8108
AM
18161+ return err;
18162+}
18163+
18164+int aufs_link(struct dentry *src_dentry, struct inode *dir,
18165+ struct dentry *dentry)
18166+{
18167+ int err, rerr;
18168+ struct au_dtime dt;
18169+ struct au_link_args *a;
18170+ struct dentry *wh_dentry, *h_src_dentry;
523b37e3 18171+ struct inode *inode, *delegated;
4a4d8108
AM
18172+ struct super_block *sb;
18173+ struct au_wr_dir_args wr_dir_args = {
18174+ /* .force_btgt = -1, */
18175+ .flags = AuWrDir_ADD_ENTRY
18176+ };
18177+
18178+ IMustLock(dir);
5527c038 18179+ inode = d_inode(src_dentry);
4a4d8108
AM
18180+ IMustLock(inode);
18181+
4a4d8108
AM
18182+ err = -ENOMEM;
18183+ a = kzalloc(sizeof(*a), GFP_NOFS);
18184+ if (unlikely(!a))
18185+ goto out;
18186+
18187+ a->parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
18188+ err = aufs_read_and_write_lock2(dentry, src_dentry,
18189+ AuLock_NOPLM | AuLock_GEN);
e49829fe
JR
18190+ if (unlikely(err))
18191+ goto out_kfree;
38d290e6 18192+ err = au_d_linkable(src_dentry);
027c5e7a
AM
18193+ if (unlikely(err))
18194+ goto out_unlock;
18195+ err = au_d_may_add(dentry);
18196+ if (unlikely(err))
18197+ goto out_unlock;
e49829fe 18198+
4a4d8108 18199+ a->src_parent = dget_parent(src_dentry);
5afbbe0d 18200+ wr_dir_args.force_btgt = au_ibtop(inode);
4a4d8108
AM
18201+
18202+ di_write_lock_parent(a->parent);
18203+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
18204+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
18205+ &wr_dir_args);
18206+ err = PTR_ERR(wh_dentry);
18207+ if (IS_ERR(wh_dentry))
027c5e7a 18208+ goto out_parent;
4a4d8108
AM
18209+
18210+ err = 0;
18211+ sb = dentry->d_sb;
5afbbe0d 18212+ a->bdst = au_dbtop(dentry);
4a4d8108
AM
18213+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
18214+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
5afbbe0d 18215+ a->bsrc = au_ibtop(inode);
2cbb1c4b 18216+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
38d290e6
JR
18217+ if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
18218+ h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
2cbb1c4b 18219+ if (!h_src_dentry) {
5afbbe0d 18220+ a->bsrc = au_dbtop(src_dentry);
2cbb1c4b
JR
18221+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
18222+ AuDebugOn(!h_src_dentry);
38d290e6
JR
18223+ } else if (IS_ERR(h_src_dentry)) {
18224+ err = PTR_ERR(h_src_dentry);
2cbb1c4b 18225+ goto out_parent;
38d290e6 18226+ }
2cbb1c4b 18227+
f2c43d5f
AM
18228+ /*
18229+ * aufs doesn't touch the credential so
18230+ * security_dentry_create_files_as() is unnecrssary.
18231+ */
4a4d8108
AM
18232+ if (au_opt_test(au_mntflags(sb), PLINK)) {
18233+ if (a->bdst < a->bsrc
18234+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
86dc4139 18235+ err = au_cpup_or_link(src_dentry, dentry, a);
523b37e3
AM
18236+ else {
18237+ delegated = NULL;
4a4d8108 18238+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
18239+ &a->h_path, &delegated);
18240+ if (unlikely(err == -EWOULDBLOCK)) {
18241+ pr_warn("cannot retry for NFSv4 delegation"
18242+ " for an internal link\n");
18243+ iput(delegated);
18244+ }
18245+ }
2cbb1c4b 18246+ dput(h_src_dentry);
4a4d8108
AM
18247+ } else {
18248+ /*
18249+ * copyup src_dentry to the branch we process,
18250+ * and then link(2) to it.
18251+ */
2cbb1c4b 18252+ dput(h_src_dentry);
4a4d8108
AM
18253+ if (a->bdst < a->bsrc
18254+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
18255+ au_unpin(&a->pin);
18256+ di_write_unlock(a->parent);
18257+ err = au_cpup_before_link(src_dentry, a);
18258+ di_write_lock_parent(a->parent);
18259+ if (!err)
18260+ err = au_pin(&a->pin, dentry, a->bdst,
18261+ au_opt_udba(sb),
18262+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
18263+ if (unlikely(err))
18264+ goto out_wh;
18265+ }
18266+ if (!err) {
18267+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
18268+ err = -ENOENT;
5527c038 18269+ if (h_src_dentry && d_is_positive(h_src_dentry)) {
523b37e3 18270+ delegated = NULL;
4a4d8108
AM
18271+ err = vfsub_link(h_src_dentry,
18272+ au_pinned_h_dir(&a->pin),
523b37e3
AM
18273+ &a->h_path, &delegated);
18274+ if (unlikely(err == -EWOULDBLOCK)) {
18275+ pr_warn("cannot retry"
18276+ " for NFSv4 delegation"
18277+ " for an internal link\n");
18278+ iput(delegated);
18279+ }
18280+ }
4a4d8108
AM
18281+ }
18282+ }
18283+ if (unlikely(err))
18284+ goto out_unpin;
18285+
18286+ if (wh_dentry) {
18287+ a->h_path.dentry = wh_dentry;
18288+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
18289+ dentry);
18290+ if (unlikely(err))
18291+ goto out_revert;
18292+ }
18293+
b912730e 18294+ au_dir_ts(dir, a->bdst);
4a4d8108 18295+ dir->i_version++;
4a4d8108
AM
18296+ inc_nlink(inode);
18297+ inode->i_ctime = dir->i_ctime;
027c5e7a
AM
18298+ d_instantiate(dentry, au_igrab(inode));
18299+ if (d_unhashed(a->h_path.dentry))
4a4d8108
AM
18300+ /* some filesystem calls d_drop() */
18301+ d_drop(dentry);
076b876e
AM
18302+ /* some filesystems consume an inode even hardlink */
18303+ au_fhsm_wrote(sb, a->bdst, /*force*/0);
4a4d8108
AM
18304+ goto out_unpin; /* success */
18305+
4f0767ce 18306+out_revert:
523b37e3
AM
18307+ /* no delegation since it is just created */
18308+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
18309+ /*delegated*/NULL, /*force*/0);
027c5e7a 18310+ if (unlikely(rerr)) {
523b37e3 18311+ AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
027c5e7a
AM
18312+ err = -EIO;
18313+ }
4a4d8108 18314+ au_dtime_revert(&dt);
4f0767ce 18315+out_unpin:
4a4d8108 18316+ au_unpin(&a->pin);
4f0767ce 18317+out_wh:
4a4d8108 18318+ dput(wh_dentry);
027c5e7a
AM
18319+out_parent:
18320+ di_write_unlock(a->parent);
18321+ dput(a->src_parent);
4f0767ce 18322+out_unlock:
4a4d8108 18323+ if (unlikely(err)) {
5afbbe0d 18324+ au_update_dbtop(dentry);
4a4d8108
AM
18325+ d_drop(dentry);
18326+ }
4a4d8108 18327+ aufs_read_and_write_unlock2(dentry, src_dentry);
e49829fe 18328+out_kfree:
f0c0a007 18329+ au_delayed_kfree(a);
4f0767ce 18330+out:
86dc4139 18331+ AuTraceErr(err);
4a4d8108
AM
18332+ return err;
18333+}
18334+
7eafdf33 18335+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4a4d8108
AM
18336+{
18337+ int err, rerr;
18338+ aufs_bindex_t bindex;
18339+ unsigned char diropq;
18340+ struct path h_path;
18341+ struct dentry *wh_dentry, *parent, *opq_dentry;
febd17d6 18342+ struct inode *h_inode;
4a4d8108
AM
18343+ struct super_block *sb;
18344+ struct {
18345+ struct au_pin pin;
18346+ struct au_dtime dt;
18347+ } *a; /* reduce the stack usage */
18348+ struct au_wr_dir_args wr_dir_args = {
18349+ .force_btgt = -1,
18350+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
18351+ };
18352+
18353+ IMustLock(dir);
18354+
18355+ err = -ENOMEM;
18356+ a = kmalloc(sizeof(*a), GFP_NOFS);
18357+ if (unlikely(!a))
18358+ goto out;
18359+
027c5e7a
AM
18360+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
18361+ if (unlikely(err))
18362+ goto out_free;
18363+ err = au_d_may_add(dentry);
18364+ if (unlikely(err))
18365+ goto out_unlock;
18366+
4a4d8108
AM
18367+ parent = dentry->d_parent; /* dir inode is locked */
18368+ di_write_lock_parent(parent);
18369+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
18370+ &a->pin, &wr_dir_args);
18371+ err = PTR_ERR(wh_dentry);
18372+ if (IS_ERR(wh_dentry))
027c5e7a 18373+ goto out_parent;
4a4d8108
AM
18374+
18375+ sb = dentry->d_sb;
5afbbe0d 18376+ bindex = au_dbtop(dentry);
4a4d8108
AM
18377+ h_path.dentry = au_h_dptr(dentry, bindex);
18378+ h_path.mnt = au_sbr_mnt(sb, bindex);
18379+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
18380+ if (unlikely(err))
027c5e7a 18381+ goto out_unpin;
4a4d8108
AM
18382+
18383+ /* make the dir opaque */
18384+ diropq = 0;
febd17d6 18385+ h_inode = d_inode(h_path.dentry);
4a4d8108
AM
18386+ if (wh_dentry
18387+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
febd17d6 18388+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
4a4d8108 18389+ opq_dentry = au_diropq_create(dentry, bindex);
febd17d6 18390+ inode_unlock(h_inode);
4a4d8108
AM
18391+ err = PTR_ERR(opq_dentry);
18392+ if (IS_ERR(opq_dentry))
18393+ goto out_dir;
18394+ dput(opq_dentry);
18395+ diropq = 1;
18396+ }
18397+
18398+ err = epilog(dir, bindex, wh_dentry, dentry);
18399+ if (!err) {
18400+ inc_nlink(dir);
027c5e7a 18401+ goto out_unpin; /* success */
4a4d8108
AM
18402+ }
18403+
18404+ /* revert */
18405+ if (diropq) {
18406+ AuLabel(revert opq);
febd17d6 18407+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
4a4d8108 18408+ rerr = au_diropq_remove(dentry, bindex);
febd17d6 18409+ inode_unlock(h_inode);
4a4d8108 18410+ if (rerr) {
523b37e3
AM
18411+ AuIOErr("%pd reverting diropq failed(%d, %d)\n",
18412+ dentry, err, rerr);
4a4d8108
AM
18413+ err = -EIO;
18414+ }
18415+ }
18416+
4f0767ce 18417+out_dir:
4a4d8108
AM
18418+ AuLabel(revert dir);
18419+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
18420+ if (rerr) {
523b37e3
AM
18421+ AuIOErr("%pd reverting dir failed(%d, %d)\n",
18422+ dentry, err, rerr);
4a4d8108
AM
18423+ err = -EIO;
18424+ }
4a4d8108 18425+ au_dtime_revert(&a->dt);
027c5e7a 18426+out_unpin:
4a4d8108
AM
18427+ au_unpin(&a->pin);
18428+ dput(wh_dentry);
027c5e7a
AM
18429+out_parent:
18430+ di_write_unlock(parent);
18431+out_unlock:
4a4d8108 18432+ if (unlikely(err)) {
5afbbe0d 18433+ au_update_dbtop(dentry);
4a4d8108
AM
18434+ d_drop(dentry);
18435+ }
4a4d8108 18436+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 18437+out_free:
f0c0a007 18438+ au_delayed_kfree(a);
4f0767ce 18439+out:
4a4d8108
AM
18440+ return err;
18441+}
7f207e10
AM
18442diff -urN /usr/share/empty/fs/aufs/i_op.c linux/fs/aufs/i_op.c
18443--- /usr/share/empty/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
18444+++ linux/fs/aufs/i_op.c 2016-12-17 12:28:17.595211562 +0100
18445@@ -0,0 +1,1444 @@
4a4d8108 18446+/*
8cdd5066 18447+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
18448+ *
18449+ * This program, aufs is free software; you can redistribute it and/or modify
18450+ * it under the terms of the GNU General Public License as published by
18451+ * the Free Software Foundation; either version 2 of the License, or
18452+ * (at your option) any later version.
18453+ *
18454+ * This program is distributed in the hope that it will be useful,
18455+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18456+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18457+ * GNU General Public License for more details.
18458+ *
18459+ * You should have received a copy of the GNU General Public License
523b37e3 18460+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 18461+ */
1facf9fc 18462+
1308ab2a 18463+/*
4a4d8108 18464+ * inode operations (except add/del/rename)
1308ab2a 18465+ */
4a4d8108
AM
18466+
18467+#include <linux/device_cgroup.h>
18468+#include <linux/fs_stack.h>
4a4d8108
AM
18469+#include <linux/namei.h>
18470+#include <linux/security.h>
4a4d8108
AM
18471+#include "aufs.h"
18472+
1e00d052 18473+static int h_permission(struct inode *h_inode, int mask,
79b8bda9 18474+ struct path *h_path, int brperm)
1facf9fc 18475+{
1308ab2a 18476+ int err;
4a4d8108 18477+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
1facf9fc 18478+
e2f27e51
AM
18479+ err = -EPERM;
18480+ if (write_mask && IS_IMMUTABLE(h_inode))
18481+ goto out;
18482+
4a4d8108 18483+ err = -EACCES;
e2f27e51
AM
18484+ if (((mask & MAY_EXEC)
18485+ && S_ISREG(h_inode->i_mode)
18486+ && (path_noexec(h_path)
18487+ || !(h_inode->i_mode & S_IXUGO))))
4a4d8108
AM
18488+ goto out;
18489+
18490+ /*
18491+ * - skip the lower fs test in the case of write to ro branch.
18492+ * - nfs dir permission write check is optimized, but a policy for
18493+ * link/rename requires a real check.
b912730e
AM
18494+ * - nfs always sets MS_POSIXACL regardless its mount option 'noacl.'
18495+ * in this case, generic_permission() returns -EOPNOTSUPP.
4a4d8108
AM
18496+ */
18497+ if ((write_mask && !au_br_writable(brperm))
18498+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
18499+ && write_mask && !(mask & MAY_READ))
18500+ || !h_inode->i_op->permission) {
18501+ /* AuLabel(generic_permission); */
b912730e 18502+ /* AuDbg("get_acl %pf\n", h_inode->i_op->get_acl); */
1e00d052 18503+ err = generic_permission(h_inode, mask);
b912730e
AM
18504+ if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
18505+ err = h_inode->i_op->permission(h_inode, mask);
18506+ AuTraceErr(err);
1308ab2a 18507+ } else {
4a4d8108 18508+ /* AuLabel(h_inode->permission); */
1e00d052 18509+ err = h_inode->i_op->permission(h_inode, mask);
4a4d8108
AM
18510+ AuTraceErr(err);
18511+ }
1facf9fc 18512+
4a4d8108
AM
18513+ if (!err)
18514+ err = devcgroup_inode_permission(h_inode, mask);
7f207e10 18515+ if (!err)
4a4d8108 18516+ err = security_inode_permission(h_inode, mask);
4a4d8108
AM
18517+
18518+#if 0
18519+ if (!err) {
18520+ /* todo: do we need to call ima_path_check()? */
18521+ struct path h_path = {
18522+ .dentry =
18523+ .mnt = h_mnt
18524+ };
18525+ err = ima_path_check(&h_path,
18526+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
18527+ IMA_COUNT_LEAVE);
1308ab2a 18528+ }
4a4d8108 18529+#endif
dece6358 18530+
4f0767ce 18531+out:
1308ab2a 18532+ return err;
18533+}
dece6358 18534+
1e00d052 18535+static int aufs_permission(struct inode *inode, int mask)
1308ab2a 18536+{
18537+ int err;
5afbbe0d 18538+ aufs_bindex_t bindex, bbot;
4a4d8108
AM
18539+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
18540+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
18541+ struct inode *h_inode;
18542+ struct super_block *sb;
18543+ struct au_branch *br;
1facf9fc 18544+
027c5e7a 18545+ /* todo: support rcu-walk? */
1e00d052 18546+ if (mask & MAY_NOT_BLOCK)
027c5e7a
AM
18547+ return -ECHILD;
18548+
4a4d8108
AM
18549+ sb = inode->i_sb;
18550+ si_read_lock(sb, AuLock_FLUSH);
18551+ ii_read_lock_child(inode);
027c5e7a
AM
18552+#if 0
18553+ err = au_iigen_test(inode, au_sigen(sb));
18554+ if (unlikely(err))
18555+ goto out;
18556+#endif
dece6358 18557+
076b876e
AM
18558+ if (!isdir
18559+ || write_mask
18560+ || au_opt_test(au_mntflags(sb), DIRPERM1)) {
4a4d8108 18561+ err = au_busy_or_stale();
5afbbe0d 18562+ h_inode = au_h_iptr(inode, au_ibtop(inode));
4a4d8108
AM
18563+ if (unlikely(!h_inode
18564+ || (h_inode->i_mode & S_IFMT)
18565+ != (inode->i_mode & S_IFMT)))
18566+ goto out;
1facf9fc 18567+
4a4d8108 18568+ err = 0;
5afbbe0d 18569+ bindex = au_ibtop(inode);
4a4d8108 18570+ br = au_sbr(sb, bindex);
79b8bda9 18571+ err = h_permission(h_inode, mask, &br->br_path, br->br_perm);
4a4d8108
AM
18572+ if (write_mask
18573+ && !err
18574+ && !special_file(h_inode->i_mode)) {
18575+ /* test whether the upper writable branch exists */
18576+ err = -EROFS;
18577+ for (; bindex >= 0; bindex--)
18578+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
18579+ err = 0;
18580+ break;
18581+ }
18582+ }
18583+ goto out;
18584+ }
dece6358 18585+
4a4d8108 18586+ /* non-write to dir */
1308ab2a 18587+ err = 0;
5afbbe0d
AM
18588+ bbot = au_ibbot(inode);
18589+ for (bindex = au_ibtop(inode); !err && bindex <= bbot; bindex++) {
4a4d8108
AM
18590+ h_inode = au_h_iptr(inode, bindex);
18591+ if (h_inode) {
18592+ err = au_busy_or_stale();
18593+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
18594+ break;
18595+
18596+ br = au_sbr(sb, bindex);
79b8bda9 18597+ err = h_permission(h_inode, mask, &br->br_path,
4a4d8108
AM
18598+ br->br_perm);
18599+ }
18600+ }
1308ab2a 18601+
4f0767ce 18602+out:
4a4d8108
AM
18603+ ii_read_unlock(inode);
18604+ si_read_unlock(sb);
1308ab2a 18605+ return err;
18606+}
18607+
4a4d8108 18608+/* ---------------------------------------------------------------------- */
1facf9fc 18609+
4a4d8108 18610+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
b4510431 18611+ unsigned int flags)
4a4d8108
AM
18612+{
18613+ struct dentry *ret, *parent;
b752ccd1 18614+ struct inode *inode;
4a4d8108 18615+ struct super_block *sb;
1716fcea 18616+ int err, npositive;
dece6358 18617+
4a4d8108 18618+ IMustLock(dir);
1308ab2a 18619+
537831f9
AM
18620+ /* todo: support rcu-walk? */
18621+ ret = ERR_PTR(-ECHILD);
18622+ if (flags & LOOKUP_RCU)
18623+ goto out;
18624+
18625+ ret = ERR_PTR(-ENAMETOOLONG);
18626+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
18627+ goto out;
18628+
4a4d8108 18629+ sb = dir->i_sb;
7f207e10
AM
18630+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
18631+ ret = ERR_PTR(err);
18632+ if (unlikely(err))
18633+ goto out;
18634+
4a4d8108
AM
18635+ err = au_di_init(dentry);
18636+ ret = ERR_PTR(err);
18637+ if (unlikely(err))
7f207e10 18638+ goto out_si;
1308ab2a 18639+
9dbd164d 18640+ inode = NULL;
027c5e7a 18641+ npositive = 0; /* suppress a warning */
4a4d8108
AM
18642+ parent = dentry->d_parent; /* dir inode is locked */
18643+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
18644+ err = au_alive_dir(parent);
18645+ if (!err)
18646+ err = au_digen_test(parent, au_sigen(sb));
18647+ if (!err) {
5afbbe0d
AM
18648+ /* regardless LOOKUP_CREATE, always ALLOW_NEG */
18649+ npositive = au_lkup_dentry(dentry, au_dbtop(parent),
18650+ AuLkup_ALLOW_NEG);
027c5e7a
AM
18651+ err = npositive;
18652+ }
4a4d8108 18653+ di_read_unlock(parent, AuLock_IR);
4a4d8108
AM
18654+ ret = ERR_PTR(err);
18655+ if (unlikely(err < 0))
18656+ goto out_unlock;
1308ab2a 18657+
4a4d8108 18658+ if (npositive) {
b752ccd1 18659+ inode = au_new_inode(dentry, /*must_new*/0);
c1595e42
JR
18660+ if (IS_ERR(inode)) {
18661+ ret = (void *)inode;
18662+ inode = NULL;
18663+ goto out_unlock;
18664+ }
9dbd164d 18665+ }
4a4d8108 18666+
c1595e42
JR
18667+ if (inode)
18668+ atomic_inc(&inode->i_count);
4a4d8108 18669+ ret = d_splice_alias(inode, dentry);
537831f9
AM
18670+#if 0
18671+ if (unlikely(d_need_lookup(dentry))) {
18672+ spin_lock(&dentry->d_lock);
18673+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
18674+ spin_unlock(&dentry->d_lock);
18675+ } else
18676+#endif
c1595e42 18677+ if (inode) {
2000de60 18678+ if (!IS_ERR(ret)) {
c1595e42 18679+ iput(inode);
2000de60
JR
18680+ if (ret && ret != dentry)
18681+ ii_write_unlock(inode);
18682+ } else {
c1595e42
JR
18683+ ii_write_unlock(inode);
18684+ iput(inode);
18685+ inode = NULL;
18686+ }
7f207e10 18687+ }
1facf9fc 18688+
4f0767ce 18689+out_unlock:
4a4d8108 18690+ di_write_unlock(dentry);
7f207e10 18691+out_si:
4a4d8108 18692+ si_read_unlock(sb);
7f207e10 18693+out:
4a4d8108
AM
18694+ return ret;
18695+}
1facf9fc 18696+
4a4d8108 18697+/* ---------------------------------------------------------------------- */
1facf9fc 18698+
b912730e
AM
18699+struct aopen_node {
18700+ struct hlist_node hlist;
18701+ struct file *file, *h_file;
18702+};
18703+
18704+static int au_do_aopen(struct inode *inode, struct file *file)
18705+{
18706+ struct au_sphlhead *aopen;
18707+ struct aopen_node *node;
18708+ struct au_do_open_args args = {
18709+ .no_lock = 1,
18710+ .open = au_do_open_nondir
18711+ };
18712+
18713+ aopen = &au_sbi(inode->i_sb)->si_aopen;
18714+ spin_lock(&aopen->spin);
18715+ hlist_for_each_entry(node, &aopen->head, hlist)
18716+ if (node->file == file) {
18717+ args.h_file = node->h_file;
18718+ break;
18719+ }
18720+ spin_unlock(&aopen->spin);
18721+ /* AuDebugOn(!args.h_file); */
18722+
18723+ return au_do_open(file, &args);
18724+}
18725+
18726+static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
18727+ struct file *file, unsigned int open_flag,
18728+ umode_t create_mode, int *opened)
18729+{
18730+ int err, h_opened = *opened;
5afbbe0d 18731+ unsigned int lkup_flags;
f0c0a007 18732+ struct dentry *parent, *d;
b912730e
AM
18733+ struct au_sphlhead *aopen;
18734+ struct vfsub_aopen_args args = {
18735+ .open_flag = open_flag,
18736+ .create_mode = create_mode,
18737+ .opened = &h_opened
18738+ };
18739+ struct aopen_node aopen_node = {
18740+ .file = file
18741+ };
18742+
18743+ IMustLock(dir);
5afbbe0d 18744+ AuDbg("open_flag 0%o\n", open_flag);
b912730e
AM
18745+ AuDbgDentry(dentry);
18746+
18747+ err = 0;
18748+ if (!au_di(dentry)) {
5afbbe0d
AM
18749+ lkup_flags = LOOKUP_OPEN;
18750+ if (open_flag & O_CREAT)
18751+ lkup_flags |= LOOKUP_CREATE;
18752+ d = aufs_lookup(dir, dentry, lkup_flags);
b912730e
AM
18753+ if (IS_ERR(d)) {
18754+ err = PTR_ERR(d);
5afbbe0d 18755+ AuTraceErr(err);
b912730e
AM
18756+ goto out;
18757+ } else if (d) {
18758+ /*
18759+ * obsoleted dentry found.
18760+ * another error will be returned later.
18761+ */
18762+ d_drop(d);
b912730e 18763+ AuDbgDentry(d);
5afbbe0d 18764+ dput(d);
b912730e
AM
18765+ }
18766+ AuDbgDentry(dentry);
18767+ }
18768+
18769+ if (d_is_positive(dentry)
18770+ || d_unhashed(dentry)
18771+ || d_unlinked(dentry)
18772+ || !(open_flag & O_CREAT))
18773+ goto out_no_open;
18774+
18775+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
18776+ if (unlikely(err))
18777+ goto out;
18778+
18779+ parent = dentry->d_parent; /* dir is locked */
18780+ di_write_lock_parent(parent);
5afbbe0d 18781+ err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
b912730e
AM
18782+ if (unlikely(err))
18783+ goto out_unlock;
18784+
18785+ AuDbgDentry(dentry);
18786+ if (d_is_positive(dentry))
18787+ goto out_unlock;
18788+
18789+ args.file = get_empty_filp();
18790+ err = PTR_ERR(args.file);
18791+ if (IS_ERR(args.file))
18792+ goto out_unlock;
18793+
18794+ args.file->f_flags = file->f_flags;
18795+ err = au_aopen_or_create(dir, dentry, &args);
18796+ AuTraceErr(err);
18797+ AuDbgFile(args.file);
18798+ if (unlikely(err < 0)) {
18799+ if (h_opened & FILE_OPENED)
18800+ fput(args.file);
18801+ else
18802+ put_filp(args.file);
18803+ goto out_unlock;
18804+ }
18805+
18806+ /* some filesystems don't set FILE_CREATED while succeeded? */
18807+ *opened |= FILE_CREATED;
18808+ if (h_opened & FILE_OPENED)
18809+ aopen_node.h_file = args.file;
18810+ else {
18811+ put_filp(args.file);
18812+ args.file = NULL;
18813+ }
18814+ aopen = &au_sbi(dir->i_sb)->si_aopen;
18815+ au_sphl_add(&aopen_node.hlist, aopen);
18816+ err = finish_open(file, dentry, au_do_aopen, opened);
18817+ au_sphl_del(&aopen_node.hlist, aopen);
18818+ AuTraceErr(err);
18819+ AuDbgFile(file);
18820+ if (aopen_node.h_file)
18821+ fput(aopen_node.h_file);
18822+
18823+out_unlock:
18824+ di_write_unlock(parent);
18825+ aufs_read_unlock(dentry, AuLock_DW);
18826+ AuDbgDentry(dentry);
f0c0a007 18827+ if (unlikely(err < 0))
b912730e
AM
18828+ goto out;
18829+out_no_open:
f0c0a007 18830+ if (err >= 0 && !(*opened & FILE_CREATED)) {
b912730e
AM
18831+ AuLabel(out_no_open);
18832+ dget(dentry);
18833+ err = finish_no_open(file, dentry);
18834+ }
18835+out:
18836+ AuDbg("%pd%s%s\n", dentry,
18837+ (*opened & FILE_CREATED) ? " created" : "",
18838+ (*opened & FILE_OPENED) ? " opened" : "");
18839+ AuTraceErr(err);
18840+ return err;
18841+}
18842+
18843+
18844+/* ---------------------------------------------------------------------- */
18845+
4a4d8108
AM
18846+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
18847+ const unsigned char add_entry, aufs_bindex_t bcpup,
5afbbe0d 18848+ aufs_bindex_t btop)
4a4d8108
AM
18849+{
18850+ int err;
18851+ struct dentry *h_parent;
18852+ struct inode *h_dir;
1facf9fc 18853+
027c5e7a 18854+ if (add_entry)
5527c038 18855+ IMustLock(d_inode(parent));
027c5e7a 18856+ else
4a4d8108
AM
18857+ di_write_lock_parent(parent);
18858+
18859+ err = 0;
18860+ if (!au_h_dptr(parent, bcpup)) {
5afbbe0d 18861+ if (btop > bcpup)
c2b27bf2 18862+ err = au_cpup_dirs(dentry, bcpup);
5afbbe0d 18863+ else if (btop < bcpup)
4a4d8108
AM
18864+ err = au_cpdown_dirs(dentry, bcpup);
18865+ else
c2b27bf2 18866+ BUG();
4a4d8108 18867+ }
38d290e6 18868+ if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
4a4d8108 18869+ h_parent = au_h_dptr(parent, bcpup);
5527c038 18870+ h_dir = d_inode(h_parent);
febd17d6 18871+ inode_lock_nested(h_dir, AuLsc_I_PARENT);
7e9cd9fe 18872+ err = au_lkup_neg(dentry, bcpup, /*wh*/0);
4a4d8108 18873+ /* todo: no unlock here */
febd17d6 18874+ inode_unlock(h_dir);
027c5e7a
AM
18875+
18876+ AuDbg("bcpup %d\n", bcpup);
18877+ if (!err) {
5527c038 18878+ if (d_really_is_negative(dentry))
5afbbe0d 18879+ au_set_h_dptr(dentry, btop, NULL);
4a4d8108
AM
18880+ au_update_dbrange(dentry, /*do_put_zero*/0);
18881+ }
1308ab2a 18882+ }
1facf9fc 18883+
4a4d8108
AM
18884+ if (!add_entry)
18885+ di_write_unlock(parent);
18886+ if (!err)
18887+ err = bcpup; /* success */
1308ab2a 18888+
027c5e7a 18889+ AuTraceErr(err);
4a4d8108
AM
18890+ return err;
18891+}
1facf9fc 18892+
4a4d8108
AM
18893+/*
18894+ * decide the branch and the parent dir where we will create a new entry.
18895+ * returns new bindex or an error.
18896+ * copyup the parent dir if needed.
18897+ */
18898+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
18899+ struct au_wr_dir_args *args)
18900+{
18901+ int err;
392086de 18902+ unsigned int flags;
5afbbe0d 18903+ aufs_bindex_t bcpup, btop, src_btop;
86dc4139
AM
18904+ const unsigned char add_entry
18905+ = au_ftest_wrdir(args->flags, ADD_ENTRY)
38d290e6 18906+ | au_ftest_wrdir(args->flags, TMPFILE);
4a4d8108
AM
18907+ struct super_block *sb;
18908+ struct dentry *parent;
18909+ struct au_sbinfo *sbinfo;
1facf9fc 18910+
4a4d8108
AM
18911+ sb = dentry->d_sb;
18912+ sbinfo = au_sbi(sb);
18913+ parent = dget_parent(dentry);
5afbbe0d
AM
18914+ btop = au_dbtop(dentry);
18915+ bcpup = btop;
4a4d8108
AM
18916+ if (args->force_btgt < 0) {
18917+ if (src_dentry) {
5afbbe0d
AM
18918+ src_btop = au_dbtop(src_dentry);
18919+ if (src_btop < btop)
18920+ bcpup = src_btop;
4a4d8108 18921+ } else if (add_entry) {
392086de
AM
18922+ flags = 0;
18923+ if (au_ftest_wrdir(args->flags, ISDIR))
18924+ au_fset_wbr(flags, DIR);
18925+ err = AuWbrCreate(sbinfo, dentry, flags);
4a4d8108
AM
18926+ bcpup = err;
18927+ }
1facf9fc 18928+
5527c038 18929+ if (bcpup < 0 || au_test_ro(sb, bcpup, d_inode(dentry))) {
4a4d8108
AM
18930+ if (add_entry)
18931+ err = AuWbrCopyup(sbinfo, dentry);
18932+ else {
18933+ if (!IS_ROOT(dentry)) {
18934+ di_read_lock_parent(parent, !AuLock_IR);
18935+ err = AuWbrCopyup(sbinfo, dentry);
18936+ di_read_unlock(parent, !AuLock_IR);
18937+ } else
18938+ err = AuWbrCopyup(sbinfo, dentry);
18939+ }
18940+ bcpup = err;
18941+ if (unlikely(err < 0))
18942+ goto out;
18943+ }
18944+ } else {
18945+ bcpup = args->force_btgt;
5527c038 18946+ AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
1308ab2a 18947+ }
027c5e7a 18948+
5afbbe0d 18949+ AuDbg("btop %d, bcpup %d\n", btop, bcpup);
4a4d8108 18950+ err = bcpup;
5afbbe0d 18951+ if (bcpup == btop)
4a4d8108 18952+ goto out; /* success */
4a4d8108
AM
18953+
18954+ /* copyup the new parent into the branch we process */
5afbbe0d 18955+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, btop);
027c5e7a 18956+ if (err >= 0) {
5527c038 18957+ if (d_really_is_negative(dentry)) {
5afbbe0d
AM
18958+ au_set_h_dptr(dentry, btop, NULL);
18959+ au_set_dbtop(dentry, bcpup);
18960+ au_set_dbbot(dentry, bcpup);
027c5e7a 18961+ }
38d290e6
JR
18962+ AuDebugOn(add_entry
18963+ && !au_ftest_wrdir(args->flags, TMPFILE)
18964+ && !au_h_dptr(dentry, bcpup));
027c5e7a 18965+ }
86dc4139
AM
18966+
18967+out:
18968+ dput(parent);
18969+ return err;
18970+}
18971+
18972+/* ---------------------------------------------------------------------- */
18973+
18974+void au_pin_hdir_unlock(struct au_pin *p)
18975+{
18976+ if (p->hdir)
5afbbe0d 18977+ au_hn_inode_unlock(p->hdir);
86dc4139
AM
18978+}
18979+
c1595e42 18980+int au_pin_hdir_lock(struct au_pin *p)
86dc4139
AM
18981+{
18982+ int err;
18983+
18984+ err = 0;
18985+ if (!p->hdir)
18986+ goto out;
18987+
18988+ /* even if an error happens later, keep this lock */
5afbbe0d 18989+ au_hn_inode_lock_nested(p->hdir, p->lsc_hi);
86dc4139
AM
18990+
18991+ err = -EBUSY;
5527c038 18992+ if (unlikely(p->hdir->hi_inode != d_inode(p->h_parent)))
86dc4139
AM
18993+ goto out;
18994+
18995+ err = 0;
18996+ if (p->h_dentry)
18997+ err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
18998+ p->h_parent, p->br);
18999+
19000+out:
19001+ return err;
19002+}
19003+
19004+int au_pin_hdir_relock(struct au_pin *p)
19005+{
19006+ int err, i;
19007+ struct inode *h_i;
19008+ struct dentry *h_d[] = {
19009+ p->h_dentry,
19010+ p->h_parent
19011+ };
19012+
19013+ err = au_pin_hdir_lock(p);
19014+ if (unlikely(err))
19015+ goto out;
19016+
19017+ for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
19018+ if (!h_d[i])
19019+ continue;
5527c038
JR
19020+ if (d_is_positive(h_d[i])) {
19021+ h_i = d_inode(h_d[i]);
86dc4139 19022+ err = !h_i->i_nlink;
5527c038 19023+ }
86dc4139
AM
19024+ }
19025+
19026+out:
19027+ return err;
19028+}
19029+
5afbbe0d 19030+static void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
86dc4139 19031+{
5afbbe0d
AM
19032+#if !defined(CONFIG_RWSEM_GENERIC_SPINLOCK) && defined(CONFIG_RWSEM_SPIN_ON_OWNER)
19033+ p->hdir->hi_inode->i_rwsem.owner = task;
86dc4139
AM
19034+#endif
19035+}
19036+
19037+void au_pin_hdir_acquire_nest(struct au_pin *p)
19038+{
19039+ if (p->hdir) {
5afbbe0d 19040+ rwsem_acquire_nest(&p->hdir->hi_inode->i_rwsem.dep_map,
86dc4139
AM
19041+ p->lsc_hi, 0, NULL, _RET_IP_);
19042+ au_pin_hdir_set_owner(p, current);
19043+ }
dece6358 19044+}
1facf9fc 19045+
86dc4139
AM
19046+void au_pin_hdir_release(struct au_pin *p)
19047+{
19048+ if (p->hdir) {
19049+ au_pin_hdir_set_owner(p, p->task);
5afbbe0d 19050+ rwsem_release(&p->hdir->hi_inode->i_rwsem.dep_map, 1, _RET_IP_);
86dc4139
AM
19051+ }
19052+}
1308ab2a 19053+
4a4d8108 19054+struct dentry *au_pinned_h_parent(struct au_pin *pin)
1308ab2a 19055+{
4a4d8108
AM
19056+ if (pin && pin->parent)
19057+ return au_h_dptr(pin->parent, pin->bindex);
19058+ return NULL;
dece6358 19059+}
1facf9fc 19060+
4a4d8108 19061+void au_unpin(struct au_pin *p)
dece6358 19062+{
86dc4139
AM
19063+ if (p->hdir)
19064+ au_pin_hdir_unlock(p);
e49829fe 19065+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
b4510431 19066+ vfsub_mnt_drop_write(p->h_mnt);
4a4d8108
AM
19067+ if (!p->hdir)
19068+ return;
1facf9fc 19069+
4a4d8108
AM
19070+ if (!au_ftest_pin(p->flags, DI_LOCKED))
19071+ di_read_unlock(p->parent, AuLock_IR);
19072+ iput(p->hdir->hi_inode);
19073+ dput(p->parent);
19074+ p->parent = NULL;
19075+ p->hdir = NULL;
19076+ p->h_mnt = NULL;
86dc4139 19077+ /* do not clear p->task */
4a4d8108 19078+}
1308ab2a 19079+
4a4d8108
AM
19080+int au_do_pin(struct au_pin *p)
19081+{
19082+ int err;
19083+ struct super_block *sb;
4a4d8108
AM
19084+ struct inode *h_dir;
19085+
19086+ err = 0;
19087+ sb = p->dentry->d_sb;
86dc4139 19088+ p->br = au_sbr(sb, p->bindex);
4a4d8108
AM
19089+ if (IS_ROOT(p->dentry)) {
19090+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
86dc4139 19091+ p->h_mnt = au_br_mnt(p->br);
b4510431 19092+ err = vfsub_mnt_want_write(p->h_mnt);
4a4d8108
AM
19093+ if (unlikely(err)) {
19094+ au_fclr_pin(p->flags, MNT_WRITE);
19095+ goto out_err;
19096+ }
19097+ }
dece6358 19098+ goto out;
1facf9fc 19099+ }
19100+
86dc4139 19101+ p->h_dentry = NULL;
5afbbe0d 19102+ if (p->bindex <= au_dbbot(p->dentry))
86dc4139 19103+ p->h_dentry = au_h_dptr(p->dentry, p->bindex);
dece6358 19104+
4a4d8108
AM
19105+ p->parent = dget_parent(p->dentry);
19106+ if (!au_ftest_pin(p->flags, DI_LOCKED))
19107+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
dece6358 19108+
4a4d8108 19109+ h_dir = NULL;
86dc4139 19110+ p->h_parent = au_h_dptr(p->parent, p->bindex);
5527c038 19111+ p->hdir = au_hi(d_inode(p->parent), p->bindex);
4a4d8108
AM
19112+ if (p->hdir)
19113+ h_dir = p->hdir->hi_inode;
dece6358 19114+
b752ccd1
AM
19115+ /*
19116+ * udba case, or
19117+ * if DI_LOCKED is not set, then p->parent may be different
19118+ * and h_parent can be NULL.
19119+ */
86dc4139 19120+ if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
e49829fe 19121+ err = -EBUSY;
4a4d8108
AM
19122+ if (!au_ftest_pin(p->flags, DI_LOCKED))
19123+ di_read_unlock(p->parent, AuLock_IR);
19124+ dput(p->parent);
19125+ p->parent = NULL;
19126+ goto out_err;
19127+ }
1308ab2a 19128+
4a4d8108 19129+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
86dc4139 19130+ p->h_mnt = au_br_mnt(p->br);
b4510431 19131+ err = vfsub_mnt_want_write(p->h_mnt);
dece6358 19132+ if (unlikely(err)) {
4a4d8108 19133+ au_fclr_pin(p->flags, MNT_WRITE);
86dc4139
AM
19134+ if (!au_ftest_pin(p->flags, DI_LOCKED))
19135+ di_read_unlock(p->parent, AuLock_IR);
19136+ dput(p->parent);
19137+ p->parent = NULL;
19138+ goto out_err;
dece6358
AM
19139+ }
19140+ }
4a4d8108 19141+
86dc4139
AM
19142+ au_igrab(h_dir);
19143+ err = au_pin_hdir_lock(p);
19144+ if (!err)
19145+ goto out; /* success */
19146+
076b876e
AM
19147+ au_unpin(p);
19148+
4f0767ce 19149+out_err:
4a4d8108
AM
19150+ pr_err("err %d\n", err);
19151+ err = au_busy_or_stale();
4f0767ce 19152+out:
1facf9fc 19153+ return err;
19154+}
19155+
4a4d8108
AM
19156+void au_pin_init(struct au_pin *p, struct dentry *dentry,
19157+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
19158+ unsigned int udba, unsigned char flags)
19159+{
19160+ p->dentry = dentry;
19161+ p->udba = udba;
19162+ p->lsc_di = lsc_di;
19163+ p->lsc_hi = lsc_hi;
19164+ p->flags = flags;
19165+ p->bindex = bindex;
19166+
19167+ p->parent = NULL;
19168+ p->hdir = NULL;
19169+ p->h_mnt = NULL;
86dc4139
AM
19170+
19171+ p->h_dentry = NULL;
19172+ p->h_parent = NULL;
19173+ p->br = NULL;
19174+ p->task = current;
4a4d8108
AM
19175+}
19176+
19177+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
19178+ unsigned int udba, unsigned char flags)
19179+{
19180+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
19181+ udba, flags);
19182+ return au_do_pin(pin);
19183+}
19184+
dece6358
AM
19185+/* ---------------------------------------------------------------------- */
19186+
1308ab2a 19187+/*
4a4d8108
AM
19188+ * ->setattr() and ->getattr() are called in various cases.
19189+ * chmod, stat: dentry is revalidated.
19190+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
19191+ * unhashed.
19192+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
1308ab2a 19193+ */
027c5e7a 19194+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
c1595e42 19195+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
1facf9fc 19196+{
4a4d8108 19197+ int err;
4a4d8108 19198+ struct dentry *parent;
1facf9fc 19199+
1308ab2a 19200+ err = 0;
027c5e7a 19201+ if (au_digen_test(dentry, sigen)) {
4a4d8108
AM
19202+ parent = dget_parent(dentry);
19203+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 19204+ err = au_refresh_dentry(dentry, parent);
4a4d8108
AM
19205+ di_read_unlock(parent, AuLock_IR);
19206+ dput(parent);
dece6358 19207+ }
1facf9fc 19208+
4a4d8108 19209+ AuTraceErr(err);
1308ab2a 19210+ return err;
19211+}
dece6358 19212+
c1595e42
JR
19213+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
19214+ struct au_icpup_args *a)
1308ab2a 19215+{
19216+ int err;
4a4d8108 19217+ loff_t sz;
5afbbe0d 19218+ aufs_bindex_t btop, ibtop;
4a4d8108
AM
19219+ struct dentry *hi_wh, *parent;
19220+ struct inode *inode;
4a4d8108
AM
19221+ struct au_wr_dir_args wr_dir_args = {
19222+ .force_btgt = -1,
19223+ .flags = 0
19224+ };
19225+
2000de60 19226+ if (d_is_dir(dentry))
4a4d8108
AM
19227+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
19228+ /* plink or hi_wh() case */
5afbbe0d 19229+ btop = au_dbtop(dentry);
5527c038 19230+ inode = d_inode(dentry);
5afbbe0d
AM
19231+ ibtop = au_ibtop(inode);
19232+ if (btop != ibtop && !au_test_ro(inode->i_sb, ibtop, inode))
19233+ wr_dir_args.force_btgt = ibtop;
4a4d8108
AM
19234+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
19235+ if (unlikely(err < 0))
19236+ goto out;
19237+ a->btgt = err;
5afbbe0d 19238+ if (err != btop)
4a4d8108
AM
19239+ au_fset_icpup(a->flags, DID_CPUP);
19240+
19241+ err = 0;
19242+ a->pin_flags = AuPin_MNT_WRITE;
19243+ parent = NULL;
19244+ if (!IS_ROOT(dentry)) {
19245+ au_fset_pin(a->pin_flags, DI_LOCKED);
19246+ parent = dget_parent(dentry);
19247+ di_write_lock_parent(parent);
19248+ }
19249+
19250+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
19251+ if (unlikely(err))
19252+ goto out_parent;
19253+
4a4d8108 19254+ sz = -1;
5afbbe0d 19255+ a->h_path.dentry = au_h_dptr(dentry, btop);
5527c038 19256+ a->h_inode = d_inode(a->h_path.dentry);
c1595e42 19257+ if (ia && (ia->ia_valid & ATTR_SIZE)) {
febd17d6 19258+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
c1595e42
JR
19259+ if (ia->ia_size < i_size_read(a->h_inode))
19260+ sz = ia->ia_size;
febd17d6 19261+ inode_unlock(a->h_inode);
c1595e42 19262+ }
4a4d8108 19263+
4a4d8108 19264+ hi_wh = NULL;
027c5e7a 19265+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
4a4d8108
AM
19266+ hi_wh = au_hi_wh(inode, a->btgt);
19267+ if (!hi_wh) {
c2b27bf2
AM
19268+ struct au_cp_generic cpg = {
19269+ .dentry = dentry,
19270+ .bdst = a->btgt,
19271+ .bsrc = -1,
19272+ .len = sz,
19273+ .pin = &a->pin
19274+ };
19275+ err = au_sio_cpup_wh(&cpg, /*file*/NULL);
4a4d8108
AM
19276+ if (unlikely(err))
19277+ goto out_unlock;
19278+ hi_wh = au_hi_wh(inode, a->btgt);
19279+ /* todo: revalidate hi_wh? */
19280+ }
19281+ }
19282+
19283+ if (parent) {
19284+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
19285+ di_downgrade_lock(parent, AuLock_IR);
19286+ dput(parent);
19287+ parent = NULL;
19288+ }
19289+ if (!au_ftest_icpup(a->flags, DID_CPUP))
19290+ goto out; /* success */
19291+
19292+ if (!d_unhashed(dentry)) {
c2b27bf2
AM
19293+ struct au_cp_generic cpg = {
19294+ .dentry = dentry,
19295+ .bdst = a->btgt,
5afbbe0d 19296+ .bsrc = btop,
c2b27bf2
AM
19297+ .len = sz,
19298+ .pin = &a->pin,
19299+ .flags = AuCpup_DTIME | AuCpup_HOPEN
19300+ };
19301+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
19302+ if (!err)
19303+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19304+ } else if (!hi_wh)
19305+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19306+ else
19307+ a->h_path.dentry = hi_wh; /* do not dget here */
1308ab2a 19308+
4f0767ce 19309+out_unlock:
5527c038 19310+ a->h_inode = d_inode(a->h_path.dentry);
86dc4139 19311+ if (!err)
dece6358 19312+ goto out; /* success */
4a4d8108 19313+ au_unpin(&a->pin);
4f0767ce 19314+out_parent:
4a4d8108
AM
19315+ if (parent) {
19316+ di_write_unlock(parent);
19317+ dput(parent);
19318+ }
4f0767ce 19319+out:
86dc4139 19320+ if (!err)
febd17d6 19321+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
1facf9fc 19322+ return err;
19323+}
19324+
4a4d8108 19325+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
1facf9fc 19326+{
4a4d8108 19327+ int err;
523b37e3 19328+ struct inode *inode, *delegated;
4a4d8108
AM
19329+ struct super_block *sb;
19330+ struct file *file;
19331+ struct au_icpup_args *a;
1facf9fc 19332+
5527c038 19333+ inode = d_inode(dentry);
4a4d8108 19334+ IMustLock(inode);
dece6358 19335+
f2c43d5f
AM
19336+ err = setattr_prepare(dentry, ia);
19337+ if (unlikely(err))
19338+ goto out;
19339+
4a4d8108
AM
19340+ err = -ENOMEM;
19341+ a = kzalloc(sizeof(*a), GFP_NOFS);
19342+ if (unlikely(!a))
19343+ goto out;
1facf9fc 19344+
4a4d8108
AM
19345+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
19346+ ia->ia_valid &= ~ATTR_MODE;
dece6358 19347+
4a4d8108
AM
19348+ file = NULL;
19349+ sb = dentry->d_sb;
e49829fe
JR
19350+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19351+ if (unlikely(err))
19352+ goto out_kfree;
19353+
4a4d8108
AM
19354+ if (ia->ia_valid & ATTR_FILE) {
19355+ /* currently ftruncate(2) only */
7e9cd9fe 19356+ AuDebugOn(!d_is_reg(dentry));
4a4d8108
AM
19357+ file = ia->ia_file;
19358+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
19359+ if (unlikely(err))
19360+ goto out_si;
19361+ ia->ia_file = au_hf_top(file);
19362+ a->udba = AuOpt_UDBA_NONE;
19363+ } else {
19364+ /* fchmod() doesn't pass ia_file */
19365+ a->udba = au_opt_udba(sb);
027c5e7a
AM
19366+ di_write_lock_child(dentry);
19367+ /* no d_unlinked(), to set UDBA_NONE for root */
4a4d8108
AM
19368+ if (d_unhashed(dentry))
19369+ a->udba = AuOpt_UDBA_NONE;
4a4d8108
AM
19370+ if (a->udba != AuOpt_UDBA_NONE) {
19371+ AuDebugOn(IS_ROOT(dentry));
19372+ err = au_reval_for_attr(dentry, au_sigen(sb));
19373+ if (unlikely(err))
19374+ goto out_dentry;
19375+ }
dece6358 19376+ }
dece6358 19377+
4a4d8108
AM
19378+ err = au_pin_and_icpup(dentry, ia, a);
19379+ if (unlikely(err < 0))
19380+ goto out_dentry;
19381+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
19382+ ia->ia_file = NULL;
19383+ ia->ia_valid &= ~ATTR_FILE;
1308ab2a 19384+ }
dece6358 19385+
4a4d8108
AM
19386+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
19387+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
19388+ == (ATTR_MODE | ATTR_CTIME)) {
7eafdf33 19389+ err = security_path_chmod(&a->h_path, ia->ia_mode);
4a4d8108
AM
19390+ if (unlikely(err))
19391+ goto out_unlock;
19392+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
19393+ && (ia->ia_valid & ATTR_CTIME)) {
86dc4139 19394+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
4a4d8108
AM
19395+ if (unlikely(err))
19396+ goto out_unlock;
19397+ }
dece6358 19398+
4a4d8108
AM
19399+ if (ia->ia_valid & ATTR_SIZE) {
19400+ struct file *f;
1308ab2a 19401+
953406b4 19402+ if (ia->ia_size < i_size_read(inode))
4a4d8108 19403+ /* unmap only */
953406b4 19404+ truncate_setsize(inode, ia->ia_size);
1308ab2a 19405+
4a4d8108
AM
19406+ f = NULL;
19407+ if (ia->ia_valid & ATTR_FILE)
19408+ f = ia->ia_file;
febd17d6 19409+ inode_unlock(a->h_inode);
4a4d8108 19410+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
febd17d6 19411+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
523b37e3
AM
19412+ } else {
19413+ delegated = NULL;
19414+ while (1) {
19415+ err = vfsub_notify_change(&a->h_path, ia, &delegated);
19416+ if (delegated) {
19417+ err = break_deleg_wait(&delegated);
19418+ if (!err)
19419+ continue;
19420+ }
19421+ break;
19422+ }
19423+ }
8cdd5066
JR
19424+ /*
19425+ * regardless aufs 'acl' option setting.
19426+ * why don't all acl-aware fs call this func from their ->setattr()?
19427+ */
19428+ if (!err && (ia->ia_valid & ATTR_MODE))
19429+ err = vfsub_acl_chmod(a->h_inode, ia->ia_mode);
4a4d8108
AM
19430+ if (!err)
19431+ au_cpup_attr_changeable(inode);
1308ab2a 19432+
4f0767ce 19433+out_unlock:
febd17d6 19434+ inode_unlock(a->h_inode);
4a4d8108 19435+ au_unpin(&a->pin);
027c5e7a 19436+ if (unlikely(err))
5afbbe0d 19437+ au_update_dbtop(dentry);
4f0767ce 19438+out_dentry:
4a4d8108
AM
19439+ di_write_unlock(dentry);
19440+ if (file) {
19441+ fi_write_unlock(file);
19442+ ia->ia_file = file;
19443+ ia->ia_valid |= ATTR_FILE;
19444+ }
4f0767ce 19445+out_si:
4a4d8108 19446+ si_read_unlock(sb);
e49829fe 19447+out_kfree:
f0c0a007 19448+ au_delayed_kfree(a);
4f0767ce 19449+out:
4a4d8108
AM
19450+ AuTraceErr(err);
19451+ return err;
1facf9fc 19452+}
19453+
c1595e42
JR
19454+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
19455+static int au_h_path_to_set_attr(struct dentry *dentry,
19456+ struct au_icpup_args *a, struct path *h_path)
19457+{
19458+ int err;
19459+ struct super_block *sb;
19460+
19461+ sb = dentry->d_sb;
19462+ a->udba = au_opt_udba(sb);
19463+ /* no d_unlinked(), to set UDBA_NONE for root */
19464+ if (d_unhashed(dentry))
19465+ a->udba = AuOpt_UDBA_NONE;
19466+ if (a->udba != AuOpt_UDBA_NONE) {
19467+ AuDebugOn(IS_ROOT(dentry));
19468+ err = au_reval_for_attr(dentry, au_sigen(sb));
19469+ if (unlikely(err))
19470+ goto out;
19471+ }
19472+ err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
19473+ if (unlikely(err < 0))
19474+ goto out;
19475+
19476+ h_path->dentry = a->h_path.dentry;
19477+ h_path->mnt = au_sbr_mnt(sb, a->btgt);
19478+
19479+out:
19480+ return err;
19481+}
19482+
f2c43d5f
AM
19483+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
19484+ struct au_sxattr *arg)
c1595e42
JR
19485+{
19486+ int err;
19487+ struct path h_path;
19488+ struct super_block *sb;
19489+ struct au_icpup_args *a;
5afbbe0d 19490+ struct inode *h_inode;
c1595e42 19491+
c1595e42
JR
19492+ IMustLock(inode);
19493+
19494+ err = -ENOMEM;
19495+ a = kzalloc(sizeof(*a), GFP_NOFS);
19496+ if (unlikely(!a))
19497+ goto out;
19498+
19499+ sb = dentry->d_sb;
19500+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19501+ if (unlikely(err))
19502+ goto out_kfree;
19503+
19504+ h_path.dentry = NULL; /* silence gcc */
19505+ di_write_lock_child(dentry);
19506+ err = au_h_path_to_set_attr(dentry, a, &h_path);
19507+ if (unlikely(err))
19508+ goto out_di;
19509+
febd17d6 19510+ inode_unlock(a->h_inode);
c1595e42
JR
19511+ switch (arg->type) {
19512+ case AU_XATTR_SET:
5afbbe0d 19513+ AuDebugOn(d_is_negative(h_path.dentry));
c1595e42
JR
19514+ err = vfsub_setxattr(h_path.dentry,
19515+ arg->u.set.name, arg->u.set.value,
19516+ arg->u.set.size, arg->u.set.flags);
19517+ break;
c1595e42
JR
19518+ case AU_ACL_SET:
19519+ err = -EOPNOTSUPP;
5527c038 19520+ h_inode = d_inode(h_path.dentry);
c1595e42 19521+ if (h_inode->i_op->set_acl)
f2c43d5f 19522+ /* this will call posix_acl_update_mode */
c1595e42
JR
19523+ err = h_inode->i_op->set_acl(h_inode,
19524+ arg->u.acl_set.acl,
19525+ arg->u.acl_set.type);
19526+ break;
19527+ }
19528+ if (!err)
19529+ au_cpup_attr_timesizes(inode);
19530+
19531+ au_unpin(&a->pin);
19532+ if (unlikely(err))
5afbbe0d 19533+ au_update_dbtop(dentry);
c1595e42
JR
19534+
19535+out_di:
19536+ di_write_unlock(dentry);
19537+ si_read_unlock(sb);
19538+out_kfree:
f0c0a007 19539+ au_delayed_kfree(a);
c1595e42
JR
19540+out:
19541+ AuTraceErr(err);
19542+ return err;
19543+}
19544+#endif
19545+
4a4d8108
AM
19546+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
19547+ unsigned int nlink)
1facf9fc 19548+{
9dbd164d
AM
19549+ unsigned int n;
19550+
4a4d8108 19551+ inode->i_mode = st->mode;
86dc4139
AM
19552+ /* don't i_[ug]id_write() here */
19553+ inode->i_uid = st->uid;
19554+ inode->i_gid = st->gid;
4a4d8108
AM
19555+ inode->i_atime = st->atime;
19556+ inode->i_mtime = st->mtime;
19557+ inode->i_ctime = st->ctime;
1facf9fc 19558+
4a4d8108
AM
19559+ au_cpup_attr_nlink(inode, /*force*/0);
19560+ if (S_ISDIR(inode->i_mode)) {
9dbd164d
AM
19561+ n = inode->i_nlink;
19562+ n -= nlink;
19563+ n += st->nlink;
f6b6e03d 19564+ smp_mb(); /* for i_nlink */
7eafdf33 19565+ /* 0 can happen */
92d182d2 19566+ set_nlink(inode, n);
4a4d8108 19567+ }
1facf9fc 19568+
4a4d8108
AM
19569+ spin_lock(&inode->i_lock);
19570+ inode->i_blocks = st->blocks;
19571+ i_size_write(inode, st->size);
19572+ spin_unlock(&inode->i_lock);
1facf9fc 19573+}
19574+
c1595e42 19575+/*
f2c43d5f 19576+ * common routine for aufs_getattr() and au_getxattr().
c1595e42
JR
19577+ * returns zero or negative (an error).
19578+ * @dentry will be read-locked in success.
19579+ */
19580+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path)
1facf9fc 19581+{
4a4d8108 19582+ int err;
076b876e 19583+ unsigned int mnt_flags, sigen;
c1595e42 19584+ unsigned char udba_none;
4a4d8108 19585+ aufs_bindex_t bindex;
4a4d8108
AM
19586+ struct super_block *sb, *h_sb;
19587+ struct inode *inode;
1facf9fc 19588+
c1595e42
JR
19589+ h_path->mnt = NULL;
19590+ h_path->dentry = NULL;
19591+
19592+ err = 0;
4a4d8108 19593+ sb = dentry->d_sb;
4a4d8108
AM
19594+ mnt_flags = au_mntflags(sb);
19595+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
1facf9fc 19596+
4a4d8108 19597+ /* support fstat(2) */
027c5e7a 19598+ if (!d_unlinked(dentry) && !udba_none) {
076b876e 19599+ sigen = au_sigen(sb);
027c5e7a
AM
19600+ err = au_digen_test(dentry, sigen);
19601+ if (!err) {
4a4d8108 19602+ di_read_lock_child(dentry, AuLock_IR);
027c5e7a 19603+ err = au_dbrange_test(dentry);
c1595e42
JR
19604+ if (unlikely(err)) {
19605+ di_read_unlock(dentry, AuLock_IR);
19606+ goto out;
19607+ }
027c5e7a 19608+ } else {
4a4d8108
AM
19609+ AuDebugOn(IS_ROOT(dentry));
19610+ di_write_lock_child(dentry);
027c5e7a
AM
19611+ err = au_dbrange_test(dentry);
19612+ if (!err)
19613+ err = au_reval_for_attr(dentry, sigen);
c1595e42
JR
19614+ if (!err)
19615+ di_downgrade_lock(dentry, AuLock_IR);
19616+ else {
19617+ di_write_unlock(dentry);
19618+ goto out;
19619+ }
4a4d8108
AM
19620+ }
19621+ } else
19622+ di_read_lock_child(dentry, AuLock_IR);
1facf9fc 19623+
5527c038 19624+ inode = d_inode(dentry);
5afbbe0d 19625+ bindex = au_ibtop(inode);
c1595e42
JR
19626+ h_path->mnt = au_sbr_mnt(sb, bindex);
19627+ h_sb = h_path->mnt->mnt_sb;
19628+ if (!force
19629+ && !au_test_fs_bad_iattr(h_sb)
19630+ && udba_none)
19631+ goto out; /* success */
1facf9fc 19632+
5afbbe0d 19633+ if (au_dbtop(dentry) == bindex)
c1595e42 19634+ h_path->dentry = au_h_dptr(dentry, bindex);
4a4d8108 19635+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
c1595e42
JR
19636+ h_path->dentry = au_plink_lkup(inode, bindex);
19637+ if (IS_ERR(h_path->dentry))
19638+ /* pretending success */
19639+ h_path->dentry = NULL;
19640+ else
19641+ dput(h_path->dentry);
4a4d8108 19642+ }
c1595e42
JR
19643+
19644+out:
19645+ return err;
19646+}
19647+
19648+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
19649+ struct dentry *dentry, struct kstat *st)
19650+{
19651+ int err;
19652+ unsigned char positive;
19653+ struct path h_path;
19654+ struct inode *inode;
19655+ struct super_block *sb;
19656+
5527c038 19657+ inode = d_inode(dentry);
c1595e42
JR
19658+ sb = dentry->d_sb;
19659+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19660+ if (unlikely(err))
19661+ goto out;
19662+ err = au_h_path_getattr(dentry, /*force*/0, &h_path);
19663+ if (unlikely(err))
19664+ goto out_si;
c06a8ce3 19665+ if (unlikely(!h_path.dentry))
c1595e42 19666+ /* illegally overlapped or something */
4a4d8108
AM
19667+ goto out_fill; /* pretending success */
19668+
5527c038 19669+ positive = d_is_positive(h_path.dentry);
4a4d8108 19670+ if (positive)
c06a8ce3 19671+ err = vfs_getattr(&h_path, st);
4a4d8108
AM
19672+ if (!err) {
19673+ if (positive)
c06a8ce3 19674+ au_refresh_iattr(inode, st,
5527c038 19675+ d_inode(h_path.dentry)->i_nlink);
4a4d8108 19676+ goto out_fill; /* success */
1facf9fc 19677+ }
7f207e10 19678+ AuTraceErr(err);
c1595e42 19679+ goto out_di;
4a4d8108 19680+
4f0767ce 19681+out_fill:
4a4d8108 19682+ generic_fillattr(inode, st);
c1595e42 19683+out_di:
4a4d8108 19684+ di_read_unlock(dentry, AuLock_IR);
c1595e42 19685+out_si:
4a4d8108 19686+ si_read_unlock(sb);
7f207e10
AM
19687+out:
19688+ AuTraceErr(err);
4a4d8108 19689+ return err;
1facf9fc 19690+}
19691+
19692+/* ---------------------------------------------------------------------- */
19693+
febd17d6
JR
19694+static const char *aufs_get_link(struct dentry *dentry, struct inode *inode,
19695+ struct delayed_call *done)
4a4d8108 19696+{
c2c0f25c 19697+ const char *ret;
c2c0f25c 19698+ struct dentry *h_dentry;
febd17d6 19699+ struct inode *h_inode;
4a4d8108 19700+ int err;
c2c0f25c 19701+ aufs_bindex_t bindex;
1facf9fc 19702+
79b8bda9 19703+ ret = NULL; /* suppress a warning */
febd17d6
JR
19704+ err = -ECHILD;
19705+ if (!dentry)
19706+ goto out;
19707+
027c5e7a
AM
19708+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
19709+ if (unlikely(err))
c2c0f25c 19710+ goto out;
027c5e7a
AM
19711+
19712+ err = au_d_hashed_positive(dentry);
c2c0f25c
AM
19713+ if (unlikely(err))
19714+ goto out_unlock;
19715+
19716+ err = -EINVAL;
19717+ inode = d_inode(dentry);
5afbbe0d 19718+ bindex = au_ibtop(inode);
c2c0f25c 19719+ h_inode = au_h_iptr(inode, bindex);
febd17d6 19720+ if (unlikely(!h_inode->i_op->get_link))
c2c0f25c
AM
19721+ goto out_unlock;
19722+
19723+ err = -EBUSY;
19724+ h_dentry = NULL;
5afbbe0d 19725+ if (au_dbtop(dentry) <= bindex) {
c2c0f25c
AM
19726+ h_dentry = au_h_dptr(dentry, bindex);
19727+ if (h_dentry)
19728+ dget(h_dentry);
027c5e7a 19729+ }
c2c0f25c
AM
19730+ if (!h_dentry) {
19731+ h_dentry = d_find_any_alias(h_inode);
19732+ if (IS_ERR(h_dentry)) {
19733+ err = PTR_ERR(h_dentry);
febd17d6 19734+ goto out_unlock;
c2c0f25c
AM
19735+ }
19736+ }
19737+ if (unlikely(!h_dentry))
febd17d6 19738+ goto out_unlock;
1facf9fc 19739+
c2c0f25c 19740+ err = 0;
febd17d6 19741+ AuDbg("%pf\n", h_inode->i_op->get_link);
c2c0f25c 19742+ AuDbgDentry(h_dentry);
f2c43d5f 19743+ ret = vfs_get_link(h_dentry, done);
c2c0f25c 19744+ dput(h_dentry);
febd17d6
JR
19745+ if (IS_ERR(ret))
19746+ err = PTR_ERR(ret);
c2c0f25c 19747+
c2c0f25c
AM
19748+out_unlock:
19749+ aufs_read_unlock(dentry, AuLock_IR);
4f0767ce 19750+out:
c2c0f25c
AM
19751+ if (unlikely(err))
19752+ ret = ERR_PTR(err);
19753+ AuTraceErrPtr(ret);
19754+ return ret;
4a4d8108 19755+}
1facf9fc 19756+
4a4d8108 19757+/* ---------------------------------------------------------------------- */
1facf9fc 19758+
e2f27e51
AM
19759+static int au_is_special(struct inode *inode)
19760+{
19761+ return (inode->i_mode & (S_IFBLK | S_IFCHR | S_IFIFO | S_IFSOCK));
19762+}
19763+
0c3ec466 19764+static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags)
4a4d8108 19765+{
0c3ec466 19766+ int err;
e2f27e51 19767+ aufs_bindex_t bindex;
0c3ec466
AM
19768+ struct super_block *sb;
19769+ struct inode *h_inode;
e2f27e51 19770+ struct vfsmount *h_mnt;
0c3ec466
AM
19771+
19772+ sb = inode->i_sb;
e2f27e51
AM
19773+ WARN_ONCE((flags & S_ATIME) && !IS_NOATIME(inode),
19774+ "unexpected s_flags 0x%lx", sb->s_flags);
19775+
0c3ec466
AM
19776+ /* mmap_sem might be acquired already, cf. aufs_mmap() */
19777+ lockdep_off();
19778+ si_read_lock(sb, AuLock_FLUSH);
19779+ ii_write_lock_child(inode);
19780+ lockdep_on();
e2f27e51
AM
19781+
19782+ err = 0;
19783+ bindex = au_ibtop(inode);
19784+ h_inode = au_h_iptr(inode, bindex);
19785+ if (!au_test_ro(sb, bindex, inode)) {
19786+ h_mnt = au_sbr_mnt(sb, bindex);
19787+ err = vfsub_mnt_want_write(h_mnt);
19788+ if (!err) {
19789+ err = vfsub_update_time(h_inode, ts, flags);
19790+ vfsub_mnt_drop_write(h_mnt);
19791+ }
19792+ } else if (au_is_special(h_inode)) {
19793+ /*
19794+ * Never copy-up here.
19795+ * These special files may already be opened and used for
19796+ * communicating. If we copied it up, then the communication
19797+ * would be corrupted.
19798+ */
19799+ AuWarn1("timestamps for i%lu are ignored "
19800+ "since it is on readonly branch (hi%lu).\n",
19801+ inode->i_ino, h_inode->i_ino);
19802+ } else if (flags & ~S_ATIME) {
19803+ err = -EIO;
19804+ AuIOErr1("unexpected flags 0x%x\n", flags);
19805+ AuDebugOn(1);
19806+ }
19807+
0c3ec466 19808+ lockdep_off();
38d290e6
JR
19809+ if (!err)
19810+ au_cpup_attr_timesizes(inode);
0c3ec466
AM
19811+ ii_write_unlock(inode);
19812+ si_read_unlock(sb);
19813+ lockdep_on();
38d290e6
JR
19814+
19815+ if (!err && (flags & S_VERSION))
19816+ inode_inc_iversion(inode);
19817+
0c3ec466 19818+ return err;
4a4d8108 19819+}
1facf9fc 19820+
4a4d8108 19821+/* ---------------------------------------------------------------------- */
1308ab2a 19822+
b95c5147
AM
19823+/* no getattr version will be set by module.c:aufs_init() */
19824+struct inode_operations aufs_iop_nogetattr[AuIop_Last],
19825+ aufs_iop[] = {
19826+ [AuIop_SYMLINK] = {
19827+ .permission = aufs_permission,
c1595e42 19828+#ifdef CONFIG_FS_POSIX_ACL
b95c5147
AM
19829+ .get_acl = aufs_get_acl,
19830+ .set_acl = aufs_set_acl, /* unsupport for symlink? */
c1595e42
JR
19831+#endif
19832+
b95c5147
AM
19833+ .setattr = aufs_setattr,
19834+ .getattr = aufs_getattr,
0c3ec466 19835+
c1595e42 19836+#ifdef CONFIG_AUFS_XATTR
b95c5147 19837+ .listxattr = aufs_listxattr,
c1595e42
JR
19838+#endif
19839+
b95c5147 19840+ .readlink = generic_readlink,
febd17d6 19841+ .get_link = aufs_get_link,
0c3ec466 19842+
b95c5147
AM
19843+ /* .update_time = aufs_update_time */
19844+ },
19845+ [AuIop_DIR] = {
19846+ .create = aufs_create,
19847+ .lookup = aufs_lookup,
19848+ .link = aufs_link,
19849+ .unlink = aufs_unlink,
19850+ .symlink = aufs_symlink,
19851+ .mkdir = aufs_mkdir,
19852+ .rmdir = aufs_rmdir,
19853+ .mknod = aufs_mknod,
19854+ .rename = aufs_rename,
19855+
19856+ .permission = aufs_permission,
c1595e42 19857+#ifdef CONFIG_FS_POSIX_ACL
b95c5147
AM
19858+ .get_acl = aufs_get_acl,
19859+ .set_acl = aufs_set_acl,
c1595e42
JR
19860+#endif
19861+
b95c5147
AM
19862+ .setattr = aufs_setattr,
19863+ .getattr = aufs_getattr,
0c3ec466 19864+
c1595e42 19865+#ifdef CONFIG_AUFS_XATTR
b95c5147 19866+ .listxattr = aufs_listxattr,
c1595e42
JR
19867+#endif
19868+
b95c5147
AM
19869+ .update_time = aufs_update_time,
19870+ .atomic_open = aufs_atomic_open,
19871+ .tmpfile = aufs_tmpfile
19872+ },
19873+ [AuIop_OTHER] = {
19874+ .permission = aufs_permission,
c1595e42 19875+#ifdef CONFIG_FS_POSIX_ACL
b95c5147
AM
19876+ .get_acl = aufs_get_acl,
19877+ .set_acl = aufs_set_acl,
c1595e42
JR
19878+#endif
19879+
b95c5147
AM
19880+ .setattr = aufs_setattr,
19881+ .getattr = aufs_getattr,
0c3ec466 19882+
c1595e42 19883+#ifdef CONFIG_AUFS_XATTR
b95c5147 19884+ .listxattr = aufs_listxattr,
c1595e42
JR
19885+#endif
19886+
b95c5147
AM
19887+ .update_time = aufs_update_time
19888+ }
4a4d8108 19889+};
7f207e10
AM
19890diff -urN /usr/share/empty/fs/aufs/i_op_del.c linux/fs/aufs/i_op_del.c
19891--- /usr/share/empty/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 19892+++ linux/fs/aufs/i_op_del.c 2016-10-09 16:55:36.492701639 +0200
5afbbe0d 19893@@ -0,0 +1,511 @@
1facf9fc 19894+/*
8cdd5066 19895+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 19896+ *
19897+ * This program, aufs is free software; you can redistribute it and/or modify
19898+ * it under the terms of the GNU General Public License as published by
19899+ * the Free Software Foundation; either version 2 of the License, or
19900+ * (at your option) any later version.
dece6358
AM
19901+ *
19902+ * This program is distributed in the hope that it will be useful,
19903+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19904+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19905+ * GNU General Public License for more details.
19906+ *
19907+ * You should have received a copy of the GNU General Public License
523b37e3 19908+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 19909+ */
19910+
19911+/*
4a4d8108 19912+ * inode operations (del entry)
1308ab2a 19913+ */
dece6358 19914+
1308ab2a 19915+#include "aufs.h"
dece6358 19916+
4a4d8108
AM
19917+/*
19918+ * decide if a new whiteout for @dentry is necessary or not.
19919+ * when it is necessary, prepare the parent dir for the upper branch whose
19920+ * branch index is @bcpup for creation. the actual creation of the whiteout will
19921+ * be done by caller.
19922+ * return value:
19923+ * 0: wh is unnecessary
19924+ * plus: wh is necessary
19925+ * minus: error
19926+ */
19927+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
1308ab2a 19928+{
4a4d8108 19929+ int need_wh, err;
5afbbe0d 19930+ aufs_bindex_t btop;
4a4d8108 19931+ struct super_block *sb;
dece6358 19932+
4a4d8108 19933+ sb = dentry->d_sb;
5afbbe0d 19934+ btop = au_dbtop(dentry);
4a4d8108 19935+ if (*bcpup < 0) {
5afbbe0d
AM
19936+ *bcpup = btop;
19937+ if (au_test_ro(sb, btop, d_inode(dentry))) {
4a4d8108
AM
19938+ err = AuWbrCopyup(au_sbi(sb), dentry);
19939+ *bcpup = err;
19940+ if (unlikely(err < 0))
19941+ goto out;
19942+ }
19943+ } else
5afbbe0d 19944+ AuDebugOn(btop < *bcpup
5527c038 19945+ || au_test_ro(sb, *bcpup, d_inode(dentry)));
5afbbe0d 19946+ AuDbg("bcpup %d, btop %d\n", *bcpup, btop);
1308ab2a 19947+
5afbbe0d 19948+ if (*bcpup != btop) {
4a4d8108
AM
19949+ err = au_cpup_dirs(dentry, *bcpup);
19950+ if (unlikely(err))
19951+ goto out;
19952+ need_wh = 1;
19953+ } else {
027c5e7a 19954+ struct au_dinfo *dinfo, *tmp;
4a4d8108 19955+
027c5e7a
AM
19956+ need_wh = -ENOMEM;
19957+ dinfo = au_di(dentry);
19958+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
19959+ if (tmp) {
19960+ au_di_cp(tmp, dinfo);
19961+ au_di_swap(tmp, dinfo);
19962+ /* returns the number of positive dentries */
5afbbe0d
AM
19963+ need_wh = au_lkup_dentry(dentry, btop + 1,
19964+ /* AuLkup_IGNORE_PERM */ 0);
027c5e7a
AM
19965+ au_di_swap(tmp, dinfo);
19966+ au_rw_write_unlock(&tmp->di_rwsem);
19967+ au_di_free(tmp);
4a4d8108
AM
19968+ }
19969+ }
19970+ AuDbg("need_wh %d\n", need_wh);
19971+ err = need_wh;
19972+
4f0767ce 19973+out:
4a4d8108 19974+ return err;
1facf9fc 19975+}
19976+
4a4d8108
AM
19977+/*
19978+ * simple tests for the del-entry operations.
19979+ * following the checks in vfs, plus the parent-child relationship.
19980+ */
19981+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
19982+ struct dentry *h_parent, int isdir)
1facf9fc 19983+{
4a4d8108
AM
19984+ int err;
19985+ umode_t h_mode;
19986+ struct dentry *h_dentry, *h_latest;
1308ab2a 19987+ struct inode *h_inode;
1facf9fc 19988+
4a4d8108 19989+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 19990+ if (d_really_is_positive(dentry)) {
4a4d8108 19991+ err = -ENOENT;
5527c038
JR
19992+ if (unlikely(d_is_negative(h_dentry)))
19993+ goto out;
19994+ h_inode = d_inode(h_dentry);
19995+ if (unlikely(!h_inode->i_nlink))
4a4d8108 19996+ goto out;
1facf9fc 19997+
4a4d8108
AM
19998+ h_mode = h_inode->i_mode;
19999+ if (!isdir) {
20000+ err = -EISDIR;
20001+ if (unlikely(S_ISDIR(h_mode)))
20002+ goto out;
20003+ } else if (unlikely(!S_ISDIR(h_mode))) {
20004+ err = -ENOTDIR;
20005+ goto out;
20006+ }
20007+ } else {
20008+ /* rename(2) case */
20009+ err = -EIO;
5527c038 20010+ if (unlikely(d_is_positive(h_dentry)))
4a4d8108
AM
20011+ goto out;
20012+ }
1facf9fc 20013+
4a4d8108
AM
20014+ err = -ENOENT;
20015+ /* expected parent dir is locked */
20016+ if (unlikely(h_parent != h_dentry->d_parent))
20017+ goto out;
20018+ err = 0;
20019+
20020+ /*
20021+ * rmdir a dir may break the consistency on some filesystem.
20022+ * let's try heavy test.
20023+ */
20024+ err = -EACCES;
076b876e 20025+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)
5527c038 20026+ && au_test_h_perm(d_inode(h_parent),
076b876e 20027+ MAY_EXEC | MAY_WRITE)))
4a4d8108
AM
20028+ goto out;
20029+
076b876e 20030+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent);
4a4d8108
AM
20031+ err = -EIO;
20032+ if (IS_ERR(h_latest))
20033+ goto out;
20034+ if (h_latest == h_dentry)
20035+ err = 0;
20036+ dput(h_latest);
20037+
4f0767ce 20038+out:
4a4d8108 20039+ return err;
1308ab2a 20040+}
1facf9fc 20041+
4a4d8108
AM
20042+/*
20043+ * decide the branch where we operate for @dentry. the branch index will be set
20044+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
20045+ * dir for reverting.
20046+ * when a new whiteout is necessary, create it.
20047+ */
20048+static struct dentry*
20049+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
20050+ struct au_dtime *dt, struct au_pin *pin)
1308ab2a 20051+{
4a4d8108
AM
20052+ struct dentry *wh_dentry;
20053+ struct super_block *sb;
20054+ struct path h_path;
20055+ int err, need_wh;
20056+ unsigned int udba;
20057+ aufs_bindex_t bcpup;
dece6358 20058+
4a4d8108
AM
20059+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
20060+ wh_dentry = ERR_PTR(need_wh);
20061+ if (unlikely(need_wh < 0))
20062+ goto out;
20063+
20064+ sb = dentry->d_sb;
20065+ udba = au_opt_udba(sb);
20066+ bcpup = *rbcpup;
20067+ err = au_pin(pin, dentry, bcpup, udba,
20068+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
20069+ wh_dentry = ERR_PTR(err);
20070+ if (unlikely(err))
20071+ goto out;
20072+
20073+ h_path.dentry = au_pinned_h_parent(pin);
20074+ if (udba != AuOpt_UDBA_NONE
5afbbe0d 20075+ && au_dbtop(dentry) == bcpup) {
4a4d8108
AM
20076+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
20077+ wh_dentry = ERR_PTR(err);
20078+ if (unlikely(err))
20079+ goto out_unpin;
20080+ }
20081+
20082+ h_path.mnt = au_sbr_mnt(sb, bcpup);
20083+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
20084+ wh_dentry = NULL;
20085+ if (!need_wh)
20086+ goto out; /* success, no need to create whiteout */
20087+
20088+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
20089+ if (IS_ERR(wh_dentry))
20090+ goto out_unpin;
20091+
20092+ /* returns with the parent is locked and wh_dentry is dget-ed */
20093+ goto out; /* success */
20094+
4f0767ce 20095+out_unpin:
4a4d8108 20096+ au_unpin(pin);
4f0767ce 20097+out:
4a4d8108 20098+ return wh_dentry;
1facf9fc 20099+}
20100+
4a4d8108
AM
20101+/*
20102+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
20103+ * in order to be revertible and save time for removing many child whiteouts
20104+ * under the dir.
20105+ * returns 1 when there are too many child whiteout and caller should remove
20106+ * them asynchronously. returns 0 when the number of children is enough small to
20107+ * remove now or the branch fs is a remote fs.
20108+ * otherwise return an error.
20109+ */
20110+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
20111+ struct au_nhash *whlist, struct inode *dir)
1facf9fc 20112+{
4a4d8108
AM
20113+ int rmdir_later, err, dirwh;
20114+ struct dentry *h_dentry;
20115+ struct super_block *sb;
5527c038 20116+ struct inode *inode;
4a4d8108
AM
20117+
20118+ sb = dentry->d_sb;
20119+ SiMustAnyLock(sb);
20120+ h_dentry = au_h_dptr(dentry, bindex);
20121+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
20122+ if (unlikely(err))
20123+ goto out;
20124+
20125+ /* stop monitoring */
5527c038
JR
20126+ inode = d_inode(dentry);
20127+ au_hn_free(au_hi(inode, bindex));
4a4d8108
AM
20128+
20129+ if (!au_test_fs_remote(h_dentry->d_sb)) {
20130+ dirwh = au_sbi(sb)->si_dirwh;
20131+ rmdir_later = (dirwh <= 1);
20132+ if (!rmdir_later)
20133+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
20134+ dirwh);
20135+ if (rmdir_later)
20136+ return rmdir_later;
20137+ }
1facf9fc 20138+
4a4d8108
AM
20139+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
20140+ if (unlikely(err)) {
523b37e3
AM
20141+ AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
20142+ h_dentry, bindex, err);
4a4d8108
AM
20143+ err = 0;
20144+ }
dece6358 20145+
4f0767ce 20146+out:
4a4d8108
AM
20147+ AuTraceErr(err);
20148+ return err;
20149+}
1308ab2a 20150+
4a4d8108
AM
20151+/*
20152+ * final procedure for deleting a entry.
20153+ * maintain dentry and iattr.
20154+ */
20155+static void epilog(struct inode *dir, struct dentry *dentry,
20156+ aufs_bindex_t bindex)
20157+{
20158+ struct inode *inode;
1308ab2a 20159+
5527c038 20160+ inode = d_inode(dentry);
4a4d8108
AM
20161+ d_drop(dentry);
20162+ inode->i_ctime = dir->i_ctime;
1308ab2a 20163+
b912730e 20164+ au_dir_ts(dir, bindex);
4a4d8108 20165+ dir->i_version++;
1facf9fc 20166+}
20167+
4a4d8108
AM
20168+/*
20169+ * when an error happened, remove the created whiteout and revert everything.
20170+ */
7f207e10
AM
20171+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
20172+ aufs_bindex_t bwh, struct dentry *wh_dentry,
20173+ struct dentry *dentry, struct au_dtime *dt)
1facf9fc 20174+{
4a4d8108
AM
20175+ int rerr;
20176+ struct path h_path = {
20177+ .dentry = wh_dentry,
7f207e10 20178+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
4a4d8108 20179+ };
dece6358 20180+
7f207e10 20181+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
4a4d8108
AM
20182+ if (!rerr) {
20183+ au_set_dbwh(dentry, bwh);
20184+ au_dtime_revert(dt);
20185+ return 0;
20186+ }
dece6358 20187+
523b37e3 20188+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
4a4d8108 20189+ return -EIO;
1facf9fc 20190+}
20191+
4a4d8108 20192+/* ---------------------------------------------------------------------- */
1facf9fc 20193+
4a4d8108 20194+int aufs_unlink(struct inode *dir, struct dentry *dentry)
1308ab2a 20195+{
4a4d8108 20196+ int err;
5afbbe0d 20197+ aufs_bindex_t bwh, bindex, btop;
523b37e3 20198+ struct inode *inode, *h_dir, *delegated;
4a4d8108 20199+ struct dentry *parent, *wh_dentry;
c2b27bf2
AM
20200+ /* to reuduce stack size */
20201+ struct {
20202+ struct au_dtime dt;
20203+ struct au_pin pin;
20204+ struct path h_path;
20205+ } *a;
1facf9fc 20206+
4a4d8108 20207+ IMustLock(dir);
027c5e7a 20208+
c2b27bf2
AM
20209+ err = -ENOMEM;
20210+ a = kmalloc(sizeof(*a), GFP_NOFS);
20211+ if (unlikely(!a))
20212+ goto out;
20213+
027c5e7a
AM
20214+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
20215+ if (unlikely(err))
c2b27bf2 20216+ goto out_free;
027c5e7a
AM
20217+ err = au_d_hashed_positive(dentry);
20218+ if (unlikely(err))
20219+ goto out_unlock;
5527c038 20220+ inode = d_inode(dentry);
4a4d8108 20221+ IMustLock(inode);
027c5e7a 20222+ err = -EISDIR;
2000de60 20223+ if (unlikely(d_is_dir(dentry)))
027c5e7a 20224+ goto out_unlock; /* possible? */
1facf9fc 20225+
5afbbe0d 20226+ btop = au_dbtop(dentry);
4a4d8108
AM
20227+ bwh = au_dbwh(dentry);
20228+ bindex = -1;
027c5e7a
AM
20229+ parent = dentry->d_parent; /* dir inode is locked */
20230+ di_write_lock_parent(parent);
c2b27bf2
AM
20231+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
20232+ &a->pin);
4a4d8108
AM
20233+ err = PTR_ERR(wh_dentry);
20234+ if (IS_ERR(wh_dentry))
027c5e7a 20235+ goto out_parent;
1facf9fc 20236+
5afbbe0d
AM
20237+ a->h_path.mnt = au_sbr_mnt(dentry->d_sb, btop);
20238+ a->h_path.dentry = au_h_dptr(dentry, btop);
c2b27bf2 20239+ dget(a->h_path.dentry);
5afbbe0d 20240+ if (bindex == btop) {
c2b27bf2 20241+ h_dir = au_pinned_h_dir(&a->pin);
523b37e3
AM
20242+ delegated = NULL;
20243+ err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
20244+ if (unlikely(err == -EWOULDBLOCK)) {
20245+ pr_warn("cannot retry for NFSv4 delegation"
20246+ " for an internal unlink\n");
20247+ iput(delegated);
20248+ }
4a4d8108
AM
20249+ } else {
20250+ /* dir inode is locked */
5527c038 20251+ h_dir = d_inode(wh_dentry->d_parent);
4a4d8108
AM
20252+ IMustLock(h_dir);
20253+ err = 0;
20254+ }
dece6358 20255+
4a4d8108 20256+ if (!err) {
7f207e10 20257+ vfsub_drop_nlink(inode);
4a4d8108
AM
20258+ epilog(dir, dentry, bindex);
20259+
20260+ /* update target timestamps */
5afbbe0d 20261+ if (bindex == btop) {
c2b27bf2
AM
20262+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
20263+ /*ignore*/
5527c038 20264+ inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
4a4d8108
AM
20265+ } else
20266+ /* todo: this timestamp may be reverted later */
20267+ inode->i_ctime = h_dir->i_ctime;
027c5e7a 20268+ goto out_unpin; /* success */
1facf9fc 20269+ }
20270+
4a4d8108
AM
20271+ /* revert */
20272+ if (wh_dentry) {
20273+ int rerr;
20274+
c2b27bf2
AM
20275+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20276+ &a->dt);
4a4d8108
AM
20277+ if (rerr)
20278+ err = rerr;
dece6358 20279+ }
1facf9fc 20280+
027c5e7a 20281+out_unpin:
c2b27bf2 20282+ au_unpin(&a->pin);
4a4d8108 20283+ dput(wh_dentry);
c2b27bf2 20284+ dput(a->h_path.dentry);
027c5e7a 20285+out_parent:
4a4d8108 20286+ di_write_unlock(parent);
027c5e7a 20287+out_unlock:
4a4d8108 20288+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2 20289+out_free:
f0c0a007 20290+ au_delayed_kfree(a);
027c5e7a 20291+out:
4a4d8108 20292+ return err;
dece6358
AM
20293+}
20294+
4a4d8108 20295+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
1308ab2a 20296+{
4a4d8108 20297+ int err, rmdir_later;
5afbbe0d 20298+ aufs_bindex_t bwh, bindex, btop;
4a4d8108
AM
20299+ struct inode *inode;
20300+ struct dentry *parent, *wh_dentry, *h_dentry;
20301+ struct au_whtmp_rmdir *args;
c2b27bf2
AM
20302+ /* to reuduce stack size */
20303+ struct {
20304+ struct au_dtime dt;
20305+ struct au_pin pin;
20306+ } *a;
1facf9fc 20307+
4a4d8108 20308+ IMustLock(dir);
027c5e7a 20309+
c2b27bf2
AM
20310+ err = -ENOMEM;
20311+ a = kmalloc(sizeof(*a), GFP_NOFS);
20312+ if (unlikely(!a))
20313+ goto out;
20314+
027c5e7a
AM
20315+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
20316+ if (unlikely(err))
c2b27bf2 20317+ goto out_free;
53392da6
AM
20318+ err = au_alive_dir(dentry);
20319+ if (unlikely(err))
027c5e7a 20320+ goto out_unlock;
5527c038 20321+ inode = d_inode(dentry);
4a4d8108 20322+ IMustLock(inode);
027c5e7a 20323+ err = -ENOTDIR;
2000de60 20324+ if (unlikely(!d_is_dir(dentry)))
027c5e7a 20325+ goto out_unlock; /* possible? */
dece6358 20326+
4a4d8108
AM
20327+ err = -ENOMEM;
20328+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
20329+ if (unlikely(!args))
20330+ goto out_unlock;
dece6358 20331+
4a4d8108
AM
20332+ parent = dentry->d_parent; /* dir inode is locked */
20333+ di_write_lock_parent(parent);
20334+ err = au_test_empty(dentry, &args->whlist);
20335+ if (unlikely(err))
027c5e7a 20336+ goto out_parent;
1facf9fc 20337+
5afbbe0d 20338+ btop = au_dbtop(dentry);
4a4d8108
AM
20339+ bwh = au_dbwh(dentry);
20340+ bindex = -1;
c2b27bf2
AM
20341+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
20342+ &a->pin);
4a4d8108
AM
20343+ err = PTR_ERR(wh_dentry);
20344+ if (IS_ERR(wh_dentry))
027c5e7a 20345+ goto out_parent;
1facf9fc 20346+
5afbbe0d 20347+ h_dentry = au_h_dptr(dentry, btop);
4a4d8108
AM
20348+ dget(h_dentry);
20349+ rmdir_later = 0;
5afbbe0d
AM
20350+ if (bindex == btop) {
20351+ err = renwh_and_rmdir(dentry, btop, &args->whlist, dir);
4a4d8108
AM
20352+ if (err > 0) {
20353+ rmdir_later = err;
20354+ err = 0;
20355+ }
20356+ } else {
20357+ /* stop monitoring */
5afbbe0d 20358+ au_hn_free(au_hi(inode, btop));
4a4d8108
AM
20359+
20360+ /* dir inode is locked */
5527c038 20361+ IMustLock(d_inode(wh_dentry->d_parent));
1facf9fc 20362+ err = 0;
20363+ }
20364+
4a4d8108 20365+ if (!err) {
027c5e7a 20366+ vfsub_dead_dir(inode);
4a4d8108
AM
20367+ au_set_dbdiropq(dentry, -1);
20368+ epilog(dir, dentry, bindex);
1308ab2a 20369+
4a4d8108 20370+ if (rmdir_later) {
5afbbe0d 20371+ au_whtmp_kick_rmdir(dir, btop, h_dentry, args);
4a4d8108
AM
20372+ args = NULL;
20373+ }
1308ab2a 20374+
4a4d8108 20375+ goto out_unpin; /* success */
1facf9fc 20376+ }
20377+
4a4d8108
AM
20378+ /* revert */
20379+ AuLabel(revert);
20380+ if (wh_dentry) {
20381+ int rerr;
1308ab2a 20382+
c2b27bf2
AM
20383+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20384+ &a->dt);
4a4d8108
AM
20385+ if (rerr)
20386+ err = rerr;
1facf9fc 20387+ }
20388+
4f0767ce 20389+out_unpin:
c2b27bf2 20390+ au_unpin(&a->pin);
4a4d8108
AM
20391+ dput(wh_dentry);
20392+ dput(h_dentry);
027c5e7a 20393+out_parent:
4a4d8108
AM
20394+ di_write_unlock(parent);
20395+ if (args)
20396+ au_whtmp_rmdir_free(args);
4f0767ce 20397+out_unlock:
4a4d8108 20398+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2 20399+out_free:
f0c0a007 20400+ au_delayed_kfree(a);
4f0767ce 20401+out:
4a4d8108
AM
20402+ AuTraceErr(err);
20403+ return err;
dece6358 20404+}
7f207e10
AM
20405diff -urN /usr/share/empty/fs/aufs/i_op_ren.c linux/fs/aufs/i_op_ren.c
20406--- /usr/share/empty/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
20407+++ linux/fs/aufs/i_op_ren.c 2016-12-17 12:28:17.595211562 +0100
20408@@ -0,0 +1,1165 @@
1facf9fc 20409+/*
8cdd5066 20410+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 20411+ *
20412+ * This program, aufs is free software; you can redistribute it and/or modify
20413+ * it under the terms of the GNU General Public License as published by
20414+ * the Free Software Foundation; either version 2 of the License, or
20415+ * (at your option) any later version.
dece6358
AM
20416+ *
20417+ * This program is distributed in the hope that it will be useful,
20418+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20419+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20420+ * GNU General Public License for more details.
20421+ *
20422+ * You should have received a copy of the GNU General Public License
523b37e3 20423+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 20424+ */
20425+
20426+/*
4a4d8108
AM
20427+ * inode operation (rename entry)
20428+ * todo: this is crazy monster
1facf9fc 20429+ */
20430+
20431+#include "aufs.h"
20432+
4a4d8108
AM
20433+enum { AuSRC, AuDST, AuSrcDst };
20434+enum { AuPARENT, AuCHILD, AuParentChild };
1facf9fc 20435+
f2c43d5f
AM
20436+#define AuRen_ISDIR_SRC 1
20437+#define AuRen_ISDIR_DST (1 << 1)
20438+#define AuRen_ISSAMEDIR (1 << 2)
20439+#define AuRen_WHSRC (1 << 3)
20440+#define AuRen_WHDST (1 << 4)
20441+#define AuRen_MNT_WRITE (1 << 5)
20442+#define AuRen_DT_DSTDIR (1 << 6)
20443+#define AuRen_DIROPQ_SRC (1 << 7)
20444+#define AuRen_DIROPQ_DST (1 << 8)
4a4d8108 20445+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
7f207e10
AM
20446+#define au_fset_ren(flags, name) \
20447+ do { (flags) |= AuRen_##name; } while (0)
20448+#define au_fclr_ren(flags, name) \
20449+ do { (flags) &= ~AuRen_##name; } while (0)
1facf9fc 20450+
4a4d8108
AM
20451+struct au_ren_args {
20452+ struct {
20453+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
20454+ *wh_dentry;
20455+ struct inode *dir, *inode;
f2c43d5f 20456+ struct au_hinode *hdir, *hinode;
4a4d8108 20457+ struct au_dtime dt[AuParentChild];
f2c43d5f 20458+ aufs_bindex_t btop, bdiropq;
4a4d8108 20459+ } sd[AuSrcDst];
1facf9fc 20460+
4a4d8108
AM
20461+#define src_dentry sd[AuSRC].dentry
20462+#define src_dir sd[AuSRC].dir
20463+#define src_inode sd[AuSRC].inode
20464+#define src_h_dentry sd[AuSRC].h_dentry
20465+#define src_parent sd[AuSRC].parent
20466+#define src_h_parent sd[AuSRC].h_parent
20467+#define src_wh_dentry sd[AuSRC].wh_dentry
20468+#define src_hdir sd[AuSRC].hdir
f2c43d5f 20469+#define src_hinode sd[AuSRC].hinode
4a4d8108
AM
20470+#define src_h_dir sd[AuSRC].hdir->hi_inode
20471+#define src_dt sd[AuSRC].dt
5afbbe0d 20472+#define src_btop sd[AuSRC].btop
f2c43d5f 20473+#define src_bdiropq sd[AuSRC].bdiropq
1facf9fc 20474+
4a4d8108
AM
20475+#define dst_dentry sd[AuDST].dentry
20476+#define dst_dir sd[AuDST].dir
20477+#define dst_inode sd[AuDST].inode
20478+#define dst_h_dentry sd[AuDST].h_dentry
20479+#define dst_parent sd[AuDST].parent
20480+#define dst_h_parent sd[AuDST].h_parent
20481+#define dst_wh_dentry sd[AuDST].wh_dentry
20482+#define dst_hdir sd[AuDST].hdir
f2c43d5f 20483+#define dst_hinode sd[AuDST].hinode
4a4d8108
AM
20484+#define dst_h_dir sd[AuDST].hdir->hi_inode
20485+#define dst_dt sd[AuDST].dt
5afbbe0d 20486+#define dst_btop sd[AuDST].btop
f2c43d5f 20487+#define dst_bdiropq sd[AuDST].bdiropq
4a4d8108
AM
20488+
20489+ struct dentry *h_trap;
20490+ struct au_branch *br;
4a4d8108
AM
20491+ struct path h_path;
20492+ struct au_nhash whlist;
f2c43d5f 20493+ aufs_bindex_t btgt, src_bwh;
1facf9fc 20494+
f2c43d5f
AM
20495+ struct {
20496+ unsigned short auren_flags;
20497+ unsigned char flags; /* syscall parameter */
20498+ unsigned char exchange;
20499+ } __packed;
1facf9fc 20500+
4a4d8108
AM
20501+ struct au_whtmp_rmdir *thargs;
20502+ struct dentry *h_dst;
20503+};
1308ab2a 20504+
4a4d8108 20505+/* ---------------------------------------------------------------------- */
1308ab2a 20506+
4a4d8108
AM
20507+/*
20508+ * functions for reverting.
20509+ * when an error happened in a single rename systemcall, we should revert
79b8bda9 20510+ * everything as if nothing happened.
4a4d8108
AM
20511+ * we don't need to revert the copied-up/down the parent dir since they are
20512+ * harmless.
20513+ */
1facf9fc 20514+
4a4d8108
AM
20515+#define RevertFailure(fmt, ...) do { \
20516+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
20517+ ##__VA_ARGS__, err, rerr); \
20518+ err = -EIO; \
20519+} while (0)
1facf9fc 20520+
f2c43d5f 20521+static void au_ren_do_rev_diropq(int err, struct au_ren_args *a, int idx)
1facf9fc 20522+{
4a4d8108 20523+ int rerr;
f2c43d5f
AM
20524+ struct dentry *d;
20525+#define src_or_dst(member) a->sd[idx].member
1facf9fc 20526+
f2c43d5f
AM
20527+ d = src_or_dst(dentry); /* {src,dst}_dentry */
20528+ au_hn_inode_lock_nested(src_or_dst(hinode), AuLsc_I_CHILD);
20529+ rerr = au_diropq_remove(d, a->btgt);
20530+ au_hn_inode_unlock(src_or_dst(hinode));
20531+ au_set_dbdiropq(d, src_or_dst(bdiropq));
4a4d8108 20532+ if (rerr)
f2c43d5f
AM
20533+ RevertFailure("remove diropq %pd", d);
20534+
20535+#undef src_or_dst_
20536+}
20537+
20538+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
20539+{
20540+ if (au_ftest_ren(a->auren_flags, DIROPQ_SRC))
20541+ au_ren_do_rev_diropq(err, a, AuSRC);
20542+ if (au_ftest_ren(a->auren_flags, DIROPQ_DST))
20543+ au_ren_do_rev_diropq(err, a, AuDST);
4a4d8108 20544+}
1facf9fc 20545+
4a4d8108
AM
20546+static void au_ren_rev_rename(int err, struct au_ren_args *a)
20547+{
20548+ int rerr;
523b37e3 20549+ struct inode *delegated;
1facf9fc 20550+
b4510431
AM
20551+ a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name,
20552+ a->src_h_parent);
4a4d8108
AM
20553+ rerr = PTR_ERR(a->h_path.dentry);
20554+ if (IS_ERR(a->h_path.dentry)) {
523b37e3 20555+ RevertFailure("lkup one %pd", a->src_dentry);
4a4d8108 20556+ return;
1facf9fc 20557+ }
20558+
523b37e3 20559+ delegated = NULL;
4a4d8108
AM
20560+ rerr = vfsub_rename(a->dst_h_dir,
20561+ au_h_dptr(a->src_dentry, a->btgt),
f2c43d5f 20562+ a->src_h_dir, &a->h_path, &delegated, a->flags);
523b37e3
AM
20563+ if (unlikely(rerr == -EWOULDBLOCK)) {
20564+ pr_warn("cannot retry for NFSv4 delegation"
20565+ " for an internal rename\n");
20566+ iput(delegated);
20567+ }
4a4d8108
AM
20568+ d_drop(a->h_path.dentry);
20569+ dput(a->h_path.dentry);
20570+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
20571+ if (rerr)
523b37e3 20572+ RevertFailure("rename %pd", a->src_dentry);
1facf9fc 20573+}
20574+
4a4d8108 20575+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
1facf9fc 20576+{
4a4d8108 20577+ int rerr;
523b37e3 20578+ struct inode *delegated;
dece6358 20579+
b4510431
AM
20580+ a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name,
20581+ a->dst_h_parent);
4a4d8108
AM
20582+ rerr = PTR_ERR(a->h_path.dentry);
20583+ if (IS_ERR(a->h_path.dentry)) {
523b37e3 20584+ RevertFailure("lkup one %pd", a->dst_dentry);
4a4d8108
AM
20585+ return;
20586+ }
5527c038 20587+ if (d_is_positive(a->h_path.dentry)) {
4a4d8108
AM
20588+ d_drop(a->h_path.dentry);
20589+ dput(a->h_path.dentry);
20590+ return;
dece6358
AM
20591+ }
20592+
523b37e3
AM
20593+ delegated = NULL;
20594+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path,
f2c43d5f 20595+ &delegated, a->flags);
523b37e3
AM
20596+ if (unlikely(rerr == -EWOULDBLOCK)) {
20597+ pr_warn("cannot retry for NFSv4 delegation"
20598+ " for an internal rename\n");
20599+ iput(delegated);
20600+ }
4a4d8108
AM
20601+ d_drop(a->h_path.dentry);
20602+ dput(a->h_path.dentry);
20603+ if (!rerr)
20604+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
20605+ else
523b37e3 20606+ RevertFailure("rename %pd", a->h_dst);
4a4d8108 20607+}
1308ab2a 20608+
4a4d8108
AM
20609+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
20610+{
20611+ int rerr;
1308ab2a 20612+
4a4d8108
AM
20613+ a->h_path.dentry = a->src_wh_dentry;
20614+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
027c5e7a 20615+ au_set_dbwh(a->src_dentry, a->src_bwh);
4a4d8108 20616+ if (rerr)
523b37e3 20617+ RevertFailure("unlink %pd", a->src_wh_dentry);
4a4d8108 20618+}
4a4d8108 20619+#undef RevertFailure
1facf9fc 20620+
1308ab2a 20621+/* ---------------------------------------------------------------------- */
20622+
4a4d8108
AM
20623+/*
20624+ * when we have to copyup the renaming entry, do it with the rename-target name
20625+ * in order to minimize the cost (the later actual rename is unnecessary).
20626+ * otherwise rename it on the target branch.
20627+ */
20628+static int au_ren_or_cpup(struct au_ren_args *a)
1facf9fc 20629+{
dece6358 20630+ int err;
4a4d8108 20631+ struct dentry *d;
523b37e3 20632+ struct inode *delegated;
1facf9fc 20633+
4a4d8108 20634+ d = a->src_dentry;
5afbbe0d 20635+ if (au_dbtop(d) == a->btgt) {
4a4d8108 20636+ a->h_path.dentry = a->dst_h_dentry;
5afbbe0d 20637+ AuDebugOn(au_dbtop(d) != a->btgt);
523b37e3 20638+ delegated = NULL;
4a4d8108 20639+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
f2c43d5f
AM
20640+ a->dst_h_dir, &a->h_path, &delegated,
20641+ a->flags);
523b37e3
AM
20642+ if (unlikely(err == -EWOULDBLOCK)) {
20643+ pr_warn("cannot retry for NFSv4 delegation"
20644+ " for an internal rename\n");
20645+ iput(delegated);
20646+ }
c2b27bf2 20647+ } else
86dc4139 20648+ BUG();
1308ab2a 20649+
027c5e7a
AM
20650+ if (!err && a->h_dst)
20651+ /* it will be set to dinfo later */
20652+ dget(a->h_dst);
1facf9fc 20653+
dece6358
AM
20654+ return err;
20655+}
1facf9fc 20656+
4a4d8108
AM
20657+/* cf. aufs_rmdir() */
20658+static int au_ren_del_whtmp(struct au_ren_args *a)
dece6358 20659+{
4a4d8108
AM
20660+ int err;
20661+ struct inode *dir;
1facf9fc 20662+
4a4d8108
AM
20663+ dir = a->dst_dir;
20664+ SiMustAnyLock(dir->i_sb);
20665+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
20666+ au_sbi(dir->i_sb)->si_dirwh)
20667+ || au_test_fs_remote(a->h_dst->d_sb)) {
20668+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
20669+ if (unlikely(err))
523b37e3
AM
20670+ pr_warn("failed removing whtmp dir %pd (%d), "
20671+ "ignored.\n", a->h_dst, err);
4a4d8108
AM
20672+ } else {
20673+ au_nhash_wh_free(&a->thargs->whlist);
20674+ a->thargs->whlist = a->whlist;
20675+ a->whlist.nh_num = 0;
20676+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
20677+ dput(a->h_dst);
20678+ a->thargs = NULL;
20679+ }
20680+
20681+ return 0;
1308ab2a 20682+}
1facf9fc 20683+
4a4d8108 20684+/* make it 'opaque' dir. */
f2c43d5f 20685+static int au_ren_do_diropq(struct au_ren_args *a, int idx)
4a4d8108
AM
20686+{
20687+ int err;
f2c43d5f
AM
20688+ struct dentry *d, *diropq;
20689+#define src_or_dst(member) a->sd[idx].member
1facf9fc 20690+
4a4d8108 20691+ err = 0;
f2c43d5f
AM
20692+ d = src_or_dst(dentry); /* {src,dst}_dentry */
20693+ src_or_dst(bdiropq) = au_dbdiropq(d);
20694+ src_or_dst(hinode) = au_hi(src_or_dst(inode), a->btgt);
20695+ au_hn_inode_lock_nested(src_or_dst(hinode), AuLsc_I_CHILD);
20696+ diropq = au_diropq_create(d, a->btgt);
20697+ au_hn_inode_unlock(src_or_dst(hinode));
4a4d8108
AM
20698+ if (IS_ERR(diropq))
20699+ err = PTR_ERR(diropq);
076b876e
AM
20700+ else
20701+ dput(diropq);
1facf9fc 20702+
f2c43d5f 20703+#undef src_or_dst_
4a4d8108
AM
20704+ return err;
20705+}
1facf9fc 20706+
f2c43d5f 20707+static int au_ren_diropq(struct au_ren_args *a)
4a4d8108
AM
20708+{
20709+ int err;
f2c43d5f
AM
20710+ unsigned char always;
20711+ struct dentry *d;
1facf9fc 20712+
f2c43d5f
AM
20713+ err = 0;
20714+ d = a->dst_dentry; /* already renamed on the branch */
20715+ always = !!au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ);
20716+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)
20717+ && a->btgt != au_dbdiropq(a->src_dentry)
20718+ && (a->dst_wh_dentry
20719+ || a->btgt <= au_dbdiropq(d)
20720+ /* hide the lower to keep xino */
20721+ /* the lowers may not be a dir, but we hide them anyway */
20722+ || a->btgt < au_dbbot(d)
20723+ || always)) {
20724+ AuDbg("here\n");
20725+ err = au_ren_do_diropq(a, AuSRC);
20726+ if (unlikely(err))
4a4d8108 20727+ goto out;
f2c43d5f 20728+ au_fset_ren(a->auren_flags, DIROPQ_SRC);
4a4d8108 20729+ }
f2c43d5f
AM
20730+ if (!a->exchange)
20731+ goto out; /* success */
1facf9fc 20732+
f2c43d5f
AM
20733+ d = a->src_dentry; /* already renamed on the branch */
20734+ if (au_ftest_ren(a->auren_flags, ISDIR_DST)
20735+ && a->btgt != au_dbdiropq(a->dst_dentry)
20736+ && (a->btgt < au_dbdiropq(d)
20737+ || a->btgt < au_dbbot(d)
20738+ || always)) {
20739+ AuDbgDentry(a->src_dentry);
20740+ AuDbgDentry(a->dst_dentry);
20741+ err = au_ren_do_diropq(a, AuDST);
4a4d8108 20742+ if (unlikely(err))
f2c43d5f
AM
20743+ goto out_rev_src;
20744+ au_fset_ren(a->auren_flags, DIROPQ_DST);
20745+ }
20746+ goto out; /* success */
dece6358 20747+
f2c43d5f
AM
20748+out_rev_src:
20749+ AuDbg("err %d, reverting src\n", err);
20750+ au_ren_rev_diropq(err, a);
20751+out:
20752+ return err;
20753+}
20754+
20755+static int do_rename(struct au_ren_args *a)
20756+{
20757+ int err;
20758+ struct dentry *d, *h_d;
20759+
20760+ if (!a->exchange) {
20761+ /* prepare workqueue args for asynchronous rmdir */
20762+ h_d = a->dst_h_dentry;
20763+ if (au_ftest_ren(a->auren_flags, ISDIR_DST)
20764+ && d_is_positive(h_d)) {
20765+ err = -ENOMEM;
20766+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb,
20767+ GFP_NOFS);
20768+ if (unlikely(!a->thargs))
20769+ goto out;
20770+ a->h_dst = dget(h_d);
20771+ }
20772+
20773+ /* create whiteout for src_dentry */
20774+ if (au_ftest_ren(a->auren_flags, WHSRC)) {
20775+ a->src_bwh = au_dbwh(a->src_dentry);
20776+ AuDebugOn(a->src_bwh >= 0);
20777+ a->src_wh_dentry = au_wh_create(a->src_dentry, a->btgt,
20778+ a->src_h_parent);
20779+ err = PTR_ERR(a->src_wh_dentry);
20780+ if (IS_ERR(a->src_wh_dentry))
20781+ goto out_thargs;
20782+ }
20783+
20784+ /* lookup whiteout for dentry */
20785+ if (au_ftest_ren(a->auren_flags, WHDST)) {
20786+ h_d = au_wh_lkup(a->dst_h_parent,
20787+ &a->dst_dentry->d_name, a->br);
20788+ err = PTR_ERR(h_d);
20789+ if (IS_ERR(h_d))
20790+ goto out_whsrc;
20791+ if (d_is_negative(h_d))
20792+ dput(h_d);
20793+ else
20794+ a->dst_wh_dentry = h_d;
20795+ }
20796+
20797+ /* rename dentry to tmpwh */
20798+ if (a->thargs) {
20799+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
20800+ if (unlikely(err))
20801+ goto out_whdst;
20802+
20803+ d = a->dst_dentry;
20804+ au_set_h_dptr(d, a->btgt, NULL);
20805+ err = au_lkup_neg(d, a->btgt, /*wh*/0);
20806+ if (unlikely(err))
20807+ goto out_whtmp;
20808+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
20809+ }
4a4d8108 20810+ }
1facf9fc 20811+
5afbbe0d 20812+ BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_btop != a->btgt);
1facf9fc 20813+
4a4d8108 20814+ /* rename by vfs_rename or cpup */
4a4d8108
AM
20815+ err = au_ren_or_cpup(a);
20816+ if (unlikely(err))
20817+ /* leave the copied-up one */
20818+ goto out_whtmp;
1308ab2a 20819+
4a4d8108 20820+ /* make dir opaque */
f2c43d5f
AM
20821+ err = au_ren_diropq(a);
20822+ if (unlikely(err))
20823+ goto out_rename;
1308ab2a 20824+
4a4d8108 20825+ /* update target timestamps */
f2c43d5f
AM
20826+ if (a->exchange) {
20827+ AuDebugOn(au_dbtop(a->dst_dentry) != a->btgt);
20828+ a->h_path.dentry = au_h_dptr(a->dst_dentry, a->btgt);
20829+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
20830+ a->dst_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
20831+ }
5afbbe0d 20832+ AuDebugOn(au_dbtop(a->src_dentry) != a->btgt);
4a4d8108
AM
20833+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
20834+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
5527c038 20835+ a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
1facf9fc 20836+
f2c43d5f
AM
20837+ if (!a->exchange) {
20838+ /* remove whiteout for dentry */
20839+ if (a->dst_wh_dentry) {
20840+ a->h_path.dentry = a->dst_wh_dentry;
20841+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
20842+ a->dst_dentry);
20843+ if (unlikely(err))
20844+ goto out_diropq;
20845+ }
1facf9fc 20846+
f2c43d5f
AM
20847+ /* remove whtmp */
20848+ if (a->thargs)
20849+ au_ren_del_whtmp(a); /* ignore this error */
1308ab2a 20850+
f2c43d5f
AM
20851+ au_fhsm_wrote(a->src_dentry->d_sb, a->btgt, /*force*/0);
20852+ }
4a4d8108
AM
20853+ err = 0;
20854+ goto out_success;
20855+
4f0767ce 20856+out_diropq:
f2c43d5f 20857+ au_ren_rev_diropq(err, a);
4f0767ce 20858+out_rename:
7e9cd9fe 20859+ au_ren_rev_rename(err, a);
027c5e7a 20860+ dput(a->h_dst);
4f0767ce 20861+out_whtmp:
4a4d8108
AM
20862+ if (a->thargs)
20863+ au_ren_rev_whtmp(err, a);
4f0767ce 20864+out_whdst:
4a4d8108
AM
20865+ dput(a->dst_wh_dentry);
20866+ a->dst_wh_dentry = NULL;
4f0767ce 20867+out_whsrc:
4a4d8108
AM
20868+ if (a->src_wh_dentry)
20869+ au_ren_rev_whsrc(err, a);
4f0767ce 20870+out_success:
4a4d8108
AM
20871+ dput(a->src_wh_dentry);
20872+ dput(a->dst_wh_dentry);
4f0767ce 20873+out_thargs:
4a4d8108
AM
20874+ if (a->thargs) {
20875+ dput(a->h_dst);
20876+ au_whtmp_rmdir_free(a->thargs);
20877+ a->thargs = NULL;
20878+ }
4f0767ce 20879+out:
4a4d8108 20880+ return err;
dece6358 20881+}
1facf9fc 20882+
1308ab2a 20883+/* ---------------------------------------------------------------------- */
1facf9fc 20884+
4a4d8108
AM
20885+/*
20886+ * test if @dentry dir can be rename destination or not.
20887+ * success means, it is a logically empty dir.
20888+ */
20889+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
1308ab2a 20890+{
4a4d8108 20891+ return au_test_empty(dentry, whlist);
1308ab2a 20892+}
1facf9fc 20893+
4a4d8108
AM
20894+/*
20895+ * test if @dentry dir can be rename source or not.
20896+ * if it can, return 0 and @children is filled.
20897+ * success means,
20898+ * - it is a logically empty dir.
20899+ * - or, it exists on writable branch and has no children including whiteouts
20900+ * on the lower branch.
20901+ */
20902+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
20903+{
20904+ int err;
20905+ unsigned int rdhash;
5afbbe0d 20906+ aufs_bindex_t btop;
1facf9fc 20907+
5afbbe0d
AM
20908+ btop = au_dbtop(dentry);
20909+ if (btop != btgt) {
4a4d8108 20910+ struct au_nhash whlist;
dece6358 20911+
4a4d8108
AM
20912+ SiMustAnyLock(dentry->d_sb);
20913+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
20914+ if (!rdhash)
20915+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
20916+ dentry));
20917+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
20918+ if (unlikely(err))
20919+ goto out;
20920+ err = au_test_empty(dentry, &whlist);
20921+ au_nhash_wh_free(&whlist);
20922+ goto out;
20923+ }
dece6358 20924+
5afbbe0d 20925+ if (btop == au_dbtaildir(dentry))
4a4d8108 20926+ return 0; /* success */
dece6358 20927+
4a4d8108 20928+ err = au_test_empty_lower(dentry);
1facf9fc 20929+
4f0767ce 20930+out:
4a4d8108
AM
20931+ if (err == -ENOTEMPTY) {
20932+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
20933+ " is not supported\n");
20934+ err = -EXDEV;
20935+ }
20936+ return err;
20937+}
1308ab2a 20938+
4a4d8108
AM
20939+/* side effect: sets whlist and h_dentry */
20940+static int au_ren_may_dir(struct au_ren_args *a)
1308ab2a 20941+{
4a4d8108
AM
20942+ int err;
20943+ unsigned int rdhash;
20944+ struct dentry *d;
1facf9fc 20945+
4a4d8108
AM
20946+ d = a->dst_dentry;
20947+ SiMustAnyLock(d->d_sb);
1facf9fc 20948+
4a4d8108 20949+ err = 0;
f2c43d5f 20950+ if (au_ftest_ren(a->auren_flags, ISDIR_DST) && a->dst_inode) {
4a4d8108
AM
20951+ rdhash = au_sbi(d->d_sb)->si_rdhash;
20952+ if (!rdhash)
20953+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
20954+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
20955+ if (unlikely(err))
20956+ goto out;
1308ab2a 20957+
f2c43d5f
AM
20958+ if (!a->exchange) {
20959+ au_set_dbtop(d, a->dst_btop);
20960+ err = may_rename_dstdir(d, &a->whlist);
20961+ au_set_dbtop(d, a->btgt);
20962+ } else
20963+ err = may_rename_srcdir(d, a->btgt);
4a4d8108 20964+ }
5afbbe0d 20965+ a->dst_h_dentry = au_h_dptr(d, au_dbtop(d));
4a4d8108
AM
20966+ if (unlikely(err))
20967+ goto out;
20968+
20969+ d = a->src_dentry;
5afbbe0d 20970+ a->src_h_dentry = au_h_dptr(d, au_dbtop(d));
f2c43d5f 20971+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)) {
4a4d8108
AM
20972+ err = may_rename_srcdir(d, a->btgt);
20973+ if (unlikely(err)) {
20974+ au_nhash_wh_free(&a->whlist);
20975+ a->whlist.nh_num = 0;
20976+ }
20977+ }
4f0767ce 20978+out:
4a4d8108 20979+ return err;
1facf9fc 20980+}
20981+
4a4d8108 20982+/* ---------------------------------------------------------------------- */
1facf9fc 20983+
4a4d8108
AM
20984+/*
20985+ * simple tests for rename.
20986+ * following the checks in vfs, plus the parent-child relationship.
20987+ */
20988+static int au_may_ren(struct au_ren_args *a)
20989+{
20990+ int err, isdir;
20991+ struct inode *h_inode;
1facf9fc 20992+
5afbbe0d 20993+ if (a->src_btop == a->btgt) {
4a4d8108 20994+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
f2c43d5f 20995+ au_ftest_ren(a->auren_flags, ISDIR_SRC));
4a4d8108
AM
20996+ if (unlikely(err))
20997+ goto out;
20998+ err = -EINVAL;
20999+ if (unlikely(a->src_h_dentry == a->h_trap))
21000+ goto out;
21001+ }
1facf9fc 21002+
4a4d8108 21003+ err = 0;
5afbbe0d 21004+ if (a->dst_btop != a->btgt)
4a4d8108 21005+ goto out;
1facf9fc 21006+
027c5e7a
AM
21007+ err = -ENOTEMPTY;
21008+ if (unlikely(a->dst_h_dentry == a->h_trap))
21009+ goto out;
21010+
4a4d8108 21011+ err = -EIO;
f2c43d5f 21012+ isdir = !!au_ftest_ren(a->auren_flags, ISDIR_DST);
5527c038
JR
21013+ if (d_really_is_negative(a->dst_dentry)) {
21014+ if (d_is_negative(a->dst_h_dentry))
21015+ err = au_may_add(a->dst_dentry, a->btgt,
21016+ a->dst_h_parent, isdir);
4a4d8108 21017+ } else {
5527c038 21018+ if (unlikely(d_is_negative(a->dst_h_dentry)))
4a4d8108 21019+ goto out;
5527c038
JR
21020+ h_inode = d_inode(a->dst_h_dentry);
21021+ if (h_inode->i_nlink)
21022+ err = au_may_del(a->dst_dentry, a->btgt,
21023+ a->dst_h_parent, isdir);
4a4d8108 21024+ }
1facf9fc 21025+
4f0767ce 21026+out:
4a4d8108
AM
21027+ if (unlikely(err == -ENOENT || err == -EEXIST))
21028+ err = -EIO;
21029+ AuTraceErr(err);
21030+ return err;
21031+}
1facf9fc 21032+
1308ab2a 21033+/* ---------------------------------------------------------------------- */
1facf9fc 21034+
4a4d8108
AM
21035+/*
21036+ * locking order
21037+ * (VFS)
21038+ * - src_dir and dir by lock_rename()
21039+ * - inode if exitsts
21040+ * (aufs)
21041+ * - lock all
21042+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
21043+ * + si_read_lock
21044+ * + di_write_lock2_child()
21045+ * + di_write_lock_child()
21046+ * + ii_write_lock_child()
21047+ * + di_write_lock_child2()
21048+ * + ii_write_lock_child2()
21049+ * + src_parent and parent
21050+ * + di_write_lock_parent()
21051+ * + ii_write_lock_parent()
21052+ * + di_write_lock_parent2()
21053+ * + ii_write_lock_parent2()
21054+ * + lower src_dir and dir by vfsub_lock_rename()
21055+ * + verify the every relationships between child and parent. if any
21056+ * of them failed, unlock all and return -EBUSY.
21057+ */
21058+static void au_ren_unlock(struct au_ren_args *a)
1308ab2a 21059+{
4a4d8108
AM
21060+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
21061+ a->dst_h_parent, a->dst_hdir);
f2c43d5f 21062+ if (au_ftest_ren(a->auren_flags, MNT_WRITE))
86dc4139 21063+ vfsub_mnt_drop_write(au_br_mnt(a->br));
1308ab2a 21064+}
21065+
4a4d8108 21066+static int au_ren_lock(struct au_ren_args *a)
1308ab2a 21067+{
4a4d8108
AM
21068+ int err;
21069+ unsigned int udba;
1308ab2a 21070+
4a4d8108
AM
21071+ err = 0;
21072+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
21073+ a->src_hdir = au_hi(a->src_dir, a->btgt);
21074+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
21075+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
86dc4139
AM
21076+
21077+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
21078+ if (unlikely(err))
21079+ goto out;
f2c43d5f 21080+ au_fset_ren(a->auren_flags, MNT_WRITE);
4a4d8108
AM
21081+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
21082+ a->dst_h_parent, a->dst_hdir);
21083+ udba = au_opt_udba(a->src_dentry->d_sb);
5527c038
JR
21084+ if (unlikely(a->src_hdir->hi_inode != d_inode(a->src_h_parent)
21085+ || a->dst_hdir->hi_inode != d_inode(a->dst_h_parent)))
4a4d8108 21086+ err = au_busy_or_stale();
5afbbe0d 21087+ if (!err && au_dbtop(a->src_dentry) == a->btgt)
4a4d8108 21088+ err = au_h_verify(a->src_h_dentry, udba,
5527c038 21089+ d_inode(a->src_h_parent), a->src_h_parent,
4a4d8108 21090+ a->br);
5afbbe0d 21091+ if (!err && au_dbtop(a->dst_dentry) == a->btgt)
4a4d8108 21092+ err = au_h_verify(a->dst_h_dentry, udba,
5527c038 21093+ d_inode(a->dst_h_parent), a->dst_h_parent,
4a4d8108 21094+ a->br);
86dc4139 21095+ if (!err)
4a4d8108 21096+ goto out; /* success */
4a4d8108
AM
21097+
21098+ err = au_busy_or_stale();
4a4d8108 21099+ au_ren_unlock(a);
86dc4139 21100+
4f0767ce 21101+out:
4a4d8108 21102+ return err;
1facf9fc 21103+}
21104+
21105+/* ---------------------------------------------------------------------- */
21106+
4a4d8108 21107+static void au_ren_refresh_dir(struct au_ren_args *a)
1facf9fc 21108+{
4a4d8108 21109+ struct inode *dir;
dece6358 21110+
4a4d8108
AM
21111+ dir = a->dst_dir;
21112+ dir->i_version++;
f2c43d5f 21113+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)) {
4a4d8108
AM
21114+ /* is this updating defined in POSIX? */
21115+ au_cpup_attr_timesizes(a->src_inode);
21116+ au_cpup_attr_nlink(dir, /*force*/1);
4a4d8108 21117+ }
b912730e 21118+ au_dir_ts(dir, a->btgt);
dece6358 21119+
f2c43d5f
AM
21120+ if (a->exchange) {
21121+ dir = a->src_dir;
21122+ dir->i_version++;
21123+ if (au_ftest_ren(a->auren_flags, ISDIR_DST)) {
21124+ /* is this updating defined in POSIX? */
21125+ au_cpup_attr_timesizes(a->dst_inode);
21126+ au_cpup_attr_nlink(dir, /*force*/1);
21127+ }
21128+ au_dir_ts(dir, a->btgt);
21129+ }
21130+
21131+ if (au_ftest_ren(a->auren_flags, ISSAMEDIR))
4a4d8108 21132+ return;
dece6358 21133+
4a4d8108
AM
21134+ dir = a->src_dir;
21135+ dir->i_version++;
f2c43d5f 21136+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC))
4a4d8108 21137+ au_cpup_attr_nlink(dir, /*force*/1);
b912730e 21138+ au_dir_ts(dir, a->btgt);
1facf9fc 21139+}
21140+
4a4d8108 21141+static void au_ren_refresh(struct au_ren_args *a)
1facf9fc 21142+{
5afbbe0d 21143+ aufs_bindex_t bbot, bindex;
4a4d8108
AM
21144+ struct dentry *d, *h_d;
21145+ struct inode *i, *h_i;
21146+ struct super_block *sb;
dece6358 21147+
027c5e7a
AM
21148+ d = a->dst_dentry;
21149+ d_drop(d);
21150+ if (a->h_dst)
21151+ /* already dget-ed by au_ren_or_cpup() */
21152+ au_set_h_dptr(d, a->btgt, a->h_dst);
21153+
21154+ i = a->dst_inode;
21155+ if (i) {
f2c43d5f
AM
21156+ if (!a->exchange) {
21157+ if (!au_ftest_ren(a->auren_flags, ISDIR_DST))
21158+ vfsub_drop_nlink(i);
21159+ else {
21160+ vfsub_dead_dir(i);
21161+ au_cpup_attr_timesizes(i);
21162+ }
21163+ au_update_dbrange(d, /*do_put_zero*/1);
21164+ } else
21165+ au_cpup_attr_nlink(i, /*force*/1);
027c5e7a 21166+ } else {
5afbbe0d
AM
21167+ bbot = a->btgt;
21168+ for (bindex = au_dbtop(d); bindex < bbot; bindex++)
027c5e7a 21169+ au_set_h_dptr(d, bindex, NULL);
5afbbe0d
AM
21170+ bbot = au_dbbot(d);
21171+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++)
027c5e7a
AM
21172+ au_set_h_dptr(d, bindex, NULL);
21173+ au_update_dbrange(d, /*do_put_zero*/0);
21174+ }
21175+
4a4d8108 21176+ d = a->src_dentry;
f2c43d5f
AM
21177+ if (!a->exchange) {
21178+ au_set_dbwh(d, -1);
21179+ bbot = au_dbbot(d);
21180+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
21181+ h_d = au_h_dptr(d, bindex);
21182+ if (h_d)
21183+ au_set_h_dptr(d, bindex, NULL);
21184+ }
21185+ au_set_dbbot(d, a->btgt);
4a4d8108 21186+
f2c43d5f
AM
21187+ sb = d->d_sb;
21188+ i = a->src_inode;
21189+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
21190+ return; /* success */
4a4d8108 21191+
f2c43d5f
AM
21192+ bbot = au_ibbot(i);
21193+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
21194+ h_i = au_h_iptr(i, bindex);
21195+ if (h_i) {
21196+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
21197+ /* ignore this error */
21198+ au_set_h_iptr(i, bindex, NULL, 0);
21199+ }
4a4d8108 21200+ }
f2c43d5f 21201+ au_set_ibbot(i, a->btgt);
4a4d8108 21202+ }
f2c43d5f 21203+ d_drop(a->src_dentry);
1308ab2a 21204+}
dece6358 21205+
4a4d8108
AM
21206+/* ---------------------------------------------------------------------- */
21207+
21208+/* mainly for link(2) and rename(2) */
21209+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
1308ab2a 21210+{
4a4d8108
AM
21211+ aufs_bindex_t bdiropq, bwh;
21212+ struct dentry *parent;
21213+ struct au_branch *br;
21214+
21215+ parent = dentry->d_parent;
5527c038 21216+ IMustLock(d_inode(parent)); /* dir is locked */
4a4d8108
AM
21217+
21218+ bdiropq = au_dbdiropq(parent);
21219+ bwh = au_dbwh(dentry);
21220+ br = au_sbr(dentry->d_sb, btgt);
21221+ if (au_br_rdonly(br)
21222+ || (0 <= bdiropq && bdiropq < btgt)
21223+ || (0 <= bwh && bwh < btgt))
21224+ btgt = -1;
21225+
21226+ AuDbg("btgt %d\n", btgt);
21227+ return btgt;
1facf9fc 21228+}
21229+
5afbbe0d 21230+/* sets src_btop, dst_btop and btgt */
4a4d8108 21231+static int au_ren_wbr(struct au_ren_args *a)
1facf9fc 21232+{
4a4d8108
AM
21233+ int err;
21234+ struct au_wr_dir_args wr_dir_args = {
21235+ /* .force_btgt = -1, */
21236+ .flags = AuWrDir_ADD_ENTRY
21237+ };
dece6358 21238+
5afbbe0d
AM
21239+ a->src_btop = au_dbtop(a->src_dentry);
21240+ a->dst_btop = au_dbtop(a->dst_dentry);
f2c43d5f
AM
21241+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)
21242+ || au_ftest_ren(a->auren_flags, ISDIR_DST))
4a4d8108 21243+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
5afbbe0d
AM
21244+ wr_dir_args.force_btgt = a->src_btop;
21245+ if (a->dst_inode && a->dst_btop < a->src_btop)
21246+ wr_dir_args.force_btgt = a->dst_btop;
4a4d8108
AM
21247+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
21248+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
21249+ a->btgt = err;
f2c43d5f
AM
21250+ if (a->exchange)
21251+ au_update_dbtop(a->dst_dentry);
dece6358 21252+
4a4d8108 21253+ return err;
1facf9fc 21254+}
21255+
4a4d8108 21256+static void au_ren_dt(struct au_ren_args *a)
1facf9fc 21257+{
4a4d8108
AM
21258+ a->h_path.dentry = a->src_h_parent;
21259+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
f2c43d5f 21260+ if (!au_ftest_ren(a->auren_flags, ISSAMEDIR)) {
4a4d8108
AM
21261+ a->h_path.dentry = a->dst_h_parent;
21262+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
21263+ }
1facf9fc 21264+
f2c43d5f
AM
21265+ au_fclr_ren(a->auren_flags, DT_DSTDIR);
21266+ if (!au_ftest_ren(a->auren_flags, ISDIR_SRC)
21267+ && !a->exchange)
4a4d8108 21268+ return;
dece6358 21269+
4a4d8108
AM
21270+ a->h_path.dentry = a->src_h_dentry;
21271+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
5527c038 21272+ if (d_is_positive(a->dst_h_dentry)) {
f2c43d5f 21273+ au_fset_ren(a->auren_flags, DT_DSTDIR);
4a4d8108
AM
21274+ a->h_path.dentry = a->dst_h_dentry;
21275+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
21276+ }
1308ab2a 21277+}
dece6358 21278+
4a4d8108 21279+static void au_ren_rev_dt(int err, struct au_ren_args *a)
1308ab2a 21280+{
4a4d8108 21281+ struct dentry *h_d;
febd17d6 21282+ struct inode *h_inode;
4a4d8108
AM
21283+
21284+ au_dtime_revert(a->src_dt + AuPARENT);
f2c43d5f 21285+ if (!au_ftest_ren(a->auren_flags, ISSAMEDIR))
4a4d8108
AM
21286+ au_dtime_revert(a->dst_dt + AuPARENT);
21287+
f2c43d5f 21288+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC) && err != -EIO) {
4a4d8108 21289+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
febd17d6
JR
21290+ h_inode = d_inode(h_d);
21291+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
4a4d8108 21292+ au_dtime_revert(a->src_dt + AuCHILD);
febd17d6 21293+ inode_unlock(h_inode);
4a4d8108 21294+
f2c43d5f 21295+ if (au_ftest_ren(a->auren_flags, DT_DSTDIR)) {
4a4d8108 21296+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
febd17d6
JR
21297+ h_inode = d_inode(h_d);
21298+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
4a4d8108 21299+ au_dtime_revert(a->dst_dt + AuCHILD);
febd17d6 21300+ inode_unlock(h_inode);
1facf9fc 21301+ }
21302+ }
21303+}
21304+
4a4d8108
AM
21305+/* ---------------------------------------------------------------------- */
21306+
21307+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
f2c43d5f
AM
21308+ struct inode *_dst_dir, struct dentry *_dst_dentry,
21309+ unsigned int _flags)
1facf9fc 21310+{
f2c43d5f 21311+ int err, lock_flags;
4a4d8108
AM
21312+ /* reduce stack space */
21313+ struct au_ren_args *a;
f2c43d5f 21314+ struct au_pin pin;
4a4d8108 21315+
f2c43d5f 21316+ AuDbg("%pd, %pd, 0x%x\n", _src_dentry, _dst_dentry, _flags);
4a4d8108
AM
21317+ IMustLock(_src_dir);
21318+ IMustLock(_dst_dir);
21319+
f2c43d5f
AM
21320+ err = -EINVAL;
21321+ if (unlikely(_flags & RENAME_WHITEOUT))
21322+ goto out;
21323+
4a4d8108
AM
21324+ err = -ENOMEM;
21325+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
21326+ a = kzalloc(sizeof(*a), GFP_NOFS);
21327+ if (unlikely(!a))
21328+ goto out;
21329+
f2c43d5f
AM
21330+ a->flags = _flags;
21331+ a->exchange = _flags & RENAME_EXCHANGE;
4a4d8108
AM
21332+ a->src_dir = _src_dir;
21333+ a->src_dentry = _src_dentry;
5527c038
JR
21334+ a->src_inode = NULL;
21335+ if (d_really_is_positive(a->src_dentry))
21336+ a->src_inode = d_inode(a->src_dentry);
4a4d8108
AM
21337+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
21338+ a->dst_dir = _dst_dir;
21339+ a->dst_dentry = _dst_dentry;
5527c038
JR
21340+ a->dst_inode = NULL;
21341+ if (d_really_is_positive(a->dst_dentry))
21342+ a->dst_inode = d_inode(a->dst_dentry);
4a4d8108
AM
21343+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
21344+ if (a->dst_inode) {
f2c43d5f
AM
21345+ /*
21346+ * if EXCHANGE && src is non-dir && dst is dir,
21347+ * dst is not locked.
21348+ */
21349+ /* IMustLock(a->dst_inode); */
4a4d8108 21350+ au_igrab(a->dst_inode);
1facf9fc 21351+ }
1facf9fc 21352+
4a4d8108 21353+ err = -ENOTDIR;
f2c43d5f 21354+ lock_flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
2000de60 21355+ if (d_is_dir(a->src_dentry)) {
f2c43d5f
AM
21356+ au_fset_ren(a->auren_flags, ISDIR_SRC);
21357+ if (unlikely(!a->exchange
21358+ && d_really_is_positive(a->dst_dentry)
2000de60 21359+ && !d_is_dir(a->dst_dentry)))
4a4d8108 21360+ goto out_free;
f2c43d5f
AM
21361+ lock_flags |= AuLock_DIRS;
21362+ }
21363+ if (a->dst_inode && d_is_dir(a->dst_dentry)) {
21364+ au_fset_ren(a->auren_flags, ISDIR_DST);
21365+ if (unlikely(!a->exchange
21366+ && d_really_is_positive(a->src_dentry)
21367+ && !d_is_dir(a->src_dentry)))
21368+ goto out_free;
21369+ lock_flags |= AuLock_DIRS;
b95c5147 21370+ }
f2c43d5f 21371+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, lock_flags);
e49829fe
JR
21372+ if (unlikely(err))
21373+ goto out_free;
1facf9fc 21374+
027c5e7a
AM
21375+ err = au_d_hashed_positive(a->src_dentry);
21376+ if (unlikely(err))
21377+ goto out_unlock;
21378+ err = -ENOENT;
21379+ if (a->dst_inode) {
21380+ /*
f2c43d5f 21381+ * If it is a dir, VFS unhash it before this
027c5e7a
AM
21382+ * function. It means we cannot rely upon d_unhashed().
21383+ */
21384+ if (unlikely(!a->dst_inode->i_nlink))
21385+ goto out_unlock;
f2c43d5f 21386+ if (!au_ftest_ren(a->auren_flags, ISDIR_DST)) {
027c5e7a 21387+ err = au_d_hashed_positive(a->dst_dentry);
f2c43d5f 21388+ if (unlikely(err && !a->exchange))
027c5e7a
AM
21389+ goto out_unlock;
21390+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
21391+ goto out_unlock;
21392+ } else if (unlikely(d_unhashed(a->dst_dentry)))
21393+ goto out_unlock;
21394+
7eafdf33
AM
21395+ /*
21396+ * is it possible?
79b8bda9 21397+ * yes, it happened (in linux-3.3-rcN) but I don't know why.
7eafdf33
AM
21398+ * there may exist a problem somewhere else.
21399+ */
21400+ err = -EINVAL;
5527c038 21401+ if (unlikely(d_inode(a->dst_parent) == d_inode(a->src_dentry)))
7eafdf33
AM
21402+ goto out_unlock;
21403+
f2c43d5f 21404+ au_fset_ren(a->auren_flags, ISSAMEDIR); /* temporary */
4a4d8108 21405+ di_write_lock_parent(a->dst_parent);
1facf9fc 21406+
4a4d8108
AM
21407+ /* which branch we process */
21408+ err = au_ren_wbr(a);
21409+ if (unlikely(err < 0))
027c5e7a 21410+ goto out_parent;
4a4d8108 21411+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
86dc4139 21412+ a->h_path.mnt = au_br_mnt(a->br);
1facf9fc 21413+
4a4d8108
AM
21414+ /* are they available to be renamed */
21415+ err = au_ren_may_dir(a);
21416+ if (unlikely(err))
21417+ goto out_children;
1facf9fc 21418+
4a4d8108 21419+ /* prepare the writable parent dir on the same branch */
5afbbe0d 21420+ if (a->dst_btop == a->btgt) {
f2c43d5f 21421+ au_fset_ren(a->auren_flags, WHDST);
4a4d8108
AM
21422+ } else {
21423+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
21424+ if (unlikely(err))
21425+ goto out_children;
21426+ }
1facf9fc 21427+
f2c43d5f
AM
21428+ err = 0;
21429+ if (!a->exchange) {
21430+ if (a->src_dir != a->dst_dir) {
21431+ /*
21432+ * this temporary unlock is safe,
21433+ * because both dir->i_mutex are locked.
21434+ */
21435+ di_write_unlock(a->dst_parent);
21436+ di_write_lock_parent(a->src_parent);
21437+ err = au_wr_dir_need_wh(a->src_dentry,
21438+ au_ftest_ren(a->auren_flags,
21439+ ISDIR_SRC),
21440+ &a->btgt);
21441+ di_write_unlock(a->src_parent);
21442+ di_write_lock2_parent(a->src_parent, a->dst_parent,
21443+ /*isdir*/1);
21444+ au_fclr_ren(a->auren_flags, ISSAMEDIR);
21445+ } else
21446+ err = au_wr_dir_need_wh(a->src_dentry,
21447+ au_ftest_ren(a->auren_flags,
21448+ ISDIR_SRC),
21449+ &a->btgt);
21450+ }
4a4d8108
AM
21451+ if (unlikely(err < 0))
21452+ goto out_children;
21453+ if (err)
f2c43d5f 21454+ au_fset_ren(a->auren_flags, WHSRC);
1facf9fc 21455+
86dc4139 21456+ /* cpup src */
5afbbe0d 21457+ if (a->src_btop != a->btgt) {
86dc4139
AM
21458+ err = au_pin(&pin, a->src_dentry, a->btgt,
21459+ au_opt_udba(a->src_dentry->d_sb),
21460+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 21461+ if (!err) {
c2b27bf2
AM
21462+ struct au_cp_generic cpg = {
21463+ .dentry = a->src_dentry,
21464+ .bdst = a->btgt,
5afbbe0d 21465+ .bsrc = a->src_btop,
c2b27bf2
AM
21466+ .len = -1,
21467+ .pin = &pin,
21468+ .flags = AuCpup_DTIME | AuCpup_HOPEN
21469+ };
5afbbe0d 21470+ AuDebugOn(au_dbtop(a->src_dentry) != a->src_btop);
c2b27bf2 21471+ err = au_sio_cpup_simple(&cpg);
367653fa 21472+ au_unpin(&pin);
86dc4139 21473+ }
86dc4139
AM
21474+ if (unlikely(err))
21475+ goto out_children;
5afbbe0d 21476+ a->src_btop = a->btgt;
86dc4139 21477+ a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
f2c43d5f
AM
21478+ if (!a->exchange)
21479+ au_fset_ren(a->auren_flags, WHSRC);
21480+ }
21481+
21482+ /* cpup dst */
21483+ if (a->exchange && a->dst_inode
21484+ && a->dst_btop != a->btgt) {
21485+ err = au_pin(&pin, a->dst_dentry, a->btgt,
21486+ au_opt_udba(a->dst_dentry->d_sb),
21487+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
21488+ if (!err) {
21489+ struct au_cp_generic cpg = {
21490+ .dentry = a->dst_dentry,
21491+ .bdst = a->btgt,
21492+ .bsrc = a->dst_btop,
21493+ .len = -1,
21494+ .pin = &pin,
21495+ .flags = AuCpup_DTIME | AuCpup_HOPEN
21496+ };
21497+ err = au_sio_cpup_simple(&cpg);
21498+ au_unpin(&pin);
21499+ }
21500+ if (unlikely(err))
21501+ goto out_children;
21502+ a->dst_btop = a->btgt;
21503+ a->dst_h_dentry = au_h_dptr(a->dst_dentry, a->btgt);
86dc4139
AM
21504+ }
21505+
4a4d8108
AM
21506+ /* lock them all */
21507+ err = au_ren_lock(a);
21508+ if (unlikely(err))
86dc4139 21509+ /* leave the copied-up one */
4a4d8108 21510+ goto out_children;
1facf9fc 21511+
f2c43d5f
AM
21512+ if (!a->exchange) {
21513+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
21514+ err = au_may_ren(a);
21515+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
21516+ err = -ENAMETOOLONG;
21517+ if (unlikely(err))
21518+ goto out_hdir;
21519+ }
1facf9fc 21520+
4a4d8108
AM
21521+ /* store timestamps to be revertible */
21522+ au_ren_dt(a);
1facf9fc 21523+
4a4d8108
AM
21524+ /* here we go */
21525+ err = do_rename(a);
21526+ if (unlikely(err))
21527+ goto out_dt;
21528+
21529+ /* update dir attributes */
21530+ au_ren_refresh_dir(a);
21531+
21532+ /* dput/iput all lower dentries */
21533+ au_ren_refresh(a);
21534+
21535+ goto out_hdir; /* success */
21536+
4f0767ce 21537+out_dt:
4a4d8108 21538+ au_ren_rev_dt(err, a);
4f0767ce 21539+out_hdir:
4a4d8108 21540+ au_ren_unlock(a);
4f0767ce 21541+out_children:
4a4d8108 21542+ au_nhash_wh_free(&a->whlist);
5afbbe0d
AM
21543+ if (err && a->dst_inode && a->dst_btop != a->btgt) {
21544+ AuDbg("btop %d, btgt %d\n", a->dst_btop, a->btgt);
027c5e7a 21545+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
5afbbe0d 21546+ au_set_dbtop(a->dst_dentry, a->dst_btop);
4a4d8108 21547+ }
027c5e7a 21548+out_parent:
f2c43d5f
AM
21549+ if (!err) {
21550+ if (!a->exchange)
21551+ d_move(a->src_dentry, a->dst_dentry);
21552+ else
21553+ d_exchange(a->src_dentry, a->dst_dentry);
21554+ } else {
5afbbe0d 21555+ au_update_dbtop(a->dst_dentry);
027c5e7a
AM
21556+ if (!a->dst_inode)
21557+ d_drop(a->dst_dentry);
21558+ }
f2c43d5f 21559+ if (au_ftest_ren(a->auren_flags, ISSAMEDIR))
4a4d8108
AM
21560+ di_write_unlock(a->dst_parent);
21561+ else
21562+ di_write_unlock2(a->src_parent, a->dst_parent);
027c5e7a 21563+out_unlock:
4a4d8108 21564+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
4f0767ce 21565+out_free:
4a4d8108
AM
21566+ iput(a->dst_inode);
21567+ if (a->thargs)
21568+ au_whtmp_rmdir_free(a->thargs);
f0c0a007 21569+ au_delayed_kfree(a);
4f0767ce 21570+out:
4a4d8108
AM
21571+ AuTraceErr(err);
21572+ return err;
1308ab2a 21573+}
7f207e10
AM
21574diff -urN /usr/share/empty/fs/aufs/Kconfig linux/fs/aufs/Kconfig
21575--- /usr/share/empty/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
e2f27e51 21576+++ linux/fs/aufs/Kconfig 2016-10-09 16:55:36.482701377 +0200
c1595e42 21577@@ -0,0 +1,185 @@
4a4d8108
AM
21578+config AUFS_FS
21579+ tristate "Aufs (Advanced multi layered unification filesystem) support"
4a4d8108
AM
21580+ help
21581+ Aufs is a stackable unification filesystem such as Unionfs,
21582+ which unifies several directories and provides a merged single
21583+ directory.
21584+ In the early days, aufs was entirely re-designed and
21585+ re-implemented Unionfs Version 1.x series. Introducing many
21586+ original ideas, approaches and improvements, it becomes totally
21587+ different from Unionfs while keeping the basic features.
1facf9fc 21588+
4a4d8108
AM
21589+if AUFS_FS
21590+choice
21591+ prompt "Maximum number of branches"
21592+ default AUFS_BRANCH_MAX_127
21593+ help
21594+ Specifies the maximum number of branches (or member directories)
21595+ in a single aufs. The larger value consumes more system
21596+ resources and has a minor impact to performance.
21597+config AUFS_BRANCH_MAX_127
21598+ bool "127"
21599+ help
21600+ Specifies the maximum number of branches (or member directories)
21601+ in a single aufs. The larger value consumes more system
21602+ resources and has a minor impact to performance.
21603+config AUFS_BRANCH_MAX_511
21604+ bool "511"
21605+ help
21606+ Specifies the maximum number of branches (or member directories)
21607+ in a single aufs. The larger value consumes more system
21608+ resources and has a minor impact to performance.
21609+config AUFS_BRANCH_MAX_1023
21610+ bool "1023"
21611+ help
21612+ Specifies the maximum number of branches (or member directories)
21613+ in a single aufs. The larger value consumes more system
21614+ resources and has a minor impact to performance.
21615+config AUFS_BRANCH_MAX_32767
21616+ bool "32767"
21617+ help
21618+ Specifies the maximum number of branches (or member directories)
21619+ in a single aufs. The larger value consumes more system
21620+ resources and has a minor impact to performance.
21621+endchoice
1facf9fc 21622+
e49829fe
JR
21623+config AUFS_SBILIST
21624+ bool
21625+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
21626+ default y
21627+ help
21628+ Automatic configuration for internal use.
21629+ When aufs supports Magic SysRq or /proc, enabled automatically.
21630+
4a4d8108
AM
21631+config AUFS_HNOTIFY
21632+ bool "Detect direct branch access (bypassing aufs)"
21633+ help
21634+ If you want to modify files on branches directly, eg. bypassing aufs,
21635+ and want aufs to detect the changes of them fully, then enable this
21636+ option and use 'udba=notify' mount option.
7f207e10 21637+ Currently there is only one available configuration, "fsnotify".
4a4d8108
AM
21638+ It will have a negative impact to the performance.
21639+ See detail in aufs.5.
dece6358 21640+
4a4d8108
AM
21641+choice
21642+ prompt "method" if AUFS_HNOTIFY
21643+ default AUFS_HFSNOTIFY
21644+config AUFS_HFSNOTIFY
21645+ bool "fsnotify"
21646+ select FSNOTIFY
4a4d8108 21647+endchoice
1facf9fc 21648+
4a4d8108
AM
21649+config AUFS_EXPORT
21650+ bool "NFS-exportable aufs"
2cbb1c4b 21651+ depends on EXPORTFS
4a4d8108
AM
21652+ help
21653+ If you want to export your mounted aufs via NFS, then enable this
21654+ option. There are several requirements for this configuration.
21655+ See detail in aufs.5.
1facf9fc 21656+
4a4d8108
AM
21657+config AUFS_INO_T_64
21658+ bool
21659+ depends on AUFS_EXPORT
21660+ depends on 64BIT && !(ALPHA || S390)
21661+ default y
21662+ help
21663+ Automatic configuration for internal use.
21664+ /* typedef unsigned long/int __kernel_ino_t */
21665+ /* alpha and s390x are int */
1facf9fc 21666+
c1595e42
JR
21667+config AUFS_XATTR
21668+ bool "support for XATTR/EA (including Security Labels)"
21669+ help
21670+ If your branch fs supports XATTR/EA and you want to make them
21671+ available in aufs too, then enable this opsion and specify the
21672+ branch attributes for EA.
21673+ See detail in aufs.5.
21674+
076b876e
AM
21675+config AUFS_FHSM
21676+ bool "File-based Hierarchical Storage Management"
21677+ help
21678+ Hierarchical Storage Management (or HSM) is a well-known feature
21679+ in the storage world. Aufs provides this feature as file-based.
21680+ with multiple branches.
21681+ These multiple branches are prioritized, ie. the topmost one
21682+ should be the fastest drive and be used heavily.
21683+
4a4d8108
AM
21684+config AUFS_RDU
21685+ bool "Readdir in userspace"
21686+ help
21687+ Aufs has two methods to provide a merged view for a directory,
21688+ by a user-space library and by kernel-space natively. The latter
21689+ is always enabled but sometimes large and slow.
21690+ If you enable this option, install the library in aufs2-util
21691+ package, and set some environment variables for your readdir(3),
21692+ then the work will be handled in user-space which generally
21693+ shows better performance in most cases.
21694+ See detail in aufs.5.
1facf9fc 21695+
4a4d8108
AM
21696+config AUFS_SHWH
21697+ bool "Show whiteouts"
21698+ help
21699+ If you want to make the whiteouts in aufs visible, then enable
21700+ this option and specify 'shwh' mount option. Although it may
21701+ sounds like philosophy or something, but in technically it
21702+ simply shows the name of whiteout with keeping its behaviour.
1facf9fc 21703+
4a4d8108
AM
21704+config AUFS_BR_RAMFS
21705+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
21706+ help
21707+ If you want to use ramfs as an aufs branch fs, then enable this
21708+ option. Generally tmpfs is recommended.
21709+ Aufs prohibited them to be a branch fs by default, because
21710+ initramfs becomes unusable after switch_root or something
21711+ generally. If you sets initramfs as an aufs branch and boot your
21712+ system by switch_root, you will meet a problem easily since the
21713+ files in initramfs may be inaccessible.
21714+ Unless you are going to use ramfs as an aufs branch fs without
21715+ switch_root or something, leave it N.
1facf9fc 21716+
4a4d8108
AM
21717+config AUFS_BR_FUSE
21718+ bool "Fuse fs as an aufs branch"
21719+ depends on FUSE_FS
21720+ select AUFS_POLL
21721+ help
21722+ If you want to use fuse-based userspace filesystem as an aufs
21723+ branch fs, then enable this option.
21724+ It implements the internal poll(2) operation which is
21725+ implemented by fuse only (curretnly).
1facf9fc 21726+
4a4d8108
AM
21727+config AUFS_POLL
21728+ bool
21729+ help
21730+ Automatic configuration for internal use.
1facf9fc 21731+
4a4d8108
AM
21732+config AUFS_BR_HFSPLUS
21733+ bool "Hfsplus as an aufs branch"
21734+ depends on HFSPLUS_FS
21735+ default y
21736+ help
21737+ If you want to use hfsplus fs as an aufs branch fs, then enable
21738+ this option. This option introduces a small overhead at
21739+ copying-up a file on hfsplus.
1facf9fc 21740+
4a4d8108
AM
21741+config AUFS_BDEV_LOOP
21742+ bool
21743+ depends on BLK_DEV_LOOP
21744+ default y
21745+ help
21746+ Automatic configuration for internal use.
21747+ Convert =[ym] into =y.
1308ab2a 21748+
4a4d8108
AM
21749+config AUFS_DEBUG
21750+ bool "Debug aufs"
21751+ help
21752+ Enable this to compile aufs internal debug code.
21753+ It will have a negative impact to the performance.
21754+
21755+config AUFS_MAGIC_SYSRQ
21756+ bool
21757+ depends on AUFS_DEBUG && MAGIC_SYSRQ
21758+ default y
21759+ help
21760+ Automatic configuration for internal use.
21761+ When aufs supports Magic SysRq, enabled automatically.
21762+endif
7f207e10
AM
21763diff -urN /usr/share/empty/fs/aufs/loop.c linux/fs/aufs/loop.c
21764--- /usr/share/empty/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
21765+++ linux/fs/aufs/loop.c 2016-10-09 16:55:38.889431135 +0200
21766@@ -0,0 +1,147 @@
1facf9fc 21767+/*
8cdd5066 21768+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21769+ *
21770+ * This program, aufs is free software; you can redistribute it and/or modify
21771+ * it under the terms of the GNU General Public License as published by
21772+ * the Free Software Foundation; either version 2 of the License, or
21773+ * (at your option) any later version.
dece6358
AM
21774+ *
21775+ * This program is distributed in the hope that it will be useful,
21776+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21777+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21778+ * GNU General Public License for more details.
21779+ *
21780+ * You should have received a copy of the GNU General Public License
523b37e3 21781+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21782+ */
21783+
21784+/*
21785+ * support for loopback block device as a branch
21786+ */
21787+
1facf9fc 21788+#include "aufs.h"
21789+
392086de
AM
21790+/* added into drivers/block/loop.c */
21791+static struct file *(*backing_file_func)(struct super_block *sb);
21792+
1facf9fc 21793+/*
21794+ * test if two lower dentries have overlapping branches.
21795+ */
b752ccd1 21796+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
1facf9fc 21797+{
b752ccd1 21798+ struct super_block *h_sb;
392086de
AM
21799+ struct file *backing_file;
21800+
21801+ if (unlikely(!backing_file_func)) {
21802+ /* don't load "loop" module here */
21803+ backing_file_func = symbol_get(loop_backing_file);
21804+ if (unlikely(!backing_file_func))
21805+ /* "loop" module is not loaded */
21806+ return 0;
21807+ }
1facf9fc 21808+
b752ccd1 21809+ h_sb = h_adding->d_sb;
392086de
AM
21810+ backing_file = backing_file_func(h_sb);
21811+ if (!backing_file)
1facf9fc 21812+ return 0;
21813+
2000de60 21814+ h_adding = backing_file->f_path.dentry;
b752ccd1
AM
21815+ /*
21816+ * h_adding can be local NFS.
21817+ * in this case aufs cannot detect the loop.
21818+ */
21819+ if (unlikely(h_adding->d_sb == sb))
1facf9fc 21820+ return 1;
b752ccd1 21821+ return !!au_test_subdir(h_adding, sb->s_root);
1facf9fc 21822+}
21823+
21824+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
21825+int au_test_loopback_kthread(void)
21826+{
b752ccd1
AM
21827+ int ret;
21828+ struct task_struct *tsk = current;
a2a7ad62 21829+ char c, comm[sizeof(tsk->comm)];
b752ccd1
AM
21830+
21831+ ret = 0;
21832+ if (tsk->flags & PF_KTHREAD) {
a2a7ad62
AM
21833+ get_task_comm(comm, tsk);
21834+ c = comm[4];
b752ccd1 21835+ ret = ('0' <= c && c <= '9'
a2a7ad62 21836+ && !strncmp(comm, "loop", 4));
b752ccd1 21837+ }
1facf9fc 21838+
b752ccd1 21839+ return ret;
1facf9fc 21840+}
87a755f4
AM
21841+
21842+/* ---------------------------------------------------------------------- */
21843+
21844+#define au_warn_loopback_step 16
21845+static int au_warn_loopback_nelem = au_warn_loopback_step;
21846+static unsigned long *au_warn_loopback_array;
21847+
21848+void au_warn_loopback(struct super_block *h_sb)
21849+{
21850+ int i, new_nelem;
21851+ unsigned long *a, magic;
21852+ static DEFINE_SPINLOCK(spin);
21853+
21854+ magic = h_sb->s_magic;
21855+ spin_lock(&spin);
21856+ a = au_warn_loopback_array;
21857+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
21858+ if (a[i] == magic) {
21859+ spin_unlock(&spin);
21860+ return;
21861+ }
21862+
21863+ /* h_sb is new to us, print it */
21864+ if (i < au_warn_loopback_nelem) {
21865+ a[i] = magic;
21866+ goto pr;
21867+ }
21868+
21869+ /* expand the array */
21870+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
21871+ a = au_kzrealloc(au_warn_loopback_array,
21872+ au_warn_loopback_nelem * sizeof(unsigned long),
e2f27e51
AM
21873+ new_nelem * sizeof(unsigned long), GFP_ATOMIC,
21874+ /*may_shrink*/0);
87a755f4
AM
21875+ if (a) {
21876+ au_warn_loopback_nelem = new_nelem;
21877+ au_warn_loopback_array = a;
21878+ a[i] = magic;
21879+ goto pr;
21880+ }
21881+
21882+ spin_unlock(&spin);
21883+ AuWarn1("realloc failed, ignored\n");
21884+ return;
21885+
21886+pr:
21887+ spin_unlock(&spin);
0c3ec466
AM
21888+ pr_warn("you may want to try another patch for loopback file "
21889+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
87a755f4
AM
21890+}
21891+
21892+int au_loopback_init(void)
21893+{
21894+ int err;
21895+ struct super_block *sb __maybe_unused;
21896+
79b8bda9 21897+ BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(unsigned long));
87a755f4
AM
21898+
21899+ err = 0;
21900+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
21901+ sizeof(unsigned long), GFP_NOFS);
21902+ if (unlikely(!au_warn_loopback_array))
21903+ err = -ENOMEM;
21904+
21905+ return err;
21906+}
21907+
21908+void au_loopback_fin(void)
21909+{
79b8bda9
AM
21910+ if (backing_file_func)
21911+ symbol_put(loop_backing_file);
f0c0a007 21912+ au_delayed_kfree(au_warn_loopback_array);
87a755f4 21913+}
7f207e10
AM
21914diff -urN /usr/share/empty/fs/aufs/loop.h linux/fs/aufs/loop.h
21915--- /usr/share/empty/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 21916+++ linux/fs/aufs/loop.h 2016-10-09 16:55:36.492701639 +0200
523b37e3 21917@@ -0,0 +1,52 @@
1facf9fc 21918+/*
8cdd5066 21919+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21920+ *
21921+ * This program, aufs is free software; you can redistribute it and/or modify
21922+ * it under the terms of the GNU General Public License as published by
21923+ * the Free Software Foundation; either version 2 of the License, or
21924+ * (at your option) any later version.
dece6358
AM
21925+ *
21926+ * This program is distributed in the hope that it will be useful,
21927+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21928+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21929+ * GNU General Public License for more details.
21930+ *
21931+ * You should have received a copy of the GNU General Public License
523b37e3 21932+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21933+ */
21934+
21935+/*
21936+ * support for loopback mount as a branch
21937+ */
21938+
21939+#ifndef __AUFS_LOOP_H__
21940+#define __AUFS_LOOP_H__
21941+
21942+#ifdef __KERNEL__
21943+
dece6358
AM
21944+struct dentry;
21945+struct super_block;
1facf9fc 21946+
21947+#ifdef CONFIG_AUFS_BDEV_LOOP
392086de
AM
21948+/* drivers/block/loop.c */
21949+struct file *loop_backing_file(struct super_block *sb);
21950+
1facf9fc 21951+/* loop.c */
b752ccd1 21952+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
1facf9fc 21953+int au_test_loopback_kthread(void);
87a755f4
AM
21954+void au_warn_loopback(struct super_block *h_sb);
21955+
21956+int au_loopback_init(void);
21957+void au_loopback_fin(void);
1facf9fc 21958+#else
4a4d8108 21959+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
b752ccd1 21960+ struct dentry *h_adding)
4a4d8108 21961+AuStubInt0(au_test_loopback_kthread, void)
87a755f4
AM
21962+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
21963+
21964+AuStubInt0(au_loopback_init, void)
21965+AuStubVoid(au_loopback_fin, void)
1facf9fc 21966+#endif /* BLK_DEV_LOOP */
21967+
21968+#endif /* __KERNEL__ */
21969+#endif /* __AUFS_LOOP_H__ */
7f207e10
AM
21970diff -urN /usr/share/empty/fs/aufs/magic.mk linux/fs/aufs/magic.mk
21971--- /usr/share/empty/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
e2f27e51 21972+++ linux/fs/aufs/magic.mk 2016-10-09 16:55:36.492701639 +0200
7e9cd9fe 21973@@ -0,0 +1,30 @@
1facf9fc 21974+
21975+# defined in ${srctree}/fs/fuse/inode.c
21976+# tristate
21977+ifdef CONFIG_FUSE_FS
21978+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
21979+endif
21980+
1facf9fc 21981+# defined in ${srctree}/fs/xfs/xfs_sb.h
21982+# tristate
21983+ifdef CONFIG_XFS_FS
21984+ccflags-y += -DXFS_SB_MAGIC=0x58465342
21985+endif
21986+
21987+# defined in ${srctree}/fs/configfs/mount.c
21988+# tristate
21989+ifdef CONFIG_CONFIGFS_FS
21990+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
21991+endif
21992+
1facf9fc 21993+# defined in ${srctree}/fs/ubifs/ubifs.h
21994+# tristate
21995+ifdef CONFIG_UBIFS_FS
21996+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
21997+endif
4a4d8108
AM
21998+
21999+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
22000+# tristate
22001+ifdef CONFIG_HFSPLUS_FS
22002+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
22003+endif
7f207e10
AM
22004diff -urN /usr/share/empty/fs/aufs/Makefile linux/fs/aufs/Makefile
22005--- /usr/share/empty/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
e2f27e51 22006+++ linux/fs/aufs/Makefile 2016-10-09 16:55:36.486034798 +0200
c1595e42 22007@@ -0,0 +1,44 @@
4a4d8108
AM
22008+
22009+include ${src}/magic.mk
22010+ifeq (${CONFIG_AUFS_FS},m)
22011+include ${src}/conf.mk
22012+endif
22013+-include ${src}/priv_def.mk
22014+
22015+# cf. include/linux/kernel.h
22016+# enable pr_debug
22017+ccflags-y += -DDEBUG
f6c5ef8b
AM
22018+# sparse requires the full pathname
22019+ifdef M
523b37e3 22020+ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h
f6c5ef8b 22021+else
523b37e3 22022+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
f6c5ef8b 22023+endif
4a4d8108
AM
22024+
22025+obj-$(CONFIG_AUFS_FS) += aufs.o
22026+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
22027+ wkq.o vfsub.o dcsub.o \
e49829fe 22028+ cpup.o whout.o wbr_policy.o \
4a4d8108
AM
22029+ dinfo.o dentry.o \
22030+ dynop.o \
22031+ finfo.o file.o f_op.o \
22032+ dir.o vdir.o \
22033+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
c2b27bf2 22034+ mvdown.o ioctl.o
4a4d8108
AM
22035+
22036+# all are boolean
e49829fe 22037+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
4a4d8108
AM
22038+aufs-$(CONFIG_SYSFS) += sysfs.o
22039+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
22040+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
22041+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
22042+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
4a4d8108 22043+aufs-$(CONFIG_AUFS_EXPORT) += export.o
c1595e42
JR
22044+aufs-$(CONFIG_AUFS_XATTR) += xattr.o
22045+aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
076b876e 22046+aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
4a4d8108
AM
22047+aufs-$(CONFIG_AUFS_POLL) += poll.o
22048+aufs-$(CONFIG_AUFS_RDU) += rdu.o
4a4d8108
AM
22049+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
22050+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
22051+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
7f207e10
AM
22052diff -urN /usr/share/empty/fs/aufs/module.c linux/fs/aufs/module.c
22053--- /usr/share/empty/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
22054+++ linux/fs/aufs/module.c 2016-10-09 16:55:38.889431135 +0200
22055@@ -0,0 +1,333 @@
1facf9fc 22056+/*
8cdd5066 22057+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 22058+ *
22059+ * This program, aufs is free software; you can redistribute it and/or modify
22060+ * it under the terms of the GNU General Public License as published by
22061+ * the Free Software Foundation; either version 2 of the License, or
22062+ * (at your option) any later version.
dece6358
AM
22063+ *
22064+ * This program is distributed in the hope that it will be useful,
22065+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22066+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22067+ * GNU General Public License for more details.
22068+ *
22069+ * You should have received a copy of the GNU General Public License
523b37e3 22070+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 22071+ */
22072+
22073+/*
22074+ * module global variables and operations
22075+ */
22076+
22077+#include <linux/module.h>
22078+#include <linux/seq_file.h>
22079+#include "aufs.h"
22080+
e2f27e51
AM
22081+/* shrinkable realloc */
22082+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink)
1facf9fc 22083+{
e2f27e51
AM
22084+ size_t sz;
22085+ int diff;
1facf9fc 22086+
e2f27e51
AM
22087+ sz = 0;
22088+ diff = -1;
22089+ if (p) {
22090+#if 0 /* unused */
22091+ if (!new_sz) {
22092+ au_delayed_kfree(p);
22093+ p = NULL;
22094+ goto out;
22095+ }
22096+#else
22097+ AuDebugOn(!new_sz);
22098+#endif
22099+ sz = ksize(p);
22100+ diff = au_kmidx_sub(sz, new_sz);
22101+ }
22102+ if (sz && !diff)
22103+ goto out;
22104+
22105+ if (sz < new_sz)
22106+ /* expand or SLOB */
22107+ p = krealloc(p, new_sz, gfp);
22108+ else if (new_sz < sz && may_shrink) {
22109+ /* shrink */
22110+ void *q;
22111+
22112+ q = kmalloc(new_sz, gfp);
22113+ if (q) {
22114+ if (p) {
22115+ memcpy(q, p, new_sz);
22116+ au_delayed_kfree(p);
22117+ }
22118+ p = q;
22119+ } else
22120+ p = NULL;
22121+ }
22122+
22123+out:
22124+ return p;
22125+}
22126+
22127+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
22128+ int may_shrink)
22129+{
22130+ p = au_krealloc(p, new_sz, gfp, may_shrink);
22131+ if (p && new_sz > nused)
1facf9fc 22132+ memset(p + nused, 0, new_sz - nused);
22133+ return p;
22134+}
22135+
22136+/* ---------------------------------------------------------------------- */
1facf9fc 22137+/*
22138+ * aufs caches
22139+ */
f0c0a007
AM
22140+
22141+struct au_dfree au_dfree;
22142+
22143+/* delayed free */
22144+static void au_do_dfree(struct work_struct *work __maybe_unused)
22145+{
22146+ struct llist_head *head;
22147+ struct llist_node *node, *next;
22148+
22149+#define AU_CACHE_DFREE_DO_BODY(name, idx, lnode) do { \
22150+ head = &au_dfree.cache[AuCache_##idx].llist; \
22151+ node = llist_del_all(head); \
22152+ for (; node; node = next) { \
e2f27e51
AM
22153+ struct au_##name *p \
22154+ = llist_entry(node, struct au_##name, \
22155+ lnode); \
f0c0a007
AM
22156+ next = llist_next(node); \
22157+ au_cache_free_##name(p); \
22158+ } \
22159+ } while (0)
22160+
22161+ AU_CACHE_DFREE_DO_BODY(dinfo, DINFO, di_lnode);
22162+ AU_CACHE_DFREE_DO_BODY(icntnr, ICNTNR, lnode);
22163+ AU_CACHE_DFREE_DO_BODY(finfo, FINFO, fi_lnode);
22164+ AU_CACHE_DFREE_DO_BODY(vdir, VDIR, vd_lnode);
22165+ AU_CACHE_DFREE_DO_BODY(vdir_dehstr, DEHSTR, lnode);
22166+#ifdef CONFIG_AUFS_HNOTIFY
22167+ AU_CACHE_DFREE_DO_BODY(hnotify, HNOTIFY, hn_lnode);
22168+#endif
22169+
22170+#define AU_DFREE_DO_BODY(llist, func) do { \
22171+ node = llist_del_all(llist); \
22172+ for (; node; node = next) { \
22173+ next = llist_next(node); \
22174+ func(node); \
22175+ } \
22176+ } while (0)
22177+
22178+ AU_DFREE_DO_BODY(au_dfree.llist + AU_DFREE_KFREE, kfree);
22179+ AU_DFREE_DO_BODY(au_dfree.llist + AU_DFREE_FREE_PAGE, au_free_page);
22180+
22181+#undef AU_CACHE_DFREE_DO_BODY
22182+#undef AU_DFREE_DO_BODY
22183+}
22184+
22185+AU_CACHE_DFREE_FUNC(dinfo, DINFO, di_lnode);
22186+AU_CACHE_DFREE_FUNC(icntnr, ICNTNR, lnode);
22187+AU_CACHE_DFREE_FUNC(finfo, FINFO, fi_lnode);
22188+AU_CACHE_DFREE_FUNC(vdir, VDIR, vd_lnode);
22189+AU_CACHE_DFREE_FUNC(vdir_dehstr, DEHSTR, lnode);
5afbbe0d
AM
22190+
22191+static void au_cache_fin(void)
22192+{
22193+ int i;
f0c0a007 22194+ struct au_cache *cp;
5afbbe0d
AM
22195+
22196+ /*
22197+ * Make sure all delayed rcu free inodes are flushed before we
22198+ * destroy cache.
22199+ */
22200+ rcu_barrier();
22201+
22202+ /* excluding AuCache_HNOTIFY */
22203+ BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
f0c0a007 22204+ flush_delayed_work(&au_dfree.dwork);
5afbbe0d 22205+ for (i = 0; i < AuCache_HNOTIFY; i++) {
f0c0a007
AM
22206+ cp = au_dfree.cache + i;
22207+ AuDebugOn(!llist_empty(&cp->llist));
22208+ kmem_cache_destroy(cp->cache);
22209+ cp->cache = NULL;
5afbbe0d
AM
22210+ }
22211+}
22212+
1facf9fc 22213+static int __init au_cache_init(void)
22214+{
f0c0a007
AM
22215+ struct au_cache *cp;
22216+
22217+ cp = au_dfree.cache;
22218+ cp[AuCache_DINFO].cache = AuCacheCtor(au_dinfo, au_di_init_once);
22219+ if (cp[AuCache_DINFO].cache)
027c5e7a 22220+ /* SLAB_DESTROY_BY_RCU */
f0c0a007
AM
22221+ cp[AuCache_ICNTNR].cache = AuCacheCtor(au_icntnr,
22222+ au_icntnr_init_once);
22223+ if (cp[AuCache_ICNTNR].cache)
22224+ cp[AuCache_FINFO].cache = AuCacheCtor(au_finfo,
22225+ au_fi_init_once);
22226+ if (cp[AuCache_FINFO].cache)
22227+ cp[AuCache_VDIR].cache = AuCache(au_vdir);
22228+ if (cp[AuCache_VDIR].cache)
22229+ cp[AuCache_DEHSTR].cache = AuCache(au_vdir_dehstr);
22230+ if (cp[AuCache_DEHSTR].cache)
1facf9fc 22231+ return 0;
22232+
5afbbe0d 22233+ au_cache_fin();
1facf9fc 22234+ return -ENOMEM;
22235+}
22236+
1facf9fc 22237+/* ---------------------------------------------------------------------- */
22238+
22239+int au_dir_roflags;
22240+
e49829fe 22241+#ifdef CONFIG_AUFS_SBILIST
1e00d052
AM
22242+/*
22243+ * iterate_supers_type() doesn't protect us from
22244+ * remounting (branch management)
22245+ */
5afbbe0d 22246+struct au_sphlhead au_sbilist;
e49829fe
JR
22247+#endif
22248+
1facf9fc 22249+/*
22250+ * functions for module interface.
22251+ */
22252+MODULE_LICENSE("GPL");
22253+/* MODULE_LICENSE("GPL v2"); */
dece6358 22254+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
1facf9fc 22255+MODULE_DESCRIPTION(AUFS_NAME
22256+ " -- Advanced multi layered unification filesystem");
22257+MODULE_VERSION(AUFS_VERSION);
c06a8ce3 22258+MODULE_ALIAS_FS(AUFS_NAME);
1facf9fc 22259+
1facf9fc 22260+/* this module parameter has no meaning when SYSFS is disabled */
22261+int sysaufs_brs = 1;
22262+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
22263+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
22264+
076b876e 22265+/* this module parameter has no meaning when USER_NS is disabled */
8cdd5066 22266+bool au_userns;
076b876e
AM
22267+MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
22268+module_param_named(allow_userns, au_userns, bool, S_IRUGO);
22269+
1facf9fc 22270+/* ---------------------------------------------------------------------- */
22271+
22272+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
22273+
22274+int au_seq_path(struct seq_file *seq, struct path *path)
22275+{
79b8bda9
AM
22276+ int err;
22277+
22278+ err = seq_path(seq, path, au_esc_chars);
22279+ if (err > 0)
22280+ err = 0;
22281+ else if (err < 0)
22282+ err = -ENOMEM;
22283+
22284+ return err;
1facf9fc 22285+}
22286+
22287+/* ---------------------------------------------------------------------- */
22288+
22289+static int __init aufs_init(void)
22290+{
22291+ int err, i;
22292+ char *p;
f0c0a007 22293+ struct au_cache *cp;
1facf9fc 22294+
22295+ p = au_esc_chars;
22296+ for (i = 1; i <= ' '; i++)
22297+ *p++ = i;
22298+ *p++ = '\\';
22299+ *p++ = '\x7f';
22300+ *p = 0;
22301+
22302+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
22303+
b95c5147
AM
22304+ memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
22305+ for (i = 0; i < AuIop_Last; i++)
22306+ aufs_iop_nogetattr[i].getattr = NULL;
22307+
f0c0a007
AM
22308+ /* First, initialize au_dfree */
22309+ for (i = 0; i < AuCache_Last; i++) { /* including hnotify */
22310+ cp = au_dfree.cache + i;
22311+ cp->cache = NULL;
22312+ init_llist_head(&cp->llist);
22313+ }
22314+ for (i = 0; i < AU_DFREE_Last; i++)
22315+ init_llist_head(au_dfree.llist + i);
22316+ INIT_DELAYED_WORK(&au_dfree.dwork, au_do_dfree);
22317+
e49829fe 22318+ au_sbilist_init();
1facf9fc 22319+ sysaufs_brs_init();
22320+ au_debug_init();
4a4d8108 22321+ au_dy_init();
1facf9fc 22322+ err = sysaufs_init();
22323+ if (unlikely(err))
22324+ goto out;
e49829fe 22325+ err = au_procfs_init();
4f0767ce 22326+ if (unlikely(err))
953406b4 22327+ goto out_sysaufs;
e49829fe
JR
22328+ err = au_wkq_init();
22329+ if (unlikely(err))
22330+ goto out_procfs;
87a755f4 22331+ err = au_loopback_init();
1facf9fc 22332+ if (unlikely(err))
22333+ goto out_wkq;
87a755f4
AM
22334+ err = au_hnotify_init();
22335+ if (unlikely(err))
22336+ goto out_loopback;
1facf9fc 22337+ err = au_sysrq_init();
22338+ if (unlikely(err))
22339+ goto out_hin;
22340+ err = au_cache_init();
22341+ if (unlikely(err))
22342+ goto out_sysrq;
076b876e
AM
22343+
22344+ aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
1facf9fc 22345+ err = register_filesystem(&aufs_fs_type);
22346+ if (unlikely(err))
22347+ goto out_cache;
076b876e 22348+
4a4d8108
AM
22349+ /* since we define pr_fmt, call printk directly */
22350+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
1facf9fc 22351+ goto out; /* success */
22352+
4f0767ce 22353+out_cache:
1facf9fc 22354+ au_cache_fin();
4f0767ce 22355+out_sysrq:
1facf9fc 22356+ au_sysrq_fin();
4f0767ce 22357+out_hin:
4a4d8108 22358+ au_hnotify_fin();
87a755f4
AM
22359+out_loopback:
22360+ au_loopback_fin();
4f0767ce 22361+out_wkq:
1facf9fc 22362+ au_wkq_fin();
e49829fe
JR
22363+out_procfs:
22364+ au_procfs_fin();
4f0767ce 22365+out_sysaufs:
1facf9fc 22366+ sysaufs_fin();
4a4d8108 22367+ au_dy_fin();
f0c0a007 22368+ flush_delayed_work(&au_dfree.dwork);
4f0767ce 22369+out:
1facf9fc 22370+ return err;
22371+}
22372+
22373+static void __exit aufs_exit(void)
22374+{
22375+ unregister_filesystem(&aufs_fs_type);
22376+ au_cache_fin();
22377+ au_sysrq_fin();
4a4d8108 22378+ au_hnotify_fin();
87a755f4 22379+ au_loopback_fin();
1facf9fc 22380+ au_wkq_fin();
e49829fe 22381+ au_procfs_fin();
1facf9fc 22382+ sysaufs_fin();
4a4d8108 22383+ au_dy_fin();
f0c0a007 22384+ flush_delayed_work(&au_dfree.dwork);
1facf9fc 22385+}
22386+
22387+module_init(aufs_init);
22388+module_exit(aufs_exit);
7f207e10
AM
22389diff -urN /usr/share/empty/fs/aufs/module.h linux/fs/aufs/module.h
22390--- /usr/share/empty/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
22391+++ linux/fs/aufs/module.h 2016-10-09 16:55:38.889431135 +0200
22392@@ -0,0 +1,156 @@
1facf9fc 22393+/*
8cdd5066 22394+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 22395+ *
22396+ * This program, aufs is free software; you can redistribute it and/or modify
22397+ * it under the terms of the GNU General Public License as published by
22398+ * the Free Software Foundation; either version 2 of the License, or
22399+ * (at your option) any later version.
dece6358
AM
22400+ *
22401+ * This program is distributed in the hope that it will be useful,
22402+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22403+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22404+ * GNU General Public License for more details.
22405+ *
22406+ * You should have received a copy of the GNU General Public License
523b37e3 22407+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 22408+ */
22409+
22410+/*
22411+ * module initialization and module-global
22412+ */
22413+
22414+#ifndef __AUFS_MODULE_H__
22415+#define __AUFS_MODULE_H__
22416+
22417+#ifdef __KERNEL__
22418+
22419+#include <linux/slab.h>
f0c0a007 22420+#include "debug.h"
1facf9fc 22421+
dece6358
AM
22422+struct path;
22423+struct seq_file;
22424+
1facf9fc 22425+/* module parameters */
1facf9fc 22426+extern int sysaufs_brs;
8cdd5066 22427+extern bool au_userns;
1facf9fc 22428+
22429+/* ---------------------------------------------------------------------- */
22430+
22431+extern int au_dir_roflags;
22432+
e2f27e51
AM
22433+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink);
22434+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
22435+ int may_shrink);
22436+
22437+static inline int au_kmidx_sub(size_t sz, size_t new_sz)
22438+{
22439+#ifndef CONFIG_SLOB
22440+ return kmalloc_index(sz) - kmalloc_index(new_sz);
22441+#else
22442+ return -1; /* SLOB is untested */
22443+#endif
22444+}
22445+
1facf9fc 22446+int au_seq_path(struct seq_file *seq, struct path *path);
22447+
e49829fe
JR
22448+#ifdef CONFIG_PROC_FS
22449+/* procfs.c */
22450+int __init au_procfs_init(void);
22451+void au_procfs_fin(void);
22452+#else
22453+AuStubInt0(au_procfs_init, void);
22454+AuStubVoid(au_procfs_fin, void);
22455+#endif
22456+
4f0767ce
JR
22457+/* ---------------------------------------------------------------------- */
22458+
f0c0a007 22459+/* kmem cache and delayed free */
1facf9fc 22460+enum {
22461+ AuCache_DINFO,
22462+ AuCache_ICNTNR,
22463+ AuCache_FINFO,
22464+ AuCache_VDIR,
22465+ AuCache_DEHSTR,
7eafdf33 22466+ AuCache_HNOTIFY, /* must be last */
1facf9fc 22467+ AuCache_Last
22468+};
22469+
f0c0a007
AM
22470+enum {
22471+ AU_DFREE_KFREE,
22472+ AU_DFREE_FREE_PAGE,
22473+ AU_DFREE_Last
22474+};
22475+
22476+struct au_cache {
22477+ struct kmem_cache *cache;
22478+ struct llist_head llist; /* delayed free */
22479+};
22480+
22481+/*
22482+ * in order to reduce the cost of the internal timer, consolidate all the
22483+ * delayed free works into a single delayed_work.
22484+ */
22485+struct au_dfree {
22486+ struct au_cache cache[AuCache_Last];
22487+ struct llist_head llist[AU_DFREE_Last];
22488+ struct delayed_work dwork;
22489+};
22490+
22491+extern struct au_dfree au_dfree;
22492+
4a4d8108
AM
22493+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
22494+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
22495+#define AuCacheCtor(type, ctor) \
22496+ kmem_cache_create(#type, sizeof(struct type), \
22497+ __alignof__(struct type), AuCacheFlags, ctor)
1facf9fc 22498+
f0c0a007
AM
22499+#define AU_DFREE_DELAY msecs_to_jiffies(10)
22500+#define AU_DFREE_BODY(lnode, llist) do { \
22501+ if (llist_add(lnode, llist)) \
22502+ schedule_delayed_work(&au_dfree.dwork, \
22503+ AU_DFREE_DELAY); \
22504+ } while (0)
22505+#define AU_CACHE_DFREE_FUNC(name, idx, lnode) \
22506+ void au_cache_dfree_##name(struct au_##name *p) \
22507+ { \
22508+ struct au_cache *cp = au_dfree.cache + AuCache_##idx; \
22509+ AU_DFREE_BODY(&p->lnode, &cp->llist); \
22510+ }
1facf9fc 22511+
22512+#define AuCacheFuncs(name, index) \
4a4d8108 22513+static inline struct au_##name *au_cache_alloc_##name(void) \
f0c0a007 22514+{ return kmem_cache_alloc(au_dfree.cache[AuCache_##index].cache, GFP_NOFS); } \
4a4d8108 22515+static inline void au_cache_free_##name(struct au_##name *p) \
f0c0a007
AM
22516+{ kmem_cache_free(au_dfree.cache[AuCache_##index].cache, p); } \
22517+void au_cache_dfree_##name(struct au_##name *p)
1facf9fc 22518+
22519+AuCacheFuncs(dinfo, DINFO);
22520+AuCacheFuncs(icntnr, ICNTNR);
22521+AuCacheFuncs(finfo, FINFO);
22522+AuCacheFuncs(vdir, VDIR);
4a4d8108
AM
22523+AuCacheFuncs(vdir_dehstr, DEHSTR);
22524+#ifdef CONFIG_AUFS_HNOTIFY
22525+AuCacheFuncs(hnotify, HNOTIFY);
22526+#endif
1facf9fc 22527+
f0c0a007
AM
22528+static inline void au_delayed_kfree(const void *p)
22529+{
22530+ AuDebugOn(!p);
22531+ AuDebugOn(ksize(p) < sizeof(struct llist_node));
22532+
22533+ AU_DFREE_BODY((void *)p, au_dfree.llist + AU_DFREE_KFREE);
22534+}
22535+
22536+/* cast only */
22537+static inline void au_free_page(void *p)
22538+{
22539+ free_page((unsigned long)p);
22540+}
22541+
22542+static inline void au_delayed_free_page(unsigned long addr)
22543+{
22544+ AU_DFREE_BODY((void *)addr, au_dfree.llist + AU_DFREE_FREE_PAGE);
22545+}
22546+
4a4d8108
AM
22547+#endif /* __KERNEL__ */
22548+#endif /* __AUFS_MODULE_H__ */
c2b27bf2
AM
22549diff -urN /usr/share/empty/fs/aufs/mvdown.c linux/fs/aufs/mvdown.c
22550--- /usr/share/empty/fs/aufs/mvdown.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 22551+++ linux/fs/aufs/mvdown.c 2016-10-09 16:55:36.492701639 +0200
5afbbe0d 22552@@ -0,0 +1,704 @@
c2b27bf2 22553+/*
8cdd5066 22554+ * Copyright (C) 2011-2016 Junjiro R. Okajima
c2b27bf2
AM
22555+ *
22556+ * This program, aufs is free software; you can redistribute it and/or modify
22557+ * it under the terms of the GNU General Public License as published by
22558+ * the Free Software Foundation; either version 2 of the License, or
22559+ * (at your option) any later version.
22560+ *
22561+ * This program is distributed in the hope that it will be useful,
22562+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22563+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22564+ * GNU General Public License for more details.
22565+ *
22566+ * You should have received a copy of the GNU General Public License
523b37e3
AM
22567+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
22568+ */
22569+
22570+/*
22571+ * move-down, opposite of copy-up
c2b27bf2
AM
22572+ */
22573+
22574+#include "aufs.h"
22575+
c2b27bf2
AM
22576+struct au_mvd_args {
22577+ struct {
c2b27bf2
AM
22578+ struct super_block *h_sb;
22579+ struct dentry *h_parent;
22580+ struct au_hinode *hdir;
392086de 22581+ struct inode *h_dir, *h_inode;
c1595e42 22582+ struct au_pin pin;
c2b27bf2
AM
22583+ } info[AUFS_MVDOWN_NARRAY];
22584+
22585+ struct aufs_mvdown mvdown;
22586+ struct dentry *dentry, *parent;
22587+ struct inode *inode, *dir;
22588+ struct super_block *sb;
22589+ aufs_bindex_t bopq, bwh, bfound;
22590+ unsigned char rename_lock;
c2b27bf2
AM
22591+};
22592+
392086de 22593+#define mvd_errno mvdown.au_errno
076b876e
AM
22594+#define mvd_bsrc mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
22595+#define mvd_src_brid mvdown.stbr[AUFS_MVDOWN_UPPER].brid
22596+#define mvd_bdst mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
22597+#define mvd_dst_brid mvdown.stbr[AUFS_MVDOWN_LOWER].brid
c2b27bf2 22598+
392086de
AM
22599+#define mvd_h_src_sb info[AUFS_MVDOWN_UPPER].h_sb
22600+#define mvd_h_src_parent info[AUFS_MVDOWN_UPPER].h_parent
22601+#define mvd_hdir_src info[AUFS_MVDOWN_UPPER].hdir
22602+#define mvd_h_src_dir info[AUFS_MVDOWN_UPPER].h_dir
22603+#define mvd_h_src_inode info[AUFS_MVDOWN_UPPER].h_inode
c1595e42 22604+#define mvd_pin_src info[AUFS_MVDOWN_UPPER].pin
392086de
AM
22605+
22606+#define mvd_h_dst_sb info[AUFS_MVDOWN_LOWER].h_sb
22607+#define mvd_h_dst_parent info[AUFS_MVDOWN_LOWER].h_parent
22608+#define mvd_hdir_dst info[AUFS_MVDOWN_LOWER].hdir
22609+#define mvd_h_dst_dir info[AUFS_MVDOWN_LOWER].h_dir
22610+#define mvd_h_dst_inode info[AUFS_MVDOWN_LOWER].h_inode
c1595e42 22611+#define mvd_pin_dst info[AUFS_MVDOWN_LOWER].pin
c2b27bf2
AM
22612+
22613+#define AU_MVD_PR(flag, ...) do { \
22614+ if (flag) \
22615+ pr_err(__VA_ARGS__); \
22616+ } while (0)
22617+
076b876e
AM
22618+static int find_lower_writable(struct au_mvd_args *a)
22619+{
22620+ struct super_block *sb;
5afbbe0d 22621+ aufs_bindex_t bindex, bbot;
076b876e
AM
22622+ struct au_branch *br;
22623+
22624+ sb = a->sb;
22625+ bindex = a->mvd_bsrc;
5afbbe0d 22626+ bbot = au_sbbot(sb);
076b876e 22627+ if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
5afbbe0d 22628+ for (bindex++; bindex <= bbot; bindex++) {
076b876e
AM
22629+ br = au_sbr(sb, bindex);
22630+ if (au_br_fhsm(br->br_perm)
22631+ && (!(au_br_sb(br)->s_flags & MS_RDONLY)))
22632+ return bindex;
22633+ }
22634+ else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
5afbbe0d 22635+ for (bindex++; bindex <= bbot; bindex++) {
076b876e
AM
22636+ br = au_sbr(sb, bindex);
22637+ if (!au_br_rdonly(br))
22638+ return bindex;
22639+ }
22640+ else
5afbbe0d 22641+ for (bindex++; bindex <= bbot; bindex++) {
076b876e
AM
22642+ br = au_sbr(sb, bindex);
22643+ if (!(au_br_sb(br)->s_flags & MS_RDONLY)) {
22644+ if (au_br_rdonly(br))
22645+ a->mvdown.flags
22646+ |= AUFS_MVDOWN_ROLOWER_R;
22647+ return bindex;
22648+ }
22649+ }
22650+
22651+ return -1;
22652+}
22653+
c2b27bf2 22654+/* make the parent dir on bdst */
392086de 22655+static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22656+{
22657+ int err;
22658+
22659+ err = 0;
22660+ a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
22661+ a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
22662+ a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
22663+ a->mvd_h_dst_parent = NULL;
5afbbe0d 22664+ if (au_dbbot(a->parent) >= a->mvd_bdst)
c2b27bf2
AM
22665+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22666+ if (!a->mvd_h_dst_parent) {
22667+ err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
22668+ if (unlikely(err)) {
392086de 22669+ AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
c2b27bf2
AM
22670+ goto out;
22671+ }
22672+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22673+ }
22674+
22675+out:
22676+ AuTraceErr(err);
22677+ return err;
22678+}
22679+
22680+/* lock them all */
392086de 22681+static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22682+{
22683+ int err;
22684+ struct dentry *h_trap;
22685+
22686+ a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
22687+ a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
c1595e42
JR
22688+ err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
22689+ au_opt_udba(a->sb),
22690+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22691+ AuTraceErr(err);
22692+ if (unlikely(err)) {
22693+ AU_MVD_PR(dmsg, "pin_dst failed\n");
22694+ goto out;
22695+ }
22696+
c2b27bf2
AM
22697+ if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
22698+ a->rename_lock = 0;
c1595e42
JR
22699+ au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22700+ AuLsc_DI_PARENT, AuLsc_I_PARENT3,
22701+ au_opt_udba(a->sb),
22702+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22703+ err = au_do_pin(&a->mvd_pin_src);
22704+ AuTraceErr(err);
5527c038 22705+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
c1595e42
JR
22706+ if (unlikely(err)) {
22707+ AU_MVD_PR(dmsg, "pin_src failed\n");
22708+ goto out_dst;
22709+ }
22710+ goto out; /* success */
c2b27bf2
AM
22711+ }
22712+
c2b27bf2 22713+ a->rename_lock = 1;
c1595e42
JR
22714+ au_pin_hdir_unlock(&a->mvd_pin_dst);
22715+ err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22716+ au_opt_udba(a->sb),
22717+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22718+ AuTraceErr(err);
5527c038 22719+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
c1595e42
JR
22720+ if (unlikely(err)) {
22721+ AU_MVD_PR(dmsg, "pin_src failed\n");
22722+ au_pin_hdir_lock(&a->mvd_pin_dst);
22723+ goto out_dst;
22724+ }
22725+ au_pin_hdir_unlock(&a->mvd_pin_src);
c2b27bf2
AM
22726+ h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22727+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22728+ if (h_trap) {
22729+ err = (h_trap != a->mvd_h_src_parent);
22730+ if (err)
22731+ err = (h_trap != a->mvd_h_dst_parent);
22732+ }
22733+ BUG_ON(err); /* it should never happen */
c1595e42
JR
22734+ if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
22735+ err = -EBUSY;
22736+ AuTraceErr(err);
22737+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22738+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22739+ au_pin_hdir_lock(&a->mvd_pin_src);
22740+ au_unpin(&a->mvd_pin_src);
22741+ au_pin_hdir_lock(&a->mvd_pin_dst);
22742+ goto out_dst;
22743+ }
22744+ goto out; /* success */
c2b27bf2 22745+
c1595e42
JR
22746+out_dst:
22747+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
22748+out:
22749+ AuTraceErr(err);
22750+ return err;
22751+}
22752+
392086de 22753+static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2 22754+{
c1595e42
JR
22755+ if (!a->rename_lock)
22756+ au_unpin(&a->mvd_pin_src);
22757+ else {
c2b27bf2
AM
22758+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22759+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
c1595e42
JR
22760+ au_pin_hdir_lock(&a->mvd_pin_src);
22761+ au_unpin(&a->mvd_pin_src);
22762+ au_pin_hdir_lock(&a->mvd_pin_dst);
22763+ }
22764+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
22765+}
22766+
22767+/* copy-down the file */
392086de 22768+static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22769+{
22770+ int err;
22771+ struct au_cp_generic cpg = {
22772+ .dentry = a->dentry,
22773+ .bdst = a->mvd_bdst,
22774+ .bsrc = a->mvd_bsrc,
22775+ .len = -1,
c1595e42 22776+ .pin = &a->mvd_pin_dst,
c2b27bf2
AM
22777+ .flags = AuCpup_DTIME | AuCpup_HOPEN
22778+ };
22779+
22780+ AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
392086de
AM
22781+ if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
22782+ au_fset_cpup(cpg.flags, OVERWRITE);
22783+ if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
22784+ au_fset_cpup(cpg.flags, RWDST);
c2b27bf2
AM
22785+ err = au_sio_cpdown_simple(&cpg);
22786+ if (unlikely(err))
392086de 22787+ AU_MVD_PR(dmsg, "cpdown failed\n");
c2b27bf2
AM
22788+
22789+ AuTraceErr(err);
22790+ return err;
22791+}
22792+
22793+/*
22794+ * unlink the whiteout on bdst if exist which may be created by UDBA while we
22795+ * were sleeping
22796+ */
392086de 22797+static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22798+{
22799+ int err;
22800+ struct path h_path;
22801+ struct au_branch *br;
523b37e3 22802+ struct inode *delegated;
c2b27bf2
AM
22803+
22804+ br = au_sbr(a->sb, a->mvd_bdst);
22805+ h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
22806+ err = PTR_ERR(h_path.dentry);
22807+ if (IS_ERR(h_path.dentry)) {
392086de 22808+ AU_MVD_PR(dmsg, "wh_lkup failed\n");
c2b27bf2
AM
22809+ goto out;
22810+ }
22811+
22812+ err = 0;
5527c038 22813+ if (d_is_positive(h_path.dentry)) {
c2b27bf2 22814+ h_path.mnt = au_br_mnt(br);
523b37e3 22815+ delegated = NULL;
5527c038 22816+ err = vfsub_unlink(d_inode(a->mvd_h_dst_parent), &h_path,
523b37e3
AM
22817+ &delegated, /*force*/0);
22818+ if (unlikely(err == -EWOULDBLOCK)) {
22819+ pr_warn("cannot retry for NFSv4 delegation"
22820+ " for an internal unlink\n");
22821+ iput(delegated);
22822+ }
c2b27bf2 22823+ if (unlikely(err))
392086de 22824+ AU_MVD_PR(dmsg, "wh_unlink failed\n");
c2b27bf2
AM
22825+ }
22826+ dput(h_path.dentry);
22827+
22828+out:
22829+ AuTraceErr(err);
22830+ return err;
22831+}
22832+
22833+/*
22834+ * unlink the topmost h_dentry
c2b27bf2 22835+ */
392086de 22836+static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22837+{
22838+ int err;
22839+ struct path h_path;
523b37e3 22840+ struct inode *delegated;
c2b27bf2
AM
22841+
22842+ h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
22843+ h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
523b37e3
AM
22844+ delegated = NULL;
22845+ err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
22846+ if (unlikely(err == -EWOULDBLOCK)) {
22847+ pr_warn("cannot retry for NFSv4 delegation"
22848+ " for an internal unlink\n");
22849+ iput(delegated);
22850+ }
c2b27bf2 22851+ if (unlikely(err))
392086de 22852+ AU_MVD_PR(dmsg, "unlink failed\n");
c2b27bf2
AM
22853+
22854+ AuTraceErr(err);
22855+ return err;
22856+}
22857+
076b876e
AM
22858+/* Since mvdown succeeded, we ignore an error of this function */
22859+static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
22860+{
22861+ int err;
22862+ struct au_branch *br;
22863+
22864+ a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
22865+ br = au_sbr(a->sb, a->mvd_bsrc);
22866+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
22867+ if (!err) {
22868+ br = au_sbr(a->sb, a->mvd_bdst);
22869+ a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
22870+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
22871+ }
22872+ if (!err)
22873+ a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
22874+ else
22875+ AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
22876+}
22877+
c2b27bf2
AM
22878+/*
22879+ * copy-down the file and unlink the bsrc file.
22880+ * - unlink the bdst whout if exist
22881+ * - copy-down the file (with whtmp name and rename)
22882+ * - unlink the bsrc file
22883+ */
392086de 22884+static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22885+{
22886+ int err;
22887+
392086de 22888+ err = au_do_mkdir(dmsg, a);
c2b27bf2 22889+ if (!err)
392086de 22890+ err = au_do_lock(dmsg, a);
c2b27bf2
AM
22891+ if (unlikely(err))
22892+ goto out;
22893+
22894+ /*
22895+ * do not revert the activities we made on bdst since they should be
22896+ * harmless in aufs.
22897+ */
22898+
392086de 22899+ err = au_do_cpdown(dmsg, a);
c2b27bf2 22900+ if (!err)
392086de
AM
22901+ err = au_do_unlink_wh(dmsg, a);
22902+ if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
22903+ err = au_do_unlink(dmsg, a);
c2b27bf2
AM
22904+ if (unlikely(err))
22905+ goto out_unlock;
22906+
c1595e42
JR
22907+ AuDbg("%pd2, 0x%x, %d --> %d\n",
22908+ a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
076b876e
AM
22909+ if (find_lower_writable(a) < 0)
22910+ a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
22911+
22912+ if (a->mvdown.flags & AUFS_MVDOWN_STFS)
22913+ au_do_stfs(dmsg, a);
22914+
c2b27bf2 22915+ /* maintain internal array */
392086de
AM
22916+ if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
22917+ au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
5afbbe0d 22918+ au_set_dbtop(a->dentry, a->mvd_bdst);
392086de 22919+ au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
5afbbe0d 22920+ au_set_ibtop(a->inode, a->mvd_bdst);
79b8bda9
AM
22921+ } else {
22922+ /* hide the lower */
22923+ au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
5afbbe0d 22924+ au_set_dbbot(a->dentry, a->mvd_bsrc);
79b8bda9 22925+ au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
5afbbe0d 22926+ au_set_ibbot(a->inode, a->mvd_bsrc);
392086de 22927+ }
5afbbe0d
AM
22928+ if (au_dbbot(a->dentry) < a->mvd_bdst)
22929+ au_set_dbbot(a->dentry, a->mvd_bdst);
22930+ if (au_ibbot(a->inode) < a->mvd_bdst)
22931+ au_set_ibbot(a->inode, a->mvd_bdst);
c2b27bf2
AM
22932+
22933+out_unlock:
392086de 22934+ au_do_unlock(dmsg, a);
c2b27bf2
AM
22935+out:
22936+ AuTraceErr(err);
22937+ return err;
22938+}
22939+
22940+/* ---------------------------------------------------------------------- */
22941+
c2b27bf2 22942+/* make sure the file is idle */
392086de 22943+static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22944+{
22945+ int err, plinked;
c2b27bf2
AM
22946+
22947+ err = 0;
c2b27bf2 22948+ plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
5afbbe0d 22949+ if (au_dbtop(a->dentry) == a->mvd_bsrc
c1595e42 22950+ && au_dcount(a->dentry) == 1
c2b27bf2 22951+ && atomic_read(&a->inode->i_count) == 1
392086de 22952+ /* && a->mvd_h_src_inode->i_nlink == 1 */
c2b27bf2
AM
22953+ && (!plinked || !au_plink_test(a->inode))
22954+ && a->inode->i_nlink == 1)
22955+ goto out;
22956+
22957+ err = -EBUSY;
392086de 22958+ AU_MVD_PR(dmsg,
c1595e42 22959+ "b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
5afbbe0d 22960+ a->mvd_bsrc, au_dbtop(a->dentry), au_dcount(a->dentry),
c2b27bf2 22961+ atomic_read(&a->inode->i_count), a->inode->i_nlink,
392086de 22962+ a->mvd_h_src_inode->i_nlink,
c2b27bf2
AM
22963+ plinked, plinked ? au_plink_test(a->inode) : 0);
22964+
22965+out:
22966+ AuTraceErr(err);
22967+ return err;
22968+}
22969+
22970+/* make sure the parent dir is fine */
392086de 22971+static int au_mvd_args_parent(const unsigned char dmsg,
c2b27bf2
AM
22972+ struct au_mvd_args *a)
22973+{
22974+ int err;
22975+ aufs_bindex_t bindex;
22976+
22977+ err = 0;
22978+ if (unlikely(au_alive_dir(a->parent))) {
22979+ err = -ENOENT;
392086de 22980+ AU_MVD_PR(dmsg, "parent dir is dead\n");
c2b27bf2
AM
22981+ goto out;
22982+ }
22983+
22984+ a->bopq = au_dbdiropq(a->parent);
22985+ bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
22986+ AuDbg("b%d\n", bindex);
22987+ if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
22988+ || (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
22989+ err = -EINVAL;
392086de
AM
22990+ a->mvd_errno = EAU_MVDOWN_OPAQUE;
22991+ AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
c2b27bf2
AM
22992+ a->bopq, a->mvd_bdst);
22993+ }
22994+
22995+out:
22996+ AuTraceErr(err);
22997+ return err;
22998+}
22999+
392086de 23000+static int au_mvd_args_intermediate(const unsigned char dmsg,
c2b27bf2
AM
23001+ struct au_mvd_args *a)
23002+{
23003+ int err;
23004+ struct au_dinfo *dinfo, *tmp;
23005+
23006+ /* lookup the next lower positive entry */
23007+ err = -ENOMEM;
23008+ tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
23009+ if (unlikely(!tmp))
23010+ goto out;
23011+
23012+ a->bfound = -1;
23013+ a->bwh = -1;
23014+ dinfo = au_di(a->dentry);
23015+ au_di_cp(tmp, dinfo);
23016+ au_di_swap(tmp, dinfo);
23017+
23018+ /* returns the number of positive dentries */
5afbbe0d
AM
23019+ err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1,
23020+ /* AuLkup_IGNORE_PERM */ 0);
c2b27bf2
AM
23021+ if (!err)
23022+ a->bwh = au_dbwh(a->dentry);
23023+ else if (err > 0)
5afbbe0d 23024+ a->bfound = au_dbtop(a->dentry);
c2b27bf2
AM
23025+
23026+ au_di_swap(tmp, dinfo);
23027+ au_rw_write_unlock(&tmp->di_rwsem);
23028+ au_di_free(tmp);
23029+ if (unlikely(err < 0))
392086de 23030+ AU_MVD_PR(dmsg, "failed look-up lower\n");
c2b27bf2
AM
23031+
23032+ /*
23033+ * here, we have these cases.
23034+ * bfound == -1
23035+ * no positive dentry under bsrc. there are more sub-cases.
23036+ * bwh < 0
23037+ * there no whiteout, we can safely move-down.
23038+ * bwh <= bsrc
23039+ * impossible
23040+ * bsrc < bwh && bwh < bdst
23041+ * there is a whiteout on RO branch. cannot proceed.
23042+ * bwh == bdst
23043+ * there is a whiteout on the RW target branch. it should
23044+ * be removed.
23045+ * bdst < bwh
23046+ * there is a whiteout somewhere unrelated branch.
23047+ * -1 < bfound && bfound <= bsrc
23048+ * impossible.
23049+ * bfound < bdst
23050+ * found, but it is on RO branch between bsrc and bdst. cannot
23051+ * proceed.
23052+ * bfound == bdst
23053+ * found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
23054+ * error.
23055+ * bdst < bfound
23056+ * found, after we create the file on bdst, it will be hidden.
23057+ */
23058+
23059+ AuDebugOn(a->bfound == -1
23060+ && a->bwh != -1
23061+ && a->bwh <= a->mvd_bsrc);
23062+ AuDebugOn(-1 < a->bfound
23063+ && a->bfound <= a->mvd_bsrc);
23064+
23065+ err = -EINVAL;
23066+ if (a->bfound == -1
23067+ && a->mvd_bsrc < a->bwh
23068+ && a->bwh != -1
23069+ && a->bwh < a->mvd_bdst) {
392086de
AM
23070+ a->mvd_errno = EAU_MVDOWN_WHITEOUT;
23071+ AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
c2b27bf2
AM
23072+ a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
23073+ goto out;
23074+ } else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
392086de
AM
23075+ a->mvd_errno = EAU_MVDOWN_UPPER;
23076+ AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
c2b27bf2
AM
23077+ a->mvd_bdst, a->bfound);
23078+ goto out;
23079+ }
23080+
23081+ err = 0; /* success */
23082+
23083+out:
23084+ AuTraceErr(err);
23085+ return err;
23086+}
23087+
392086de 23088+static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
23089+{
23090+ int err;
23091+
392086de
AM
23092+ err = 0;
23093+ if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
23094+ && a->bfound == a->mvd_bdst)
23095+ err = -EEXIST;
c2b27bf2
AM
23096+ AuTraceErr(err);
23097+ return err;
23098+}
23099+
392086de 23100+static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
23101+{
23102+ int err;
23103+ struct au_branch *br;
23104+
23105+ err = -EISDIR;
23106+ if (unlikely(S_ISDIR(a->inode->i_mode)))
23107+ goto out;
23108+
23109+ err = -EINVAL;
392086de 23110+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
5afbbe0d 23111+ a->mvd_bsrc = au_ibtop(a->inode);
392086de
AM
23112+ else {
23113+ a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
23114+ if (unlikely(a->mvd_bsrc < 0
5afbbe0d
AM
23115+ || (a->mvd_bsrc < au_dbtop(a->dentry)
23116+ || au_dbbot(a->dentry) < a->mvd_bsrc
392086de 23117+ || !au_h_dptr(a->dentry, a->mvd_bsrc))
5afbbe0d
AM
23118+ || (a->mvd_bsrc < au_ibtop(a->inode)
23119+ || au_ibbot(a->inode) < a->mvd_bsrc
392086de
AM
23120+ || !au_h_iptr(a->inode, a->mvd_bsrc)))) {
23121+ a->mvd_errno = EAU_MVDOWN_NOUPPER;
23122+ AU_MVD_PR(dmsg, "no upper\n");
23123+ goto out;
23124+ }
23125+ }
5afbbe0d 23126+ if (unlikely(a->mvd_bsrc == au_sbbot(a->sb))) {
392086de
AM
23127+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
23128+ AU_MVD_PR(dmsg, "on the bottom\n");
c2b27bf2
AM
23129+ goto out;
23130+ }
392086de 23131+ a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
c2b27bf2
AM
23132+ br = au_sbr(a->sb, a->mvd_bsrc);
23133+ err = au_br_rdonly(br);
392086de
AM
23134+ if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
23135+ if (unlikely(err))
23136+ goto out;
23137+ } else if (!(vfsub_native_ro(a->mvd_h_src_inode)
23138+ || IS_APPEND(a->mvd_h_src_inode))) {
23139+ if (err)
23140+ a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
23141+ /* go on */
23142+ } else
c2b27bf2
AM
23143+ goto out;
23144+
23145+ err = -EINVAL;
392086de
AM
23146+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
23147+ a->mvd_bdst = find_lower_writable(a);
23148+ if (unlikely(a->mvd_bdst < 0)) {
23149+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
23150+ AU_MVD_PR(dmsg, "no writable lower branch\n");
23151+ goto out;
23152+ }
23153+ } else {
23154+ a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
23155+ if (unlikely(a->mvd_bdst < 0
5afbbe0d 23156+ || au_sbbot(a->sb) < a->mvd_bdst)) {
392086de
AM
23157+ a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
23158+ AU_MVD_PR(dmsg, "no lower brid\n");
23159+ goto out;
23160+ }
c2b27bf2
AM
23161+ }
23162+
392086de 23163+ err = au_mvd_args_busy(dmsg, a);
c2b27bf2 23164+ if (!err)
392086de 23165+ err = au_mvd_args_parent(dmsg, a);
c2b27bf2 23166+ if (!err)
392086de 23167+ err = au_mvd_args_intermediate(dmsg, a);
c2b27bf2 23168+ if (!err)
392086de 23169+ err = au_mvd_args_exist(dmsg, a);
c2b27bf2
AM
23170+ if (!err)
23171+ AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
23172+
23173+out:
23174+ AuTraceErr(err);
23175+ return err;
23176+}
23177+
23178+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
23179+{
392086de
AM
23180+ int err, e;
23181+ unsigned char dmsg;
23182+ struct au_mvd_args *args;
79b8bda9 23183+ struct inode *inode;
c2b27bf2 23184+
79b8bda9 23185+ inode = d_inode(dentry);
c2b27bf2
AM
23186+ err = -EPERM;
23187+ if (unlikely(!capable(CAP_SYS_ADMIN)))
23188+ goto out;
23189+
392086de
AM
23190+ err = -ENOMEM;
23191+ args = kmalloc(sizeof(*args), GFP_NOFS);
23192+ if (unlikely(!args))
23193+ goto out;
23194+
23195+ err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
23196+ if (!err)
23197+ err = !access_ok(VERIFY_WRITE, uarg, sizeof(*uarg));
c2b27bf2
AM
23198+ if (unlikely(err)) {
23199+ err = -EFAULT;
392086de
AM
23200+ AuTraceErr(err);
23201+ goto out_free;
c2b27bf2 23202+ }
392086de
AM
23203+ AuDbg("flags 0x%x\n", args->mvdown.flags);
23204+ args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
23205+ args->mvdown.au_errno = 0;
23206+ args->dentry = dentry;
79b8bda9 23207+ args->inode = inode;
392086de 23208+ args->sb = dentry->d_sb;
c2b27bf2 23209+
392086de
AM
23210+ err = -ENOENT;
23211+ dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
23212+ args->parent = dget_parent(dentry);
5527c038 23213+ args->dir = d_inode(args->parent);
febd17d6 23214+ inode_lock_nested(args->dir, I_MUTEX_PARENT);
392086de
AM
23215+ dput(args->parent);
23216+ if (unlikely(args->parent != dentry->d_parent)) {
23217+ AU_MVD_PR(dmsg, "parent dir is moved\n");
c2b27bf2
AM
23218+ goto out_dir;
23219+ }
23220+
febd17d6 23221+ inode_lock_nested(inode, I_MUTEX_CHILD);
b95c5147 23222+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
c2b27bf2
AM
23223+ if (unlikely(err))
23224+ goto out_inode;
23225+
392086de
AM
23226+ di_write_lock_parent(args->parent);
23227+ err = au_mvd_args(dmsg, args);
c2b27bf2
AM
23228+ if (unlikely(err))
23229+ goto out_parent;
23230+
392086de 23231+ err = au_do_mvdown(dmsg, args);
c2b27bf2
AM
23232+ if (unlikely(err))
23233+ goto out_parent;
c2b27bf2 23234+
392086de 23235+ au_cpup_attr_timesizes(args->dir);
79b8bda9
AM
23236+ au_cpup_attr_timesizes(inode);
23237+ if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
23238+ au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
c2b27bf2
AM
23239+ /* au_digen_dec(dentry); */
23240+
23241+out_parent:
392086de 23242+ di_write_unlock(args->parent);
c2b27bf2
AM
23243+ aufs_read_unlock(dentry, AuLock_DW);
23244+out_inode:
febd17d6 23245+ inode_unlock(inode);
c2b27bf2 23246+out_dir:
febd17d6 23247+ inode_unlock(args->dir);
392086de
AM
23248+out_free:
23249+ e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
23250+ if (unlikely(e))
23251+ err = -EFAULT;
f0c0a007 23252+ au_delayed_kfree(args);
c2b27bf2
AM
23253+out:
23254+ AuTraceErr(err);
23255+ return err;
23256+}
23257diff -urN /usr/share/empty/fs/aufs/opts.c linux/fs/aufs/opts.c
23258--- /usr/share/empty/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
23259+++ linux/fs/aufs/opts.c 2016-12-17 12:28:17.598545045 +0100
23260@@ -0,0 +1,1870 @@
1facf9fc 23261+/*
8cdd5066 23262+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 23263+ *
23264+ * This program, aufs is free software; you can redistribute it and/or modify
23265+ * it under the terms of the GNU General Public License as published by
23266+ * the Free Software Foundation; either version 2 of the License, or
23267+ * (at your option) any later version.
dece6358
AM
23268+ *
23269+ * This program is distributed in the hope that it will be useful,
23270+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23271+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23272+ * GNU General Public License for more details.
23273+ *
23274+ * You should have received a copy of the GNU General Public License
523b37e3 23275+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 23276+ */
23277+
23278+/*
23279+ * mount options/flags
23280+ */
23281+
dece6358 23282+#include <linux/namei.h>
1facf9fc 23283+#include <linux/types.h> /* a distribution requires */
23284+#include <linux/parser.h>
23285+#include "aufs.h"
23286+
23287+/* ---------------------------------------------------------------------- */
23288+
23289+enum {
23290+ Opt_br,
7e9cd9fe
AM
23291+ Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend,
23292+ Opt_idel, Opt_imod,
23293+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash,
dece6358 23294+ Opt_rdblk_def, Opt_rdhash_def,
7e9cd9fe 23295+ Opt_xino, Opt_noxino,
1facf9fc 23296+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
23297+ Opt_trunc_xino_path, Opt_itrunc_xino,
23298+ Opt_trunc_xib, Opt_notrunc_xib,
dece6358 23299+ Opt_shwh, Opt_noshwh,
1facf9fc 23300+ Opt_plink, Opt_noplink, Opt_list_plink,
23301+ Opt_udba,
4a4d8108 23302+ Opt_dio, Opt_nodio,
1facf9fc 23303+ Opt_diropq_a, Opt_diropq_w,
23304+ Opt_warn_perm, Opt_nowarn_perm,
23305+ Opt_wbr_copyup, Opt_wbr_create,
076b876e 23306+ Opt_fhsm_sec,
1facf9fc 23307+ Opt_verbose, Opt_noverbose,
23308+ Opt_sum, Opt_nosum, Opt_wsum,
076b876e 23309+ Opt_dirperm1, Opt_nodirperm1,
c1595e42 23310+ Opt_acl, Opt_noacl,
1facf9fc 23311+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
23312+};
23313+
23314+static match_table_t options = {
23315+ {Opt_br, "br=%s"},
23316+ {Opt_br, "br:%s"},
23317+
23318+ {Opt_add, "add=%d:%s"},
23319+ {Opt_add, "add:%d:%s"},
23320+ {Opt_add, "ins=%d:%s"},
23321+ {Opt_add, "ins:%d:%s"},
23322+ {Opt_append, "append=%s"},
23323+ {Opt_append, "append:%s"},
23324+ {Opt_prepend, "prepend=%s"},
23325+ {Opt_prepend, "prepend:%s"},
23326+
23327+ {Opt_del, "del=%s"},
23328+ {Opt_del, "del:%s"},
23329+ /* {Opt_idel, "idel:%d"}, */
23330+ {Opt_mod, "mod=%s"},
23331+ {Opt_mod, "mod:%s"},
23332+ /* {Opt_imod, "imod:%d:%s"}, */
23333+
23334+ {Opt_dirwh, "dirwh=%d"},
23335+
23336+ {Opt_xino, "xino=%s"},
23337+ {Opt_noxino, "noxino"},
23338+ {Opt_trunc_xino, "trunc_xino"},
23339+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
23340+ {Opt_notrunc_xino, "notrunc_xino"},
23341+ {Opt_trunc_xino_path, "trunc_xino=%s"},
23342+ {Opt_itrunc_xino, "itrunc_xino=%d"},
23343+ /* {Opt_zxino, "zxino=%s"}, */
23344+ {Opt_trunc_xib, "trunc_xib"},
23345+ {Opt_notrunc_xib, "notrunc_xib"},
23346+
e49829fe 23347+#ifdef CONFIG_PROC_FS
1facf9fc 23348+ {Opt_plink, "plink"},
e49829fe
JR
23349+#else
23350+ {Opt_ignore_silent, "plink"},
23351+#endif
23352+
1facf9fc 23353+ {Opt_noplink, "noplink"},
e49829fe 23354+
1facf9fc 23355+#ifdef CONFIG_AUFS_DEBUG
23356+ {Opt_list_plink, "list_plink"},
23357+#endif
23358+
23359+ {Opt_udba, "udba=%s"},
23360+
4a4d8108
AM
23361+ {Opt_dio, "dio"},
23362+ {Opt_nodio, "nodio"},
23363+
076b876e
AM
23364+#ifdef CONFIG_AUFS_FHSM
23365+ {Opt_fhsm_sec, "fhsm_sec=%d"},
23366+#else
23367+ {Opt_ignore_silent, "fhsm_sec=%d"},
23368+#endif
23369+
1facf9fc 23370+ {Opt_diropq_a, "diropq=always"},
23371+ {Opt_diropq_a, "diropq=a"},
23372+ {Opt_diropq_w, "diropq=whiteouted"},
23373+ {Opt_diropq_w, "diropq=w"},
23374+
23375+ {Opt_warn_perm, "warn_perm"},
23376+ {Opt_nowarn_perm, "nowarn_perm"},
23377+
23378+ /* keep them temporary */
1facf9fc 23379+ {Opt_ignore_silent, "nodlgt"},
1facf9fc 23380+ {Opt_ignore_silent, "clean_plink"},
23381+
dece6358
AM
23382+#ifdef CONFIG_AUFS_SHWH
23383+ {Opt_shwh, "shwh"},
23384+#endif
23385+ {Opt_noshwh, "noshwh"},
23386+
076b876e
AM
23387+ {Opt_dirperm1, "dirperm1"},
23388+ {Opt_nodirperm1, "nodirperm1"},
23389+
1facf9fc 23390+ {Opt_verbose, "verbose"},
23391+ {Opt_verbose, "v"},
23392+ {Opt_noverbose, "noverbose"},
23393+ {Opt_noverbose, "quiet"},
23394+ {Opt_noverbose, "q"},
23395+ {Opt_noverbose, "silent"},
23396+
23397+ {Opt_sum, "sum"},
23398+ {Opt_nosum, "nosum"},
23399+ {Opt_wsum, "wsum"},
23400+
23401+ {Opt_rdcache, "rdcache=%d"},
23402+ {Opt_rdblk, "rdblk=%d"},
dece6358 23403+ {Opt_rdblk_def, "rdblk=def"},
1facf9fc 23404+ {Opt_rdhash, "rdhash=%d"},
dece6358 23405+ {Opt_rdhash_def, "rdhash=def"},
1facf9fc 23406+
23407+ {Opt_wbr_create, "create=%s"},
23408+ {Opt_wbr_create, "create_policy=%s"},
23409+ {Opt_wbr_copyup, "cpup=%s"},
23410+ {Opt_wbr_copyup, "copyup=%s"},
23411+ {Opt_wbr_copyup, "copyup_policy=%s"},
23412+
c1595e42
JR
23413+ /* generic VFS flag */
23414+#ifdef CONFIG_FS_POSIX_ACL
23415+ {Opt_acl, "acl"},
23416+ {Opt_noacl, "noacl"},
23417+#else
23418+ {Opt_ignore_silent, "acl"},
23419+ {Opt_ignore_silent, "noacl"},
23420+#endif
23421+
1facf9fc 23422+ /* internal use for the scripts */
23423+ {Opt_ignore_silent, "si=%s"},
23424+
23425+ {Opt_br, "dirs=%s"},
23426+ {Opt_ignore, "debug=%d"},
23427+ {Opt_ignore, "delete=whiteout"},
23428+ {Opt_ignore, "delete=all"},
23429+ {Opt_ignore, "imap=%s"},
23430+
1308ab2a 23431+ /* temporary workaround, due to old mount(8)? */
23432+ {Opt_ignore_silent, "relatime"},
23433+
1facf9fc 23434+ {Opt_err, NULL}
23435+};
23436+
23437+/* ---------------------------------------------------------------------- */
23438+
076b876e 23439+static const char *au_parser_pattern(int val, match_table_t tbl)
1facf9fc 23440+{
076b876e
AM
23441+ struct match_token *p;
23442+
23443+ p = tbl;
23444+ while (p->pattern) {
23445+ if (p->token == val)
23446+ return p->pattern;
23447+ p++;
1facf9fc 23448+ }
23449+ BUG();
23450+ return "??";
23451+}
23452+
076b876e
AM
23453+static const char *au_optstr(int *val, match_table_t tbl)
23454+{
23455+ struct match_token *p;
23456+ int v;
23457+
23458+ v = *val;
2000de60
JR
23459+ if (!v)
23460+ goto out;
076b876e 23461+ p = tbl;
2000de60
JR
23462+ while (p->pattern) {
23463+ if (p->token
23464+ && (v & p->token) == p->token) {
076b876e
AM
23465+ *val &= ~p->token;
23466+ return p->pattern;
23467+ }
23468+ p++;
23469+ }
2000de60
JR
23470+
23471+out:
076b876e
AM
23472+ return NULL;
23473+}
23474+
1facf9fc 23475+/* ---------------------------------------------------------------------- */
23476+
1e00d052 23477+static match_table_t brperm = {
1facf9fc 23478+ {AuBrPerm_RO, AUFS_BRPERM_RO},
23479+ {AuBrPerm_RR, AUFS_BRPERM_RR},
23480+ {AuBrPerm_RW, AUFS_BRPERM_RW},
1e00d052
AM
23481+ {0, NULL}
23482+};
1facf9fc 23483+
86dc4139 23484+static match_table_t brattr = {
076b876e
AM
23485+ /* general */
23486+ {AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG},
23487+ {AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL},
c1595e42 23488+ /* 'unpin' attrib is meaningless since linux-3.18-rc1 */
86dc4139 23489+ {AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN},
2000de60 23490+#ifdef CONFIG_AUFS_FHSM
076b876e 23491+ {AuBrAttr_FHSM, AUFS_BRATTR_FHSM},
2000de60
JR
23492+#endif
23493+#ifdef CONFIG_AUFS_XATTR
c1595e42
JR
23494+ {AuBrAttr_ICEX, AUFS_BRATTR_ICEX},
23495+ {AuBrAttr_ICEX_SEC, AUFS_BRATTR_ICEX_SEC},
23496+ {AuBrAttr_ICEX_SYS, AUFS_BRATTR_ICEX_SYS},
23497+ {AuBrAttr_ICEX_TR, AUFS_BRATTR_ICEX_TR},
23498+ {AuBrAttr_ICEX_USR, AUFS_BRATTR_ICEX_USR},
23499+ {AuBrAttr_ICEX_OTH, AUFS_BRATTR_ICEX_OTH},
2000de60 23500+#endif
076b876e
AM
23501+
23502+ /* ro/rr branch */
1e00d052 23503+ {AuBrRAttr_WH, AUFS_BRRATTR_WH},
076b876e
AM
23504+
23505+ /* rw branch */
23506+ {AuBrWAttr_MOO, AUFS_BRWATTR_MOO},
1e00d052 23507+ {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
076b876e 23508+
1e00d052 23509+ {0, NULL}
1facf9fc 23510+};
23511+
1e00d052
AM
23512+static int br_attr_val(char *str, match_table_t table, substring_t args[])
23513+{
23514+ int attr, v;
23515+ char *p;
23516+
23517+ attr = 0;
23518+ do {
23519+ p = strchr(str, '+');
23520+ if (p)
23521+ *p = 0;
23522+ v = match_token(str, table, args);
076b876e
AM
23523+ if (v) {
23524+ if (v & AuBrAttr_CMOO_Mask)
23525+ attr &= ~AuBrAttr_CMOO_Mask;
1e00d052 23526+ attr |= v;
076b876e 23527+ } else {
1e00d052
AM
23528+ if (p)
23529+ *p = '+';
0c3ec466 23530+ pr_warn("ignored branch attribute %s\n", str);
1e00d052
AM
23531+ break;
23532+ }
23533+ if (p)
23534+ str = p + 1;
23535+ } while (p);
23536+
23537+ return attr;
23538+}
23539+
076b876e
AM
23540+static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm)
23541+{
23542+ int sz;
23543+ const char *p;
23544+ char *q;
23545+
076b876e
AM
23546+ q = str->a;
23547+ *q = 0;
23548+ p = au_optstr(&perm, brattr);
23549+ if (p) {
23550+ sz = strlen(p);
23551+ memcpy(q, p, sz + 1);
23552+ q += sz;
23553+ } else
23554+ goto out;
23555+
23556+ do {
23557+ p = au_optstr(&perm, brattr);
23558+ if (p) {
23559+ *q++ = '+';
23560+ sz = strlen(p);
23561+ memcpy(q, p, sz + 1);
23562+ q += sz;
23563+ }
23564+ } while (p);
23565+
23566+out:
c1595e42 23567+ return q - str->a;
076b876e
AM
23568+}
23569+
4a4d8108 23570+static int noinline_for_stack br_perm_val(char *perm)
1facf9fc 23571+{
076b876e
AM
23572+ int val, bad, sz;
23573+ char *p;
1facf9fc 23574+ substring_t args[MAX_OPT_ARGS];
076b876e 23575+ au_br_perm_str_t attr;
1facf9fc 23576+
1e00d052
AM
23577+ p = strchr(perm, '+');
23578+ if (p)
23579+ *p = 0;
23580+ val = match_token(perm, brperm, args);
23581+ if (!val) {
23582+ if (p)
23583+ *p = '+';
0c3ec466 23584+ pr_warn("ignored branch permission %s\n", perm);
1e00d052
AM
23585+ val = AuBrPerm_RO;
23586+ goto out;
23587+ }
23588+ if (!p)
23589+ goto out;
23590+
076b876e
AM
23591+ val |= br_attr_val(p + 1, brattr, args);
23592+
23593+ bad = 0;
86dc4139 23594+ switch (val & AuBrPerm_Mask) {
1e00d052
AM
23595+ case AuBrPerm_RO:
23596+ case AuBrPerm_RR:
076b876e
AM
23597+ bad = val & AuBrWAttr_Mask;
23598+ val &= ~AuBrWAttr_Mask;
1e00d052
AM
23599+ break;
23600+ case AuBrPerm_RW:
076b876e
AM
23601+ bad = val & AuBrRAttr_Mask;
23602+ val &= ~AuBrRAttr_Mask;
1e00d052
AM
23603+ break;
23604+ }
c1595e42
JR
23605+
23606+ /*
23607+ * 'unpin' attrib becomes meaningless since linux-3.18-rc1, but aufs
23608+ * does not treat it as an error, just warning.
23609+ * this is a tiny guard for the user operation.
23610+ */
23611+ if (val & AuBrAttr_UNPIN) {
23612+ bad |= AuBrAttr_UNPIN;
23613+ val &= ~AuBrAttr_UNPIN;
23614+ }
23615+
076b876e
AM
23616+ if (unlikely(bad)) {
23617+ sz = au_do_optstr_br_attr(&attr, bad);
23618+ AuDebugOn(!sz);
23619+ pr_warn("ignored branch attribute %s\n", attr.a);
23620+ }
1e00d052
AM
23621+
23622+out:
1facf9fc 23623+ return val;
23624+}
23625+
076b876e 23626+void au_optstr_br_perm(au_br_perm_str_t *str, int perm)
1facf9fc 23627+{
076b876e
AM
23628+ au_br_perm_str_t attr;
23629+ const char *p;
23630+ char *q;
1e00d052
AM
23631+ int sz;
23632+
076b876e
AM
23633+ q = str->a;
23634+ p = au_optstr(&perm, brperm);
23635+ AuDebugOn(!p || !*p);
23636+ sz = strlen(p);
23637+ memcpy(q, p, sz + 1);
23638+ q += sz;
1e00d052 23639+
076b876e
AM
23640+ sz = au_do_optstr_br_attr(&attr, perm);
23641+ if (sz) {
23642+ *q++ = '+';
23643+ memcpy(q, attr.a, sz + 1);
1e00d052
AM
23644+ }
23645+
076b876e 23646+ AuDebugOn(strlen(str->a) >= sizeof(str->a));
1facf9fc 23647+}
23648+
23649+/* ---------------------------------------------------------------------- */
23650+
23651+static match_table_t udbalevel = {
23652+ {AuOpt_UDBA_REVAL, "reval"},
23653+ {AuOpt_UDBA_NONE, "none"},
4a4d8108
AM
23654+#ifdef CONFIG_AUFS_HNOTIFY
23655+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
23656+#ifdef CONFIG_AUFS_HFSNOTIFY
23657+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
4a4d8108 23658+#endif
1facf9fc 23659+#endif
23660+ {-1, NULL}
23661+};
23662+
4a4d8108 23663+static int noinline_for_stack udba_val(char *str)
1facf9fc 23664+{
23665+ substring_t args[MAX_OPT_ARGS];
23666+
7f207e10 23667+ return match_token(str, udbalevel, args);
1facf9fc 23668+}
23669+
23670+const char *au_optstr_udba(int udba)
23671+{
076b876e 23672+ return au_parser_pattern(udba, udbalevel);
1facf9fc 23673+}
23674+
23675+/* ---------------------------------------------------------------------- */
23676+
23677+static match_table_t au_wbr_create_policy = {
23678+ {AuWbrCreate_TDP, "tdp"},
23679+ {AuWbrCreate_TDP, "top-down-parent"},
23680+ {AuWbrCreate_RR, "rr"},
23681+ {AuWbrCreate_RR, "round-robin"},
23682+ {AuWbrCreate_MFS, "mfs"},
23683+ {AuWbrCreate_MFS, "most-free-space"},
23684+ {AuWbrCreate_MFSV, "mfs:%d"},
23685+ {AuWbrCreate_MFSV, "most-free-space:%d"},
23686+
f2c43d5f
AM
23687+ /* top-down regardless the parent, and then mfs */
23688+ {AuWbrCreate_TDMFS, "tdmfs:%d"},
23689+ {AuWbrCreate_TDMFSV, "tdmfs:%d:%d"},
23690+
1facf9fc 23691+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
23692+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
23693+ {AuWbrCreate_PMFS, "pmfs"},
23694+ {AuWbrCreate_PMFSV, "pmfs:%d"},
392086de
AM
23695+ {AuWbrCreate_PMFSRR, "pmfsrr:%d"},
23696+ {AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"},
1facf9fc 23697+
23698+ {-1, NULL}
23699+};
23700+
dece6358
AM
23701+/*
23702+ * cf. linux/lib/parser.c and cmdline.c
23703+ * gave up calling memparse() since it uses simple_strtoull() instead of
9dbd164d 23704+ * kstrto...().
dece6358 23705+ */
4a4d8108
AM
23706+static int noinline_for_stack
23707+au_match_ull(substring_t *s, unsigned long long *result)
1facf9fc 23708+{
23709+ int err;
23710+ unsigned int len;
23711+ char a[32];
23712+
23713+ err = -ERANGE;
23714+ len = s->to - s->from;
23715+ if (len + 1 <= sizeof(a)) {
23716+ memcpy(a, s->from, len);
23717+ a[len] = '\0';
9dbd164d 23718+ err = kstrtoull(a, 0, result);
1facf9fc 23719+ }
23720+ return err;
23721+}
23722+
23723+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
23724+ struct au_opt_wbr_create *create)
23725+{
23726+ int err;
23727+ unsigned long long ull;
23728+
23729+ err = 0;
23730+ if (!au_match_ull(arg, &ull))
23731+ create->mfsrr_watermark = ull;
23732+ else {
4a4d8108 23733+ pr_err("bad integer in %s\n", str);
1facf9fc 23734+ err = -EINVAL;
23735+ }
23736+
23737+ return err;
23738+}
23739+
23740+static int au_wbr_mfs_sec(substring_t *arg, char *str,
23741+ struct au_opt_wbr_create *create)
23742+{
23743+ int n, err;
23744+
23745+ err = 0;
027c5e7a 23746+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
1facf9fc 23747+ create->mfs_second = n;
23748+ else {
4a4d8108 23749+ pr_err("bad integer in %s\n", str);
1facf9fc 23750+ err = -EINVAL;
23751+ }
23752+
23753+ return err;
23754+}
23755+
4a4d8108
AM
23756+static int noinline_for_stack
23757+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
1facf9fc 23758+{
23759+ int err, e;
23760+ substring_t args[MAX_OPT_ARGS];
23761+
23762+ err = match_token(str, au_wbr_create_policy, args);
23763+ create->wbr_create = err;
23764+ switch (err) {
23765+ case AuWbrCreate_MFSRRV:
f2c43d5f 23766+ case AuWbrCreate_TDMFSV:
392086de 23767+ case AuWbrCreate_PMFSRRV:
1facf9fc 23768+ e = au_wbr_mfs_wmark(&args[0], str, create);
23769+ if (!e)
23770+ e = au_wbr_mfs_sec(&args[1], str, create);
23771+ if (unlikely(e))
23772+ err = e;
23773+ break;
23774+ case AuWbrCreate_MFSRR:
f2c43d5f 23775+ case AuWbrCreate_TDMFS:
392086de 23776+ case AuWbrCreate_PMFSRR:
1facf9fc 23777+ e = au_wbr_mfs_wmark(&args[0], str, create);
23778+ if (unlikely(e)) {
23779+ err = e;
23780+ break;
23781+ }
23782+ /*FALLTHROUGH*/
23783+ case AuWbrCreate_MFS:
23784+ case AuWbrCreate_PMFS:
027c5e7a 23785+ create->mfs_second = AUFS_MFS_DEF_SEC;
1facf9fc 23786+ break;
23787+ case AuWbrCreate_MFSV:
23788+ case AuWbrCreate_PMFSV:
23789+ e = au_wbr_mfs_sec(&args[0], str, create);
23790+ if (unlikely(e))
23791+ err = e;
23792+ break;
23793+ }
23794+
23795+ return err;
23796+}
23797+
23798+const char *au_optstr_wbr_create(int wbr_create)
23799+{
076b876e 23800+ return au_parser_pattern(wbr_create, au_wbr_create_policy);
1facf9fc 23801+}
23802+
23803+static match_table_t au_wbr_copyup_policy = {
23804+ {AuWbrCopyup_TDP, "tdp"},
23805+ {AuWbrCopyup_TDP, "top-down-parent"},
23806+ {AuWbrCopyup_BUP, "bup"},
23807+ {AuWbrCopyup_BUP, "bottom-up-parent"},
23808+ {AuWbrCopyup_BU, "bu"},
23809+ {AuWbrCopyup_BU, "bottom-up"},
23810+ {-1, NULL}
23811+};
23812+
4a4d8108 23813+static int noinline_for_stack au_wbr_copyup_val(char *str)
1facf9fc 23814+{
23815+ substring_t args[MAX_OPT_ARGS];
23816+
23817+ return match_token(str, au_wbr_copyup_policy, args);
23818+}
23819+
23820+const char *au_optstr_wbr_copyup(int wbr_copyup)
23821+{
076b876e 23822+ return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy);
1facf9fc 23823+}
23824+
23825+/* ---------------------------------------------------------------------- */
23826+
23827+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
23828+
23829+static void dump_opts(struct au_opts *opts)
23830+{
23831+#ifdef CONFIG_AUFS_DEBUG
23832+ /* reduce stack space */
23833+ union {
23834+ struct au_opt_add *add;
23835+ struct au_opt_del *del;
23836+ struct au_opt_mod *mod;
23837+ struct au_opt_xino *xino;
23838+ struct au_opt_xino_itrunc *xino_itrunc;
23839+ struct au_opt_wbr_create *create;
23840+ } u;
23841+ struct au_opt *opt;
23842+
23843+ opt = opts->opt;
23844+ while (opt->type != Opt_tail) {
23845+ switch (opt->type) {
23846+ case Opt_add:
23847+ u.add = &opt->add;
23848+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
23849+ u.add->bindex, u.add->pathname, u.add->perm,
23850+ u.add->path.dentry);
23851+ break;
23852+ case Opt_del:
23853+ case Opt_idel:
23854+ u.del = &opt->del;
23855+ AuDbg("del {%s, %p}\n",
23856+ u.del->pathname, u.del->h_path.dentry);
23857+ break;
23858+ case Opt_mod:
23859+ case Opt_imod:
23860+ u.mod = &opt->mod;
23861+ AuDbg("mod {%s, 0x%x, %p}\n",
23862+ u.mod->path, u.mod->perm, u.mod->h_root);
23863+ break;
23864+ case Opt_append:
23865+ u.add = &opt->add;
23866+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
23867+ u.add->bindex, u.add->pathname, u.add->perm,
23868+ u.add->path.dentry);
23869+ break;
23870+ case Opt_prepend:
23871+ u.add = &opt->add;
23872+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
23873+ u.add->bindex, u.add->pathname, u.add->perm,
23874+ u.add->path.dentry);
23875+ break;
23876+ case Opt_dirwh:
23877+ AuDbg("dirwh %d\n", opt->dirwh);
23878+ break;
23879+ case Opt_rdcache:
23880+ AuDbg("rdcache %d\n", opt->rdcache);
23881+ break;
23882+ case Opt_rdblk:
23883+ AuDbg("rdblk %u\n", opt->rdblk);
23884+ break;
dece6358
AM
23885+ case Opt_rdblk_def:
23886+ AuDbg("rdblk_def\n");
23887+ break;
1facf9fc 23888+ case Opt_rdhash:
23889+ AuDbg("rdhash %u\n", opt->rdhash);
23890+ break;
dece6358
AM
23891+ case Opt_rdhash_def:
23892+ AuDbg("rdhash_def\n");
23893+ break;
1facf9fc 23894+ case Opt_xino:
23895+ u.xino = &opt->xino;
523b37e3 23896+ AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
1facf9fc 23897+ break;
23898+ case Opt_trunc_xino:
23899+ AuLabel(trunc_xino);
23900+ break;
23901+ case Opt_notrunc_xino:
23902+ AuLabel(notrunc_xino);
23903+ break;
23904+ case Opt_trunc_xino_path:
23905+ case Opt_itrunc_xino:
23906+ u.xino_itrunc = &opt->xino_itrunc;
23907+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
23908+ break;
1facf9fc 23909+ case Opt_noxino:
23910+ AuLabel(noxino);
23911+ break;
23912+ case Opt_trunc_xib:
23913+ AuLabel(trunc_xib);
23914+ break;
23915+ case Opt_notrunc_xib:
23916+ AuLabel(notrunc_xib);
23917+ break;
dece6358
AM
23918+ case Opt_shwh:
23919+ AuLabel(shwh);
23920+ break;
23921+ case Opt_noshwh:
23922+ AuLabel(noshwh);
23923+ break;
076b876e
AM
23924+ case Opt_dirperm1:
23925+ AuLabel(dirperm1);
23926+ break;
23927+ case Opt_nodirperm1:
23928+ AuLabel(nodirperm1);
23929+ break;
1facf9fc 23930+ case Opt_plink:
23931+ AuLabel(plink);
23932+ break;
23933+ case Opt_noplink:
23934+ AuLabel(noplink);
23935+ break;
23936+ case Opt_list_plink:
23937+ AuLabel(list_plink);
23938+ break;
23939+ case Opt_udba:
23940+ AuDbg("udba %d, %s\n",
23941+ opt->udba, au_optstr_udba(opt->udba));
23942+ break;
4a4d8108
AM
23943+ case Opt_dio:
23944+ AuLabel(dio);
23945+ break;
23946+ case Opt_nodio:
23947+ AuLabel(nodio);
23948+ break;
1facf9fc 23949+ case Opt_diropq_a:
23950+ AuLabel(diropq_a);
23951+ break;
23952+ case Opt_diropq_w:
23953+ AuLabel(diropq_w);
23954+ break;
23955+ case Opt_warn_perm:
23956+ AuLabel(warn_perm);
23957+ break;
23958+ case Opt_nowarn_perm:
23959+ AuLabel(nowarn_perm);
23960+ break;
1facf9fc 23961+ case Opt_verbose:
23962+ AuLabel(verbose);
23963+ break;
23964+ case Opt_noverbose:
23965+ AuLabel(noverbose);
23966+ break;
23967+ case Opt_sum:
23968+ AuLabel(sum);
23969+ break;
23970+ case Opt_nosum:
23971+ AuLabel(nosum);
23972+ break;
23973+ case Opt_wsum:
23974+ AuLabel(wsum);
23975+ break;
23976+ case Opt_wbr_create:
23977+ u.create = &opt->wbr_create;
23978+ AuDbg("create %d, %s\n", u.create->wbr_create,
23979+ au_optstr_wbr_create(u.create->wbr_create));
23980+ switch (u.create->wbr_create) {
23981+ case AuWbrCreate_MFSV:
23982+ case AuWbrCreate_PMFSV:
23983+ AuDbg("%d sec\n", u.create->mfs_second);
23984+ break;
23985+ case AuWbrCreate_MFSRR:
f2c43d5f 23986+ case AuWbrCreate_TDMFS:
1facf9fc 23987+ AuDbg("%llu watermark\n",
23988+ u.create->mfsrr_watermark);
23989+ break;
23990+ case AuWbrCreate_MFSRRV:
f2c43d5f 23991+ case AuWbrCreate_TDMFSV:
392086de 23992+ case AuWbrCreate_PMFSRRV:
1facf9fc 23993+ AuDbg("%llu watermark, %d sec\n",
23994+ u.create->mfsrr_watermark,
23995+ u.create->mfs_second);
23996+ break;
23997+ }
23998+ break;
23999+ case Opt_wbr_copyup:
24000+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
24001+ au_optstr_wbr_copyup(opt->wbr_copyup));
24002+ break;
076b876e
AM
24003+ case Opt_fhsm_sec:
24004+ AuDbg("fhsm_sec %u\n", opt->fhsm_second);
24005+ break;
c1595e42
JR
24006+ case Opt_acl:
24007+ AuLabel(acl);
24008+ break;
24009+ case Opt_noacl:
24010+ AuLabel(noacl);
24011+ break;
1facf9fc 24012+ default:
24013+ BUG();
24014+ }
24015+ opt++;
24016+ }
24017+#endif
24018+}
24019+
24020+void au_opts_free(struct au_opts *opts)
24021+{
24022+ struct au_opt *opt;
24023+
24024+ opt = opts->opt;
24025+ while (opt->type != Opt_tail) {
24026+ switch (opt->type) {
24027+ case Opt_add:
24028+ case Opt_append:
24029+ case Opt_prepend:
24030+ path_put(&opt->add.path);
24031+ break;
24032+ case Opt_del:
24033+ case Opt_idel:
24034+ path_put(&opt->del.h_path);
24035+ break;
24036+ case Opt_mod:
24037+ case Opt_imod:
24038+ dput(opt->mod.h_root);
24039+ break;
24040+ case Opt_xino:
24041+ fput(opt->xino.file);
24042+ break;
24043+ }
24044+ opt++;
24045+ }
24046+}
24047+
24048+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
24049+ aufs_bindex_t bindex)
24050+{
24051+ int err;
24052+ struct au_opt_add *add = &opt->add;
24053+ char *p;
24054+
24055+ add->bindex = bindex;
1e00d052 24056+ add->perm = AuBrPerm_RO;
1facf9fc 24057+ add->pathname = opt_str;
24058+ p = strchr(opt_str, '=');
24059+ if (p) {
24060+ *p++ = 0;
24061+ if (*p)
24062+ add->perm = br_perm_val(p);
24063+ }
24064+
24065+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
24066+ if (!err) {
24067+ if (!p) {
24068+ add->perm = AuBrPerm_RO;
24069+ if (au_test_fs_rr(add->path.dentry->d_sb))
24070+ add->perm = AuBrPerm_RR;
24071+ else if (!bindex && !(sb_flags & MS_RDONLY))
24072+ add->perm = AuBrPerm_RW;
24073+ }
24074+ opt->type = Opt_add;
24075+ goto out;
24076+ }
4a4d8108 24077+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
1facf9fc 24078+ err = -EINVAL;
24079+
4f0767ce 24080+out:
1facf9fc 24081+ return err;
24082+}
24083+
24084+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
24085+{
24086+ int err;
24087+
24088+ del->pathname = args[0].from;
24089+ AuDbg("del path %s\n", del->pathname);
24090+
24091+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
24092+ if (unlikely(err))
4a4d8108 24093+ pr_err("lookup failed %s (%d)\n", del->pathname, err);
1facf9fc 24094+
24095+ return err;
24096+}
24097+
24098+#if 0 /* reserved for future use */
24099+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
24100+ struct au_opt_del *del, substring_t args[])
24101+{
24102+ int err;
24103+ struct dentry *root;
24104+
24105+ err = -EINVAL;
24106+ root = sb->s_root;
24107+ aufs_read_lock(root, AuLock_FLUSH);
5afbbe0d 24108+ if (bindex < 0 || au_sbbot(sb) < bindex) {
4a4d8108 24109+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 24110+ goto out;
24111+ }
24112+
24113+ err = 0;
24114+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
24115+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
24116+
4f0767ce 24117+out:
1facf9fc 24118+ aufs_read_unlock(root, !AuLock_IR);
24119+ return err;
24120+}
24121+#endif
24122+
4a4d8108
AM
24123+static int noinline_for_stack
24124+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
1facf9fc 24125+{
24126+ int err;
24127+ struct path path;
24128+ char *p;
24129+
24130+ err = -EINVAL;
24131+ mod->path = args[0].from;
24132+ p = strchr(mod->path, '=');
24133+ if (unlikely(!p)) {
4a4d8108 24134+ pr_err("no permssion %s\n", args[0].from);
1facf9fc 24135+ goto out;
24136+ }
24137+
24138+ *p++ = 0;
24139+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
24140+ if (unlikely(err)) {
4a4d8108 24141+ pr_err("lookup failed %s (%d)\n", mod->path, err);
1facf9fc 24142+ goto out;
24143+ }
24144+
24145+ mod->perm = br_perm_val(p);
24146+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
24147+ mod->h_root = dget(path.dentry);
24148+ path_put(&path);
24149+
4f0767ce 24150+out:
1facf9fc 24151+ return err;
24152+}
24153+
24154+#if 0 /* reserved for future use */
24155+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
24156+ struct au_opt_mod *mod, substring_t args[])
24157+{
24158+ int err;
24159+ struct dentry *root;
24160+
24161+ err = -EINVAL;
24162+ root = sb->s_root;
24163+ aufs_read_lock(root, AuLock_FLUSH);
5afbbe0d 24164+ if (bindex < 0 || au_sbbot(sb) < bindex) {
4a4d8108 24165+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 24166+ goto out;
24167+ }
24168+
24169+ err = 0;
24170+ mod->perm = br_perm_val(args[1].from);
24171+ AuDbg("mod path %s, perm 0x%x, %s\n",
24172+ mod->path, mod->perm, args[1].from);
24173+ mod->h_root = dget(au_h_dptr(root, bindex));
24174+
4f0767ce 24175+out:
1facf9fc 24176+ aufs_read_unlock(root, !AuLock_IR);
24177+ return err;
24178+}
24179+#endif
24180+
24181+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
24182+ substring_t args[])
24183+{
24184+ int err;
24185+ struct file *file;
24186+
24187+ file = au_xino_create(sb, args[0].from, /*silent*/0);
24188+ err = PTR_ERR(file);
24189+ if (IS_ERR(file))
24190+ goto out;
24191+
24192+ err = -EINVAL;
2000de60 24193+ if (unlikely(file->f_path.dentry->d_sb == sb)) {
1facf9fc 24194+ fput(file);
4a4d8108 24195+ pr_err("%s must be outside\n", args[0].from);
1facf9fc 24196+ goto out;
24197+ }
24198+
24199+ err = 0;
24200+ xino->file = file;
24201+ xino->path = args[0].from;
24202+
4f0767ce 24203+out:
1facf9fc 24204+ return err;
24205+}
24206+
4a4d8108
AM
24207+static int noinline_for_stack
24208+au_opts_parse_xino_itrunc_path(struct super_block *sb,
24209+ struct au_opt_xino_itrunc *xino_itrunc,
24210+ substring_t args[])
1facf9fc 24211+{
24212+ int err;
5afbbe0d 24213+ aufs_bindex_t bbot, bindex;
1facf9fc 24214+ struct path path;
24215+ struct dentry *root;
24216+
24217+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
24218+ if (unlikely(err)) {
4a4d8108 24219+ pr_err("lookup failed %s (%d)\n", args[0].from, err);
1facf9fc 24220+ goto out;
24221+ }
24222+
24223+ xino_itrunc->bindex = -1;
24224+ root = sb->s_root;
24225+ aufs_read_lock(root, AuLock_FLUSH);
5afbbe0d
AM
24226+ bbot = au_sbbot(sb);
24227+ for (bindex = 0; bindex <= bbot; bindex++) {
1facf9fc 24228+ if (au_h_dptr(root, bindex) == path.dentry) {
24229+ xino_itrunc->bindex = bindex;
24230+ break;
24231+ }
24232+ }
24233+ aufs_read_unlock(root, !AuLock_IR);
24234+ path_put(&path);
24235+
24236+ if (unlikely(xino_itrunc->bindex < 0)) {
4a4d8108 24237+ pr_err("no such branch %s\n", args[0].from);
1facf9fc 24238+ err = -EINVAL;
24239+ }
24240+
4f0767ce 24241+out:
1facf9fc 24242+ return err;
24243+}
24244+
24245+/* called without aufs lock */
24246+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
24247+{
24248+ int err, n, token;
24249+ aufs_bindex_t bindex;
24250+ unsigned char skipped;
24251+ struct dentry *root;
24252+ struct au_opt *opt, *opt_tail;
24253+ char *opt_str;
24254+ /* reduce the stack space */
24255+ union {
24256+ struct au_opt_xino_itrunc *xino_itrunc;
24257+ struct au_opt_wbr_create *create;
24258+ } u;
24259+ struct {
24260+ substring_t args[MAX_OPT_ARGS];
24261+ } *a;
24262+
24263+ err = -ENOMEM;
24264+ a = kmalloc(sizeof(*a), GFP_NOFS);
24265+ if (unlikely(!a))
24266+ goto out;
24267+
24268+ root = sb->s_root;
24269+ err = 0;
24270+ bindex = 0;
24271+ opt = opts->opt;
24272+ opt_tail = opt + opts->max_opt - 1;
24273+ opt->type = Opt_tail;
24274+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
24275+ err = -EINVAL;
24276+ skipped = 0;
24277+ token = match_token(opt_str, options, a->args);
24278+ switch (token) {
24279+ case Opt_br:
24280+ err = 0;
24281+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
24282+ && *opt_str) {
24283+ err = opt_add(opt, opt_str, opts->sb_flags,
24284+ bindex++);
24285+ if (unlikely(!err && ++opt > opt_tail)) {
24286+ err = -E2BIG;
24287+ break;
24288+ }
24289+ opt->type = Opt_tail;
24290+ skipped = 1;
24291+ }
24292+ break;
24293+ case Opt_add:
24294+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 24295+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24296+ break;
24297+ }
24298+ bindex = n;
24299+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
24300+ bindex);
24301+ if (!err)
24302+ opt->type = token;
24303+ break;
24304+ case Opt_append:
24305+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
24306+ /*dummy bindex*/1);
24307+ if (!err)
24308+ opt->type = token;
24309+ break;
24310+ case Opt_prepend:
24311+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
24312+ /*bindex*/0);
24313+ if (!err)
24314+ opt->type = token;
24315+ break;
24316+ case Opt_del:
24317+ err = au_opts_parse_del(&opt->del, a->args);
24318+ if (!err)
24319+ opt->type = token;
24320+ break;
24321+#if 0 /* reserved for future use */
24322+ case Opt_idel:
24323+ del->pathname = "(indexed)";
24324+ if (unlikely(match_int(&args[0], &n))) {
4a4d8108 24325+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24326+ break;
24327+ }
24328+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
24329+ if (!err)
24330+ opt->type = token;
24331+ break;
24332+#endif
24333+ case Opt_mod:
24334+ err = au_opts_parse_mod(&opt->mod, a->args);
24335+ if (!err)
24336+ opt->type = token;
24337+ break;
24338+#ifdef IMOD /* reserved for future use */
24339+ case Opt_imod:
24340+ u.mod->path = "(indexed)";
24341+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 24342+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24343+ break;
24344+ }
24345+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
24346+ if (!err)
24347+ opt->type = token;
24348+ break;
24349+#endif
24350+ case Opt_xino:
24351+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
24352+ if (!err)
24353+ opt->type = token;
24354+ break;
24355+
24356+ case Opt_trunc_xino_path:
24357+ err = au_opts_parse_xino_itrunc_path
24358+ (sb, &opt->xino_itrunc, a->args);
24359+ if (!err)
24360+ opt->type = token;
24361+ break;
24362+
24363+ case Opt_itrunc_xino:
24364+ u.xino_itrunc = &opt->xino_itrunc;
24365+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 24366+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24367+ break;
24368+ }
24369+ u.xino_itrunc->bindex = n;
24370+ aufs_read_lock(root, AuLock_FLUSH);
5afbbe0d 24371+ if (n < 0 || au_sbbot(sb) < n) {
4a4d8108 24372+ pr_err("out of bounds, %d\n", n);
1facf9fc 24373+ aufs_read_unlock(root, !AuLock_IR);
24374+ break;
24375+ }
24376+ aufs_read_unlock(root, !AuLock_IR);
24377+ err = 0;
24378+ opt->type = token;
24379+ break;
24380+
24381+ case Opt_dirwh:
24382+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
24383+ break;
24384+ err = 0;
24385+ opt->type = token;
24386+ break;
24387+
24388+ case Opt_rdcache:
027c5e7a
AM
24389+ if (unlikely(match_int(&a->args[0], &n))) {
24390+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24391+ break;
027c5e7a
AM
24392+ }
24393+ if (unlikely(n > AUFS_RDCACHE_MAX)) {
24394+ pr_err("rdcache must be smaller than %d\n",
24395+ AUFS_RDCACHE_MAX);
24396+ break;
24397+ }
24398+ opt->rdcache = n;
1facf9fc 24399+ err = 0;
24400+ opt->type = token;
24401+ break;
24402+ case Opt_rdblk:
24403+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 24404+ || n < 0
1facf9fc 24405+ || n > KMALLOC_MAX_SIZE)) {
4a4d8108 24406+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24407+ break;
24408+ }
1308ab2a 24409+ if (unlikely(n && n < NAME_MAX)) {
4a4d8108
AM
24410+ pr_err("rdblk must be larger than %d\n",
24411+ NAME_MAX);
1facf9fc 24412+ break;
24413+ }
24414+ opt->rdblk = n;
24415+ err = 0;
24416+ opt->type = token;
24417+ break;
24418+ case Opt_rdhash:
24419+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 24420+ || n < 0
1facf9fc 24421+ || n * sizeof(struct hlist_head)
24422+ > KMALLOC_MAX_SIZE)) {
4a4d8108 24423+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 24424+ break;
24425+ }
24426+ opt->rdhash = n;
24427+ err = 0;
24428+ opt->type = token;
24429+ break;
24430+
24431+ case Opt_trunc_xino:
24432+ case Opt_notrunc_xino:
24433+ case Opt_noxino:
24434+ case Opt_trunc_xib:
24435+ case Opt_notrunc_xib:
dece6358
AM
24436+ case Opt_shwh:
24437+ case Opt_noshwh:
076b876e
AM
24438+ case Opt_dirperm1:
24439+ case Opt_nodirperm1:
1facf9fc 24440+ case Opt_plink:
24441+ case Opt_noplink:
24442+ case Opt_list_plink:
4a4d8108
AM
24443+ case Opt_dio:
24444+ case Opt_nodio:
1facf9fc 24445+ case Opt_diropq_a:
24446+ case Opt_diropq_w:
24447+ case Opt_warn_perm:
24448+ case Opt_nowarn_perm:
1facf9fc 24449+ case Opt_verbose:
24450+ case Opt_noverbose:
24451+ case Opt_sum:
24452+ case Opt_nosum:
24453+ case Opt_wsum:
dece6358
AM
24454+ case Opt_rdblk_def:
24455+ case Opt_rdhash_def:
c1595e42
JR
24456+ case Opt_acl:
24457+ case Opt_noacl:
1facf9fc 24458+ err = 0;
24459+ opt->type = token;
24460+ break;
24461+
24462+ case Opt_udba:
24463+ opt->udba = udba_val(a->args[0].from);
24464+ if (opt->udba >= 0) {
24465+ err = 0;
24466+ opt->type = token;
24467+ } else
4a4d8108 24468+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 24469+ break;
24470+
24471+ case Opt_wbr_create:
24472+ u.create = &opt->wbr_create;
24473+ u.create->wbr_create
24474+ = au_wbr_create_val(a->args[0].from, u.create);
24475+ if (u.create->wbr_create >= 0) {
24476+ err = 0;
24477+ opt->type = token;
24478+ } else
4a4d8108 24479+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 24480+ break;
24481+ case Opt_wbr_copyup:
24482+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
24483+ if (opt->wbr_copyup >= 0) {
24484+ err = 0;
24485+ opt->type = token;
24486+ } else
4a4d8108 24487+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 24488+ break;
24489+
076b876e
AM
24490+ case Opt_fhsm_sec:
24491+ if (unlikely(match_int(&a->args[0], &n)
24492+ || n < 0)) {
24493+ pr_err("bad integer in %s\n", opt_str);
24494+ break;
24495+ }
24496+ if (sysaufs_brs) {
24497+ opt->fhsm_second = n;
24498+ opt->type = token;
24499+ } else
24500+ pr_warn("ignored %s\n", opt_str);
24501+ err = 0;
24502+ break;
24503+
1facf9fc 24504+ case Opt_ignore:
0c3ec466 24505+ pr_warn("ignored %s\n", opt_str);
1facf9fc 24506+ /*FALLTHROUGH*/
24507+ case Opt_ignore_silent:
24508+ skipped = 1;
24509+ err = 0;
24510+ break;
24511+ case Opt_err:
4a4d8108 24512+ pr_err("unknown option %s\n", opt_str);
1facf9fc 24513+ break;
24514+ }
24515+
24516+ if (!err && !skipped) {
24517+ if (unlikely(++opt > opt_tail)) {
24518+ err = -E2BIG;
24519+ opt--;
24520+ opt->type = Opt_tail;
24521+ break;
24522+ }
24523+ opt->type = Opt_tail;
24524+ }
24525+ }
24526+
f0c0a007 24527+ au_delayed_kfree(a);
1facf9fc 24528+ dump_opts(opts);
24529+ if (unlikely(err))
24530+ au_opts_free(opts);
24531+
4f0767ce 24532+out:
1facf9fc 24533+ return err;
24534+}
24535+
24536+static int au_opt_wbr_create(struct super_block *sb,
24537+ struct au_opt_wbr_create *create)
24538+{
24539+ int err;
24540+ struct au_sbinfo *sbinfo;
24541+
dece6358
AM
24542+ SiMustWriteLock(sb);
24543+
1facf9fc 24544+ err = 1; /* handled */
24545+ sbinfo = au_sbi(sb);
24546+ if (sbinfo->si_wbr_create_ops->fin) {
24547+ err = sbinfo->si_wbr_create_ops->fin(sb);
24548+ if (!err)
24549+ err = 1;
24550+ }
24551+
24552+ sbinfo->si_wbr_create = create->wbr_create;
24553+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
24554+ switch (create->wbr_create) {
24555+ case AuWbrCreate_MFSRRV:
24556+ case AuWbrCreate_MFSRR:
f2c43d5f
AM
24557+ case AuWbrCreate_TDMFS:
24558+ case AuWbrCreate_TDMFSV:
392086de
AM
24559+ case AuWbrCreate_PMFSRR:
24560+ case AuWbrCreate_PMFSRRV:
1facf9fc 24561+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
24562+ /*FALLTHROUGH*/
24563+ case AuWbrCreate_MFS:
24564+ case AuWbrCreate_MFSV:
24565+ case AuWbrCreate_PMFS:
24566+ case AuWbrCreate_PMFSV:
e49829fe
JR
24567+ sbinfo->si_wbr_mfs.mfs_expire
24568+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
1facf9fc 24569+ break;
24570+ }
24571+
24572+ if (sbinfo->si_wbr_create_ops->init)
24573+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
24574+
24575+ return err;
24576+}
24577+
24578+/*
24579+ * returns,
24580+ * plus: processed without an error
24581+ * zero: unprocessed
24582+ */
24583+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
24584+ struct au_opts *opts)
24585+{
24586+ int err;
24587+ struct au_sbinfo *sbinfo;
24588+
dece6358
AM
24589+ SiMustWriteLock(sb);
24590+
1facf9fc 24591+ err = 1; /* handled */
24592+ sbinfo = au_sbi(sb);
24593+ switch (opt->type) {
24594+ case Opt_udba:
24595+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
24596+ sbinfo->si_mntflags |= opt->udba;
24597+ opts->given_udba |= opt->udba;
24598+ break;
24599+
24600+ case Opt_plink:
24601+ au_opt_set(sbinfo->si_mntflags, PLINK);
24602+ break;
24603+ case Opt_noplink:
24604+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
e49829fe 24605+ au_plink_put(sb, /*verbose*/1);
1facf9fc 24606+ au_opt_clr(sbinfo->si_mntflags, PLINK);
24607+ break;
24608+ case Opt_list_plink:
24609+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
24610+ au_plink_list(sb);
24611+ break;
24612+
4a4d8108
AM
24613+ case Opt_dio:
24614+ au_opt_set(sbinfo->si_mntflags, DIO);
24615+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24616+ break;
24617+ case Opt_nodio:
24618+ au_opt_clr(sbinfo->si_mntflags, DIO);
24619+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24620+ break;
24621+
076b876e
AM
24622+ case Opt_fhsm_sec:
24623+ au_fhsm_set(sbinfo, opt->fhsm_second);
24624+ break;
24625+
1facf9fc 24626+ case Opt_diropq_a:
24627+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24628+ break;
24629+ case Opt_diropq_w:
24630+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24631+ break;
24632+
24633+ case Opt_warn_perm:
24634+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
24635+ break;
24636+ case Opt_nowarn_perm:
24637+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
24638+ break;
24639+
1facf9fc 24640+ case Opt_verbose:
24641+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
24642+ break;
24643+ case Opt_noverbose:
24644+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
24645+ break;
24646+
24647+ case Opt_sum:
24648+ au_opt_set(sbinfo->si_mntflags, SUM);
24649+ break;
24650+ case Opt_wsum:
24651+ au_opt_clr(sbinfo->si_mntflags, SUM);
24652+ au_opt_set(sbinfo->si_mntflags, SUM_W);
24653+ case Opt_nosum:
24654+ au_opt_clr(sbinfo->si_mntflags, SUM);
24655+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
24656+ break;
24657+
24658+ case Opt_wbr_create:
24659+ err = au_opt_wbr_create(sb, &opt->wbr_create);
24660+ break;
24661+ case Opt_wbr_copyup:
24662+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
24663+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
24664+ break;
24665+
24666+ case Opt_dirwh:
24667+ sbinfo->si_dirwh = opt->dirwh;
24668+ break;
24669+
24670+ case Opt_rdcache:
e49829fe
JR
24671+ sbinfo->si_rdcache
24672+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
1facf9fc 24673+ break;
24674+ case Opt_rdblk:
24675+ sbinfo->si_rdblk = opt->rdblk;
24676+ break;
dece6358
AM
24677+ case Opt_rdblk_def:
24678+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
24679+ break;
1facf9fc 24680+ case Opt_rdhash:
24681+ sbinfo->si_rdhash = opt->rdhash;
24682+ break;
dece6358
AM
24683+ case Opt_rdhash_def:
24684+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
24685+ break;
24686+
24687+ case Opt_shwh:
24688+ au_opt_set(sbinfo->si_mntflags, SHWH);
24689+ break;
24690+ case Opt_noshwh:
24691+ au_opt_clr(sbinfo->si_mntflags, SHWH);
24692+ break;
1facf9fc 24693+
076b876e
AM
24694+ case Opt_dirperm1:
24695+ au_opt_set(sbinfo->si_mntflags, DIRPERM1);
24696+ break;
24697+ case Opt_nodirperm1:
24698+ au_opt_clr(sbinfo->si_mntflags, DIRPERM1);
24699+ break;
24700+
1facf9fc 24701+ case Opt_trunc_xino:
24702+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
24703+ break;
24704+ case Opt_notrunc_xino:
24705+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
24706+ break;
24707+
24708+ case Opt_trunc_xino_path:
24709+ case Opt_itrunc_xino:
24710+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
24711+ if (!err)
24712+ err = 1;
24713+ break;
24714+
24715+ case Opt_trunc_xib:
24716+ au_fset_opts(opts->flags, TRUNC_XIB);
24717+ break;
24718+ case Opt_notrunc_xib:
24719+ au_fclr_opts(opts->flags, TRUNC_XIB);
24720+ break;
24721+
c1595e42
JR
24722+ case Opt_acl:
24723+ sb->s_flags |= MS_POSIXACL;
24724+ break;
24725+ case Opt_noacl:
24726+ sb->s_flags &= ~MS_POSIXACL;
24727+ break;
24728+
1facf9fc 24729+ default:
24730+ err = 0;
24731+ break;
24732+ }
24733+
24734+ return err;
24735+}
24736+
24737+/*
24738+ * returns tri-state.
24739+ * plus: processed without an error
24740+ * zero: unprocessed
24741+ * minus: error
24742+ */
24743+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
24744+ struct au_opts *opts)
24745+{
24746+ int err, do_refresh;
24747+
24748+ err = 0;
24749+ switch (opt->type) {
24750+ case Opt_append:
5afbbe0d 24751+ opt->add.bindex = au_sbbot(sb) + 1;
1facf9fc 24752+ if (opt->add.bindex < 0)
24753+ opt->add.bindex = 0;
24754+ goto add;
24755+ case Opt_prepend:
24756+ opt->add.bindex = 0;
f6b6e03d 24757+ add: /* indented label */
1facf9fc 24758+ case Opt_add:
24759+ err = au_br_add(sb, &opt->add,
24760+ au_ftest_opts(opts->flags, REMOUNT));
24761+ if (!err) {
24762+ err = 1;
027c5e7a 24763+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24764+ }
24765+ break;
24766+
24767+ case Opt_del:
24768+ case Opt_idel:
24769+ err = au_br_del(sb, &opt->del,
24770+ au_ftest_opts(opts->flags, REMOUNT));
24771+ if (!err) {
24772+ err = 1;
24773+ au_fset_opts(opts->flags, TRUNC_XIB);
027c5e7a 24774+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24775+ }
24776+ break;
24777+
24778+ case Opt_mod:
24779+ case Opt_imod:
24780+ err = au_br_mod(sb, &opt->mod,
24781+ au_ftest_opts(opts->flags, REMOUNT),
24782+ &do_refresh);
24783+ if (!err) {
24784+ err = 1;
027c5e7a
AM
24785+ if (do_refresh)
24786+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24787+ }
24788+ break;
24789+ }
24790+
24791+ return err;
24792+}
24793+
24794+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
24795+ struct au_opt_xino **opt_xino,
24796+ struct au_opts *opts)
24797+{
24798+ int err;
5afbbe0d 24799+ aufs_bindex_t bbot, bindex;
1facf9fc 24800+ struct dentry *root, *parent, *h_root;
24801+
24802+ err = 0;
24803+ switch (opt->type) {
24804+ case Opt_xino:
24805+ err = au_xino_set(sb, &opt->xino,
24806+ !!au_ftest_opts(opts->flags, REMOUNT));
24807+ if (unlikely(err))
24808+ break;
24809+
24810+ *opt_xino = &opt->xino;
24811+ au_xino_brid_set(sb, -1);
24812+
24813+ /* safe d_parent access */
2000de60 24814+ parent = opt->xino.file->f_path.dentry->d_parent;
1facf9fc 24815+ root = sb->s_root;
5afbbe0d
AM
24816+ bbot = au_sbbot(sb);
24817+ for (bindex = 0; bindex <= bbot; bindex++) {
1facf9fc 24818+ h_root = au_h_dptr(root, bindex);
24819+ if (h_root == parent) {
24820+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
24821+ break;
24822+ }
24823+ }
24824+ break;
24825+
24826+ case Opt_noxino:
24827+ au_xino_clr(sb);
24828+ au_xino_brid_set(sb, -1);
24829+ *opt_xino = (void *)-1;
24830+ break;
24831+ }
24832+
24833+ return err;
24834+}
24835+
24836+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
24837+ unsigned int pending)
24838+{
076b876e 24839+ int err, fhsm;
5afbbe0d 24840+ aufs_bindex_t bindex, bbot;
79b8bda9 24841+ unsigned char do_plink, skip, do_free, can_no_dreval;
1facf9fc 24842+ struct au_branch *br;
24843+ struct au_wbr *wbr;
79b8bda9 24844+ struct dentry *root, *dentry;
1facf9fc 24845+ struct inode *dir, *h_dir;
24846+ struct au_sbinfo *sbinfo;
24847+ struct au_hinode *hdir;
24848+
dece6358
AM
24849+ SiMustAnyLock(sb);
24850+
1facf9fc 24851+ sbinfo = au_sbi(sb);
24852+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
24853+
dece6358
AM
24854+ if (!(sb_flags & MS_RDONLY)) {
24855+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
0c3ec466 24856+ pr_warn("first branch should be rw\n");
dece6358 24857+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
febd17d6 24858+ pr_warn_once("shwh should be used with ro\n");
dece6358 24859+ }
1facf9fc 24860+
4a4d8108 24861+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
1facf9fc 24862+ && !au_opt_test(sbinfo->si_mntflags, XINO))
febd17d6 24863+ pr_warn_once("udba=*notify requires xino\n");
1facf9fc 24864+
076b876e 24865+ if (au_opt_test(sbinfo->si_mntflags, DIRPERM1))
febd17d6
JR
24866+ pr_warn_once("dirperm1 breaks the protection"
24867+ " by the permission bits on the lower branch\n");
076b876e 24868+
1facf9fc 24869+ err = 0;
076b876e 24870+ fhsm = 0;
1facf9fc 24871+ root = sb->s_root;
5527c038 24872+ dir = d_inode(root);
1facf9fc 24873+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
79b8bda9
AM
24874+ can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
24875+ UDBA_NONE);
5afbbe0d
AM
24876+ bbot = au_sbbot(sb);
24877+ for (bindex = 0; !err && bindex <= bbot; bindex++) {
1facf9fc 24878+ skip = 0;
24879+ h_dir = au_h_iptr(dir, bindex);
24880+ br = au_sbr(sb, bindex);
1facf9fc 24881+
c1595e42
JR
24882+ if ((br->br_perm & AuBrAttr_ICEX)
24883+ && !h_dir->i_op->listxattr)
24884+ br->br_perm &= ~AuBrAttr_ICEX;
24885+#if 0
24886+ if ((br->br_perm & AuBrAttr_ICEX_SEC)
24887+ && (au_br_sb(br)->s_flags & MS_NOSEC))
24888+ br->br_perm &= ~AuBrAttr_ICEX_SEC;
24889+#endif
24890+
24891+ do_free = 0;
1facf9fc 24892+ wbr = br->br_wbr;
24893+ if (wbr)
24894+ wbr_wh_read_lock(wbr);
24895+
1e00d052 24896+ if (!au_br_writable(br->br_perm)) {
1facf9fc 24897+ do_free = !!wbr;
24898+ skip = (!wbr
24899+ || (!wbr->wbr_whbase
24900+ && !wbr->wbr_plink
24901+ && !wbr->wbr_orph));
1e00d052 24902+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 24903+ /* skip = (!br->br_whbase && !br->br_orph); */
24904+ skip = (!wbr || !wbr->wbr_whbase);
24905+ if (skip && wbr) {
24906+ if (do_plink)
24907+ skip = !!wbr->wbr_plink;
24908+ else
24909+ skip = !wbr->wbr_plink;
24910+ }
1e00d052 24911+ } else {
1facf9fc 24912+ /* skip = (br->br_whbase && br->br_ohph); */
24913+ skip = (wbr && wbr->wbr_whbase);
24914+ if (skip) {
24915+ if (do_plink)
24916+ skip = !!wbr->wbr_plink;
24917+ else
24918+ skip = !wbr->wbr_plink;
24919+ }
1facf9fc 24920+ }
24921+ if (wbr)
24922+ wbr_wh_read_unlock(wbr);
24923+
79b8bda9
AM
24924+ if (can_no_dreval) {
24925+ dentry = br->br_path.dentry;
24926+ spin_lock(&dentry->d_lock);
24927+ if (dentry->d_flags &
24928+ (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE))
24929+ can_no_dreval = 0;
24930+ spin_unlock(&dentry->d_lock);
24931+ }
24932+
076b876e
AM
24933+ if (au_br_fhsm(br->br_perm)) {
24934+ fhsm++;
24935+ AuDebugOn(!br->br_fhsm);
24936+ }
24937+
1facf9fc 24938+ if (skip)
24939+ continue;
24940+
24941+ hdir = au_hi(dir, bindex);
5afbbe0d 24942+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 24943+ if (wbr)
24944+ wbr_wh_write_lock(wbr);
86dc4139 24945+ err = au_wh_init(br, sb);
1facf9fc 24946+ if (wbr)
24947+ wbr_wh_write_unlock(wbr);
5afbbe0d 24948+ au_hn_inode_unlock(hdir);
1facf9fc 24949+
24950+ if (!err && do_free) {
f0c0a007
AM
24951+ if (wbr)
24952+ au_delayed_kfree(wbr);
1facf9fc 24953+ br->br_wbr = NULL;
24954+ }
24955+ }
24956+
79b8bda9
AM
24957+ if (can_no_dreval)
24958+ au_fset_si(sbinfo, NO_DREVAL);
24959+ else
24960+ au_fclr_si(sbinfo, NO_DREVAL);
24961+
c1595e42 24962+ if (fhsm >= 2) {
076b876e 24963+ au_fset_si(sbinfo, FHSM);
5afbbe0d 24964+ for (bindex = bbot; bindex >= 0; bindex--) {
c1595e42
JR
24965+ br = au_sbr(sb, bindex);
24966+ if (au_br_fhsm(br->br_perm)) {
24967+ au_fhsm_set_bottom(sb, bindex);
24968+ break;
24969+ }
24970+ }
24971+ } else {
076b876e 24972+ au_fclr_si(sbinfo, FHSM);
c1595e42
JR
24973+ au_fhsm_set_bottom(sb, -1);
24974+ }
076b876e 24975+
1facf9fc 24976+ return err;
24977+}
24978+
24979+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
24980+{
24981+ int err;
24982+ unsigned int tmp;
5afbbe0d 24983+ aufs_bindex_t bindex, bbot;
1facf9fc 24984+ struct au_opt *opt;
24985+ struct au_opt_xino *opt_xino, xino;
24986+ struct au_sbinfo *sbinfo;
027c5e7a 24987+ struct au_branch *br;
076b876e 24988+ struct inode *dir;
1facf9fc 24989+
dece6358
AM
24990+ SiMustWriteLock(sb);
24991+
1facf9fc 24992+ err = 0;
24993+ opt_xino = NULL;
24994+ opt = opts->opt;
24995+ while (err >= 0 && opt->type != Opt_tail)
24996+ err = au_opt_simple(sb, opt++, opts);
24997+ if (err > 0)
24998+ err = 0;
24999+ else if (unlikely(err < 0))
25000+ goto out;
25001+
25002+ /* disable xino and udba temporary */
25003+ sbinfo = au_sbi(sb);
25004+ tmp = sbinfo->si_mntflags;
25005+ au_opt_clr(sbinfo->si_mntflags, XINO);
25006+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
25007+
25008+ opt = opts->opt;
25009+ while (err >= 0 && opt->type != Opt_tail)
25010+ err = au_opt_br(sb, opt++, opts);
25011+ if (err > 0)
25012+ err = 0;
25013+ else if (unlikely(err < 0))
25014+ goto out;
25015+
5afbbe0d
AM
25016+ bbot = au_sbbot(sb);
25017+ if (unlikely(bbot < 0)) {
1facf9fc 25018+ err = -EINVAL;
4a4d8108 25019+ pr_err("no branches\n");
1facf9fc 25020+ goto out;
25021+ }
25022+
25023+ if (au_opt_test(tmp, XINO))
25024+ au_opt_set(sbinfo->si_mntflags, XINO);
25025+ opt = opts->opt;
25026+ while (!err && opt->type != Opt_tail)
25027+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
25028+ if (unlikely(err))
25029+ goto out;
25030+
25031+ err = au_opts_verify(sb, sb->s_flags, tmp);
25032+ if (unlikely(err))
25033+ goto out;
25034+
25035+ /* restore xino */
25036+ if (au_opt_test(tmp, XINO) && !opt_xino) {
25037+ xino.file = au_xino_def(sb);
25038+ err = PTR_ERR(xino.file);
25039+ if (IS_ERR(xino.file))
25040+ goto out;
25041+
25042+ err = au_xino_set(sb, &xino, /*remount*/0);
25043+ fput(xino.file);
25044+ if (unlikely(err))
25045+ goto out;
25046+ }
25047+
25048+ /* restore udba */
027c5e7a 25049+ tmp &= AuOptMask_UDBA;
1facf9fc 25050+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
027c5e7a 25051+ sbinfo->si_mntflags |= tmp;
5afbbe0d
AM
25052+ bbot = au_sbbot(sb);
25053+ for (bindex = 0; bindex <= bbot; bindex++) {
027c5e7a
AM
25054+ br = au_sbr(sb, bindex);
25055+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
25056+ if (unlikely(err))
25057+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
25058+ bindex, err);
25059+ /* go on even if err */
25060+ }
4a4d8108 25061+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
5527c038 25062+ dir = d_inode(sb->s_root);
4a4d8108 25063+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
1facf9fc 25064+ }
25065+
4f0767ce 25066+out:
1facf9fc 25067+ return err;
25068+}
25069+
25070+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
25071+{
25072+ int err, rerr;
79b8bda9 25073+ unsigned char no_dreval;
1facf9fc 25074+ struct inode *dir;
25075+ struct au_opt_xino *opt_xino;
25076+ struct au_opt *opt;
25077+ struct au_sbinfo *sbinfo;
25078+
dece6358
AM
25079+ SiMustWriteLock(sb);
25080+
79b8bda9 25081+ err = 0;
5527c038 25082+ dir = d_inode(sb->s_root);
1facf9fc 25083+ sbinfo = au_sbi(sb);
1facf9fc 25084+ opt_xino = NULL;
25085+ opt = opts->opt;
25086+ while (err >= 0 && opt->type != Opt_tail) {
25087+ err = au_opt_simple(sb, opt, opts);
25088+ if (!err)
25089+ err = au_opt_br(sb, opt, opts);
25090+ if (!err)
25091+ err = au_opt_xino(sb, opt, &opt_xino, opts);
25092+ opt++;
25093+ }
25094+ if (err > 0)
25095+ err = 0;
25096+ AuTraceErr(err);
25097+ /* go on even err */
25098+
79b8bda9 25099+ no_dreval = !!au_ftest_si(sbinfo, NO_DREVAL);
1facf9fc 25100+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
25101+ if (unlikely(rerr && !err))
25102+ err = rerr;
25103+
79b8bda9 25104+ if (no_dreval != !!au_ftest_si(sbinfo, NO_DREVAL))
b95c5147 25105+ au_fset_opts(opts->flags, REFRESH_IDOP);
79b8bda9 25106+
1facf9fc 25107+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
25108+ rerr = au_xib_trunc(sb);
25109+ if (unlikely(rerr && !err))
25110+ err = rerr;
25111+ }
25112+
25113+ /* will be handled by the caller */
027c5e7a 25114+ if (!au_ftest_opts(opts->flags, REFRESH)
79b8bda9
AM
25115+ && (opts->given_udba
25116+ || au_opt_test(sbinfo->si_mntflags, XINO)
b95c5147 25117+ || au_ftest_opts(opts->flags, REFRESH_IDOP)
79b8bda9 25118+ ))
027c5e7a 25119+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 25120+
25121+ AuDbg("status 0x%x\n", opts->flags);
25122+ return err;
25123+}
25124+
25125+/* ---------------------------------------------------------------------- */
25126+
25127+unsigned int au_opt_udba(struct super_block *sb)
25128+{
25129+ return au_mntflags(sb) & AuOptMask_UDBA;
25130+}
7f207e10
AM
25131diff -urN /usr/share/empty/fs/aufs/opts.h linux/fs/aufs/opts.h
25132--- /usr/share/empty/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
25133+++ linux/fs/aufs/opts.h 2016-12-17 12:28:17.598545045 +0100
25134@@ -0,0 +1,213 @@
1facf9fc 25135+/*
8cdd5066 25136+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 25137+ *
25138+ * This program, aufs is free software; you can redistribute it and/or modify
25139+ * it under the terms of the GNU General Public License as published by
25140+ * the Free Software Foundation; either version 2 of the License, or
25141+ * (at your option) any later version.
dece6358
AM
25142+ *
25143+ * This program is distributed in the hope that it will be useful,
25144+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25145+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25146+ * GNU General Public License for more details.
25147+ *
25148+ * You should have received a copy of the GNU General Public License
523b37e3 25149+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 25150+ */
25151+
25152+/*
25153+ * mount options/flags
25154+ */
25155+
25156+#ifndef __AUFS_OPTS_H__
25157+#define __AUFS_OPTS_H__
25158+
25159+#ifdef __KERNEL__
25160+
dece6358 25161+#include <linux/path.h>
1facf9fc 25162+
dece6358
AM
25163+struct file;
25164+struct super_block;
25165+
1facf9fc 25166+/* ---------------------------------------------------------------------- */
25167+
25168+/* mount flags */
25169+#define AuOpt_XINO 1 /* external inode number bitmap
25170+ and translation table */
25171+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
25172+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
25173+#define AuOpt_UDBA_REVAL (1 << 3)
4a4d8108 25174+#define AuOpt_UDBA_HNOTIFY (1 << 4)
dece6358
AM
25175+#define AuOpt_SHWH (1 << 5) /* show whiteout */
25176+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
076b876e
AM
25177+#define AuOpt_DIRPERM1 (1 << 7) /* ignore the lower dir's perm
25178+ bits */
dece6358
AM
25179+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
25180+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
25181+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
25182+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
25183+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
4a4d8108 25184+#define AuOpt_DIO (1 << 14) /* direct io */
1facf9fc 25185+
4a4d8108
AM
25186+#ifndef CONFIG_AUFS_HNOTIFY
25187+#undef AuOpt_UDBA_HNOTIFY
25188+#define AuOpt_UDBA_HNOTIFY 0
1facf9fc 25189+#endif
dece6358
AM
25190+#ifndef CONFIG_AUFS_SHWH
25191+#undef AuOpt_SHWH
25192+#define AuOpt_SHWH 0
25193+#endif
1facf9fc 25194+
25195+#define AuOpt_Def (AuOpt_XINO \
25196+ | AuOpt_UDBA_REVAL \
25197+ | AuOpt_PLINK \
25198+ /* | AuOpt_DIRPERM1 */ \
25199+ | AuOpt_WARN_PERM)
25200+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
25201+ | AuOpt_UDBA_REVAL \
4a4d8108 25202+ | AuOpt_UDBA_HNOTIFY)
1facf9fc 25203+
25204+#define au_opt_test(flags, name) (flags & AuOpt_##name)
25205+#define au_opt_set(flags, name) do { \
25206+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
25207+ ((flags) |= AuOpt_##name); \
25208+} while (0)
25209+#define au_opt_set_udba(flags, name) do { \
25210+ (flags) &= ~AuOptMask_UDBA; \
25211+ ((flags) |= AuOpt_##name); \
25212+} while (0)
7f207e10
AM
25213+#define au_opt_clr(flags, name) do { \
25214+ ((flags) &= ~AuOpt_##name); \
25215+} while (0)
1facf9fc 25216+
e49829fe
JR
25217+static inline unsigned int au_opts_plink(unsigned int mntflags)
25218+{
25219+#ifdef CONFIG_PROC_FS
25220+ return mntflags;
25221+#else
25222+ return mntflags & ~AuOpt_PLINK;
25223+#endif
25224+}
25225+
1facf9fc 25226+/* ---------------------------------------------------------------------- */
25227+
25228+/* policies to select one among multiple writable branches */
25229+enum {
25230+ AuWbrCreate_TDP, /* top down parent */
25231+ AuWbrCreate_RR, /* round robin */
25232+ AuWbrCreate_MFS, /* most free space */
25233+ AuWbrCreate_MFSV, /* mfs with seconds */
25234+ AuWbrCreate_MFSRR, /* mfs then rr */
25235+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
f2c43d5f
AM
25236+ AuWbrCreate_TDMFS, /* top down regardless parent and mfs */
25237+ AuWbrCreate_TDMFSV, /* top down regardless parent and mfs */
1facf9fc 25238+ AuWbrCreate_PMFS, /* parent and mfs */
25239+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
392086de
AM
25240+ AuWbrCreate_PMFSRR, /* parent, mfs and round-robin */
25241+ AuWbrCreate_PMFSRRV, /* plus seconds */
1facf9fc 25242+
25243+ AuWbrCreate_Def = AuWbrCreate_TDP
25244+};
25245+
25246+enum {
25247+ AuWbrCopyup_TDP, /* top down parent */
25248+ AuWbrCopyup_BUP, /* bottom up parent */
25249+ AuWbrCopyup_BU, /* bottom up */
25250+
25251+ AuWbrCopyup_Def = AuWbrCopyup_TDP
25252+};
25253+
25254+/* ---------------------------------------------------------------------- */
25255+
25256+struct au_opt_add {
25257+ aufs_bindex_t bindex;
25258+ char *pathname;
25259+ int perm;
25260+ struct path path;
25261+};
25262+
25263+struct au_opt_del {
25264+ char *pathname;
25265+ struct path h_path;
25266+};
25267+
25268+struct au_opt_mod {
25269+ char *path;
25270+ int perm;
25271+ struct dentry *h_root;
25272+};
25273+
25274+struct au_opt_xino {
25275+ char *path;
25276+ struct file *file;
25277+};
25278+
25279+struct au_opt_xino_itrunc {
25280+ aufs_bindex_t bindex;
25281+};
25282+
25283+struct au_opt_wbr_create {
25284+ int wbr_create;
25285+ int mfs_second;
25286+ unsigned long long mfsrr_watermark;
25287+};
25288+
25289+struct au_opt {
25290+ int type;
25291+ union {
25292+ struct au_opt_xino xino;
25293+ struct au_opt_xino_itrunc xino_itrunc;
25294+ struct au_opt_add add;
25295+ struct au_opt_del del;
25296+ struct au_opt_mod mod;
25297+ int dirwh;
25298+ int rdcache;
25299+ unsigned int rdblk;
25300+ unsigned int rdhash;
25301+ int udba;
25302+ struct au_opt_wbr_create wbr_create;
25303+ int wbr_copyup;
076b876e 25304+ unsigned int fhsm_second;
1facf9fc 25305+ };
25306+};
25307+
25308+/* opts flags */
25309+#define AuOpts_REMOUNT 1
027c5e7a
AM
25310+#define AuOpts_REFRESH (1 << 1)
25311+#define AuOpts_TRUNC_XIB (1 << 2)
25312+#define AuOpts_REFRESH_DYAOP (1 << 3)
b95c5147 25313+#define AuOpts_REFRESH_IDOP (1 << 4)
1facf9fc 25314+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
7f207e10
AM
25315+#define au_fset_opts(flags, name) \
25316+ do { (flags) |= AuOpts_##name; } while (0)
25317+#define au_fclr_opts(flags, name) \
25318+ do { (flags) &= ~AuOpts_##name; } while (0)
1facf9fc 25319+
25320+struct au_opts {
25321+ struct au_opt *opt;
25322+ int max_opt;
25323+
25324+ unsigned int given_udba;
25325+ unsigned int flags;
25326+ unsigned long sb_flags;
25327+};
25328+
25329+/* ---------------------------------------------------------------------- */
25330+
7e9cd9fe 25331+/* opts.c */
076b876e 25332+void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
1facf9fc 25333+const char *au_optstr_udba(int udba);
25334+const char *au_optstr_wbr_copyup(int wbr_copyup);
25335+const char *au_optstr_wbr_create(int wbr_create);
25336+
25337+void au_opts_free(struct au_opts *opts);
25338+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
25339+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
25340+ unsigned int pending);
25341+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
25342+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
25343+
25344+unsigned int au_opt_udba(struct super_block *sb);
25345+
1facf9fc 25346+#endif /* __KERNEL__ */
25347+#endif /* __AUFS_OPTS_H__ */
7f207e10
AM
25348diff -urN /usr/share/empty/fs/aufs/plink.c linux/fs/aufs/plink.c
25349--- /usr/share/empty/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 25350+++ linux/fs/aufs/plink.c 2016-10-09 16:55:36.496035060 +0200
f0c0a007 25351@@ -0,0 +1,514 @@
1facf9fc 25352+/*
8cdd5066 25353+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 25354+ *
25355+ * This program, aufs is free software; you can redistribute it and/or modify
25356+ * it under the terms of the GNU General Public License as published by
25357+ * the Free Software Foundation; either version 2 of the License, or
25358+ * (at your option) any later version.
dece6358
AM
25359+ *
25360+ * This program is distributed in the hope that it will be useful,
25361+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25362+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25363+ * GNU General Public License for more details.
25364+ *
25365+ * You should have received a copy of the GNU General Public License
523b37e3 25366+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 25367+ */
25368+
25369+/*
25370+ * pseudo-link
25371+ */
25372+
25373+#include "aufs.h"
25374+
25375+/*
e49829fe 25376+ * the pseudo-link maintenance mode.
1facf9fc 25377+ * during a user process maintains the pseudo-links,
25378+ * prohibit adding a new plink and branch manipulation.
e49829fe
JR
25379+ *
25380+ * Flags
25381+ * NOPLM:
25382+ * For entry functions which will handle plink, and i_mutex is already held
25383+ * in VFS.
25384+ * They cannot wait and should return an error at once.
25385+ * Callers has to check the error.
25386+ * NOPLMW:
25387+ * For entry functions which will handle plink, but i_mutex is not held
25388+ * in VFS.
25389+ * They can wait the plink maintenance mode to finish.
25390+ *
25391+ * They behave like F_SETLK and F_SETLKW.
25392+ * If the caller never handle plink, then both flags are unnecessary.
1facf9fc 25393+ */
e49829fe
JR
25394+
25395+int au_plink_maint(struct super_block *sb, int flags)
1facf9fc 25396+{
e49829fe
JR
25397+ int err;
25398+ pid_t pid, ppid;
f0c0a007 25399+ struct task_struct *parent, *prev;
e49829fe 25400+ struct au_sbinfo *sbi;
dece6358
AM
25401+
25402+ SiMustAnyLock(sb);
25403+
e49829fe
JR
25404+ err = 0;
25405+ if (!au_opt_test(au_mntflags(sb), PLINK))
25406+ goto out;
25407+
25408+ sbi = au_sbi(sb);
25409+ pid = sbi->si_plink_maint_pid;
25410+ if (!pid || pid == current->pid)
25411+ goto out;
25412+
25413+ /* todo: it highly depends upon /sbin/mount.aufs */
f0c0a007
AM
25414+ prev = NULL;
25415+ parent = current;
25416+ ppid = 0;
e49829fe 25417+ rcu_read_lock();
f0c0a007
AM
25418+ while (1) {
25419+ parent = rcu_dereference(parent->real_parent);
25420+ if (parent == prev)
25421+ break;
25422+ ppid = task_pid_vnr(parent);
25423+ if (pid == ppid) {
25424+ rcu_read_unlock();
25425+ goto out;
25426+ }
25427+ prev = parent;
25428+ }
e49829fe 25429+ rcu_read_unlock();
e49829fe
JR
25430+
25431+ if (au_ftest_lock(flags, NOPLMW)) {
027c5e7a
AM
25432+ /* if there is no i_mutex lock in VFS, we don't need to wait */
25433+ /* AuDebugOn(!lockdep_depth(current)); */
e49829fe
JR
25434+ while (sbi->si_plink_maint_pid) {
25435+ si_read_unlock(sb);
25436+ /* gave up wake_up_bit() */
25437+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
25438+
25439+ if (au_ftest_lock(flags, FLUSH))
25440+ au_nwt_flush(&sbi->si_nowait);
25441+ si_noflush_read_lock(sb);
25442+ }
25443+ } else if (au_ftest_lock(flags, NOPLM)) {
25444+ AuDbg("ppid %d, pid %d\n", ppid, pid);
25445+ err = -EAGAIN;
25446+ }
25447+
25448+out:
25449+ return err;
4a4d8108
AM
25450+}
25451+
e49829fe 25452+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
4a4d8108 25453+{
4a4d8108 25454+ spin_lock(&sbinfo->si_plink_maint_lock);
027c5e7a 25455+ sbinfo->si_plink_maint_pid = 0;
4a4d8108 25456+ spin_unlock(&sbinfo->si_plink_maint_lock);
027c5e7a 25457+ wake_up_all(&sbinfo->si_plink_wq);
4a4d8108
AM
25458+}
25459+
e49829fe 25460+int au_plink_maint_enter(struct super_block *sb)
4a4d8108
AM
25461+{
25462+ int err;
4a4d8108
AM
25463+ struct au_sbinfo *sbinfo;
25464+
25465+ err = 0;
4a4d8108
AM
25466+ sbinfo = au_sbi(sb);
25467+ /* make sure i am the only one in this fs */
e49829fe
JR
25468+ si_write_lock(sb, AuLock_FLUSH);
25469+ if (au_opt_test(au_mntflags(sb), PLINK)) {
25470+ spin_lock(&sbinfo->si_plink_maint_lock);
25471+ if (!sbinfo->si_plink_maint_pid)
25472+ sbinfo->si_plink_maint_pid = current->pid;
25473+ else
25474+ err = -EBUSY;
25475+ spin_unlock(&sbinfo->si_plink_maint_lock);
25476+ }
4a4d8108
AM
25477+ si_write_unlock(sb);
25478+
25479+ return err;
1facf9fc 25480+}
25481+
25482+/* ---------------------------------------------------------------------- */
25483+
1facf9fc 25484+#ifdef CONFIG_AUFS_DEBUG
25485+void au_plink_list(struct super_block *sb)
25486+{
86dc4139 25487+ int i;
1facf9fc 25488+ struct au_sbinfo *sbinfo;
86dc4139 25489+ struct hlist_head *plink_hlist;
5afbbe0d 25490+ struct au_icntnr *icntnr;
1facf9fc 25491+
dece6358
AM
25492+ SiMustAnyLock(sb);
25493+
1facf9fc 25494+ sbinfo = au_sbi(sb);
25495+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25496+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25497+
86dc4139
AM
25498+ for (i = 0; i < AuPlink_NHASH; i++) {
25499+ plink_hlist = &sbinfo->si_plink[i].head;
25500+ rcu_read_lock();
5afbbe0d
AM
25501+ hlist_for_each_entry_rcu(icntnr, plink_hlist, plink)
25502+ AuDbg("%lu\n", icntnr->vfs_inode.i_ino);
86dc4139
AM
25503+ rcu_read_unlock();
25504+ }
1facf9fc 25505+}
25506+#endif
25507+
25508+/* is the inode pseudo-linked? */
25509+int au_plink_test(struct inode *inode)
25510+{
86dc4139 25511+ int found, i;
1facf9fc 25512+ struct au_sbinfo *sbinfo;
86dc4139 25513+ struct hlist_head *plink_hlist;
5afbbe0d 25514+ struct au_icntnr *icntnr;
1facf9fc 25515+
25516+ sbinfo = au_sbi(inode->i_sb);
dece6358 25517+ AuRwMustAnyLock(&sbinfo->si_rwsem);
1facf9fc 25518+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
e49829fe 25519+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
1facf9fc 25520+
25521+ found = 0;
86dc4139
AM
25522+ i = au_plink_hash(inode->i_ino);
25523+ plink_hlist = &sbinfo->si_plink[i].head;
4a4d8108 25524+ rcu_read_lock();
5afbbe0d
AM
25525+ hlist_for_each_entry_rcu(icntnr, plink_hlist, plink)
25526+ if (&icntnr->vfs_inode == inode) {
1facf9fc 25527+ found = 1;
25528+ break;
25529+ }
4a4d8108 25530+ rcu_read_unlock();
1facf9fc 25531+ return found;
25532+}
25533+
25534+/* ---------------------------------------------------------------------- */
25535+
25536+/*
25537+ * generate a name for plink.
25538+ * the file will be stored under AUFS_WH_PLINKDIR.
25539+ */
25540+/* 20 is max digits length of ulong 64 */
25541+#define PLINK_NAME_LEN ((20 + 1) * 2)
25542+
25543+static int plink_name(char *name, int len, struct inode *inode,
25544+ aufs_bindex_t bindex)
25545+{
25546+ int rlen;
25547+ struct inode *h_inode;
25548+
25549+ h_inode = au_h_iptr(inode, bindex);
25550+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
25551+ return rlen;
25552+}
25553+
7f207e10
AM
25554+struct au_do_plink_lkup_args {
25555+ struct dentry **errp;
25556+ struct qstr *tgtname;
25557+ struct dentry *h_parent;
25558+ struct au_branch *br;
25559+};
25560+
25561+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
25562+ struct dentry *h_parent,
25563+ struct au_branch *br)
25564+{
25565+ struct dentry *h_dentry;
febd17d6 25566+ struct inode *h_inode;
7f207e10 25567+
febd17d6
JR
25568+ h_inode = d_inode(h_parent);
25569+ inode_lock_nested(h_inode, AuLsc_I_CHILD2);
b4510431 25570+ h_dentry = vfsub_lkup_one(tgtname, h_parent);
febd17d6 25571+ inode_unlock(h_inode);
7f207e10
AM
25572+ return h_dentry;
25573+}
25574+
25575+static void au_call_do_plink_lkup(void *args)
25576+{
25577+ struct au_do_plink_lkup_args *a = args;
25578+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
25579+}
25580+
1facf9fc 25581+/* lookup the plink-ed @inode under the branch at @bindex */
25582+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
25583+{
25584+ struct dentry *h_dentry, *h_parent;
25585+ struct au_branch *br;
7f207e10 25586+ int wkq_err;
1facf9fc 25587+ char a[PLINK_NAME_LEN];
0c3ec466 25588+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 25589+
e49829fe
JR
25590+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
25591+
1facf9fc 25592+ br = au_sbr(inode->i_sb, bindex);
25593+ h_parent = br->br_wbr->wbr_plink;
1facf9fc 25594+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25595+
2dfbb274 25596+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
7f207e10
AM
25597+ struct au_do_plink_lkup_args args = {
25598+ .errp = &h_dentry,
25599+ .tgtname = &tgtname,
25600+ .h_parent = h_parent,
25601+ .br = br
25602+ };
25603+
25604+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
25605+ if (unlikely(wkq_err))
25606+ h_dentry = ERR_PTR(wkq_err);
25607+ } else
25608+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
25609+
1facf9fc 25610+ return h_dentry;
25611+}
25612+
25613+/* create a pseudo-link */
25614+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
25615+ struct dentry *h_dentry, struct au_branch *br)
25616+{
25617+ int err;
25618+ struct path h_path = {
86dc4139 25619+ .mnt = au_br_mnt(br)
1facf9fc 25620+ };
523b37e3 25621+ struct inode *h_dir, *delegated;
1facf9fc 25622+
5527c038 25623+ h_dir = d_inode(h_parent);
febd17d6 25624+ inode_lock_nested(h_dir, AuLsc_I_CHILD2);
4f0767ce 25625+again:
b4510431 25626+ h_path.dentry = vfsub_lkup_one(tgt, h_parent);
1facf9fc 25627+ err = PTR_ERR(h_path.dentry);
25628+ if (IS_ERR(h_path.dentry))
25629+ goto out;
25630+
25631+ err = 0;
25632+ /* wh.plink dir is not monitored */
7f207e10 25633+ /* todo: is it really safe? */
5527c038
JR
25634+ if (d_is_positive(h_path.dentry)
25635+ && d_inode(h_path.dentry) != d_inode(h_dentry)) {
523b37e3
AM
25636+ delegated = NULL;
25637+ err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
25638+ if (unlikely(err == -EWOULDBLOCK)) {
25639+ pr_warn("cannot retry for NFSv4 delegation"
25640+ " for an internal unlink\n");
25641+ iput(delegated);
25642+ }
1facf9fc 25643+ dput(h_path.dentry);
25644+ h_path.dentry = NULL;
25645+ if (!err)
25646+ goto again;
25647+ }
5527c038 25648+ if (!err && d_is_negative(h_path.dentry)) {
523b37e3
AM
25649+ delegated = NULL;
25650+ err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
25651+ if (unlikely(err == -EWOULDBLOCK)) {
25652+ pr_warn("cannot retry for NFSv4 delegation"
25653+ " for an internal link\n");
25654+ iput(delegated);
25655+ }
25656+ }
1facf9fc 25657+ dput(h_path.dentry);
25658+
4f0767ce 25659+out:
febd17d6 25660+ inode_unlock(h_dir);
1facf9fc 25661+ return err;
25662+}
25663+
25664+struct do_whplink_args {
25665+ int *errp;
25666+ struct qstr *tgt;
25667+ struct dentry *h_parent;
25668+ struct dentry *h_dentry;
25669+ struct au_branch *br;
25670+};
25671+
25672+static void call_do_whplink(void *args)
25673+{
25674+ struct do_whplink_args *a = args;
25675+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
25676+}
25677+
25678+static int whplink(struct dentry *h_dentry, struct inode *inode,
25679+ aufs_bindex_t bindex, struct au_branch *br)
25680+{
25681+ int err, wkq_err;
25682+ struct au_wbr *wbr;
25683+ struct dentry *h_parent;
1facf9fc 25684+ char a[PLINK_NAME_LEN];
0c3ec466 25685+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 25686+
25687+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
25688+ h_parent = wbr->wbr_plink;
1facf9fc 25689+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25690+
25691+ /* always superio. */
2dfbb274 25692+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
1facf9fc 25693+ struct do_whplink_args args = {
25694+ .errp = &err,
25695+ .tgt = &tgtname,
25696+ .h_parent = h_parent,
25697+ .h_dentry = h_dentry,
25698+ .br = br
25699+ };
25700+ wkq_err = au_wkq_wait(call_do_whplink, &args);
25701+ if (unlikely(wkq_err))
25702+ err = wkq_err;
25703+ } else
25704+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
1facf9fc 25705+
25706+ return err;
25707+}
25708+
1facf9fc 25709+/*
25710+ * create a new pseudo-link for @h_dentry on @bindex.
25711+ * the linked inode is held in aufs @inode.
25712+ */
25713+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
25714+ struct dentry *h_dentry)
25715+{
25716+ struct super_block *sb;
25717+ struct au_sbinfo *sbinfo;
86dc4139 25718+ struct hlist_head *plink_hlist;
5afbbe0d 25719+ struct au_icntnr *icntnr;
86dc4139
AM
25720+ struct au_sphlhead *sphl;
25721+ int found, err, cnt, i;
1facf9fc 25722+
25723+ sb = inode->i_sb;
25724+ sbinfo = au_sbi(sb);
25725+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25726+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25727+
86dc4139 25728+ found = au_plink_test(inode);
4a4d8108 25729+ if (found)
1facf9fc 25730+ return;
4a4d8108 25731+
86dc4139
AM
25732+ i = au_plink_hash(inode->i_ino);
25733+ sphl = sbinfo->si_plink + i;
25734+ plink_hlist = &sphl->head;
5afbbe0d 25735+ au_igrab(inode);
1facf9fc 25736+
86dc4139 25737+ spin_lock(&sphl->spin);
5afbbe0d
AM
25738+ hlist_for_each_entry(icntnr, plink_hlist, plink) {
25739+ if (&icntnr->vfs_inode == inode) {
4a4d8108
AM
25740+ found = 1;
25741+ break;
25742+ }
1facf9fc 25743+ }
5afbbe0d
AM
25744+ if (!found) {
25745+ icntnr = container_of(inode, struct au_icntnr, vfs_inode);
25746+ hlist_add_head_rcu(&icntnr->plink, plink_hlist);
25747+ }
86dc4139 25748+ spin_unlock(&sphl->spin);
4a4d8108 25749+ if (!found) {
86dc4139
AM
25750+ cnt = au_sphl_count(sphl);
25751+#define msg "unexpectedly unblanced or too many pseudo-links"
25752+ if (cnt > AUFS_PLINK_WARN)
25753+ AuWarn1(msg ", %d\n", cnt);
25754+#undef msg
1facf9fc 25755+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
5afbbe0d
AM
25756+ if (unlikely(err)) {
25757+ pr_warn("err %d, damaged pseudo link.\n", err);
25758+ au_sphl_del_rcu(&icntnr->plink, sphl);
25759+ iput(&icntnr->vfs_inode);
4a4d8108 25760+ }
5afbbe0d
AM
25761+ } else
25762+ iput(&icntnr->vfs_inode);
1facf9fc 25763+}
25764+
25765+/* free all plinks */
e49829fe 25766+void au_plink_put(struct super_block *sb, int verbose)
1facf9fc 25767+{
86dc4139 25768+ int i, warned;
1facf9fc 25769+ struct au_sbinfo *sbinfo;
86dc4139
AM
25770+ struct hlist_head *plink_hlist;
25771+ struct hlist_node *tmp;
5afbbe0d 25772+ struct au_icntnr *icntnr;
1facf9fc 25773+
dece6358
AM
25774+ SiMustWriteLock(sb);
25775+
1facf9fc 25776+ sbinfo = au_sbi(sb);
25777+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25778+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25779+
1facf9fc 25780+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
25781+ warned = 0;
25782+ for (i = 0; i < AuPlink_NHASH; i++) {
25783+ plink_hlist = &sbinfo->si_plink[i].head;
25784+ if (!warned && verbose && !hlist_empty(plink_hlist)) {
25785+ pr_warn("pseudo-link is not flushed");
25786+ warned = 1;
25787+ }
5afbbe0d
AM
25788+ hlist_for_each_entry_safe(icntnr, tmp, plink_hlist, plink)
25789+ iput(&icntnr->vfs_inode);
86dc4139
AM
25790+ INIT_HLIST_HEAD(plink_hlist);
25791+ }
1facf9fc 25792+}
25793+
e49829fe
JR
25794+void au_plink_clean(struct super_block *sb, int verbose)
25795+{
25796+ struct dentry *root;
25797+
25798+ root = sb->s_root;
25799+ aufs_write_lock(root);
25800+ if (au_opt_test(au_mntflags(sb), PLINK))
25801+ au_plink_put(sb, verbose);
25802+ aufs_write_unlock(root);
25803+}
25804+
86dc4139
AM
25805+static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
25806+{
25807+ int do_put;
5afbbe0d 25808+ aufs_bindex_t btop, bbot, bindex;
86dc4139
AM
25809+
25810+ do_put = 0;
5afbbe0d
AM
25811+ btop = au_ibtop(inode);
25812+ bbot = au_ibbot(inode);
25813+ if (btop >= 0) {
25814+ for (bindex = btop; bindex <= bbot; bindex++) {
86dc4139
AM
25815+ if (!au_h_iptr(inode, bindex)
25816+ || au_ii_br_id(inode, bindex) != br_id)
25817+ continue;
25818+ au_set_h_iptr(inode, bindex, NULL, 0);
25819+ do_put = 1;
25820+ break;
25821+ }
25822+ if (do_put)
5afbbe0d 25823+ for (bindex = btop; bindex <= bbot; bindex++)
86dc4139
AM
25824+ if (au_h_iptr(inode, bindex)) {
25825+ do_put = 0;
25826+ break;
25827+ }
25828+ } else
25829+ do_put = 1;
25830+
25831+ return do_put;
25832+}
25833+
1facf9fc 25834+/* free the plinks on a branch specified by @br_id */
25835+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
25836+{
25837+ struct au_sbinfo *sbinfo;
86dc4139
AM
25838+ struct hlist_head *plink_hlist;
25839+ struct hlist_node *tmp;
5afbbe0d 25840+ struct au_icntnr *icntnr;
1facf9fc 25841+ struct inode *inode;
86dc4139 25842+ int i, do_put;
1facf9fc 25843+
dece6358
AM
25844+ SiMustWriteLock(sb);
25845+
1facf9fc 25846+ sbinfo = au_sbi(sb);
25847+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25848+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25849+
1facf9fc 25850+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
25851+ for (i = 0; i < AuPlink_NHASH; i++) {
25852+ plink_hlist = &sbinfo->si_plink[i].head;
5afbbe0d
AM
25853+ hlist_for_each_entry_safe(icntnr, tmp, plink_hlist, plink) {
25854+ inode = au_igrab(&icntnr->vfs_inode);
86dc4139
AM
25855+ ii_write_lock_child(inode);
25856+ do_put = au_plink_do_half_refresh(inode, br_id);
5afbbe0d
AM
25857+ if (do_put) {
25858+ hlist_del(&icntnr->plink);
25859+ iput(inode);
25860+ }
86dc4139
AM
25861+ ii_write_unlock(inode);
25862+ iput(inode);
dece6358 25863+ }
dece6358
AM
25864+ }
25865+}
7f207e10
AM
25866diff -urN /usr/share/empty/fs/aufs/poll.c linux/fs/aufs/poll.c
25867--- /usr/share/empty/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 25868+++ linux/fs/aufs/poll.c 2016-10-09 16:55:36.496035060 +0200
b912730e 25869@@ -0,0 +1,52 @@
dece6358 25870+/*
8cdd5066 25871+ * Copyright (C) 2005-2016 Junjiro R. Okajima
dece6358
AM
25872+ *
25873+ * This program, aufs is free software; you can redistribute it and/or modify
25874+ * it under the terms of the GNU General Public License as published by
25875+ * the Free Software Foundation; either version 2 of the License, or
25876+ * (at your option) any later version.
25877+ *
25878+ * This program is distributed in the hope that it will be useful,
25879+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25880+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25881+ * GNU General Public License for more details.
25882+ *
25883+ * You should have received a copy of the GNU General Public License
523b37e3 25884+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358
AM
25885+ */
25886+
1308ab2a 25887+/*
25888+ * poll operation
25889+ * There is only one filesystem which implements ->poll operation, currently.
25890+ */
25891+
25892+#include "aufs.h"
25893+
25894+unsigned int aufs_poll(struct file *file, poll_table *wait)
25895+{
25896+ unsigned int mask;
25897+ int err;
25898+ struct file *h_file;
1308ab2a 25899+ struct super_block *sb;
25900+
25901+ /* We should pretend an error happened. */
25902+ mask = POLLERR /* | POLLIN | POLLOUT */;
b912730e 25903+ sb = file->f_path.dentry->d_sb;
e49829fe 25904+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
25905+
25906+ h_file = au_read_pre(file, /*keep_fi*/0);
25907+ err = PTR_ERR(h_file);
25908+ if (IS_ERR(h_file))
1308ab2a 25909+ goto out;
25910+
25911+ /* it is not an error if h_file has no operation */
25912+ mask = DEFAULT_POLLMASK;
523b37e3 25913+ if (h_file->f_op->poll)
1308ab2a 25914+ mask = h_file->f_op->poll(h_file, wait);
b912730e 25915+ fput(h_file); /* instead of au_read_post() */
1308ab2a 25916+
4f0767ce 25917+out:
1308ab2a 25918+ si_read_unlock(sb);
25919+ AuTraceErr((int)mask);
25920+ return mask;
25921+}
c1595e42
JR
25922diff -urN /usr/share/empty/fs/aufs/posix_acl.c linux/fs/aufs/posix_acl.c
25923--- /usr/share/empty/fs/aufs/posix_acl.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f 25924+++ linux/fs/aufs/posix_acl.c 2016-12-17 12:28:17.598545045 +0100
8cdd5066 25925@@ -0,0 +1,98 @@
c1595e42 25926+/*
8cdd5066 25927+ * Copyright (C) 2014-2016 Junjiro R. Okajima
c1595e42
JR
25928+ *
25929+ * This program, aufs is free software; you can redistribute it and/or modify
25930+ * it under the terms of the GNU General Public License as published by
25931+ * the Free Software Foundation; either version 2 of the License, or
25932+ * (at your option) any later version.
25933+ *
25934+ * This program is distributed in the hope that it will be useful,
25935+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25936+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25937+ * GNU General Public License for more details.
25938+ *
25939+ * You should have received a copy of the GNU General Public License
25940+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25941+ */
25942+
25943+/*
25944+ * posix acl operations
25945+ */
25946+
25947+#include <linux/fs.h>
c1595e42
JR
25948+#include "aufs.h"
25949+
25950+struct posix_acl *aufs_get_acl(struct inode *inode, int type)
25951+{
25952+ struct posix_acl *acl;
25953+ int err;
25954+ aufs_bindex_t bindex;
25955+ struct inode *h_inode;
25956+ struct super_block *sb;
25957+
25958+ acl = NULL;
25959+ sb = inode->i_sb;
25960+ si_read_lock(sb, AuLock_FLUSH);
25961+ ii_read_lock_child(inode);
25962+ if (!(sb->s_flags & MS_POSIXACL))
25963+ goto out;
25964+
5afbbe0d 25965+ bindex = au_ibtop(inode);
c1595e42
JR
25966+ h_inode = au_h_iptr(inode, bindex);
25967+ if (unlikely(!h_inode
25968+ || ((h_inode->i_mode & S_IFMT)
25969+ != (inode->i_mode & S_IFMT)))) {
25970+ err = au_busy_or_stale();
25971+ acl = ERR_PTR(err);
25972+ goto out;
25973+ }
25974+
25975+ /* always topmost only */
25976+ acl = get_acl(h_inode, type);
25977+
25978+out:
25979+ ii_read_unlock(inode);
25980+ si_read_unlock(sb);
25981+
25982+ AuTraceErrPtr(acl);
25983+ return acl;
25984+}
25985+
25986+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
25987+{
25988+ int err;
25989+ ssize_t ssz;
25990+ struct dentry *dentry;
f2c43d5f 25991+ struct au_sxattr arg = {
c1595e42
JR
25992+ .type = AU_ACL_SET,
25993+ .u.acl_set = {
25994+ .acl = acl,
25995+ .type = type
25996+ },
25997+ };
25998+
5afbbe0d
AM
25999+ IMustLock(inode);
26000+
c1595e42
JR
26001+ if (inode->i_ino == AUFS_ROOT_INO)
26002+ dentry = dget(inode->i_sb->s_root);
26003+ else {
26004+ dentry = d_find_alias(inode);
26005+ if (!dentry)
26006+ dentry = d_find_any_alias(inode);
26007+ if (!dentry) {
26008+ pr_warn("cannot handle this inode, "
26009+ "please report to aufs-users ML\n");
26010+ err = -ENOENT;
26011+ goto out;
26012+ }
26013+ }
26014+
f2c43d5f 26015+ ssz = au_sxattr(dentry, inode, &arg);
c1595e42
JR
26016+ dput(dentry);
26017+ err = ssz;
26018+ if (ssz >= 0)
26019+ err = 0;
26020+
26021+out:
c1595e42
JR
26022+ return err;
26023+}
7f207e10
AM
26024diff -urN /usr/share/empty/fs/aufs/procfs.c linux/fs/aufs/procfs.c
26025--- /usr/share/empty/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 26026+++ linux/fs/aufs/procfs.c 2016-10-09 16:55:36.496035060 +0200
523b37e3 26027@@ -0,0 +1,169 @@
e49829fe 26028+/*
8cdd5066 26029+ * Copyright (C) 2010-2016 Junjiro R. Okajima
e49829fe
JR
26030+ *
26031+ * This program, aufs is free software; you can redistribute it and/or modify
26032+ * it under the terms of the GNU General Public License as published by
26033+ * the Free Software Foundation; either version 2 of the License, or
26034+ * (at your option) any later version.
26035+ *
26036+ * This program is distributed in the hope that it will be useful,
26037+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26038+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26039+ * GNU General Public License for more details.
26040+ *
26041+ * You should have received a copy of the GNU General Public License
523b37e3 26042+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
26043+ */
26044+
26045+/*
26046+ * procfs interfaces
26047+ */
26048+
26049+#include <linux/proc_fs.h>
26050+#include "aufs.h"
26051+
26052+static int au_procfs_plm_release(struct inode *inode, struct file *file)
26053+{
26054+ struct au_sbinfo *sbinfo;
26055+
26056+ sbinfo = file->private_data;
26057+ if (sbinfo) {
26058+ au_plink_maint_leave(sbinfo);
26059+ kobject_put(&sbinfo->si_kobj);
26060+ }
26061+
26062+ return 0;
26063+}
26064+
26065+static void au_procfs_plm_write_clean(struct file *file)
26066+{
26067+ struct au_sbinfo *sbinfo;
26068+
26069+ sbinfo = file->private_data;
26070+ if (sbinfo)
26071+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
26072+}
26073+
26074+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
26075+{
26076+ int err;
26077+ struct super_block *sb;
26078+ struct au_sbinfo *sbinfo;
26079+
26080+ err = -EBUSY;
26081+ if (unlikely(file->private_data))
26082+ goto out;
26083+
26084+ sb = NULL;
53392da6 26085+ /* don't use au_sbilist_lock() here */
e49829fe 26086+ spin_lock(&au_sbilist.spin);
5afbbe0d 26087+ hlist_for_each_entry(sbinfo, &au_sbilist.head, si_list)
e49829fe
JR
26088+ if (id == sysaufs_si_id(sbinfo)) {
26089+ kobject_get(&sbinfo->si_kobj);
26090+ sb = sbinfo->si_sb;
26091+ break;
26092+ }
26093+ spin_unlock(&au_sbilist.spin);
26094+
26095+ err = -EINVAL;
26096+ if (unlikely(!sb))
26097+ goto out;
26098+
26099+ err = au_plink_maint_enter(sb);
26100+ if (!err)
26101+ /* keep kobject_get() */
26102+ file->private_data = sbinfo;
26103+ else
26104+ kobject_put(&sbinfo->si_kobj);
26105+out:
26106+ return err;
26107+}
26108+
26109+/*
26110+ * Accept a valid "si=xxxx" only.
26111+ * Once it is accepted successfully, accept "clean" too.
26112+ */
26113+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
26114+ size_t count, loff_t *ppos)
26115+{
26116+ ssize_t err;
26117+ unsigned long id;
26118+ /* last newline is allowed */
26119+ char buf[3 + sizeof(unsigned long) * 2 + 1];
26120+
26121+ err = -EACCES;
26122+ if (unlikely(!capable(CAP_SYS_ADMIN)))
26123+ goto out;
26124+
26125+ err = -EINVAL;
26126+ if (unlikely(count > sizeof(buf)))
26127+ goto out;
26128+
26129+ err = copy_from_user(buf, ubuf, count);
26130+ if (unlikely(err)) {
26131+ err = -EFAULT;
26132+ goto out;
26133+ }
26134+ buf[count] = 0;
26135+
26136+ err = -EINVAL;
26137+ if (!strcmp("clean", buf)) {
26138+ au_procfs_plm_write_clean(file);
26139+ goto out_success;
26140+ } else if (unlikely(strncmp("si=", buf, 3)))
26141+ goto out;
26142+
9dbd164d 26143+ err = kstrtoul(buf + 3, 16, &id);
e49829fe
JR
26144+ if (unlikely(err))
26145+ goto out;
26146+
26147+ err = au_procfs_plm_write_si(file, id);
26148+ if (unlikely(err))
26149+ goto out;
26150+
26151+out_success:
26152+ err = count; /* success */
26153+out:
26154+ return err;
26155+}
26156+
26157+static const struct file_operations au_procfs_plm_fop = {
26158+ .write = au_procfs_plm_write,
26159+ .release = au_procfs_plm_release,
26160+ .owner = THIS_MODULE
26161+};
26162+
26163+/* ---------------------------------------------------------------------- */
26164+
26165+static struct proc_dir_entry *au_procfs_dir;
26166+
26167+void au_procfs_fin(void)
26168+{
26169+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
26170+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
26171+}
26172+
26173+int __init au_procfs_init(void)
26174+{
26175+ int err;
26176+ struct proc_dir_entry *entry;
26177+
26178+ err = -ENOMEM;
26179+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
26180+ if (unlikely(!au_procfs_dir))
26181+ goto out;
26182+
26183+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
26184+ au_procfs_dir, &au_procfs_plm_fop);
26185+ if (unlikely(!entry))
26186+ goto out_dir;
26187+
26188+ err = 0;
26189+ goto out; /* success */
26190+
26191+
26192+out_dir:
26193+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
26194+out:
26195+ return err;
26196+}
7f207e10
AM
26197diff -urN /usr/share/empty/fs/aufs/rdu.c linux/fs/aufs/rdu.c
26198--- /usr/share/empty/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 26199+++ linux/fs/aufs/rdu.c 2016-10-09 16:55:36.496035060 +0200
5afbbe0d 26200@@ -0,0 +1,381 @@
1308ab2a 26201+/*
8cdd5066 26202+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1308ab2a 26203+ *
26204+ * This program, aufs is free software; you can redistribute it and/or modify
26205+ * it under the terms of the GNU General Public License as published by
26206+ * the Free Software Foundation; either version 2 of the License, or
26207+ * (at your option) any later version.
26208+ *
26209+ * This program is distributed in the hope that it will be useful,
26210+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26211+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26212+ * GNU General Public License for more details.
26213+ *
26214+ * You should have received a copy of the GNU General Public License
523b37e3 26215+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1308ab2a 26216+ */
26217+
26218+/*
26219+ * readdir in userspace.
26220+ */
26221+
b752ccd1 26222+#include <linux/compat.h>
4a4d8108 26223+#include <linux/fs_stack.h>
1308ab2a 26224+#include <linux/security.h>
1308ab2a 26225+#include "aufs.h"
26226+
26227+/* bits for struct aufs_rdu.flags */
26228+#define AuRdu_CALLED 1
26229+#define AuRdu_CONT (1 << 1)
26230+#define AuRdu_FULL (1 << 2)
26231+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
7f207e10
AM
26232+#define au_fset_rdu(flags, name) \
26233+ do { (flags) |= AuRdu_##name; } while (0)
26234+#define au_fclr_rdu(flags, name) \
26235+ do { (flags) &= ~AuRdu_##name; } while (0)
1308ab2a 26236+
26237+struct au_rdu_arg {
392086de 26238+ struct dir_context ctx;
1308ab2a 26239+ struct aufs_rdu *rdu;
26240+ union au_rdu_ent_ul ent;
26241+ unsigned long end;
26242+
26243+ struct super_block *sb;
26244+ int err;
26245+};
26246+
392086de 26247+static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
1308ab2a 26248+ loff_t offset, u64 h_ino, unsigned int d_type)
26249+{
26250+ int err, len;
392086de 26251+ struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
1308ab2a 26252+ struct aufs_rdu *rdu = arg->rdu;
26253+ struct au_rdu_ent ent;
26254+
26255+ err = 0;
26256+ arg->err = 0;
26257+ au_fset_rdu(rdu->cookie.flags, CALLED);
26258+ len = au_rdu_len(nlen);
26259+ if (arg->ent.ul + len < arg->end) {
26260+ ent.ino = h_ino;
26261+ ent.bindex = rdu->cookie.bindex;
26262+ ent.type = d_type;
26263+ ent.nlen = nlen;
4a4d8108
AM
26264+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
26265+ ent.type = DT_UNKNOWN;
1308ab2a 26266+
9dbd164d 26267+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 26268+ err = -EFAULT;
26269+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
26270+ goto out;
26271+ if (copy_to_user(arg->ent.e->name, name, nlen))
26272+ goto out;
26273+ /* the terminating NULL */
26274+ if (__put_user(0, arg->ent.e->name + nlen))
26275+ goto out;
26276+ err = 0;
26277+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
26278+ arg->ent.ul += len;
26279+ rdu->rent++;
26280+ } else {
26281+ err = -EFAULT;
26282+ au_fset_rdu(rdu->cookie.flags, FULL);
26283+ rdu->full = 1;
26284+ rdu->tail = arg->ent;
26285+ }
26286+
4f0767ce 26287+out:
1308ab2a 26288+ /* AuTraceErr(err); */
26289+ return err;
26290+}
26291+
26292+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
26293+{
26294+ int err;
26295+ loff_t offset;
26296+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
26297+
92d182d2 26298+ /* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
1308ab2a 26299+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
26300+ err = offset;
26301+ if (unlikely(offset != cookie->h_pos))
26302+ goto out;
26303+
26304+ err = 0;
26305+ do {
26306+ arg->err = 0;
26307+ au_fclr_rdu(cookie->flags, CALLED);
26308+ /* smp_mb(); */
392086de 26309+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1308ab2a 26310+ if (err >= 0)
26311+ err = arg->err;
26312+ } while (!err
26313+ && au_ftest_rdu(cookie->flags, CALLED)
26314+ && !au_ftest_rdu(cookie->flags, FULL));
26315+ cookie->h_pos = h_file->f_pos;
26316+
4f0767ce 26317+out:
1308ab2a 26318+ AuTraceErr(err);
26319+ return err;
26320+}
26321+
26322+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
26323+{
26324+ int err;
5afbbe0d 26325+ aufs_bindex_t bbot;
392086de
AM
26326+ struct au_rdu_arg arg = {
26327+ .ctx = {
2000de60 26328+ .actor = au_rdu_fill
392086de
AM
26329+ }
26330+ };
1308ab2a 26331+ struct dentry *dentry;
26332+ struct inode *inode;
26333+ struct file *h_file;
26334+ struct au_rdu_cookie *cookie = &rdu->cookie;
26335+
26336+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
26337+ if (unlikely(err)) {
26338+ err = -EFAULT;
26339+ AuTraceErr(err);
26340+ goto out;
26341+ }
26342+ rdu->rent = 0;
26343+ rdu->tail = rdu->ent;
26344+ rdu->full = 0;
26345+ arg.rdu = rdu;
26346+ arg.ent = rdu->ent;
26347+ arg.end = arg.ent.ul;
26348+ arg.end += rdu->sz;
26349+
26350+ err = -ENOTDIR;
5afbbe0d 26351+ if (unlikely(!file->f_op->iterate && !file->f_op->iterate_shared))
1308ab2a 26352+ goto out;
26353+
26354+ err = security_file_permission(file, MAY_READ);
26355+ AuTraceErr(err);
26356+ if (unlikely(err))
26357+ goto out;
26358+
2000de60 26359+ dentry = file->f_path.dentry;
5527c038 26360+ inode = d_inode(dentry);
5afbbe0d 26361+ inode_lock_shared(inode);
1308ab2a 26362+
26363+ arg.sb = inode->i_sb;
e49829fe
JR
26364+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
26365+ if (unlikely(err))
26366+ goto out_mtx;
027c5e7a
AM
26367+ err = au_alive_dir(dentry);
26368+ if (unlikely(err))
26369+ goto out_si;
e49829fe 26370+ /* todo: reval? */
1308ab2a 26371+ fi_read_lock(file);
26372+
26373+ err = -EAGAIN;
26374+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
26375+ && cookie->generation != au_figen(file)))
26376+ goto out_unlock;
26377+
26378+ err = 0;
26379+ if (!rdu->blk) {
26380+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
26381+ if (!rdu->blk)
26382+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
26383+ }
5afbbe0d
AM
26384+ bbot = au_fbtop(file);
26385+ if (cookie->bindex < bbot)
26386+ cookie->bindex = bbot;
26387+ bbot = au_fbbot_dir(file);
26388+ /* AuDbg("b%d, b%d\n", cookie->bindex, bbot); */
26389+ for (; !err && cookie->bindex <= bbot;
1308ab2a 26390+ cookie->bindex++, cookie->h_pos = 0) {
4a4d8108 26391+ h_file = au_hf_dir(file, cookie->bindex);
1308ab2a 26392+ if (!h_file)
26393+ continue;
26394+
26395+ au_fclr_rdu(cookie->flags, FULL);
26396+ err = au_rdu_do(h_file, &arg);
26397+ AuTraceErr(err);
26398+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
26399+ break;
26400+ }
26401+ AuDbg("rent %llu\n", rdu->rent);
26402+
26403+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
26404+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
26405+ au_fset_rdu(cookie->flags, CONT);
26406+ cookie->generation = au_figen(file);
26407+ }
26408+
26409+ ii_read_lock_child(inode);
5afbbe0d 26410+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibtop(inode)));
1308ab2a 26411+ ii_read_unlock(inode);
26412+
4f0767ce 26413+out_unlock:
1308ab2a 26414+ fi_read_unlock(file);
027c5e7a 26415+out_si:
1308ab2a 26416+ si_read_unlock(arg.sb);
4f0767ce 26417+out_mtx:
5afbbe0d 26418+ inode_unlock_shared(inode);
4f0767ce 26419+out:
1308ab2a 26420+ AuTraceErr(err);
26421+ return err;
26422+}
26423+
26424+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
26425+{
26426+ int err;
26427+ ino_t ino;
26428+ unsigned long long nent;
26429+ union au_rdu_ent_ul *u;
26430+ struct au_rdu_ent ent;
26431+ struct super_block *sb;
26432+
26433+ err = 0;
26434+ nent = rdu->nent;
26435+ u = &rdu->ent;
2000de60 26436+ sb = file->f_path.dentry->d_sb;
1308ab2a 26437+ si_read_lock(sb, AuLock_FLUSH);
26438+ while (nent-- > 0) {
9dbd164d 26439+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 26440+ err = copy_from_user(&ent, u->e, sizeof(ent));
4a4d8108
AM
26441+ if (!err)
26442+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
1308ab2a 26443+ if (unlikely(err)) {
26444+ err = -EFAULT;
26445+ AuTraceErr(err);
26446+ break;
26447+ }
26448+
26449+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
26450+ if (!ent.wh)
26451+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
26452+ else
26453+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
26454+ &ino);
26455+ if (unlikely(err)) {
26456+ AuTraceErr(err);
26457+ break;
26458+ }
26459+
26460+ err = __put_user(ino, &u->e->ino);
26461+ if (unlikely(err)) {
26462+ err = -EFAULT;
26463+ AuTraceErr(err);
26464+ break;
26465+ }
26466+ u->ul += au_rdu_len(ent.nlen);
26467+ }
26468+ si_read_unlock(sb);
26469+
26470+ return err;
26471+}
26472+
26473+/* ---------------------------------------------------------------------- */
26474+
26475+static int au_rdu_verify(struct aufs_rdu *rdu)
26476+{
b752ccd1 26477+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
1308ab2a 26478+ "%llu, b%d, 0x%x, g%u}\n",
b752ccd1 26479+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
1308ab2a 26480+ rdu->blk,
26481+ rdu->rent, rdu->shwh, rdu->full,
26482+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
26483+ rdu->cookie.generation);
dece6358 26484+
b752ccd1 26485+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
1308ab2a 26486+ return 0;
dece6358 26487+
b752ccd1
AM
26488+ AuDbg("%u:%u\n",
26489+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
1308ab2a 26490+ return -EINVAL;
26491+}
26492+
26493+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
dece6358 26494+{
1308ab2a 26495+ long err, e;
26496+ struct aufs_rdu rdu;
26497+ void __user *p = (void __user *)arg;
dece6358 26498+
1308ab2a 26499+ err = copy_from_user(&rdu, p, sizeof(rdu));
26500+ if (unlikely(err)) {
26501+ err = -EFAULT;
26502+ AuTraceErr(err);
26503+ goto out;
26504+ }
26505+ err = au_rdu_verify(&rdu);
dece6358
AM
26506+ if (unlikely(err))
26507+ goto out;
26508+
1308ab2a 26509+ switch (cmd) {
26510+ case AUFS_CTL_RDU:
26511+ err = au_rdu(file, &rdu);
26512+ if (unlikely(err))
26513+ break;
dece6358 26514+
1308ab2a 26515+ e = copy_to_user(p, &rdu, sizeof(rdu));
26516+ if (unlikely(e)) {
26517+ err = -EFAULT;
26518+ AuTraceErr(err);
26519+ }
26520+ break;
26521+ case AUFS_CTL_RDU_INO:
26522+ err = au_rdu_ino(file, &rdu);
26523+ break;
26524+
26525+ default:
4a4d8108 26526+ /* err = -ENOTTY; */
1308ab2a 26527+ err = -EINVAL;
26528+ }
dece6358 26529+
4f0767ce 26530+out:
1308ab2a 26531+ AuTraceErr(err);
26532+ return err;
1facf9fc 26533+}
b752ccd1
AM
26534+
26535+#ifdef CONFIG_COMPAT
26536+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
26537+{
26538+ long err, e;
26539+ struct aufs_rdu rdu;
26540+ void __user *p = compat_ptr(arg);
26541+
26542+ /* todo: get_user()? */
26543+ err = copy_from_user(&rdu, p, sizeof(rdu));
26544+ if (unlikely(err)) {
26545+ err = -EFAULT;
26546+ AuTraceErr(err);
26547+ goto out;
26548+ }
26549+ rdu.ent.e = compat_ptr(rdu.ent.ul);
26550+ err = au_rdu_verify(&rdu);
26551+ if (unlikely(err))
26552+ goto out;
26553+
26554+ switch (cmd) {
26555+ case AUFS_CTL_RDU:
26556+ err = au_rdu(file, &rdu);
26557+ if (unlikely(err))
26558+ break;
26559+
26560+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
26561+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
26562+ e = copy_to_user(p, &rdu, sizeof(rdu));
26563+ if (unlikely(e)) {
26564+ err = -EFAULT;
26565+ AuTraceErr(err);
26566+ }
26567+ break;
26568+ case AUFS_CTL_RDU_INO:
26569+ err = au_rdu_ino(file, &rdu);
26570+ break;
26571+
26572+ default:
26573+ /* err = -ENOTTY; */
26574+ err = -EINVAL;
26575+ }
26576+
4f0767ce 26577+out:
b752ccd1
AM
26578+ AuTraceErr(err);
26579+ return err;
26580+}
26581+#endif
7f207e10
AM
26582diff -urN /usr/share/empty/fs/aufs/rwsem.h linux/fs/aufs/rwsem.h
26583--- /usr/share/empty/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 26584+++ linux/fs/aufs/rwsem.h 2016-10-09 16:55:36.496035060 +0200
5afbbe0d 26585@@ -0,0 +1,198 @@
1facf9fc 26586+/*
8cdd5066 26587+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 26588+ *
26589+ * This program, aufs is free software; you can redistribute it and/or modify
26590+ * it under the terms of the GNU General Public License as published by
26591+ * the Free Software Foundation; either version 2 of the License, or
26592+ * (at your option) any later version.
dece6358
AM
26593+ *
26594+ * This program is distributed in the hope that it will be useful,
26595+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26596+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26597+ * GNU General Public License for more details.
26598+ *
26599+ * You should have received a copy of the GNU General Public License
523b37e3 26600+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26601+ */
26602+
26603+/*
26604+ * simple read-write semaphore wrappers
26605+ */
26606+
26607+#ifndef __AUFS_RWSEM_H__
26608+#define __AUFS_RWSEM_H__
26609+
26610+#ifdef __KERNEL__
26611+
4a4d8108 26612+#include "debug.h"
dece6358
AM
26613+
26614+struct au_rwsem {
26615+ struct rw_semaphore rwsem;
26616+#ifdef CONFIG_AUFS_DEBUG
26617+ /* just for debugging, not almighty counter */
26618+ atomic_t rcnt, wcnt;
26619+#endif
26620+};
26621+
5afbbe0d
AM
26622+#ifdef CONFIG_LOCKDEP
26623+#define au_lockdep_set_name(rw) \
26624+ lockdep_set_class_and_name(&(rw)->rwsem, \
26625+ /*original key*/(rw)->rwsem.dep_map.key, \
26626+ /*name*/#rw)
26627+#else
26628+#define au_lockdep_set_name(rw) do {} while (0)
26629+#endif
26630+
dece6358
AM
26631+#ifdef CONFIG_AUFS_DEBUG
26632+#define AuDbgCntInit(rw) do { \
26633+ atomic_set(&(rw)->rcnt, 0); \
26634+ atomic_set(&(rw)->wcnt, 0); \
26635+ smp_mb(); /* atomic set */ \
26636+} while (0)
26637+
5afbbe0d
AM
26638+#define AuDbgCnt(rw, cnt) atomic_read(&(rw)->cnt)
26639+#define AuDbgCntInc(rw, cnt) atomic_inc(&(rw)->cnt)
26640+#define AuDbgCntDec(rw, cnt) WARN_ON(atomic_dec_return(&(rw)->cnt) < 0)
26641+#define AuDbgRcntInc(rw) AuDbgCntInc(rw, rcnt)
26642+#define AuDbgRcntDec(rw) AuDbgCntDec(rw, rcnt)
26643+#define AuDbgWcntInc(rw) AuDbgCntInc(rw, wcnt)
26644+#define AuDbgWcntDec(rw) AuDbgCntDec(rw, wcnt)
dece6358 26645+#else
5afbbe0d 26646+#define AuDbgCnt(rw, cnt) 0
dece6358
AM
26647+#define AuDbgCntInit(rw) do {} while (0)
26648+#define AuDbgRcntInc(rw) do {} while (0)
26649+#define AuDbgRcntDec(rw) do {} while (0)
26650+#define AuDbgWcntInc(rw) do {} while (0)
26651+#define AuDbgWcntDec(rw) do {} while (0)
26652+#endif /* CONFIG_AUFS_DEBUG */
26653+
26654+/* to debug easier, do not make them inlined functions */
5afbbe0d 26655+#define AuRwMustNoWaiters(rw) AuDebugOn(rwsem_is_contended(&(rw)->rwsem))
dece6358 26656+/* rwsem_is_locked() is unusable */
5afbbe0d
AM
26657+#define AuRwMustReadLock(rw) AuDebugOn(AuDbgCnt(rw, rcnt) <= 0)
26658+#define AuRwMustWriteLock(rw) AuDebugOn(AuDbgCnt(rw, wcnt) <= 0)
26659+#define AuRwMustAnyLock(rw) AuDebugOn(AuDbgCnt(rw, rcnt) <= 0 \
26660+ && AuDbgCnt(rw, wcnt) <= 0)
26661+#define AuRwDestroy(rw) AuDebugOn(AuDbgCnt(rw, rcnt) \
26662+ || AuDbgCnt(rw, wcnt))
26663+
26664+#define au_rw_init(rw) do { \
26665+ AuDbgCntInit(rw); \
26666+ init_rwsem(&(rw)->rwsem); \
26667+ au_lockdep_set_name(rw); \
26668+ } while (0)
dece6358 26669+
5afbbe0d
AM
26670+#define au_rw_init_wlock(rw) do { \
26671+ au_rw_init(rw); \
26672+ down_write(&(rw)->rwsem); \
26673+ AuDbgWcntInc(rw); \
26674+ } while (0)
dece6358 26675+
5afbbe0d
AM
26676+#define au_rw_init_wlock_nested(rw, lsc) do { \
26677+ au_rw_init(rw); \
26678+ down_write_nested(&(rw)->rwsem, lsc); \
26679+ AuDbgWcntInc(rw); \
26680+ } while (0)
dece6358
AM
26681+
26682+static inline void au_rw_read_lock(struct au_rwsem *rw)
26683+{
26684+ down_read(&rw->rwsem);
26685+ AuDbgRcntInc(rw);
26686+}
26687+
26688+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
26689+{
26690+ down_read_nested(&rw->rwsem, lsc);
26691+ AuDbgRcntInc(rw);
26692+}
26693+
26694+static inline void au_rw_read_unlock(struct au_rwsem *rw)
26695+{
26696+ AuRwMustReadLock(rw);
26697+ AuDbgRcntDec(rw);
26698+ up_read(&rw->rwsem);
26699+}
26700+
26701+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
26702+{
26703+ AuRwMustWriteLock(rw);
26704+ AuDbgRcntInc(rw);
26705+ AuDbgWcntDec(rw);
26706+ downgrade_write(&rw->rwsem);
26707+}
26708+
26709+static inline void au_rw_write_lock(struct au_rwsem *rw)
26710+{
26711+ down_write(&rw->rwsem);
26712+ AuDbgWcntInc(rw);
26713+}
26714+
26715+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
26716+ unsigned int lsc)
26717+{
26718+ down_write_nested(&rw->rwsem, lsc);
26719+ AuDbgWcntInc(rw);
26720+}
1facf9fc 26721+
dece6358
AM
26722+static inline void au_rw_write_unlock(struct au_rwsem *rw)
26723+{
26724+ AuRwMustWriteLock(rw);
26725+ AuDbgWcntDec(rw);
26726+ up_write(&rw->rwsem);
26727+}
26728+
26729+/* why is not _nested version defined */
26730+static inline int au_rw_read_trylock(struct au_rwsem *rw)
26731+{
076b876e
AM
26732+ int ret;
26733+
26734+ ret = down_read_trylock(&rw->rwsem);
dece6358
AM
26735+ if (ret)
26736+ AuDbgRcntInc(rw);
26737+ return ret;
26738+}
26739+
26740+static inline int au_rw_write_trylock(struct au_rwsem *rw)
26741+{
076b876e
AM
26742+ int ret;
26743+
26744+ ret = down_write_trylock(&rw->rwsem);
dece6358
AM
26745+ if (ret)
26746+ AuDbgWcntInc(rw);
26747+ return ret;
26748+}
26749+
5afbbe0d 26750+#undef AuDbgCntDec
dece6358
AM
26751+#undef AuDbgRcntInc
26752+#undef AuDbgRcntDec
dece6358 26753+#undef AuDbgWcntDec
1facf9fc 26754+
26755+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26756+static inline void prefix##_read_lock(param) \
dece6358 26757+{ au_rw_read_lock(rwsem); } \
1facf9fc 26758+static inline void prefix##_write_lock(param) \
dece6358 26759+{ au_rw_write_lock(rwsem); } \
1facf9fc 26760+static inline int prefix##_read_trylock(param) \
dece6358 26761+{ return au_rw_read_trylock(rwsem); } \
1facf9fc 26762+static inline int prefix##_write_trylock(param) \
dece6358 26763+{ return au_rw_write_trylock(rwsem); }
1facf9fc 26764+/* why is not _nested version defined */
26765+/* static inline void prefix##_read_trylock_nested(param, lsc)
dece6358 26766+{ au_rw_read_trylock_nested(rwsem, lsc)); }
1facf9fc 26767+static inline void prefix##_write_trylock_nestd(param, lsc)
dece6358 26768+{ au_rw_write_trylock_nested(rwsem, lsc); } */
1facf9fc 26769+
26770+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
26771+static inline void prefix##_read_unlock(param) \
dece6358 26772+{ au_rw_read_unlock(rwsem); } \
1facf9fc 26773+static inline void prefix##_write_unlock(param) \
dece6358 26774+{ au_rw_write_unlock(rwsem); } \
1facf9fc 26775+static inline void prefix##_downgrade_lock(param) \
dece6358 26776+{ au_rw_dgrade_lock(rwsem); }
1facf9fc 26777+
26778+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
26779+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26780+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
26781+
26782+#endif /* __KERNEL__ */
26783+#endif /* __AUFS_RWSEM_H__ */
7f207e10
AM
26784diff -urN /usr/share/empty/fs/aufs/sbinfo.c linux/fs/aufs/sbinfo.c
26785--- /usr/share/empty/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
26786+++ linux/fs/aufs/sbinfo.c 2016-10-09 16:55:38.889431135 +0200
26787@@ -0,0 +1,355 @@
1facf9fc 26788+/*
8cdd5066 26789+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 26790+ *
26791+ * This program, aufs is free software; you can redistribute it and/or modify
26792+ * it under the terms of the GNU General Public License as published by
26793+ * the Free Software Foundation; either version 2 of the License, or
26794+ * (at your option) any later version.
dece6358
AM
26795+ *
26796+ * This program is distributed in the hope that it will be useful,
26797+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26798+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26799+ * GNU General Public License for more details.
26800+ *
26801+ * You should have received a copy of the GNU General Public License
523b37e3 26802+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26803+ */
26804+
26805+/*
26806+ * superblock private data
26807+ */
26808+
26809+#include "aufs.h"
26810+
26811+/*
26812+ * they are necessary regardless sysfs is disabled.
26813+ */
26814+void au_si_free(struct kobject *kobj)
26815+{
86dc4139 26816+ int i;
1facf9fc 26817+ struct au_sbinfo *sbinfo;
b752ccd1 26818+ char *locked __maybe_unused; /* debug only */
1facf9fc 26819+
26820+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
86dc4139
AM
26821+ for (i = 0; i < AuPlink_NHASH; i++)
26822+ AuDebugOn(!hlist_empty(&sbinfo->si_plink[i].head));
f0c0a007 26823+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
5afbbe0d
AM
26824+
26825+ AuDebugOn(percpu_counter_sum(&sbinfo->si_ninodes));
26826+ percpu_counter_destroy(&sbinfo->si_ninodes);
26827+ AuDebugOn(percpu_counter_sum(&sbinfo->si_nfiles));
26828+ percpu_counter_destroy(&sbinfo->si_nfiles);
1facf9fc 26829+
e49829fe 26830+ au_rw_write_lock(&sbinfo->si_rwsem);
1facf9fc 26831+ au_br_free(sbinfo);
e49829fe 26832+ au_rw_write_unlock(&sbinfo->si_rwsem);
b752ccd1 26833+
f0c0a007 26834+ au_delayed_kfree(sbinfo->si_branch);
febd17d6 26835+ for (i = 0; i < AU_NPIDMAP; i++)
f0c0a007
AM
26836+ if (sbinfo->au_si_pid.pid_bitmap[i])
26837+ au_delayed_kfree(sbinfo->au_si_pid.pid_bitmap[i]);
febd17d6 26838+ mutex_destroy(&sbinfo->au_si_pid.pid_mtx);
1facf9fc 26839+ mutex_destroy(&sbinfo->si_xib_mtx);
dece6358 26840+ AuRwDestroy(&sbinfo->si_rwsem);
1facf9fc 26841+
f0c0a007 26842+ au_delayed_kfree(sbinfo);
1facf9fc 26843+}
26844+
26845+int au_si_alloc(struct super_block *sb)
26846+{
86dc4139 26847+ int err, i;
1facf9fc 26848+ struct au_sbinfo *sbinfo;
26849+
26850+ err = -ENOMEM;
4a4d8108 26851+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
1facf9fc 26852+ if (unlikely(!sbinfo))
26853+ goto out;
26854+
26855+ /* will be reallocated separately */
26856+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
26857+ if (unlikely(!sbinfo->si_branch))
febd17d6 26858+ goto out_sbinfo;
1facf9fc 26859+
1facf9fc 26860+ err = sysaufs_si_init(sbinfo);
26861+ if (unlikely(err))
26862+ goto out_br;
26863+
26864+ au_nwt_init(&sbinfo->si_nowait);
dece6358 26865+ au_rw_init_wlock(&sbinfo->si_rwsem);
febd17d6 26866+ mutex_init(&sbinfo->au_si_pid.pid_mtx);
b752ccd1 26867+
5afbbe0d
AM
26868+ percpu_counter_init(&sbinfo->si_ninodes, 0, GFP_NOFS);
26869+ percpu_counter_init(&sbinfo->si_nfiles, 0, GFP_NOFS);
7f207e10 26870+
5afbbe0d 26871+ sbinfo->si_bbot = -1;
392086de 26872+ sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
1facf9fc 26873+
26874+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
26875+ sbinfo->si_wbr_create = AuWbrCreate_Def;
4a4d8108
AM
26876+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
26877+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
1facf9fc 26878+
076b876e
AM
26879+ au_fhsm_init(sbinfo);
26880+
e49829fe 26881+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
1facf9fc 26882+
392086de
AM
26883+ sbinfo->si_xino_jiffy = jiffies;
26884+ sbinfo->si_xino_expire
26885+ = msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
1facf9fc 26886+ mutex_init(&sbinfo->si_xib_mtx);
1facf9fc 26887+ sbinfo->si_xino_brid = -1;
26888+ /* leave si_xib_last_pindex and si_xib_next_bit */
26889+
b912730e
AM
26890+ au_sphl_init(&sbinfo->si_aopen);
26891+
e49829fe 26892+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
1facf9fc 26893+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
26894+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
26895+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
26896+
86dc4139
AM
26897+ for (i = 0; i < AuPlink_NHASH; i++)
26898+ au_sphl_init(sbinfo->si_plink + i);
1facf9fc 26899+ init_waitqueue_head(&sbinfo->si_plink_wq);
4a4d8108 26900+ spin_lock_init(&sbinfo->si_plink_maint_lock);
1facf9fc 26901+
523b37e3
AM
26902+ au_sphl_init(&sbinfo->si_files);
26903+
b95c5147
AM
26904+ /* with getattr by default */
26905+ sbinfo->si_iop_array = aufs_iop;
26906+
1facf9fc 26907+ /* leave other members for sysaufs and si_mnt. */
26908+ sbinfo->si_sb = sb;
26909+ sb->s_fs_info = sbinfo;
b752ccd1 26910+ si_pid_set(sb);
1facf9fc 26911+ return 0; /* success */
26912+
4f0767ce 26913+out_br:
f0c0a007 26914+ au_delayed_kfree(sbinfo->si_branch);
4f0767ce 26915+out_sbinfo:
f0c0a007 26916+ au_delayed_kfree(sbinfo);
4f0767ce 26917+out:
1facf9fc 26918+ return err;
26919+}
26920+
e2f27e51 26921+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink)
1facf9fc 26922+{
26923+ int err, sz;
26924+ struct au_branch **brp;
26925+
dece6358
AM
26926+ AuRwMustWriteLock(&sbinfo->si_rwsem);
26927+
1facf9fc 26928+ err = -ENOMEM;
5afbbe0d 26929+ sz = sizeof(*brp) * (sbinfo->si_bbot + 1);
1facf9fc 26930+ if (unlikely(!sz))
26931+ sz = sizeof(*brp);
e2f27e51
AM
26932+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS,
26933+ may_shrink);
1facf9fc 26934+ if (brp) {
26935+ sbinfo->si_branch = brp;
26936+ err = 0;
26937+ }
26938+
26939+ return err;
26940+}
26941+
26942+/* ---------------------------------------------------------------------- */
26943+
26944+unsigned int au_sigen_inc(struct super_block *sb)
26945+{
26946+ unsigned int gen;
5527c038 26947+ struct inode *inode;
1facf9fc 26948+
dece6358
AM
26949+ SiMustWriteLock(sb);
26950+
1facf9fc 26951+ gen = ++au_sbi(sb)->si_generation;
26952+ au_update_digen(sb->s_root);
5527c038
JR
26953+ inode = d_inode(sb->s_root);
26954+ au_update_iigen(inode, /*half*/0);
26955+ inode->i_version++;
1facf9fc 26956+ return gen;
26957+}
26958+
26959+aufs_bindex_t au_new_br_id(struct super_block *sb)
26960+{
26961+ aufs_bindex_t br_id;
26962+ int i;
26963+ struct au_sbinfo *sbinfo;
26964+
dece6358
AM
26965+ SiMustWriteLock(sb);
26966+
1facf9fc 26967+ sbinfo = au_sbi(sb);
26968+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
26969+ br_id = ++sbinfo->si_last_br_id;
7f207e10 26970+ AuDebugOn(br_id < 0);
1facf9fc 26971+ if (br_id && au_br_index(sb, br_id) < 0)
26972+ return br_id;
26973+ }
26974+
26975+ return -1;
26976+}
26977+
26978+/* ---------------------------------------------------------------------- */
26979+
e49829fe
JR
26980+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
26981+int si_read_lock(struct super_block *sb, int flags)
26982+{
26983+ int err;
26984+
26985+ err = 0;
26986+ if (au_ftest_lock(flags, FLUSH))
26987+ au_nwt_flush(&au_sbi(sb)->si_nowait);
26988+
26989+ si_noflush_read_lock(sb);
26990+ err = au_plink_maint(sb, flags);
26991+ if (unlikely(err))
26992+ si_read_unlock(sb);
26993+
26994+ return err;
26995+}
26996+
26997+int si_write_lock(struct super_block *sb, int flags)
26998+{
26999+ int err;
27000+
27001+ if (au_ftest_lock(flags, FLUSH))
27002+ au_nwt_flush(&au_sbi(sb)->si_nowait);
27003+
27004+ si_noflush_write_lock(sb);
27005+ err = au_plink_maint(sb, flags);
27006+ if (unlikely(err))
27007+ si_write_unlock(sb);
27008+
27009+ return err;
27010+}
27011+
1facf9fc 27012+/* dentry and super_block lock. call at entry point */
e49829fe 27013+int aufs_read_lock(struct dentry *dentry, int flags)
1facf9fc 27014+{
e49829fe 27015+ int err;
027c5e7a 27016+ struct super_block *sb;
e49829fe 27017+
027c5e7a
AM
27018+ sb = dentry->d_sb;
27019+ err = si_read_lock(sb, flags);
27020+ if (unlikely(err))
27021+ goto out;
27022+
27023+ if (au_ftest_lock(flags, DW))
27024+ di_write_lock_child(dentry);
27025+ else
27026+ di_read_lock_child(dentry, flags);
27027+
27028+ if (au_ftest_lock(flags, GEN)) {
27029+ err = au_digen_test(dentry, au_sigen(sb));
79b8bda9
AM
27030+ if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
27031+ AuDebugOn(!err && au_dbrange_test(dentry));
27032+ else if (!err)
27033+ err = au_dbrange_test(dentry);
027c5e7a
AM
27034+ if (unlikely(err))
27035+ aufs_read_unlock(dentry, flags);
e49829fe
JR
27036+ }
27037+
027c5e7a 27038+out:
e49829fe 27039+ return err;
1facf9fc 27040+}
27041+
27042+void aufs_read_unlock(struct dentry *dentry, int flags)
27043+{
27044+ if (au_ftest_lock(flags, DW))
27045+ di_write_unlock(dentry);
27046+ else
27047+ di_read_unlock(dentry, flags);
27048+ si_read_unlock(dentry->d_sb);
27049+}
27050+
27051+void aufs_write_lock(struct dentry *dentry)
27052+{
e49829fe 27053+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 27054+ di_write_lock_child(dentry);
27055+}
27056+
27057+void aufs_write_unlock(struct dentry *dentry)
27058+{
27059+ di_write_unlock(dentry);
27060+ si_write_unlock(dentry->d_sb);
27061+}
27062+
e49829fe 27063+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
1facf9fc 27064+{
e49829fe 27065+ int err;
027c5e7a
AM
27066+ unsigned int sigen;
27067+ struct super_block *sb;
e49829fe 27068+
027c5e7a
AM
27069+ sb = d1->d_sb;
27070+ err = si_read_lock(sb, flags);
27071+ if (unlikely(err))
27072+ goto out;
27073+
b95c5147 27074+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
027c5e7a
AM
27075+
27076+ if (au_ftest_lock(flags, GEN)) {
27077+ sigen = au_sigen(sb);
27078+ err = au_digen_test(d1, sigen);
27079+ AuDebugOn(!err && au_dbrange_test(d1));
27080+ if (!err) {
27081+ err = au_digen_test(d2, sigen);
27082+ AuDebugOn(!err && au_dbrange_test(d2));
27083+ }
27084+ if (unlikely(err))
27085+ aufs_read_and_write_unlock2(d1, d2);
27086+ }
27087+
27088+out:
e49829fe 27089+ return err;
1facf9fc 27090+}
27091+
27092+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
27093+{
27094+ di_write_unlock2(d1, d2);
27095+ si_read_unlock(d1->d_sb);
27096+}
b752ccd1
AM
27097+
27098+/* ---------------------------------------------------------------------- */
27099+
febd17d6 27100+static void si_pid_alloc(struct au_si_pid *au_si_pid, int idx)
b752ccd1 27101+{
febd17d6 27102+ unsigned long *p;
b752ccd1 27103+
febd17d6
JR
27104+ BUILD_BUG_ON(sizeof(unsigned long) !=
27105+ sizeof(*au_si_pid->pid_bitmap));
b752ccd1 27106+
febd17d6
JR
27107+ mutex_lock(&au_si_pid->pid_mtx);
27108+ p = au_si_pid->pid_bitmap[idx];
27109+ while (!p) {
27110+ /*
27111+ * bad approach.
27112+ * but keeping 'si_pid_set()' void is more important.
27113+ */
27114+ p = kcalloc(BITS_TO_LONGS(AU_PIDSTEP),
27115+ sizeof(*au_si_pid->pid_bitmap),
27116+ GFP_NOFS);
27117+ if (p)
27118+ break;
27119+ cond_resched();
27120+ }
27121+ au_si_pid->pid_bitmap[idx] = p;
27122+ mutex_unlock(&au_si_pid->pid_mtx);
b752ccd1
AM
27123+}
27124+
febd17d6 27125+void si_pid_set(struct super_block *sb)
b752ccd1 27126+{
febd17d6
JR
27127+ pid_t bit;
27128+ int idx;
27129+ unsigned long *bitmap;
27130+ struct au_si_pid *au_si_pid;
27131+
27132+ si_pid_idx_bit(&idx, &bit);
27133+ au_si_pid = &au_sbi(sb)->au_si_pid;
27134+ bitmap = au_si_pid->pid_bitmap[idx];
27135+ if (!bitmap) {
27136+ si_pid_alloc(au_si_pid, idx);
27137+ bitmap = au_si_pid->pid_bitmap[idx];
27138+ }
27139+ AuDebugOn(test_bit(bit, bitmap));
27140+ set_bit(bit, bitmap);
27141+ /* smp_mb(); */
b752ccd1 27142+}
7f207e10
AM
27143diff -urN /usr/share/empty/fs/aufs/spl.h linux/fs/aufs/spl.h
27144--- /usr/share/empty/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 27145+++ linux/fs/aufs/spl.h 2016-10-09 16:55:36.496035060 +0200
f0c0a007 27146@@ -0,0 +1,113 @@
1facf9fc 27147+/*
8cdd5066 27148+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 27149+ *
27150+ * This program, aufs is free software; you can redistribute it and/or modify
27151+ * it under the terms of the GNU General Public License as published by
27152+ * the Free Software Foundation; either version 2 of the License, or
27153+ * (at your option) any later version.
dece6358
AM
27154+ *
27155+ * This program is distributed in the hope that it will be useful,
27156+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27157+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27158+ * GNU General Public License for more details.
27159+ *
27160+ * You should have received a copy of the GNU General Public License
523b37e3 27161+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 27162+ */
27163+
27164+/*
27165+ * simple list protected by a spinlock
27166+ */
27167+
27168+#ifndef __AUFS_SPL_H__
27169+#define __AUFS_SPL_H__
27170+
27171+#ifdef __KERNEL__
27172+
f0c0a007 27173+#if 0
1facf9fc 27174+struct au_splhead {
27175+ spinlock_t spin;
27176+ struct list_head head;
27177+};
27178+
27179+static inline void au_spl_init(struct au_splhead *spl)
27180+{
27181+ spin_lock_init(&spl->spin);
27182+ INIT_LIST_HEAD(&spl->head);
27183+}
27184+
27185+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
27186+{
27187+ spin_lock(&spl->spin);
27188+ list_add(list, &spl->head);
27189+ spin_unlock(&spl->spin);
27190+}
27191+
27192+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
27193+{
27194+ spin_lock(&spl->spin);
27195+ list_del(list);
27196+ spin_unlock(&spl->spin);
27197+}
27198+
4a4d8108
AM
27199+static inline void au_spl_del_rcu(struct list_head *list,
27200+ struct au_splhead *spl)
27201+{
27202+ spin_lock(&spl->spin);
27203+ list_del_rcu(list);
27204+ spin_unlock(&spl->spin);
27205+}
f0c0a007 27206+#endif
4a4d8108 27207+
86dc4139
AM
27208+/* ---------------------------------------------------------------------- */
27209+
27210+struct au_sphlhead {
27211+ spinlock_t spin;
27212+ struct hlist_head head;
27213+};
27214+
27215+static inline void au_sphl_init(struct au_sphlhead *sphl)
27216+{
27217+ spin_lock_init(&sphl->spin);
27218+ INIT_HLIST_HEAD(&sphl->head);
27219+}
27220+
27221+static inline void au_sphl_add(struct hlist_node *hlist,
27222+ struct au_sphlhead *sphl)
27223+{
27224+ spin_lock(&sphl->spin);
27225+ hlist_add_head(hlist, &sphl->head);
27226+ spin_unlock(&sphl->spin);
27227+}
27228+
27229+static inline void au_sphl_del(struct hlist_node *hlist,
27230+ struct au_sphlhead *sphl)
27231+{
27232+ spin_lock(&sphl->spin);
27233+ hlist_del(hlist);
27234+ spin_unlock(&sphl->spin);
27235+}
27236+
27237+static inline void au_sphl_del_rcu(struct hlist_node *hlist,
27238+ struct au_sphlhead *sphl)
27239+{
27240+ spin_lock(&sphl->spin);
27241+ hlist_del_rcu(hlist);
27242+ spin_unlock(&sphl->spin);
27243+}
27244+
27245+static inline unsigned long au_sphl_count(struct au_sphlhead *sphl)
27246+{
27247+ unsigned long cnt;
27248+ struct hlist_node *pos;
27249+
27250+ cnt = 0;
27251+ spin_lock(&sphl->spin);
27252+ hlist_for_each(pos, &sphl->head)
27253+ cnt++;
27254+ spin_unlock(&sphl->spin);
27255+ return cnt;
27256+}
27257+
1facf9fc 27258+#endif /* __KERNEL__ */
27259+#endif /* __AUFS_SPL_H__ */
7f207e10
AM
27260diff -urN /usr/share/empty/fs/aufs/super.c linux/fs/aufs/super.c
27261--- /usr/share/empty/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
27262+++ linux/fs/aufs/super.c 2016-12-17 12:28:17.598545045 +0100
27263@@ -0,0 +1,1046 @@
1facf9fc 27264+/*
8cdd5066 27265+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 27266+ *
27267+ * This program, aufs is free software; you can redistribute it and/or modify
27268+ * it under the terms of the GNU General Public License as published by
27269+ * the Free Software Foundation; either version 2 of the License, or
27270+ * (at your option) any later version.
dece6358
AM
27271+ *
27272+ * This program is distributed in the hope that it will be useful,
27273+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27274+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27275+ * GNU General Public License for more details.
27276+ *
27277+ * You should have received a copy of the GNU General Public License
523b37e3 27278+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 27279+ */
27280+
27281+/*
27282+ * mount and super_block operations
27283+ */
27284+
f6c5ef8b 27285+#include <linux/mm.h>
1facf9fc 27286+#include <linux/seq_file.h>
27287+#include <linux/statfs.h>
7f207e10 27288+#include <linux/vmalloc.h>
1facf9fc 27289+#include "aufs.h"
27290+
27291+/*
27292+ * super_operations
27293+ */
27294+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
27295+{
27296+ struct au_icntnr *c;
27297+
27298+ c = au_cache_alloc_icntnr();
27299+ if (c) {
027c5e7a 27300+ au_icntnr_init(c);
1facf9fc 27301+ c->vfs_inode.i_version = 1; /* sigen(sb); */
27302+ c->iinfo.ii_hinode = NULL;
27303+ return &c->vfs_inode;
27304+ }
27305+ return NULL;
27306+}
27307+
027c5e7a
AM
27308+static void aufs_destroy_inode_cb(struct rcu_head *head)
27309+{
27310+ struct inode *inode = container_of(head, struct inode, i_rcu);
27311+
f0c0a007 27312+ au_cache_dfree_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
027c5e7a
AM
27313+}
27314+
1facf9fc 27315+static void aufs_destroy_inode(struct inode *inode)
27316+{
5afbbe0d
AM
27317+ if (!au_is_bad_inode(inode))
27318+ au_iinfo_fin(inode);
027c5e7a 27319+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
1facf9fc 27320+}
27321+
27322+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
27323+{
27324+ struct inode *inode;
27325+ int err;
27326+
27327+ inode = iget_locked(sb, ino);
27328+ if (unlikely(!inode)) {
27329+ inode = ERR_PTR(-ENOMEM);
27330+ goto out;
27331+ }
27332+ if (!(inode->i_state & I_NEW))
27333+ goto out;
27334+
27335+ err = au_xigen_new(inode);
27336+ if (!err)
27337+ err = au_iinfo_init(inode);
27338+ if (!err)
27339+ inode->i_version++;
27340+ else {
27341+ iget_failed(inode);
27342+ inode = ERR_PTR(err);
27343+ }
27344+
4f0767ce 27345+out:
1facf9fc 27346+ /* never return NULL */
27347+ AuDebugOn(!inode);
27348+ AuTraceErrPtr(inode);
27349+ return inode;
27350+}
27351+
27352+/* lock free root dinfo */
27353+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
27354+{
27355+ int err;
5afbbe0d 27356+ aufs_bindex_t bindex, bbot;
1facf9fc 27357+ struct path path;
4a4d8108 27358+ struct au_hdentry *hdp;
1facf9fc 27359+ struct au_branch *br;
076b876e 27360+ au_br_perm_str_t perm;
1facf9fc 27361+
27362+ err = 0;
5afbbe0d
AM
27363+ bbot = au_sbbot(sb);
27364+ bindex = 0;
27365+ hdp = au_hdentry(au_di(sb->s_root), bindex);
27366+ for (; !err && bindex <= bbot; bindex++, hdp++) {
1facf9fc 27367+ br = au_sbr(sb, bindex);
86dc4139 27368+ path.mnt = au_br_mnt(br);
5afbbe0d 27369+ path.dentry = hdp->hd_dentry;
1facf9fc 27370+ err = au_seq_path(seq, &path);
79b8bda9 27371+ if (!err) {
076b876e 27372+ au_optstr_br_perm(&perm, br->br_perm);
79b8bda9 27373+ seq_printf(seq, "=%s", perm.a);
5afbbe0d 27374+ if (bindex != bbot)
79b8bda9 27375+ seq_putc(seq, ':');
1e00d052 27376+ }
1facf9fc 27377+ }
79b8bda9
AM
27378+ if (unlikely(err || seq_has_overflowed(seq)))
27379+ err = -E2BIG;
1facf9fc 27380+
27381+ return err;
27382+}
27383+
f2c43d5f
AM
27384+static void au_gen_fmt(char *fmt, int len __maybe_unused, const char *pat,
27385+ const char *append)
27386+{
27387+ char *p;
27388+
27389+ p = fmt;
27390+ while (*pat != ':')
27391+ *p++ = *pat++;
27392+ *p++ = *pat++;
27393+ strcpy(p, append);
27394+ AuDebugOn(strlen(fmt) >= len);
27395+}
27396+
1facf9fc 27397+static void au_show_wbr_create(struct seq_file *m, int v,
27398+ struct au_sbinfo *sbinfo)
27399+{
27400+ const char *pat;
f2c43d5f
AM
27401+ char fmt[32];
27402+ struct au_wbr_mfs *mfs;
1facf9fc 27403+
dece6358
AM
27404+ AuRwMustAnyLock(&sbinfo->si_rwsem);
27405+
c2b27bf2 27406+ seq_puts(m, ",create=");
1facf9fc 27407+ pat = au_optstr_wbr_create(v);
f2c43d5f 27408+ mfs = &sbinfo->si_wbr_mfs;
1facf9fc 27409+ switch (v) {
27410+ case AuWbrCreate_TDP:
27411+ case AuWbrCreate_RR:
27412+ case AuWbrCreate_MFS:
27413+ case AuWbrCreate_PMFS:
c2b27bf2 27414+ seq_puts(m, pat);
1facf9fc 27415+ break;
f2c43d5f
AM
27416+ case AuWbrCreate_MFSRR:
27417+ case AuWbrCreate_TDMFS:
27418+ case AuWbrCreate_PMFSRR:
27419+ au_gen_fmt(fmt, sizeof(fmt), pat, "%llu");
27420+ seq_printf(m, fmt, mfs->mfsrr_watermark);
1facf9fc 27421+ break;
f2c43d5f 27422+ case AuWbrCreate_MFSV:
1facf9fc 27423+ case AuWbrCreate_PMFSV:
f2c43d5f
AM
27424+ au_gen_fmt(fmt, sizeof(fmt), pat, "%lu");
27425+ seq_printf(m, fmt,
27426+ jiffies_to_msecs(mfs->mfs_expire)
e49829fe 27427+ / MSEC_PER_SEC);
1facf9fc 27428+ break;
1facf9fc 27429+ case AuWbrCreate_MFSRRV:
f2c43d5f 27430+ case AuWbrCreate_TDMFSV:
392086de 27431+ case AuWbrCreate_PMFSRRV:
f2c43d5f
AM
27432+ au_gen_fmt(fmt, sizeof(fmt), pat, "%llu:%lu");
27433+ seq_printf(m, fmt, mfs->mfsrr_watermark,
27434+ jiffies_to_msecs(mfs->mfs_expire) / MSEC_PER_SEC);
392086de 27435+ break;
f2c43d5f
AM
27436+ default:
27437+ BUG();
1facf9fc 27438+ }
27439+}
27440+
7eafdf33 27441+static int au_show_xino(struct seq_file *seq, struct super_block *sb)
1facf9fc 27442+{
27443+#ifdef CONFIG_SYSFS
27444+ return 0;
27445+#else
27446+ int err;
27447+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
27448+ aufs_bindex_t bindex, brid;
1facf9fc 27449+ struct qstr *name;
27450+ struct file *f;
27451+ struct dentry *d, *h_root;
27452+
dece6358
AM
27453+ AuRwMustAnyLock(&sbinfo->si_rwsem);
27454+
1facf9fc 27455+ err = 0;
1facf9fc 27456+ f = au_sbi(sb)->si_xib;
27457+ if (!f)
27458+ goto out;
27459+
27460+ /* stop printing the default xino path on the first writable branch */
27461+ h_root = NULL;
27462+ brid = au_xino_brid(sb);
27463+ if (brid >= 0) {
27464+ bindex = au_br_index(sb, brid);
5afbbe0d 27465+ h_root = au_hdentry(au_di(sb->s_root), bindex)->hd_dentry;
1facf9fc 27466+ }
2000de60 27467+ d = f->f_path.dentry;
1facf9fc 27468+ name = &d->d_name;
27469+ /* safe ->d_parent because the file is unlinked */
27470+ if (d->d_parent == h_root
27471+ && name->len == len
27472+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
27473+ goto out;
27474+
27475+ seq_puts(seq, ",xino=");
27476+ err = au_xino_path(seq, f);
27477+
4f0767ce 27478+out:
1facf9fc 27479+ return err;
27480+#endif
27481+}
27482+
27483+/* seq_file will re-call me in case of too long string */
7eafdf33 27484+static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
1facf9fc 27485+{
027c5e7a 27486+ int err;
1facf9fc 27487+ unsigned int mnt_flags, v;
27488+ struct super_block *sb;
27489+ struct au_sbinfo *sbinfo;
27490+
27491+#define AuBool(name, str) do { \
27492+ v = au_opt_test(mnt_flags, name); \
27493+ if (v != au_opt_test(AuOpt_Def, name)) \
27494+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
27495+} while (0)
27496+
27497+#define AuStr(name, str) do { \
27498+ v = mnt_flags & AuOptMask_##name; \
27499+ if (v != (AuOpt_Def & AuOptMask_##name)) \
27500+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
27501+} while (0)
27502+
27503+#define AuUInt(name, str, val) do { \
27504+ if (val != AUFS_##name##_DEF) \
27505+ seq_printf(m, "," #str "=%u", val); \
27506+} while (0)
27507+
7eafdf33 27508+ sb = dentry->d_sb;
c1595e42
JR
27509+ if (sb->s_flags & MS_POSIXACL)
27510+ seq_puts(m, ",acl");
27511+
27512+ /* lock free root dinfo */
1facf9fc 27513+ si_noflush_read_lock(sb);
27514+ sbinfo = au_sbi(sb);
27515+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
27516+
27517+ mnt_flags = au_mntflags(sb);
27518+ if (au_opt_test(mnt_flags, XINO)) {
7eafdf33 27519+ err = au_show_xino(m, sb);
1facf9fc 27520+ if (unlikely(err))
27521+ goto out;
27522+ } else
27523+ seq_puts(m, ",noxino");
27524+
27525+ AuBool(TRUNC_XINO, trunc_xino);
27526+ AuStr(UDBA, udba);
dece6358 27527+ AuBool(SHWH, shwh);
1facf9fc 27528+ AuBool(PLINK, plink);
4a4d8108 27529+ AuBool(DIO, dio);
076b876e 27530+ AuBool(DIRPERM1, dirperm1);
1facf9fc 27531+
27532+ v = sbinfo->si_wbr_create;
27533+ if (v != AuWbrCreate_Def)
27534+ au_show_wbr_create(m, v, sbinfo);
27535+
27536+ v = sbinfo->si_wbr_copyup;
27537+ if (v != AuWbrCopyup_Def)
27538+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
27539+
27540+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
27541+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
27542+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
27543+
27544+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
27545+
027c5e7a
AM
27546+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
27547+ AuUInt(RDCACHE, rdcache, v);
1facf9fc 27548+
27549+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
27550+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
27551+
076b876e
AM
27552+ au_fhsm_show(m, sbinfo);
27553+
1facf9fc 27554+ AuBool(SUM, sum);
27555+ /* AuBool(SUM_W, wsum); */
27556+ AuBool(WARN_PERM, warn_perm);
27557+ AuBool(VERBOSE, verbose);
27558+
4f0767ce 27559+out:
1facf9fc 27560+ /* be sure to print "br:" last */
27561+ if (!sysaufs_brs) {
27562+ seq_puts(m, ",br:");
27563+ au_show_brs(m, sb);
27564+ }
27565+ si_read_unlock(sb);
27566+ return 0;
27567+
1facf9fc 27568+#undef AuBool
27569+#undef AuStr
4a4d8108 27570+#undef AuUInt
1facf9fc 27571+}
27572+
27573+/* ---------------------------------------------------------------------- */
27574+
27575+/* sum mode which returns the summation for statfs(2) */
27576+
27577+static u64 au_add_till_max(u64 a, u64 b)
27578+{
27579+ u64 old;
27580+
27581+ old = a;
27582+ a += b;
92d182d2
AM
27583+ if (old <= a)
27584+ return a;
27585+ return ULLONG_MAX;
27586+}
27587+
27588+static u64 au_mul_till_max(u64 a, long mul)
27589+{
27590+ u64 old;
27591+
27592+ old = a;
27593+ a *= mul;
27594+ if (old <= a)
1facf9fc 27595+ return a;
27596+ return ULLONG_MAX;
27597+}
27598+
27599+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
27600+{
27601+ int err;
92d182d2 27602+ long bsize, factor;
1facf9fc 27603+ u64 blocks, bfree, bavail, files, ffree;
5afbbe0d 27604+ aufs_bindex_t bbot, bindex, i;
1facf9fc 27605+ unsigned char shared;
7f207e10 27606+ struct path h_path;
1facf9fc 27607+ struct super_block *h_sb;
27608+
92d182d2
AM
27609+ err = 0;
27610+ bsize = LONG_MAX;
27611+ files = 0;
27612+ ffree = 0;
1facf9fc 27613+ blocks = 0;
27614+ bfree = 0;
27615+ bavail = 0;
5afbbe0d
AM
27616+ bbot = au_sbbot(sb);
27617+ for (bindex = 0; bindex <= bbot; bindex++) {
7f207e10
AM
27618+ h_path.mnt = au_sbr_mnt(sb, bindex);
27619+ h_sb = h_path.mnt->mnt_sb;
1facf9fc 27620+ shared = 0;
92d182d2 27621+ for (i = 0; !shared && i < bindex; i++)
1facf9fc 27622+ shared = (au_sbr_sb(sb, i) == h_sb);
27623+ if (shared)
27624+ continue;
27625+
27626+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27627+ h_path.dentry = h_path.mnt->mnt_root;
27628+ err = vfs_statfs(&h_path, buf);
1facf9fc 27629+ if (unlikely(err))
27630+ goto out;
27631+
92d182d2
AM
27632+ if (bsize > buf->f_bsize) {
27633+ /*
27634+ * we will reduce bsize, so we have to expand blocks
27635+ * etc. to match them again
27636+ */
27637+ factor = (bsize / buf->f_bsize);
27638+ blocks = au_mul_till_max(blocks, factor);
27639+ bfree = au_mul_till_max(bfree, factor);
27640+ bavail = au_mul_till_max(bavail, factor);
27641+ bsize = buf->f_bsize;
27642+ }
27643+
27644+ factor = (buf->f_bsize / bsize);
27645+ blocks = au_add_till_max(blocks,
27646+ au_mul_till_max(buf->f_blocks, factor));
27647+ bfree = au_add_till_max(bfree,
27648+ au_mul_till_max(buf->f_bfree, factor));
27649+ bavail = au_add_till_max(bavail,
27650+ au_mul_till_max(buf->f_bavail, factor));
1facf9fc 27651+ files = au_add_till_max(files, buf->f_files);
27652+ ffree = au_add_till_max(ffree, buf->f_ffree);
27653+ }
27654+
92d182d2 27655+ buf->f_bsize = bsize;
1facf9fc 27656+ buf->f_blocks = blocks;
27657+ buf->f_bfree = bfree;
27658+ buf->f_bavail = bavail;
27659+ buf->f_files = files;
27660+ buf->f_ffree = ffree;
92d182d2 27661+ buf->f_frsize = 0;
1facf9fc 27662+
4f0767ce 27663+out:
1facf9fc 27664+ return err;
27665+}
27666+
27667+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
27668+{
27669+ int err;
7f207e10 27670+ struct path h_path;
1facf9fc 27671+ struct super_block *sb;
27672+
27673+ /* lock free root dinfo */
27674+ sb = dentry->d_sb;
27675+ si_noflush_read_lock(sb);
7f207e10 27676+ if (!au_opt_test(au_mntflags(sb), SUM)) {
1facf9fc 27677+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27678+ h_path.mnt = au_sbr_mnt(sb, 0);
27679+ h_path.dentry = h_path.mnt->mnt_root;
27680+ err = vfs_statfs(&h_path, buf);
27681+ } else
1facf9fc 27682+ err = au_statfs_sum(sb, buf);
27683+ si_read_unlock(sb);
27684+
27685+ if (!err) {
27686+ buf->f_type = AUFS_SUPER_MAGIC;
4a4d8108 27687+ buf->f_namelen = AUFS_MAX_NAMELEN;
1facf9fc 27688+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
27689+ }
27690+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
27691+
27692+ return err;
27693+}
27694+
27695+/* ---------------------------------------------------------------------- */
27696+
537831f9
AM
27697+static int aufs_sync_fs(struct super_block *sb, int wait)
27698+{
27699+ int err, e;
5afbbe0d 27700+ aufs_bindex_t bbot, bindex;
537831f9
AM
27701+ struct au_branch *br;
27702+ struct super_block *h_sb;
27703+
27704+ err = 0;
27705+ si_noflush_read_lock(sb);
5afbbe0d
AM
27706+ bbot = au_sbbot(sb);
27707+ for (bindex = 0; bindex <= bbot; bindex++) {
537831f9
AM
27708+ br = au_sbr(sb, bindex);
27709+ if (!au_br_writable(br->br_perm))
27710+ continue;
27711+
27712+ h_sb = au_sbr_sb(sb, bindex);
27713+ if (h_sb->s_op->sync_fs) {
27714+ e = h_sb->s_op->sync_fs(h_sb, wait);
27715+ if (unlikely(e && !err))
27716+ err = e;
27717+ /* go on even if an error happens */
27718+ }
27719+ }
27720+ si_read_unlock(sb);
27721+
27722+ return err;
27723+}
27724+
27725+/* ---------------------------------------------------------------------- */
27726+
1facf9fc 27727+/* final actions when unmounting a file system */
27728+static void aufs_put_super(struct super_block *sb)
27729+{
27730+ struct au_sbinfo *sbinfo;
27731+
27732+ sbinfo = au_sbi(sb);
27733+ if (!sbinfo)
27734+ return;
27735+
1facf9fc 27736+ dbgaufs_si_fin(sbinfo);
27737+ kobject_put(&sbinfo->si_kobj);
27738+}
27739+
27740+/* ---------------------------------------------------------------------- */
27741+
79b8bda9
AM
27742+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
27743+ struct super_block *sb, void *arg)
7f207e10
AM
27744+{
27745+ void *array;
076b876e 27746+ unsigned long long n, sz;
7f207e10
AM
27747+
27748+ array = NULL;
27749+ n = 0;
27750+ if (!*hint)
27751+ goto out;
27752+
27753+ if (*hint > ULLONG_MAX / sizeof(array)) {
27754+ array = ERR_PTR(-EMFILE);
27755+ pr_err("hint %llu\n", *hint);
27756+ goto out;
27757+ }
27758+
076b876e
AM
27759+ sz = sizeof(array) * *hint;
27760+ array = kzalloc(sz, GFP_NOFS);
7f207e10 27761+ if (unlikely(!array))
076b876e 27762+ array = vzalloc(sz);
7f207e10
AM
27763+ if (unlikely(!array)) {
27764+ array = ERR_PTR(-ENOMEM);
27765+ goto out;
27766+ }
27767+
79b8bda9 27768+ n = cb(sb, array, *hint, arg);
7f207e10
AM
27769+ AuDebugOn(n > *hint);
27770+
27771+out:
27772+ *hint = n;
27773+ return array;
27774+}
27775+
79b8bda9 27776+static unsigned long long au_iarray_cb(struct super_block *sb, void *a,
7f207e10
AM
27777+ unsigned long long max __maybe_unused,
27778+ void *arg)
27779+{
27780+ unsigned long long n;
27781+ struct inode **p, *inode;
27782+ struct list_head *head;
27783+
27784+ n = 0;
27785+ p = a;
27786+ head = arg;
79b8bda9 27787+ spin_lock(&sb->s_inode_list_lock);
7f207e10 27788+ list_for_each_entry(inode, head, i_sb_list) {
5afbbe0d
AM
27789+ if (!au_is_bad_inode(inode)
27790+ && au_ii(inode)->ii_btop >= 0) {
2cbb1c4b
JR
27791+ spin_lock(&inode->i_lock);
27792+ if (atomic_read(&inode->i_count)) {
27793+ au_igrab(inode);
27794+ *p++ = inode;
27795+ n++;
27796+ AuDebugOn(n > max);
27797+ }
27798+ spin_unlock(&inode->i_lock);
7f207e10
AM
27799+ }
27800+ }
79b8bda9 27801+ spin_unlock(&sb->s_inode_list_lock);
7f207e10
AM
27802+
27803+ return n;
27804+}
27805+
27806+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
27807+{
5afbbe0d 27808+ *max = au_ninodes(sb);
79b8bda9 27809+ return au_array_alloc(max, au_iarray_cb, sb, &sb->s_inodes);
7f207e10
AM
27810+}
27811+
27812+void au_iarray_free(struct inode **a, unsigned long long max)
27813+{
27814+ unsigned long long ull;
27815+
27816+ for (ull = 0; ull < max; ull++)
27817+ iput(a[ull]);
be52b249 27818+ kvfree(a);
7f207e10
AM
27819+}
27820+
27821+/* ---------------------------------------------------------------------- */
27822+
1facf9fc 27823+/*
27824+ * refresh dentry and inode at remount time.
27825+ */
027c5e7a
AM
27826+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
27827+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
27828+ struct dentry *parent)
1facf9fc 27829+{
27830+ int err;
1facf9fc 27831+
27832+ di_write_lock_child(dentry);
1facf9fc 27833+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
27834+ err = au_refresh_dentry(dentry, parent);
27835+ if (!err && dir_flags)
5527c038 27836+ au_hn_reset(d_inode(dentry), dir_flags);
1facf9fc 27837+ di_read_unlock(parent, AuLock_IR);
1facf9fc 27838+ di_write_unlock(dentry);
27839+
27840+ return err;
27841+}
27842+
027c5e7a
AM
27843+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
27844+ struct au_sbinfo *sbinfo,
b95c5147 27845+ const unsigned int dir_flags, unsigned int do_idop)
1facf9fc 27846+{
027c5e7a
AM
27847+ int err;
27848+ struct dentry *parent;
027c5e7a
AM
27849+
27850+ err = 0;
27851+ parent = dget_parent(dentry);
27852+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
5527c038
JR
27853+ if (d_really_is_positive(dentry)) {
27854+ if (!d_is_dir(dentry))
027c5e7a
AM
27855+ err = au_do_refresh(dentry, /*dir_flags*/0,
27856+ parent);
27857+ else {
27858+ err = au_do_refresh(dentry, dir_flags, parent);
27859+ if (unlikely(err))
27860+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
27861+ }
27862+ } else
27863+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
27864+ AuDbgDentry(dentry);
27865+ }
27866+ dput(parent);
27867+
79b8bda9 27868+ if (!err) {
b95c5147 27869+ if (do_idop)
79b8bda9
AM
27870+ au_refresh_dop(dentry, /*force_reval*/0);
27871+ } else
27872+ au_refresh_dop(dentry, /*force_reval*/1);
27873+
027c5e7a
AM
27874+ AuTraceErr(err);
27875+ return err;
1facf9fc 27876+}
27877+
b95c5147 27878+static int au_refresh_d(struct super_block *sb, unsigned int do_idop)
1facf9fc 27879+{
27880+ int err, i, j, ndentry, e;
027c5e7a 27881+ unsigned int sigen;
1facf9fc 27882+ struct au_dcsub_pages dpages;
27883+ struct au_dpage *dpage;
027c5e7a
AM
27884+ struct dentry **dentries, *d;
27885+ struct au_sbinfo *sbinfo;
27886+ struct dentry *root = sb->s_root;
5527c038 27887+ const unsigned int dir_flags = au_hi_flags(d_inode(root), /*isdir*/1);
1facf9fc 27888+
b95c5147 27889+ if (do_idop)
79b8bda9
AM
27890+ au_refresh_dop(root, /*force_reval*/0);
27891+
027c5e7a
AM
27892+ err = au_dpages_init(&dpages, GFP_NOFS);
27893+ if (unlikely(err))
1facf9fc 27894+ goto out;
027c5e7a
AM
27895+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
27896+ if (unlikely(err))
1facf9fc 27897+ goto out_dpages;
1facf9fc 27898+
027c5e7a
AM
27899+ sigen = au_sigen(sb);
27900+ sbinfo = au_sbi(sb);
27901+ for (i = 0; i < dpages.ndpage; i++) {
1facf9fc 27902+ dpage = dpages.dpages + i;
27903+ dentries = dpage->dentries;
27904+ ndentry = dpage->ndentry;
027c5e7a 27905+ for (j = 0; j < ndentry; j++) {
1facf9fc 27906+ d = dentries[j];
79b8bda9 27907+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags,
b95c5147 27908+ do_idop);
027c5e7a
AM
27909+ if (unlikely(e && !err))
27910+ err = e;
27911+ /* go on even err */
1facf9fc 27912+ }
27913+ }
27914+
4f0767ce 27915+out_dpages:
1facf9fc 27916+ au_dpages_free(&dpages);
4f0767ce 27917+out:
1facf9fc 27918+ return err;
27919+}
27920+
b95c5147 27921+static int au_refresh_i(struct super_block *sb, unsigned int do_idop)
1facf9fc 27922+{
027c5e7a
AM
27923+ int err, e;
27924+ unsigned int sigen;
27925+ unsigned long long max, ull;
27926+ struct inode *inode, **array;
1facf9fc 27927+
027c5e7a
AM
27928+ array = au_iarray_alloc(sb, &max);
27929+ err = PTR_ERR(array);
27930+ if (IS_ERR(array))
27931+ goto out;
1facf9fc 27932+
27933+ err = 0;
027c5e7a
AM
27934+ sigen = au_sigen(sb);
27935+ for (ull = 0; ull < max; ull++) {
27936+ inode = array[ull];
076b876e
AM
27937+ if (unlikely(!inode))
27938+ break;
b95c5147
AM
27939+
27940+ e = 0;
27941+ ii_write_lock_child(inode);
537831f9 27942+ if (au_iigen(inode, NULL) != sigen) {
027c5e7a 27943+ e = au_refresh_hinode_self(inode);
1facf9fc 27944+ if (unlikely(e)) {
b95c5147 27945+ au_refresh_iop(inode, /*force_getattr*/1);
027c5e7a 27946+ pr_err("error %d, i%lu\n", e, inode->i_ino);
1facf9fc 27947+ if (!err)
27948+ err = e;
27949+ /* go on even if err */
27950+ }
27951+ }
b95c5147
AM
27952+ if (!e && do_idop)
27953+ au_refresh_iop(inode, /*force_getattr*/0);
27954+ ii_write_unlock(inode);
1facf9fc 27955+ }
27956+
027c5e7a 27957+ au_iarray_free(array, max);
1facf9fc 27958+
4f0767ce 27959+out:
1facf9fc 27960+ return err;
27961+}
27962+
b95c5147 27963+static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
1facf9fc 27964+{
027c5e7a
AM
27965+ int err, e;
27966+ unsigned int udba;
5afbbe0d 27967+ aufs_bindex_t bindex, bbot;
1facf9fc 27968+ struct dentry *root;
27969+ struct inode *inode;
027c5e7a 27970+ struct au_branch *br;
79b8bda9 27971+ struct au_sbinfo *sbi;
1facf9fc 27972+
27973+ au_sigen_inc(sb);
79b8bda9
AM
27974+ sbi = au_sbi(sb);
27975+ au_fclr_si(sbi, FAILED_REFRESH_DIR);
1facf9fc 27976+
27977+ root = sb->s_root;
27978+ DiMustNoWaiters(root);
5527c038 27979+ inode = d_inode(root);
1facf9fc 27980+ IiMustNoWaiters(inode);
1facf9fc 27981+
027c5e7a 27982+ udba = au_opt_udba(sb);
5afbbe0d
AM
27983+ bbot = au_sbbot(sb);
27984+ for (bindex = 0; bindex <= bbot; bindex++) {
027c5e7a
AM
27985+ br = au_sbr(sb, bindex);
27986+ err = au_hnotify_reset_br(udba, br, br->br_perm);
1facf9fc 27987+ if (unlikely(err))
027c5e7a
AM
27988+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
27989+ bindex, err);
27990+ /* go on even if err */
1facf9fc 27991+ }
027c5e7a 27992+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
1facf9fc 27993+
b95c5147 27994+ if (do_idop) {
79b8bda9
AM
27995+ if (au_ftest_si(sbi, NO_DREVAL)) {
27996+ AuDebugOn(sb->s_d_op == &aufs_dop_noreval);
27997+ sb->s_d_op = &aufs_dop_noreval;
b95c5147
AM
27998+ AuDebugOn(sbi->si_iop_array == aufs_iop_nogetattr);
27999+ sbi->si_iop_array = aufs_iop_nogetattr;
79b8bda9
AM
28000+ } else {
28001+ AuDebugOn(sb->s_d_op == &aufs_dop);
28002+ sb->s_d_op = &aufs_dop;
b95c5147
AM
28003+ AuDebugOn(sbi->si_iop_array == aufs_iop);
28004+ sbi->si_iop_array = aufs_iop;
79b8bda9 28005+ }
b95c5147
AM
28006+ pr_info("reset to %pf and %pf\n",
28007+ sb->s_d_op, sbi->si_iop_array);
79b8bda9
AM
28008+ }
28009+
027c5e7a 28010+ di_write_unlock(root);
b95c5147
AM
28011+ err = au_refresh_d(sb, do_idop);
28012+ e = au_refresh_i(sb, do_idop);
027c5e7a
AM
28013+ if (unlikely(e && !err))
28014+ err = e;
1facf9fc 28015+ /* aufs_write_lock() calls ..._child() */
28016+ di_write_lock_child(root);
027c5e7a
AM
28017+
28018+ au_cpup_attr_all(inode, /*force*/1);
28019+
28020+ if (unlikely(err))
28021+ AuIOErr("refresh failed, ignored, %d\n", err);
1facf9fc 28022+}
28023+
28024+/* stop extra interpretation of errno in mount(8), and strange error messages */
28025+static int cvt_err(int err)
28026+{
28027+ AuTraceErr(err);
28028+
28029+ switch (err) {
28030+ case -ENOENT:
28031+ case -ENOTDIR:
28032+ case -EEXIST:
28033+ case -EIO:
28034+ err = -EINVAL;
28035+ }
28036+ return err;
28037+}
28038+
28039+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
28040+{
4a4d8108
AM
28041+ int err, do_dx;
28042+ unsigned int mntflags;
be52b249
AM
28043+ struct au_opts opts = {
28044+ .opt = NULL
28045+ };
1facf9fc 28046+ struct dentry *root;
28047+ struct inode *inode;
28048+ struct au_sbinfo *sbinfo;
28049+
28050+ err = 0;
28051+ root = sb->s_root;
28052+ if (!data || !*data) {
e49829fe
JR
28053+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
28054+ if (!err) {
28055+ di_write_lock_child(root);
28056+ err = au_opts_verify(sb, *flags, /*pending*/0);
28057+ aufs_write_unlock(root);
28058+ }
1facf9fc 28059+ goto out;
28060+ }
28061+
28062+ err = -ENOMEM;
1facf9fc 28063+ opts.opt = (void *)__get_free_page(GFP_NOFS);
28064+ if (unlikely(!opts.opt))
28065+ goto out;
28066+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
28067+ opts.flags = AuOpts_REMOUNT;
28068+ opts.sb_flags = *flags;
28069+
28070+ /* parse it before aufs lock */
28071+ err = au_opts_parse(sb, data, &opts);
28072+ if (unlikely(err))
28073+ goto out_opts;
28074+
28075+ sbinfo = au_sbi(sb);
5527c038 28076+ inode = d_inode(root);
febd17d6 28077+ inode_lock(inode);
e49829fe
JR
28078+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
28079+ if (unlikely(err))
28080+ goto out_mtx;
28081+ di_write_lock_child(root);
1facf9fc 28082+
28083+ /* au_opts_remount() may return an error */
28084+ err = au_opts_remount(sb, &opts);
28085+ au_opts_free(&opts);
28086+
027c5e7a 28087+ if (au_ftest_opts(opts.flags, REFRESH))
b95c5147 28088+ au_remount_refresh(sb, au_ftest_opts(opts.flags, REFRESH_IDOP));
1facf9fc 28089+
4a4d8108
AM
28090+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
28091+ mntflags = au_mntflags(sb);
28092+ do_dx = !!au_opt_test(mntflags, DIO);
28093+ au_dy_arefresh(do_dx);
28094+ }
28095+
076b876e 28096+ au_fhsm_wrote_all(sb, /*force*/1); /* ?? */
1facf9fc 28097+ aufs_write_unlock(root);
953406b4 28098+
e49829fe 28099+out_mtx:
febd17d6 28100+ inode_unlock(inode);
4f0767ce 28101+out_opts:
f0c0a007 28102+ au_delayed_free_page((unsigned long)opts.opt);
4f0767ce 28103+out:
1facf9fc 28104+ err = cvt_err(err);
28105+ AuTraceErr(err);
28106+ return err;
28107+}
28108+
4a4d8108 28109+static const struct super_operations aufs_sop = {
1facf9fc 28110+ .alloc_inode = aufs_alloc_inode,
28111+ .destroy_inode = aufs_destroy_inode,
b752ccd1 28112+ /* always deleting, no clearing */
1facf9fc 28113+ .drop_inode = generic_delete_inode,
28114+ .show_options = aufs_show_options,
28115+ .statfs = aufs_statfs,
28116+ .put_super = aufs_put_super,
537831f9 28117+ .sync_fs = aufs_sync_fs,
1facf9fc 28118+ .remount_fs = aufs_remount_fs
28119+};
28120+
28121+/* ---------------------------------------------------------------------- */
28122+
28123+static int alloc_root(struct super_block *sb)
28124+{
28125+ int err;
28126+ struct inode *inode;
28127+ struct dentry *root;
28128+
28129+ err = -ENOMEM;
28130+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
28131+ err = PTR_ERR(inode);
28132+ if (IS_ERR(inode))
28133+ goto out;
28134+
b95c5147 28135+ inode->i_op = aufs_iop + AuIop_DIR; /* with getattr by default */
1facf9fc 28136+ inode->i_fop = &aufs_dir_fop;
28137+ inode->i_mode = S_IFDIR;
9dbd164d 28138+ set_nlink(inode, 2);
1facf9fc 28139+ unlock_new_inode(inode);
28140+
92d182d2 28141+ root = d_make_root(inode);
1facf9fc 28142+ if (unlikely(!root))
92d182d2 28143+ goto out;
1facf9fc 28144+ err = PTR_ERR(root);
28145+ if (IS_ERR(root))
92d182d2 28146+ goto out;
1facf9fc 28147+
4a4d8108 28148+ err = au_di_init(root);
1facf9fc 28149+ if (!err) {
28150+ sb->s_root = root;
28151+ return 0; /* success */
28152+ }
28153+ dput(root);
1facf9fc 28154+
4f0767ce 28155+out:
1facf9fc 28156+ return err;
1facf9fc 28157+}
28158+
28159+static int aufs_fill_super(struct super_block *sb, void *raw_data,
28160+ int silent __maybe_unused)
28161+{
28162+ int err;
be52b249
AM
28163+ struct au_opts opts = {
28164+ .opt = NULL
28165+ };
79b8bda9 28166+ struct au_sbinfo *sbinfo;
1facf9fc 28167+ struct dentry *root;
28168+ struct inode *inode;
28169+ char *arg = raw_data;
28170+
28171+ if (unlikely(!arg || !*arg)) {
28172+ err = -EINVAL;
4a4d8108 28173+ pr_err("no arg\n");
1facf9fc 28174+ goto out;
28175+ }
28176+
28177+ err = -ENOMEM;
1facf9fc 28178+ opts.opt = (void *)__get_free_page(GFP_NOFS);
28179+ if (unlikely(!opts.opt))
28180+ goto out;
28181+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
28182+ opts.sb_flags = sb->s_flags;
28183+
28184+ err = au_si_alloc(sb);
28185+ if (unlikely(err))
28186+ goto out_opts;
79b8bda9 28187+ sbinfo = au_sbi(sb);
1facf9fc 28188+
28189+ /* all timestamps always follow the ones on the branch */
28190+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
28191+ sb->s_op = &aufs_sop;
027c5e7a 28192+ sb->s_d_op = &aufs_dop;
1facf9fc 28193+ sb->s_magic = AUFS_SUPER_MAGIC;
28194+ sb->s_maxbytes = 0;
c1595e42 28195+ sb->s_stack_depth = 1;
1facf9fc 28196+ au_export_init(sb);
f2c43d5f 28197+ au_xattr_init(sb);
1facf9fc 28198+
28199+ err = alloc_root(sb);
28200+ if (unlikely(err)) {
28201+ si_write_unlock(sb);
28202+ goto out_info;
28203+ }
28204+ root = sb->s_root;
5527c038 28205+ inode = d_inode(root);
1facf9fc 28206+
28207+ /*
28208+ * actually we can parse options regardless aufs lock here.
28209+ * but at remount time, parsing must be done before aufs lock.
28210+ * so we follow the same rule.
28211+ */
28212+ ii_write_lock_parent(inode);
28213+ aufs_write_unlock(root);
28214+ err = au_opts_parse(sb, arg, &opts);
28215+ if (unlikely(err))
28216+ goto out_root;
28217+
28218+ /* lock vfs_inode first, then aufs. */
febd17d6 28219+ inode_lock(inode);
1facf9fc 28220+ aufs_write_lock(root);
28221+ err = au_opts_mount(sb, &opts);
28222+ au_opts_free(&opts);
79b8bda9
AM
28223+ if (!err && au_ftest_si(sbinfo, NO_DREVAL)) {
28224+ sb->s_d_op = &aufs_dop_noreval;
28225+ pr_info("%pf\n", sb->s_d_op);
28226+ au_refresh_dop(root, /*force_reval*/0);
b95c5147
AM
28227+ sbinfo->si_iop_array = aufs_iop_nogetattr;
28228+ au_refresh_iop(inode, /*force_getattr*/0);
79b8bda9 28229+ }
1facf9fc 28230+ aufs_write_unlock(root);
febd17d6 28231+ inode_unlock(inode);
4a4d8108
AM
28232+ if (!err)
28233+ goto out_opts; /* success */
1facf9fc 28234+
4f0767ce 28235+out_root:
1facf9fc 28236+ dput(root);
28237+ sb->s_root = NULL;
4f0767ce 28238+out_info:
79b8bda9
AM
28239+ dbgaufs_si_fin(sbinfo);
28240+ kobject_put(&sbinfo->si_kobj);
1facf9fc 28241+ sb->s_fs_info = NULL;
4f0767ce 28242+out_opts:
f0c0a007 28243+ au_delayed_free_page((unsigned long)opts.opt);
4f0767ce 28244+out:
1facf9fc 28245+ AuTraceErr(err);
28246+ err = cvt_err(err);
28247+ AuTraceErr(err);
28248+ return err;
28249+}
28250+
28251+/* ---------------------------------------------------------------------- */
28252+
027c5e7a
AM
28253+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
28254+ const char *dev_name __maybe_unused,
28255+ void *raw_data)
1facf9fc 28256+{
027c5e7a 28257+ struct dentry *root;
1facf9fc 28258+ struct super_block *sb;
28259+
28260+ /* all timestamps always follow the ones on the branch */
28261+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
027c5e7a
AM
28262+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
28263+ if (IS_ERR(root))
28264+ goto out;
28265+
28266+ sb = root->d_sb;
28267+ si_write_lock(sb, !AuLock_FLUSH);
28268+ sysaufs_brs_add(sb, 0);
28269+ si_write_unlock(sb);
28270+ au_sbilist_add(sb);
28271+
28272+out:
28273+ return root;
1facf9fc 28274+}
28275+
e49829fe
JR
28276+static void aufs_kill_sb(struct super_block *sb)
28277+{
28278+ struct au_sbinfo *sbinfo;
28279+
28280+ sbinfo = au_sbi(sb);
28281+ if (sbinfo) {
28282+ au_sbilist_del(sb);
28283+ aufs_write_lock(sb->s_root);
076b876e 28284+ au_fhsm_fin(sb);
e49829fe
JR
28285+ if (sbinfo->si_wbr_create_ops->fin)
28286+ sbinfo->si_wbr_create_ops->fin(sb);
28287+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
28288+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
b95c5147 28289+ au_remount_refresh(sb, /*do_idop*/0);
e49829fe
JR
28290+ }
28291+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
28292+ au_plink_put(sb, /*verbose*/1);
28293+ au_xino_clr(sb);
1e00d052 28294+ sbinfo->si_sb = NULL;
e49829fe 28295+ aufs_write_unlock(sb->s_root);
e49829fe
JR
28296+ au_nwt_flush(&sbinfo->si_nowait);
28297+ }
98d9a5b1 28298+ kill_anon_super(sb);
e49829fe
JR
28299+}
28300+
1facf9fc 28301+struct file_system_type aufs_fs_type = {
28302+ .name = AUFS_FSTYPE,
c06a8ce3
AM
28303+ /* a race between rename and others */
28304+ .fs_flags = FS_RENAME_DOES_D_MOVE,
027c5e7a 28305+ .mount = aufs_mount,
e49829fe 28306+ .kill_sb = aufs_kill_sb,
1facf9fc 28307+ /* no need to __module_get() and module_put(). */
28308+ .owner = THIS_MODULE,
28309+};
7f207e10
AM
28310diff -urN /usr/share/empty/fs/aufs/super.h linux/fs/aufs/super.h
28311--- /usr/share/empty/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
28312+++ linux/fs/aufs/super.h 2016-12-17 12:28:17.598545045 +0100
28313@@ -0,0 +1,639 @@
1facf9fc 28314+/*
8cdd5066 28315+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 28316+ *
28317+ * This program, aufs is free software; you can redistribute it and/or modify
28318+ * it under the terms of the GNU General Public License as published by
28319+ * the Free Software Foundation; either version 2 of the License, or
28320+ * (at your option) any later version.
dece6358
AM
28321+ *
28322+ * This program is distributed in the hope that it will be useful,
28323+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28324+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28325+ * GNU General Public License for more details.
28326+ *
28327+ * You should have received a copy of the GNU General Public License
523b37e3 28328+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28329+ */
28330+
28331+/*
28332+ * super_block operations
28333+ */
28334+
28335+#ifndef __AUFS_SUPER_H__
28336+#define __AUFS_SUPER_H__
28337+
28338+#ifdef __KERNEL__
28339+
28340+#include <linux/fs.h>
5527c038 28341+#include <linux/kobject.h>
1facf9fc 28342+#include "rwsem.h"
28343+#include "spl.h"
28344+#include "wkq.h"
28345+
1facf9fc 28346+/* policies to select one among multiple writable branches */
28347+struct au_wbr_copyup_operations {
28348+ int (*copyup)(struct dentry *dentry);
28349+};
28350+
392086de
AM
28351+#define AuWbr_DIR 1 /* target is a dir */
28352+#define AuWbr_PARENT (1 << 1) /* always require a parent */
28353+
28354+#define au_ftest_wbr(flags, name) ((flags) & AuWbr_##name)
28355+#define au_fset_wbr(flags, name) { (flags) |= AuWbr_##name; }
28356+#define au_fclr_wbr(flags, name) { (flags) &= ~AuWbr_##name; }
28357+
1facf9fc 28358+struct au_wbr_create_operations {
392086de 28359+ int (*create)(struct dentry *dentry, unsigned int flags);
1facf9fc 28360+ int (*init)(struct super_block *sb);
28361+ int (*fin)(struct super_block *sb);
28362+};
28363+
28364+struct au_wbr_mfs {
28365+ struct mutex mfs_lock; /* protect this structure */
28366+ unsigned long mfs_jiffy;
28367+ unsigned long mfs_expire;
28368+ aufs_bindex_t mfs_bindex;
28369+
28370+ unsigned long long mfsrr_bytes;
28371+ unsigned long long mfsrr_watermark;
28372+};
28373+
86dc4139
AM
28374+#define AuPlink_NHASH 100
28375+static inline int au_plink_hash(ino_t ino)
28376+{
28377+ return ino % AuPlink_NHASH;
28378+}
28379+
076b876e
AM
28380+/* File-based Hierarchical Storage Management */
28381+struct au_fhsm {
28382+#ifdef CONFIG_AUFS_FHSM
28383+ /* allow only one process who can receive the notification */
28384+ spinlock_t fhsm_spin;
28385+ pid_t fhsm_pid;
28386+ wait_queue_head_t fhsm_wqh;
28387+ atomic_t fhsm_readable;
28388+
c1595e42 28389+ /* these are protected by si_rwsem */
076b876e 28390+ unsigned long fhsm_expire;
c1595e42 28391+ aufs_bindex_t fhsm_bottom;
076b876e
AM
28392+#endif
28393+};
28394+
febd17d6
JR
28395+#define AU_PIDSTEP (int)(BITS_TO_LONGS(PID_MAX_DEFAULT) * BITS_PER_LONG)
28396+#define AU_NPIDMAP (int)DIV_ROUND_UP(PID_MAX_LIMIT, AU_PIDSTEP)
28397+struct au_si_pid {
28398+ unsigned long *pid_bitmap[AU_NPIDMAP];
28399+ struct mutex pid_mtx;
28400+};
28401+
1facf9fc 28402+struct au_branch;
28403+struct au_sbinfo {
28404+ /* nowait tasks in the system-wide workqueue */
28405+ struct au_nowait_tasks si_nowait;
28406+
b752ccd1
AM
28407+ /*
28408+ * tried sb->s_umount, but failed due to the dependecy between i_mutex.
28409+ * rwsem for au_sbinfo is necessary.
28410+ */
dece6358 28411+ struct au_rwsem si_rwsem;
1facf9fc 28412+
b752ccd1 28413+ /* prevent recursive locking in deleting inode */
febd17d6 28414+ struct au_si_pid au_si_pid;
b752ccd1 28415+
7f207e10 28416+ /*
523b37e3
AM
28417+ * dirty approach to protect sb->sb_inodes and ->s_files (gone) from
28418+ * remount.
7f207e10 28419+ */
5afbbe0d 28420+ struct percpu_counter si_ninodes, si_nfiles;
7f207e10 28421+
1facf9fc 28422+ /* branch management */
28423+ unsigned int si_generation;
28424+
2000de60 28425+ /* see AuSi_ flags */
1facf9fc 28426+ unsigned char au_si_status;
28427+
5afbbe0d 28428+ aufs_bindex_t si_bbot;
7f207e10
AM
28429+
28430+ /* dirty trick to keep br_id plus */
28431+ unsigned int si_last_br_id :
28432+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
1facf9fc 28433+ struct au_branch **si_branch;
28434+
28435+ /* policy to select a writable branch */
28436+ unsigned char si_wbr_copyup;
28437+ unsigned char si_wbr_create;
28438+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
28439+ struct au_wbr_create_operations *si_wbr_create_ops;
28440+
28441+ /* round robin */
28442+ atomic_t si_wbr_rr_next;
28443+
28444+ /* most free space */
28445+ struct au_wbr_mfs si_wbr_mfs;
28446+
076b876e
AM
28447+ /* File-based Hierarchical Storage Management */
28448+ struct au_fhsm si_fhsm;
28449+
1facf9fc 28450+ /* mount flags */
28451+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
28452+ unsigned int si_mntflags;
28453+
28454+ /* external inode number (bitmap and translation table) */
5527c038
JR
28455+ vfs_readf_t si_xread;
28456+ vfs_writef_t si_xwrite;
1facf9fc 28457+ struct file *si_xib;
28458+ struct mutex si_xib_mtx; /* protect xib members */
28459+ unsigned long *si_xib_buf;
28460+ unsigned long si_xib_last_pindex;
28461+ int si_xib_next_bit;
28462+ aufs_bindex_t si_xino_brid;
392086de
AM
28463+ unsigned long si_xino_jiffy;
28464+ unsigned long si_xino_expire;
1facf9fc 28465+ /* reserved for future use */
28466+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
28467+
28468+#ifdef CONFIG_AUFS_EXPORT
28469+ /* i_generation */
28470+ struct file *si_xigen;
28471+ atomic_t si_xigen_next;
28472+#endif
28473+
b912730e
AM
28474+ /* dirty trick to suppoer atomic_open */
28475+ struct au_sphlhead si_aopen;
28476+
1facf9fc 28477+ /* vdir parameters */
e49829fe 28478+ unsigned long si_rdcache; /* max cache time in jiffies */
1facf9fc 28479+ unsigned int si_rdblk; /* deblk size */
28480+ unsigned int si_rdhash; /* hash size */
28481+
28482+ /*
28483+ * If the number of whiteouts are larger than si_dirwh, leave all of
28484+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
28485+ * future fsck.aufs or kernel thread will remove them later.
28486+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
28487+ */
28488+ unsigned int si_dirwh;
28489+
1facf9fc 28490+ /* pseudo_link list */
86dc4139 28491+ struct au_sphlhead si_plink[AuPlink_NHASH];
1facf9fc 28492+ wait_queue_head_t si_plink_wq;
4a4d8108 28493+ spinlock_t si_plink_maint_lock;
e49829fe 28494+ pid_t si_plink_maint_pid;
1facf9fc 28495+
523b37e3
AM
28496+ /* file list */
28497+ struct au_sphlhead si_files;
28498+
b95c5147
AM
28499+ /* with/without getattr, brother of sb->s_d_op */
28500+ struct inode_operations *si_iop_array;
28501+
1facf9fc 28502+ /*
28503+ * sysfs and lifetime management.
28504+ * this is not a small structure and it may be a waste of memory in case
28505+ * of sysfs is disabled, particulary when many aufs-es are mounted.
28506+ * but using sysfs is majority.
28507+ */
28508+ struct kobject si_kobj;
28509+#ifdef CONFIG_DEBUG_FS
86dc4139
AM
28510+ struct dentry *si_dbgaufs;
28511+ struct dentry *si_dbgaufs_plink;
28512+ struct dentry *si_dbgaufs_xib;
1facf9fc 28513+#ifdef CONFIG_AUFS_EXPORT
28514+ struct dentry *si_dbgaufs_xigen;
28515+#endif
28516+#endif
28517+
e49829fe 28518+#ifdef CONFIG_AUFS_SBILIST
5afbbe0d 28519+ struct hlist_node si_list;
e49829fe
JR
28520+#endif
28521+
1facf9fc 28522+ /* dirty, necessary for unmounting, sysfs and sysrq */
28523+ struct super_block *si_sb;
28524+};
28525+
dece6358
AM
28526+/* sbinfo status flags */
28527+/*
28528+ * set true when refresh_dirs() failed at remount time.
28529+ * then try refreshing dirs at access time again.
28530+ * if it is false, refreshing dirs at access time is unnecesary
28531+ */
027c5e7a 28532+#define AuSi_FAILED_REFRESH_DIR 1
076b876e 28533+#define AuSi_FHSM (1 << 1) /* fhsm is active now */
79b8bda9 28534+#define AuSi_NO_DREVAL (1 << 2) /* disable all d_revalidate */
076b876e
AM
28535+
28536+#ifndef CONFIG_AUFS_FHSM
28537+#undef AuSi_FHSM
28538+#define AuSi_FHSM 0
28539+#endif
28540+
dece6358
AM
28541+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
28542+ unsigned int flag)
28543+{
28544+ AuRwMustAnyLock(&sbi->si_rwsem);
28545+ return sbi->au_si_status & flag;
28546+}
28547+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
28548+#define au_fset_si(sbinfo, name) do { \
28549+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
28550+ (sbinfo)->au_si_status |= AuSi_##name; \
28551+} while (0)
28552+#define au_fclr_si(sbinfo, name) do { \
28553+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
28554+ (sbinfo)->au_si_status &= ~AuSi_##name; \
28555+} while (0)
28556+
1facf9fc 28557+/* ---------------------------------------------------------------------- */
28558+
28559+/* policy to select one among writable branches */
4a4d8108
AM
28560+#define AuWbrCopyup(sbinfo, ...) \
28561+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
28562+#define AuWbrCreate(sbinfo, ...) \
28563+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
1facf9fc 28564+
28565+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
28566+#define AuLock_DW 1 /* write-lock dentry */
28567+#define AuLock_IR (1 << 1) /* read-lock inode */
28568+#define AuLock_IW (1 << 2) /* write-lock inode */
28569+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
b95c5147 28570+#define AuLock_DIRS (1 << 4) /* target is a pair of dirs */
f2c43d5f 28571+ /* except RENAME_EXCHANGE */
e49829fe
JR
28572+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
28573+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
027c5e7a 28574+#define AuLock_GEN (1 << 7) /* test digen/iigen */
1facf9fc 28575+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
7f207e10
AM
28576+#define au_fset_lock(flags, name) \
28577+ do { (flags) |= AuLock_##name; } while (0)
28578+#define au_fclr_lock(flags, name) \
28579+ do { (flags) &= ~AuLock_##name; } while (0)
1facf9fc 28580+
28581+/* ---------------------------------------------------------------------- */
28582+
28583+/* super.c */
28584+extern struct file_system_type aufs_fs_type;
28585+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
79b8bda9
AM
28586+typedef unsigned long long (*au_arraycb_t)(struct super_block *sb, void *array,
28587+ unsigned long long max, void *arg);
79b8bda9
AM
28588+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
28589+ struct super_block *sb, void *arg);
7f207e10
AM
28590+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
28591+void au_iarray_free(struct inode **a, unsigned long long max);
1facf9fc 28592+
28593+/* sbinfo.c */
28594+void au_si_free(struct kobject *kobj);
28595+int au_si_alloc(struct super_block *sb);
e2f27e51 28596+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink);
1facf9fc 28597+
28598+unsigned int au_sigen_inc(struct super_block *sb);
28599+aufs_bindex_t au_new_br_id(struct super_block *sb);
28600+
e49829fe
JR
28601+int si_read_lock(struct super_block *sb, int flags);
28602+int si_write_lock(struct super_block *sb, int flags);
28603+int aufs_read_lock(struct dentry *dentry, int flags);
1facf9fc 28604+void aufs_read_unlock(struct dentry *dentry, int flags);
28605+void aufs_write_lock(struct dentry *dentry);
28606+void aufs_write_unlock(struct dentry *dentry);
e49829fe 28607+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
1facf9fc 28608+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
28609+
28610+/* wbr_policy.c */
28611+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
28612+extern struct au_wbr_create_operations au_wbr_create_ops[];
28613+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
c2b27bf2 28614+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
5afbbe0d 28615+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop);
c2b27bf2
AM
28616+
28617+/* mvdown.c */
28618+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
1facf9fc 28619+
076b876e
AM
28620+#ifdef CONFIG_AUFS_FHSM
28621+/* fhsm.c */
28622+
28623+static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
28624+{
28625+ pid_t pid;
28626+
28627+ spin_lock(&fhsm->fhsm_spin);
28628+ pid = fhsm->fhsm_pid;
28629+ spin_unlock(&fhsm->fhsm_spin);
28630+
28631+ return pid;
28632+}
28633+
28634+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
28635+void au_fhsm_wrote_all(struct super_block *sb, int force);
28636+int au_fhsm_fd(struct super_block *sb, int oflags);
28637+int au_fhsm_br_alloc(struct au_branch *br);
c1595e42 28638+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
076b876e
AM
28639+void au_fhsm_fin(struct super_block *sb);
28640+void au_fhsm_init(struct au_sbinfo *sbinfo);
28641+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
28642+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
28643+#else
28644+AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
28645+ int force)
28646+AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
28647+AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
c1595e42
JR
28648+AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
28649+AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
28650+AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
076b876e
AM
28651+AuStubVoid(au_fhsm_fin, struct super_block *sb)
28652+AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
28653+AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
28654+AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
28655+#endif
28656+
1facf9fc 28657+/* ---------------------------------------------------------------------- */
28658+
28659+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
28660+{
28661+ return sb->s_fs_info;
28662+}
28663+
28664+/* ---------------------------------------------------------------------- */
28665+
28666+#ifdef CONFIG_AUFS_EXPORT
a2a7ad62 28667+int au_test_nfsd(void);
1facf9fc 28668+void au_export_init(struct super_block *sb);
b752ccd1 28669+void au_xigen_inc(struct inode *inode);
1facf9fc 28670+int au_xigen_new(struct inode *inode);
28671+int au_xigen_set(struct super_block *sb, struct file *base);
28672+void au_xigen_clr(struct super_block *sb);
28673+
28674+static inline int au_busy_or_stale(void)
28675+{
b752ccd1 28676+ if (!au_test_nfsd())
1facf9fc 28677+ return -EBUSY;
28678+ return -ESTALE;
28679+}
28680+#else
b752ccd1 28681+AuStubInt0(au_test_nfsd, void)
a2a7ad62 28682+AuStubVoid(au_export_init, struct super_block *sb)
b752ccd1 28683+AuStubVoid(au_xigen_inc, struct inode *inode)
4a4d8108
AM
28684+AuStubInt0(au_xigen_new, struct inode *inode)
28685+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
28686+AuStubVoid(au_xigen_clr, struct super_block *sb)
c1595e42 28687+AuStub(int, au_busy_or_stale, return -EBUSY, void)
1facf9fc 28688+#endif /* CONFIG_AUFS_EXPORT */
28689+
28690+/* ---------------------------------------------------------------------- */
28691+
e49829fe
JR
28692+#ifdef CONFIG_AUFS_SBILIST
28693+/* module.c */
5afbbe0d 28694+extern struct au_sphlhead au_sbilist;
e49829fe
JR
28695+
28696+static inline void au_sbilist_init(void)
28697+{
5afbbe0d 28698+ au_sphl_init(&au_sbilist);
e49829fe
JR
28699+}
28700+
28701+static inline void au_sbilist_add(struct super_block *sb)
28702+{
5afbbe0d 28703+ au_sphl_add(&au_sbi(sb)->si_list, &au_sbilist);
e49829fe
JR
28704+}
28705+
28706+static inline void au_sbilist_del(struct super_block *sb)
28707+{
5afbbe0d 28708+ au_sphl_del(&au_sbi(sb)->si_list, &au_sbilist);
e49829fe 28709+}
53392da6
AM
28710+
28711+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
28712+static inline void au_sbilist_lock(void)
28713+{
28714+ spin_lock(&au_sbilist.spin);
28715+}
28716+
28717+static inline void au_sbilist_unlock(void)
28718+{
28719+ spin_unlock(&au_sbilist.spin);
28720+}
28721+#define AuGFP_SBILIST GFP_ATOMIC
28722+#else
28723+AuStubVoid(au_sbilist_lock, void)
28724+AuStubVoid(au_sbilist_unlock, void)
28725+#define AuGFP_SBILIST GFP_NOFS
28726+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
e49829fe
JR
28727+#else
28728+AuStubVoid(au_sbilist_init, void)
c1595e42
JR
28729+AuStubVoid(au_sbilist_add, struct super_block *sb)
28730+AuStubVoid(au_sbilist_del, struct super_block *sb)
53392da6
AM
28731+AuStubVoid(au_sbilist_lock, void)
28732+AuStubVoid(au_sbilist_unlock, void)
28733+#define AuGFP_SBILIST GFP_NOFS
e49829fe
JR
28734+#endif
28735+
28736+/* ---------------------------------------------------------------------- */
28737+
1facf9fc 28738+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
28739+{
dece6358 28740+ /*
c1595e42 28741+ * This function is a dynamic '__init' function actually,
dece6358
AM
28742+ * so the tiny check for si_rwsem is unnecessary.
28743+ */
28744+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
1facf9fc 28745+#ifdef CONFIG_DEBUG_FS
28746+ sbinfo->si_dbgaufs = NULL;
86dc4139 28747+ sbinfo->si_dbgaufs_plink = NULL;
1facf9fc 28748+ sbinfo->si_dbgaufs_xib = NULL;
28749+#ifdef CONFIG_AUFS_EXPORT
28750+ sbinfo->si_dbgaufs_xigen = NULL;
28751+#endif
28752+#endif
28753+}
28754+
28755+/* ---------------------------------------------------------------------- */
28756+
febd17d6 28757+static inline void si_pid_idx_bit(int *idx, pid_t *bit)
b752ccd1
AM
28758+{
28759+ /* the origin of pid is 1, but the bitmap's is 0 */
febd17d6
JR
28760+ *bit = current->pid - 1;
28761+ *idx = *bit / AU_PIDSTEP;
28762+ *bit %= AU_PIDSTEP;
b752ccd1
AM
28763+}
28764+
28765+static inline int si_pid_test(struct super_block *sb)
28766+{
076b876e 28767+ pid_t bit;
febd17d6
JR
28768+ int idx;
28769+ unsigned long *bitmap;
076b876e 28770+
febd17d6
JR
28771+ si_pid_idx_bit(&idx, &bit);
28772+ bitmap = au_sbi(sb)->au_si_pid.pid_bitmap[idx];
28773+ if (bitmap)
28774+ return test_bit(bit, bitmap);
28775+ return 0;
b752ccd1
AM
28776+}
28777+
28778+static inline void si_pid_clr(struct super_block *sb)
28779+{
076b876e 28780+ pid_t bit;
febd17d6
JR
28781+ int idx;
28782+ unsigned long *bitmap;
076b876e 28783+
febd17d6
JR
28784+ si_pid_idx_bit(&idx, &bit);
28785+ bitmap = au_sbi(sb)->au_si_pid.pid_bitmap[idx];
28786+ BUG_ON(!bitmap);
28787+ AuDebugOn(!test_bit(bit, bitmap));
28788+ clear_bit(bit, bitmap);
28789+ /* smp_mb(); */
b752ccd1
AM
28790+}
28791+
febd17d6
JR
28792+void si_pid_set(struct super_block *sb);
28793+
b752ccd1
AM
28794+/* ---------------------------------------------------------------------- */
28795+
1facf9fc 28796+/* lock superblock. mainly for entry point functions */
28797+/*
b752ccd1
AM
28798+ * __si_read_lock, __si_write_lock,
28799+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
1facf9fc 28800+ */
b752ccd1 28801+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
1facf9fc 28802+
dece6358
AM
28803+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
28804+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
28805+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
28806+
b752ccd1
AM
28807+static inline void si_noflush_read_lock(struct super_block *sb)
28808+{
28809+ __si_read_lock(sb);
28810+ si_pid_set(sb);
28811+}
28812+
28813+static inline int si_noflush_read_trylock(struct super_block *sb)
28814+{
076b876e
AM
28815+ int locked;
28816+
28817+ locked = __si_read_trylock(sb);
b752ccd1
AM
28818+ if (locked)
28819+ si_pid_set(sb);
28820+ return locked;
28821+}
28822+
28823+static inline void si_noflush_write_lock(struct super_block *sb)
28824+{
28825+ __si_write_lock(sb);
28826+ si_pid_set(sb);
28827+}
28828+
28829+static inline int si_noflush_write_trylock(struct super_block *sb)
28830+{
076b876e
AM
28831+ int locked;
28832+
28833+ locked = __si_write_trylock(sb);
b752ccd1
AM
28834+ if (locked)
28835+ si_pid_set(sb);
28836+ return locked;
28837+}
28838+
7e9cd9fe 28839+#if 0 /* reserved */
1facf9fc 28840+static inline int si_read_trylock(struct super_block *sb, int flags)
28841+{
28842+ if (au_ftest_lock(flags, FLUSH))
28843+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28844+ return si_noflush_read_trylock(sb);
28845+}
e49829fe 28846+#endif
1facf9fc 28847+
b752ccd1
AM
28848+static inline void si_read_unlock(struct super_block *sb)
28849+{
28850+ si_pid_clr(sb);
28851+ __si_read_unlock(sb);
28852+}
28853+
7e9cd9fe 28854+#if 0 /* reserved */
1facf9fc 28855+static inline int si_write_trylock(struct super_block *sb, int flags)
28856+{
28857+ if (au_ftest_lock(flags, FLUSH))
28858+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28859+ return si_noflush_write_trylock(sb);
28860+}
b752ccd1
AM
28861+#endif
28862+
28863+static inline void si_write_unlock(struct super_block *sb)
28864+{
28865+ si_pid_clr(sb);
28866+ __si_write_unlock(sb);
28867+}
28868+
7e9cd9fe 28869+#if 0 /* reserved */
b752ccd1
AM
28870+static inline void si_downgrade_lock(struct super_block *sb)
28871+{
28872+ __si_downgrade_lock(sb);
28873+}
28874+#endif
1facf9fc 28875+
28876+/* ---------------------------------------------------------------------- */
28877+
5afbbe0d 28878+static inline aufs_bindex_t au_sbbot(struct super_block *sb)
1facf9fc 28879+{
dece6358 28880+ SiMustAnyLock(sb);
5afbbe0d 28881+ return au_sbi(sb)->si_bbot;
1facf9fc 28882+}
28883+
28884+static inline unsigned int au_mntflags(struct super_block *sb)
28885+{
dece6358 28886+ SiMustAnyLock(sb);
1facf9fc 28887+ return au_sbi(sb)->si_mntflags;
28888+}
28889+
28890+static inline unsigned int au_sigen(struct super_block *sb)
28891+{
dece6358 28892+ SiMustAnyLock(sb);
1facf9fc 28893+ return au_sbi(sb)->si_generation;
28894+}
28895+
5afbbe0d
AM
28896+static inline unsigned long long au_ninodes(struct super_block *sb)
28897+{
28898+ s64 n = percpu_counter_sum(&au_sbi(sb)->si_ninodes);
28899+
28900+ BUG_ON(n < 0);
28901+ return n;
28902+}
28903+
7f207e10
AM
28904+static inline void au_ninodes_inc(struct super_block *sb)
28905+{
5afbbe0d 28906+ percpu_counter_inc(&au_sbi(sb)->si_ninodes);
7f207e10
AM
28907+}
28908+
28909+static inline void au_ninodes_dec(struct super_block *sb)
28910+{
5afbbe0d
AM
28911+ percpu_counter_dec(&au_sbi(sb)->si_ninodes);
28912+}
28913+
28914+static inline unsigned long long au_nfiles(struct super_block *sb)
28915+{
28916+ s64 n = percpu_counter_sum(&au_sbi(sb)->si_nfiles);
28917+
28918+ BUG_ON(n < 0);
28919+ return n;
7f207e10
AM
28920+}
28921+
28922+static inline void au_nfiles_inc(struct super_block *sb)
28923+{
5afbbe0d 28924+ percpu_counter_inc(&au_sbi(sb)->si_nfiles);
7f207e10
AM
28925+}
28926+
28927+static inline void au_nfiles_dec(struct super_block *sb)
28928+{
5afbbe0d 28929+ percpu_counter_dec(&au_sbi(sb)->si_nfiles);
7f207e10
AM
28930+}
28931+
1facf9fc 28932+static inline struct au_branch *au_sbr(struct super_block *sb,
28933+ aufs_bindex_t bindex)
28934+{
dece6358 28935+ SiMustAnyLock(sb);
1facf9fc 28936+ return au_sbi(sb)->si_branch[0 + bindex];
28937+}
28938+
28939+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
28940+{
dece6358 28941+ SiMustWriteLock(sb);
1facf9fc 28942+ au_sbi(sb)->si_xino_brid = brid;
28943+}
28944+
28945+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
28946+{
dece6358 28947+ SiMustAnyLock(sb);
1facf9fc 28948+ return au_sbi(sb)->si_xino_brid;
28949+}
28950+
28951+#endif /* __KERNEL__ */
28952+#endif /* __AUFS_SUPER_H__ */
7f207e10
AM
28953diff -urN /usr/share/empty/fs/aufs/sysaufs.c linux/fs/aufs/sysaufs.c
28954--- /usr/share/empty/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 28955+++ linux/fs/aufs/sysaufs.c 2016-10-09 16:55:36.496035060 +0200
523b37e3 28956@@ -0,0 +1,104 @@
1facf9fc 28957+/*
8cdd5066 28958+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 28959+ *
28960+ * This program, aufs is free software; you can redistribute it and/or modify
28961+ * it under the terms of the GNU General Public License as published by
28962+ * the Free Software Foundation; either version 2 of the License, or
28963+ * (at your option) any later version.
dece6358
AM
28964+ *
28965+ * This program is distributed in the hope that it will be useful,
28966+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28967+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28968+ * GNU General Public License for more details.
28969+ *
28970+ * You should have received a copy of the GNU General Public License
523b37e3 28971+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28972+ */
28973+
28974+/*
28975+ * sysfs interface and lifetime management
28976+ * they are necessary regardless sysfs is disabled.
28977+ */
28978+
1facf9fc 28979+#include <linux/random.h>
1facf9fc 28980+#include "aufs.h"
28981+
28982+unsigned long sysaufs_si_mask;
e49829fe 28983+struct kset *sysaufs_kset;
1facf9fc 28984+
28985+#define AuSiAttr(_name) { \
28986+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
28987+ .show = sysaufs_si_##_name, \
28988+}
28989+
28990+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
28991+struct attribute *sysaufs_si_attrs[] = {
28992+ &sysaufs_si_attr_xi_path.attr,
28993+ NULL,
28994+};
28995+
4a4d8108 28996+static const struct sysfs_ops au_sbi_ops = {
1facf9fc 28997+ .show = sysaufs_si_show
28998+};
28999+
29000+static struct kobj_type au_sbi_ktype = {
29001+ .release = au_si_free,
29002+ .sysfs_ops = &au_sbi_ops,
29003+ .default_attrs = sysaufs_si_attrs
29004+};
29005+
29006+/* ---------------------------------------------------------------------- */
29007+
29008+int sysaufs_si_init(struct au_sbinfo *sbinfo)
29009+{
29010+ int err;
29011+
e49829fe 29012+ sbinfo->si_kobj.kset = sysaufs_kset;
1facf9fc 29013+ /* cf. sysaufs_name() */
29014+ err = kobject_init_and_add
e49829fe 29015+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
1facf9fc 29016+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
29017+
29018+ dbgaufs_si_null(sbinfo);
29019+ if (!err) {
29020+ err = dbgaufs_si_init(sbinfo);
29021+ if (unlikely(err))
29022+ kobject_put(&sbinfo->si_kobj);
29023+ }
29024+ return err;
29025+}
29026+
29027+void sysaufs_fin(void)
29028+{
29029+ dbgaufs_fin();
e49829fe
JR
29030+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
29031+ kset_unregister(sysaufs_kset);
1facf9fc 29032+}
29033+
29034+int __init sysaufs_init(void)
29035+{
29036+ int err;
29037+
29038+ do {
29039+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
29040+ } while (!sysaufs_si_mask);
29041+
4a4d8108 29042+ err = -EINVAL;
e49829fe
JR
29043+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
29044+ if (unlikely(!sysaufs_kset))
4a4d8108 29045+ goto out;
e49829fe
JR
29046+ err = PTR_ERR(sysaufs_kset);
29047+ if (IS_ERR(sysaufs_kset))
1facf9fc 29048+ goto out;
e49829fe 29049+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
1facf9fc 29050+ if (unlikely(err)) {
e49829fe 29051+ kset_unregister(sysaufs_kset);
1facf9fc 29052+ goto out;
29053+ }
29054+
29055+ err = dbgaufs_init();
29056+ if (unlikely(err))
29057+ sysaufs_fin();
4f0767ce 29058+out:
1facf9fc 29059+ return err;
29060+}
7f207e10
AM
29061diff -urN /usr/share/empty/fs/aufs/sysaufs.h linux/fs/aufs/sysaufs.h
29062--- /usr/share/empty/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 29063+++ linux/fs/aufs/sysaufs.h 2016-10-09 16:55:36.496035060 +0200
c1595e42 29064@@ -0,0 +1,101 @@
1facf9fc 29065+/*
8cdd5066 29066+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 29067+ *
29068+ * This program, aufs is free software; you can redistribute it and/or modify
29069+ * it under the terms of the GNU General Public License as published by
29070+ * the Free Software Foundation; either version 2 of the License, or
29071+ * (at your option) any later version.
dece6358
AM
29072+ *
29073+ * This program is distributed in the hope that it will be useful,
29074+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29075+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29076+ * GNU General Public License for more details.
29077+ *
29078+ * You should have received a copy of the GNU General Public License
523b37e3 29079+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29080+ */
29081+
29082+/*
29083+ * sysfs interface and mount lifetime management
29084+ */
29085+
29086+#ifndef __SYSAUFS_H__
29087+#define __SYSAUFS_H__
29088+
29089+#ifdef __KERNEL__
29090+
1facf9fc 29091+#include <linux/sysfs.h>
1facf9fc 29092+#include "module.h"
29093+
dece6358
AM
29094+struct super_block;
29095+struct au_sbinfo;
29096+
1facf9fc 29097+struct sysaufs_si_attr {
29098+ struct attribute attr;
29099+ int (*show)(struct seq_file *seq, struct super_block *sb);
29100+};
29101+
29102+/* ---------------------------------------------------------------------- */
29103+
29104+/* sysaufs.c */
29105+extern unsigned long sysaufs_si_mask;
e49829fe 29106+extern struct kset *sysaufs_kset;
1facf9fc 29107+extern struct attribute *sysaufs_si_attrs[];
29108+int sysaufs_si_init(struct au_sbinfo *sbinfo);
29109+int __init sysaufs_init(void);
29110+void sysaufs_fin(void);
29111+
29112+/* ---------------------------------------------------------------------- */
29113+
29114+/* some people doesn't like to show a pointer in kernel */
29115+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
29116+{
29117+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
29118+}
29119+
29120+#define SysaufsSiNamePrefix "si_"
29121+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
29122+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
29123+{
29124+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
29125+ sysaufs_si_id(sbinfo));
29126+}
29127+
29128+struct au_branch;
29129+#ifdef CONFIG_SYSFS
29130+/* sysfs.c */
29131+extern struct attribute_group *sysaufs_attr_group;
29132+
29133+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
29134+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
29135+ char *buf);
076b876e
AM
29136+long au_brinfo_ioctl(struct file *file, unsigned long arg);
29137+#ifdef CONFIG_COMPAT
29138+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
29139+#endif
1facf9fc 29140+
29141+void sysaufs_br_init(struct au_branch *br);
29142+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
29143+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
29144+
29145+#define sysaufs_brs_init() do {} while (0)
29146+
29147+#else
29148+#define sysaufs_attr_group NULL
29149+
4a4d8108 29150+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
c1595e42
JR
29151+AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
29152+ struct attribute *attr, char *buf)
4a4d8108
AM
29153+AuStubVoid(sysaufs_br_init, struct au_branch *br)
29154+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
29155+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
1facf9fc 29156+
29157+static inline void sysaufs_brs_init(void)
29158+{
29159+ sysaufs_brs = 0;
29160+}
29161+
29162+#endif /* CONFIG_SYSFS */
29163+
29164+#endif /* __KERNEL__ */
29165+#endif /* __SYSAUFS_H__ */
7f207e10
AM
29166diff -urN /usr/share/empty/fs/aufs/sysfs.c linux/fs/aufs/sysfs.c
29167--- /usr/share/empty/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 29168+++ linux/fs/aufs/sysfs.c 2016-10-09 16:55:36.496035060 +0200
79b8bda9 29169@@ -0,0 +1,376 @@
1facf9fc 29170+/*
8cdd5066 29171+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 29172+ *
29173+ * This program, aufs is free software; you can redistribute it and/or modify
29174+ * it under the terms of the GNU General Public License as published by
29175+ * the Free Software Foundation; either version 2 of the License, or
29176+ * (at your option) any later version.
dece6358
AM
29177+ *
29178+ * This program is distributed in the hope that it will be useful,
29179+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29180+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29181+ * GNU General Public License for more details.
29182+ *
29183+ * You should have received a copy of the GNU General Public License
523b37e3 29184+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29185+ */
29186+
29187+/*
29188+ * sysfs interface
29189+ */
29190+
076b876e 29191+#include <linux/compat.h>
1facf9fc 29192+#include <linux/seq_file.h>
1facf9fc 29193+#include "aufs.h"
29194+
4a4d8108
AM
29195+#ifdef CONFIG_AUFS_FS_MODULE
29196+/* this entry violates the "one line per file" policy of sysfs */
29197+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
29198+ char *buf)
29199+{
29200+ ssize_t err;
29201+ static char *conf =
29202+/* this file is generated at compiling */
29203+#include "conf.str"
29204+ ;
29205+
29206+ err = snprintf(buf, PAGE_SIZE, conf);
29207+ if (unlikely(err >= PAGE_SIZE))
29208+ err = -EFBIG;
29209+ return err;
29210+}
29211+
29212+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
29213+#endif
29214+
1facf9fc 29215+static struct attribute *au_attr[] = {
4a4d8108
AM
29216+#ifdef CONFIG_AUFS_FS_MODULE
29217+ &au_config_attr.attr,
29218+#endif
1facf9fc 29219+ NULL, /* need to NULL terminate the list of attributes */
29220+};
29221+
29222+static struct attribute_group sysaufs_attr_group_body = {
29223+ .attrs = au_attr
29224+};
29225+
29226+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
29227+
29228+/* ---------------------------------------------------------------------- */
29229+
29230+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
29231+{
29232+ int err;
29233+
dece6358
AM
29234+ SiMustAnyLock(sb);
29235+
1facf9fc 29236+ err = 0;
29237+ if (au_opt_test(au_mntflags(sb), XINO)) {
29238+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
29239+ seq_putc(seq, '\n');
29240+ }
29241+ return err;
29242+}
29243+
29244+/*
29245+ * the lifetime of branch is independent from the entry under sysfs.
29246+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
29247+ * unlinked.
29248+ */
29249+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
392086de 29250+ aufs_bindex_t bindex, int idx)
1facf9fc 29251+{
1e00d052 29252+ int err;
1facf9fc 29253+ struct path path;
29254+ struct dentry *root;
29255+ struct au_branch *br;
076b876e 29256+ au_br_perm_str_t perm;
1facf9fc 29257+
29258+ AuDbg("b%d\n", bindex);
29259+
1e00d052 29260+ err = 0;
1facf9fc 29261+ root = sb->s_root;
29262+ di_read_lock_parent(root, !AuLock_IR);
29263+ br = au_sbr(sb, bindex);
392086de
AM
29264+
29265+ switch (idx) {
29266+ case AuBrSysfs_BR:
29267+ path.mnt = au_br_mnt(br);
29268+ path.dentry = au_h_dptr(root, bindex);
79b8bda9
AM
29269+ err = au_seq_path(seq, &path);
29270+ if (!err) {
29271+ au_optstr_br_perm(&perm, br->br_perm);
29272+ seq_printf(seq, "=%s\n", perm.a);
29273+ }
392086de
AM
29274+ break;
29275+ case AuBrSysfs_BRID:
79b8bda9 29276+ seq_printf(seq, "%d\n", br->br_id);
392086de
AM
29277+ break;
29278+ }
076b876e 29279+ di_read_unlock(root, !AuLock_IR);
79b8bda9 29280+ if (unlikely(err || seq_has_overflowed(seq)))
076b876e 29281+ err = -E2BIG;
392086de 29282+
1e00d052 29283+ return err;
1facf9fc 29284+}
29285+
29286+/* ---------------------------------------------------------------------- */
29287+
29288+static struct seq_file *au_seq(char *p, ssize_t len)
29289+{
29290+ struct seq_file *seq;
29291+
29292+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
29293+ if (seq) {
29294+ /* mutex_init(&seq.lock); */
29295+ seq->buf = p;
29296+ seq->size = len;
29297+ return seq; /* success */
29298+ }
29299+
29300+ seq = ERR_PTR(-ENOMEM);
29301+ return seq;
29302+}
29303+
392086de
AM
29304+#define SysaufsBr_PREFIX "br"
29305+#define SysaufsBrid_PREFIX "brid"
1facf9fc 29306+
29307+/* todo: file size may exceed PAGE_SIZE */
29308+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
1308ab2a 29309+ char *buf)
1facf9fc 29310+{
29311+ ssize_t err;
392086de 29312+ int idx;
1facf9fc 29313+ long l;
5afbbe0d 29314+ aufs_bindex_t bbot;
1facf9fc 29315+ struct au_sbinfo *sbinfo;
29316+ struct super_block *sb;
29317+ struct seq_file *seq;
29318+ char *name;
29319+ struct attribute **cattr;
29320+
29321+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
29322+ sb = sbinfo->si_sb;
1308ab2a 29323+
29324+ /*
29325+ * prevent a race condition between sysfs and aufs.
29326+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
29327+ * prohibits maintaining the sysfs entries.
29328+ * hew we acquire read lock after sysfs_get_active_two().
29329+ * on the other hand, the remount process may maintain the sysfs/aufs
29330+ * entries after acquiring write lock.
29331+ * it can cause a deadlock.
29332+ * simply we gave up processing read here.
29333+ */
29334+ err = -EBUSY;
29335+ if (unlikely(!si_noflush_read_trylock(sb)))
29336+ goto out;
1facf9fc 29337+
29338+ seq = au_seq(buf, PAGE_SIZE);
29339+ err = PTR_ERR(seq);
29340+ if (IS_ERR(seq))
1308ab2a 29341+ goto out_unlock;
1facf9fc 29342+
29343+ name = (void *)attr->name;
29344+ cattr = sysaufs_si_attrs;
29345+ while (*cattr) {
29346+ if (!strcmp(name, (*cattr)->name)) {
29347+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
29348+ ->show(seq, sb);
29349+ goto out_seq;
29350+ }
29351+ cattr++;
29352+ }
29353+
392086de
AM
29354+ if (!strncmp(name, SysaufsBrid_PREFIX,
29355+ sizeof(SysaufsBrid_PREFIX) - 1)) {
29356+ idx = AuBrSysfs_BRID;
29357+ name += sizeof(SysaufsBrid_PREFIX) - 1;
29358+ } else if (!strncmp(name, SysaufsBr_PREFIX,
29359+ sizeof(SysaufsBr_PREFIX) - 1)) {
29360+ idx = AuBrSysfs_BR;
1facf9fc 29361+ name += sizeof(SysaufsBr_PREFIX) - 1;
392086de
AM
29362+ } else
29363+ BUG();
29364+
29365+ err = kstrtol(name, 10, &l);
29366+ if (!err) {
5afbbe0d
AM
29367+ bbot = au_sbbot(sb);
29368+ if (l <= bbot)
392086de
AM
29369+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
29370+ else
29371+ err = -ENOENT;
1facf9fc 29372+ }
1facf9fc 29373+
4f0767ce 29374+out_seq:
1facf9fc 29375+ if (!err) {
29376+ err = seq->count;
29377+ /* sysfs limit */
29378+ if (unlikely(err == PAGE_SIZE))
29379+ err = -EFBIG;
29380+ }
f0c0a007 29381+ au_delayed_kfree(seq);
4f0767ce 29382+out_unlock:
1facf9fc 29383+ si_read_unlock(sb);
4f0767ce 29384+out:
1facf9fc 29385+ return err;
29386+}
29387+
29388+/* ---------------------------------------------------------------------- */
29389+
076b876e
AM
29390+static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
29391+{
29392+ int err;
29393+ int16_t brid;
5afbbe0d 29394+ aufs_bindex_t bindex, bbot;
076b876e
AM
29395+ size_t sz;
29396+ char *buf;
29397+ struct seq_file *seq;
29398+ struct au_branch *br;
29399+
29400+ si_read_lock(sb, AuLock_FLUSH);
5afbbe0d
AM
29401+ bbot = au_sbbot(sb);
29402+ err = bbot + 1;
076b876e
AM
29403+ if (!arg)
29404+ goto out;
29405+
29406+ err = -ENOMEM;
29407+ buf = (void *)__get_free_page(GFP_NOFS);
29408+ if (unlikely(!buf))
29409+ goto out;
29410+
29411+ seq = au_seq(buf, PAGE_SIZE);
29412+ err = PTR_ERR(seq);
29413+ if (IS_ERR(seq))
29414+ goto out_buf;
29415+
29416+ sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
5afbbe0d 29417+ for (bindex = 0; bindex <= bbot; bindex++, arg++) {
076b876e
AM
29418+ err = !access_ok(VERIFY_WRITE, arg, sizeof(*arg));
29419+ if (unlikely(err))
29420+ break;
29421+
29422+ br = au_sbr(sb, bindex);
29423+ brid = br->br_id;
29424+ BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
29425+ err = __put_user(brid, &arg->id);
29426+ if (unlikely(err))
29427+ break;
29428+
29429+ BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
29430+ err = __put_user(br->br_perm, &arg->perm);
29431+ if (unlikely(err))
29432+ break;
29433+
79b8bda9
AM
29434+ err = au_seq_path(seq, &br->br_path);
29435+ if (unlikely(err))
29436+ break;
29437+ seq_putc(seq, '\0');
29438+ if (!seq_has_overflowed(seq)) {
076b876e
AM
29439+ err = copy_to_user(arg->path, seq->buf, seq->count);
29440+ seq->count = 0;
29441+ if (unlikely(err))
29442+ break;
29443+ } else {
29444+ err = -E2BIG;
29445+ goto out_seq;
29446+ }
29447+ }
29448+ if (unlikely(err))
29449+ err = -EFAULT;
29450+
29451+out_seq:
f0c0a007 29452+ au_delayed_kfree(seq);
076b876e 29453+out_buf:
f0c0a007 29454+ au_delayed_free_page((unsigned long)buf);
076b876e
AM
29455+out:
29456+ si_read_unlock(sb);
29457+ return err;
29458+}
29459+
29460+long au_brinfo_ioctl(struct file *file, unsigned long arg)
29461+{
2000de60 29462+ return au_brinfo(file->f_path.dentry->d_sb, (void __user *)arg);
076b876e
AM
29463+}
29464+
29465+#ifdef CONFIG_COMPAT
29466+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
29467+{
2000de60 29468+ return au_brinfo(file->f_path.dentry->d_sb, compat_ptr(arg));
076b876e
AM
29469+}
29470+#endif
29471+
29472+/* ---------------------------------------------------------------------- */
29473+
1facf9fc 29474+void sysaufs_br_init(struct au_branch *br)
29475+{
392086de
AM
29476+ int i;
29477+ struct au_brsysfs *br_sysfs;
29478+ struct attribute *attr;
4a4d8108 29479+
392086de
AM
29480+ br_sysfs = br->br_sysfs;
29481+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29482+ attr = &br_sysfs->attr;
29483+ sysfs_attr_init(attr);
29484+ attr->name = br_sysfs->name;
29485+ attr->mode = S_IRUGO;
29486+ br_sysfs++;
29487+ }
1facf9fc 29488+}
29489+
29490+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
29491+{
29492+ struct au_branch *br;
29493+ struct kobject *kobj;
392086de
AM
29494+ struct au_brsysfs *br_sysfs;
29495+ int i;
5afbbe0d 29496+ aufs_bindex_t bbot;
1facf9fc 29497+
29498+ dbgaufs_brs_del(sb, bindex);
29499+
29500+ if (!sysaufs_brs)
29501+ return;
29502+
29503+ kobj = &au_sbi(sb)->si_kobj;
5afbbe0d
AM
29504+ bbot = au_sbbot(sb);
29505+ for (; bindex <= bbot; bindex++) {
1facf9fc 29506+ br = au_sbr(sb, bindex);
392086de
AM
29507+ br_sysfs = br->br_sysfs;
29508+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29509+ sysfs_remove_file(kobj, &br_sysfs->attr);
29510+ br_sysfs++;
29511+ }
1facf9fc 29512+ }
29513+}
29514+
29515+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
29516+{
392086de 29517+ int err, i;
5afbbe0d 29518+ aufs_bindex_t bbot;
1facf9fc 29519+ struct kobject *kobj;
29520+ struct au_branch *br;
392086de 29521+ struct au_brsysfs *br_sysfs;
1facf9fc 29522+
29523+ dbgaufs_brs_add(sb, bindex);
29524+
29525+ if (!sysaufs_brs)
29526+ return;
29527+
29528+ kobj = &au_sbi(sb)->si_kobj;
5afbbe0d
AM
29529+ bbot = au_sbbot(sb);
29530+ for (; bindex <= bbot; bindex++) {
1facf9fc 29531+ br = au_sbr(sb, bindex);
392086de
AM
29532+ br_sysfs = br->br_sysfs;
29533+ snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
29534+ SysaufsBr_PREFIX "%d", bindex);
29535+ snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
29536+ SysaufsBrid_PREFIX "%d", bindex);
29537+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29538+ err = sysfs_create_file(kobj, &br_sysfs->attr);
29539+ if (unlikely(err))
29540+ pr_warn("failed %s under sysfs(%d)\n",
29541+ br_sysfs->name, err);
29542+ br_sysfs++;
29543+ }
1facf9fc 29544+ }
29545+}
7f207e10
AM
29546diff -urN /usr/share/empty/fs/aufs/sysrq.c linux/fs/aufs/sysrq.c
29547--- /usr/share/empty/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 29548+++ linux/fs/aufs/sysrq.c 2016-10-09 16:55:36.496035060 +0200
076b876e 29549@@ -0,0 +1,157 @@
1facf9fc 29550+/*
8cdd5066 29551+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 29552+ *
29553+ * This program, aufs is free software; you can redistribute it and/or modify
29554+ * it under the terms of the GNU General Public License as published by
29555+ * the Free Software Foundation; either version 2 of the License, or
29556+ * (at your option) any later version.
dece6358
AM
29557+ *
29558+ * This program is distributed in the hope that it will be useful,
29559+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29560+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29561+ * GNU General Public License for more details.
29562+ *
29563+ * You should have received a copy of the GNU General Public License
523b37e3 29564+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29565+ */
29566+
29567+/*
29568+ * magic sysrq hanlder
29569+ */
29570+
1facf9fc 29571+/* #include <linux/sysrq.h> */
027c5e7a 29572+#include <linux/writeback.h>
1facf9fc 29573+#include "aufs.h"
29574+
29575+/* ---------------------------------------------------------------------- */
29576+
29577+static void sysrq_sb(struct super_block *sb)
29578+{
29579+ char *plevel;
29580+ struct au_sbinfo *sbinfo;
29581+ struct file *file;
523b37e3
AM
29582+ struct au_sphlhead *files;
29583+ struct au_finfo *finfo;
1facf9fc 29584+
29585+ plevel = au_plevel;
29586+ au_plevel = KERN_WARNING;
1facf9fc 29587+
4a4d8108 29588+ /* since we define pr_fmt, call printk directly */
c06a8ce3
AM
29589+#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
29590+
29591+ sbinfo = au_sbi(sb);
4a4d8108 29592+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
c06a8ce3 29593+ pr("superblock\n");
1facf9fc 29594+ au_dpri_sb(sb);
027c5e7a
AM
29595+
29596+#if 0
c06a8ce3 29597+ pr("root dentry\n");
1facf9fc 29598+ au_dpri_dentry(sb->s_root);
c06a8ce3 29599+ pr("root inode\n");
5527c038 29600+ au_dpri_inode(d_inode(sb->s_root));
027c5e7a
AM
29601+#endif
29602+
1facf9fc 29603+#if 0
027c5e7a
AM
29604+ do {
29605+ int err, i, j, ndentry;
29606+ struct au_dcsub_pages dpages;
29607+ struct au_dpage *dpage;
29608+
29609+ err = au_dpages_init(&dpages, GFP_ATOMIC);
29610+ if (unlikely(err))
29611+ break;
29612+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
29613+ if (!err)
29614+ for (i = 0; i < dpages.ndpage; i++) {
29615+ dpage = dpages.dpages + i;
29616+ ndentry = dpage->ndentry;
29617+ for (j = 0; j < ndentry; j++)
29618+ au_dpri_dentry(dpage->dentries[j]);
29619+ }
29620+ au_dpages_free(&dpages);
29621+ } while (0);
29622+#endif
29623+
29624+#if 1
29625+ {
29626+ struct inode *i;
076b876e 29627+
c06a8ce3 29628+ pr("isolated inode\n");
79b8bda9 29629+ spin_lock(&sb->s_inode_list_lock);
2cbb1c4b
JR
29630+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
29631+ spin_lock(&i->i_lock);
b4510431 29632+ if (1 || hlist_empty(&i->i_dentry))
027c5e7a 29633+ au_dpri_inode(i);
2cbb1c4b
JR
29634+ spin_unlock(&i->i_lock);
29635+ }
79b8bda9 29636+ spin_unlock(&sb->s_inode_list_lock);
027c5e7a 29637+ }
1facf9fc 29638+#endif
c06a8ce3 29639+ pr("files\n");
523b37e3
AM
29640+ files = &au_sbi(sb)->si_files;
29641+ spin_lock(&files->spin);
29642+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
4a4d8108 29643+ umode_t mode;
076b876e 29644+
523b37e3 29645+ file = finfo->fi_file;
c06a8ce3 29646+ mode = file_inode(file)->i_mode;
38d290e6 29647+ if (!special_file(mode))
1facf9fc 29648+ au_dpri_file(file);
523b37e3
AM
29649+ }
29650+ spin_unlock(&files->spin);
c06a8ce3 29651+ pr("done\n");
1facf9fc 29652+
c06a8ce3 29653+#undef pr
1facf9fc 29654+ au_plevel = plevel;
1facf9fc 29655+}
29656+
29657+/* ---------------------------------------------------------------------- */
29658+
29659+/* module parameter */
29660+static char *aufs_sysrq_key = "a";
29661+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
29662+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
29663+
0c5527e5 29664+static void au_sysrq(int key __maybe_unused)
1facf9fc 29665+{
1facf9fc 29666+ struct au_sbinfo *sbinfo;
29667+
027c5e7a 29668+ lockdep_off();
53392da6 29669+ au_sbilist_lock();
5afbbe0d 29670+ hlist_for_each_entry(sbinfo, &au_sbilist.head, si_list)
1facf9fc 29671+ sysrq_sb(sbinfo->si_sb);
53392da6 29672+ au_sbilist_unlock();
027c5e7a 29673+ lockdep_on();
1facf9fc 29674+}
29675+
29676+static struct sysrq_key_op au_sysrq_op = {
29677+ .handler = au_sysrq,
29678+ .help_msg = "Aufs",
29679+ .action_msg = "Aufs",
29680+ .enable_mask = SYSRQ_ENABLE_DUMP
29681+};
29682+
29683+/* ---------------------------------------------------------------------- */
29684+
29685+int __init au_sysrq_init(void)
29686+{
29687+ int err;
29688+ char key;
29689+
29690+ err = -1;
29691+ key = *aufs_sysrq_key;
29692+ if ('a' <= key && key <= 'z')
29693+ err = register_sysrq_key(key, &au_sysrq_op);
29694+ if (unlikely(err))
4a4d8108 29695+ pr_err("err %d, sysrq=%c\n", err, key);
1facf9fc 29696+ return err;
29697+}
29698+
29699+void au_sysrq_fin(void)
29700+{
29701+ int err;
076b876e 29702+
1facf9fc 29703+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
29704+ if (unlikely(err))
4a4d8108 29705+ pr_err("err %d (ignored)\n", err);
1facf9fc 29706+}
7f207e10
AM
29707diff -urN /usr/share/empty/fs/aufs/vdir.c linux/fs/aufs/vdir.c
29708--- /usr/share/empty/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51
AM
29709+++ linux/fs/aufs/vdir.c 2016-10-09 16:55:38.889431135 +0200
29710@@ -0,0 +1,900 @@
1facf9fc 29711+/*
8cdd5066 29712+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 29713+ *
29714+ * This program, aufs is free software; you can redistribute it and/or modify
29715+ * it under the terms of the GNU General Public License as published by
29716+ * the Free Software Foundation; either version 2 of the License, or
29717+ * (at your option) any later version.
dece6358
AM
29718+ *
29719+ * This program is distributed in the hope that it will be useful,
29720+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29721+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29722+ * GNU General Public License for more details.
29723+ *
29724+ * You should have received a copy of the GNU General Public License
523b37e3 29725+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29726+ */
29727+
29728+/*
29729+ * virtual or vertical directory
29730+ */
29731+
29732+#include "aufs.h"
29733+
dece6358 29734+static unsigned int calc_size(int nlen)
1facf9fc 29735+{
dece6358 29736+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
1facf9fc 29737+}
29738+
29739+static int set_deblk_end(union au_vdir_deblk_p *p,
29740+ union au_vdir_deblk_p *deblk_end)
29741+{
29742+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
29743+ p->de->de_str.len = 0;
29744+ /* smp_mb(); */
29745+ return 0;
29746+ }
29747+ return -1; /* error */
29748+}
29749+
29750+/* returns true or false */
29751+static int is_deblk_end(union au_vdir_deblk_p *p,
29752+ union au_vdir_deblk_p *deblk_end)
29753+{
29754+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
29755+ return !p->de->de_str.len;
29756+ return 1;
29757+}
29758+
29759+static unsigned char *last_deblk(struct au_vdir *vdir)
29760+{
29761+ return vdir->vd_deblk[vdir->vd_nblk - 1];
29762+}
29763+
29764+/* ---------------------------------------------------------------------- */
29765+
79b8bda9 29766+/* estimate the appropriate size for name hash table */
1308ab2a 29767+unsigned int au_rdhash_est(loff_t sz)
29768+{
29769+ unsigned int n;
29770+
29771+ n = UINT_MAX;
29772+ sz >>= 10;
29773+ if (sz < n)
29774+ n = sz;
29775+ if (sz < AUFS_RDHASH_DEF)
29776+ n = AUFS_RDHASH_DEF;
4a4d8108 29777+ /* pr_info("n %u\n", n); */
1308ab2a 29778+ return n;
29779+}
29780+
1facf9fc 29781+/*
29782+ * the allocated memory has to be freed by
dece6358 29783+ * au_nhash_wh_free() or au_nhash_de_free().
1facf9fc 29784+ */
dece6358 29785+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
1facf9fc 29786+{
1facf9fc 29787+ struct hlist_head *head;
dece6358 29788+ unsigned int u;
076b876e 29789+ size_t sz;
1facf9fc 29790+
076b876e
AM
29791+ sz = sizeof(*nhash->nh_head) * num_hash;
29792+ head = kmalloc(sz, gfp);
dece6358
AM
29793+ if (head) {
29794+ nhash->nh_num = num_hash;
29795+ nhash->nh_head = head;
29796+ for (u = 0; u < num_hash; u++)
1facf9fc 29797+ INIT_HLIST_HEAD(head++);
dece6358 29798+ return 0; /* success */
1facf9fc 29799+ }
1facf9fc 29800+
dece6358 29801+ return -ENOMEM;
1facf9fc 29802+}
29803+
dece6358
AM
29804+static void nhash_count(struct hlist_head *head)
29805+{
29806+#if 0
29807+ unsigned long n;
29808+ struct hlist_node *pos;
29809+
29810+ n = 0;
29811+ hlist_for_each(pos, head)
29812+ n++;
4a4d8108 29813+ pr_info("%lu\n", n);
dece6358
AM
29814+#endif
29815+}
29816+
29817+static void au_nhash_wh_do_free(struct hlist_head *head)
1facf9fc 29818+{
c06a8ce3
AM
29819+ struct au_vdir_wh *pos;
29820+ struct hlist_node *node;
1facf9fc 29821+
c06a8ce3 29822+ hlist_for_each_entry_safe(pos, node, head, wh_hash)
f0c0a007 29823+ au_delayed_kfree(pos);
1facf9fc 29824+}
29825+
dece6358 29826+static void au_nhash_de_do_free(struct hlist_head *head)
1facf9fc 29827+{
c06a8ce3
AM
29828+ struct au_vdir_dehstr *pos;
29829+ struct hlist_node *node;
1facf9fc 29830+
c06a8ce3 29831+ hlist_for_each_entry_safe(pos, node, head, hash)
f0c0a007 29832+ au_cache_dfree_vdir_dehstr(pos);
1facf9fc 29833+}
29834+
dece6358
AM
29835+static void au_nhash_do_free(struct au_nhash *nhash,
29836+ void (*free)(struct hlist_head *head))
1facf9fc 29837+{
1308ab2a 29838+ unsigned int n;
1facf9fc 29839+ struct hlist_head *head;
1facf9fc 29840+
dece6358 29841+ n = nhash->nh_num;
1308ab2a 29842+ if (!n)
29843+ return;
29844+
dece6358 29845+ head = nhash->nh_head;
1308ab2a 29846+ while (n-- > 0) {
dece6358
AM
29847+ nhash_count(head);
29848+ free(head++);
1facf9fc 29849+ }
f0c0a007 29850+ au_delayed_kfree(nhash->nh_head);
1facf9fc 29851+}
29852+
dece6358 29853+void au_nhash_wh_free(struct au_nhash *whlist)
1facf9fc 29854+{
dece6358
AM
29855+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
29856+}
1facf9fc 29857+
dece6358
AM
29858+static void au_nhash_de_free(struct au_nhash *delist)
29859+{
29860+ au_nhash_do_free(delist, au_nhash_de_do_free);
1facf9fc 29861+}
29862+
29863+/* ---------------------------------------------------------------------- */
29864+
29865+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
29866+ int limit)
29867+{
29868+ int num;
29869+ unsigned int u, n;
29870+ struct hlist_head *head;
c06a8ce3 29871+ struct au_vdir_wh *pos;
1facf9fc 29872+
29873+ num = 0;
29874+ n = whlist->nh_num;
29875+ head = whlist->nh_head;
1308ab2a 29876+ for (u = 0; u < n; u++, head++)
c06a8ce3
AM
29877+ hlist_for_each_entry(pos, head, wh_hash)
29878+ if (pos->wh_bindex == btgt && ++num > limit)
1facf9fc 29879+ return 1;
1facf9fc 29880+ return 0;
29881+}
29882+
29883+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
dece6358 29884+ unsigned char *name,
1facf9fc 29885+ unsigned int len)
29886+{
dece6358
AM
29887+ unsigned int v;
29888+ /* const unsigned int magic_bit = 12; */
29889+
1308ab2a 29890+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
29891+
dece6358 29892+ v = 0;
f0c0a007
AM
29893+ if (len > 8)
29894+ len = 8;
dece6358
AM
29895+ while (len--)
29896+ v += *name++;
29897+ /* v = hash_long(v, magic_bit); */
29898+ v %= nhash->nh_num;
29899+ return nhash->nh_head + v;
29900+}
29901+
29902+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
29903+ int nlen)
29904+{
29905+ return str->len == nlen && !memcmp(str->name, name, nlen);
1facf9fc 29906+}
29907+
29908+/* returns found or not */
dece6358 29909+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
1facf9fc 29910+{
29911+ struct hlist_head *head;
c06a8ce3 29912+ struct au_vdir_wh *pos;
1facf9fc 29913+ struct au_vdir_destr *str;
29914+
dece6358 29915+ head = au_name_hash(whlist, name, nlen);
c06a8ce3
AM
29916+ hlist_for_each_entry(pos, head, wh_hash) {
29917+ str = &pos->wh_str;
1facf9fc 29918+ AuDbg("%.*s\n", str->len, str->name);
dece6358
AM
29919+ if (au_nhash_test_name(str, name, nlen))
29920+ return 1;
29921+ }
29922+ return 0;
29923+}
29924+
29925+/* returns found(true) or not */
29926+static int test_known(struct au_nhash *delist, char *name, int nlen)
29927+{
29928+ struct hlist_head *head;
c06a8ce3 29929+ struct au_vdir_dehstr *pos;
dece6358
AM
29930+ struct au_vdir_destr *str;
29931+
29932+ head = au_name_hash(delist, name, nlen);
c06a8ce3
AM
29933+ hlist_for_each_entry(pos, head, hash) {
29934+ str = pos->str;
dece6358
AM
29935+ AuDbg("%.*s\n", str->len, str->name);
29936+ if (au_nhash_test_name(str, name, nlen))
1facf9fc 29937+ return 1;
29938+ }
29939+ return 0;
29940+}
29941+
dece6358
AM
29942+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
29943+ unsigned char d_type)
29944+{
29945+#ifdef CONFIG_AUFS_SHWH
29946+ wh->wh_ino = ino;
29947+ wh->wh_type = d_type;
29948+#endif
29949+}
29950+
29951+/* ---------------------------------------------------------------------- */
29952+
29953+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
29954+ unsigned int d_type, aufs_bindex_t bindex,
29955+ unsigned char shwh)
1facf9fc 29956+{
29957+ int err;
29958+ struct au_vdir_destr *str;
29959+ struct au_vdir_wh *wh;
29960+
dece6358 29961+ AuDbg("%.*s\n", nlen, name);
1308ab2a 29962+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
29963+
1facf9fc 29964+ err = -ENOMEM;
dece6358 29965+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
1facf9fc 29966+ if (unlikely(!wh))
29967+ goto out;
29968+
29969+ err = 0;
29970+ wh->wh_bindex = bindex;
dece6358
AM
29971+ if (shwh)
29972+ au_shwh_init_wh(wh, ino, d_type);
1facf9fc 29973+ str = &wh->wh_str;
dece6358
AM
29974+ str->len = nlen;
29975+ memcpy(str->name, name, nlen);
29976+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
1facf9fc 29977+ /* smp_mb(); */
29978+
4f0767ce 29979+out:
1facf9fc 29980+ return err;
29981+}
29982+
1facf9fc 29983+static int append_deblk(struct au_vdir *vdir)
29984+{
29985+ int err;
dece6358 29986+ unsigned long ul;
1facf9fc 29987+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
29988+ union au_vdir_deblk_p p, deblk_end;
29989+ unsigned char **o;
29990+
29991+ err = -ENOMEM;
e2f27e51
AM
29992+ o = au_krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
29993+ GFP_NOFS, /*may_shrink*/0);
1facf9fc 29994+ if (unlikely(!o))
29995+ goto out;
29996+
29997+ vdir->vd_deblk = o;
29998+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
29999+ if (p.deblk) {
30000+ ul = vdir->vd_nblk++;
30001+ vdir->vd_deblk[ul] = p.deblk;
30002+ vdir->vd_last.ul = ul;
30003+ vdir->vd_last.p.deblk = p.deblk;
30004+ deblk_end.deblk = p.deblk + deblk_sz;
30005+ err = set_deblk_end(&p, &deblk_end);
30006+ }
30007+
4f0767ce 30008+out:
1facf9fc 30009+ return err;
30010+}
30011+
dece6358
AM
30012+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
30013+ unsigned int d_type, struct au_nhash *delist)
30014+{
30015+ int err;
30016+ unsigned int sz;
30017+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
30018+ union au_vdir_deblk_p p, *room, deblk_end;
30019+ struct au_vdir_dehstr *dehstr;
30020+
30021+ p.deblk = last_deblk(vdir);
30022+ deblk_end.deblk = p.deblk + deblk_sz;
30023+ room = &vdir->vd_last.p;
30024+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
30025+ || !is_deblk_end(room, &deblk_end));
30026+
30027+ sz = calc_size(nlen);
30028+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
30029+ err = append_deblk(vdir);
30030+ if (unlikely(err))
30031+ goto out;
30032+
30033+ p.deblk = last_deblk(vdir);
30034+ deblk_end.deblk = p.deblk + deblk_sz;
30035+ /* smp_mb(); */
30036+ AuDebugOn(room->deblk != p.deblk);
30037+ }
30038+
30039+ err = -ENOMEM;
4a4d8108 30040+ dehstr = au_cache_alloc_vdir_dehstr();
dece6358
AM
30041+ if (unlikely(!dehstr))
30042+ goto out;
30043+
30044+ dehstr->str = &room->de->de_str;
30045+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
30046+ room->de->de_ino = ino;
30047+ room->de->de_type = d_type;
30048+ room->de->de_str.len = nlen;
30049+ memcpy(room->de->de_str.name, name, nlen);
30050+
30051+ err = 0;
30052+ room->deblk += sz;
30053+ if (unlikely(set_deblk_end(room, &deblk_end)))
30054+ err = append_deblk(vdir);
30055+ /* smp_mb(); */
30056+
4f0767ce 30057+out:
dece6358
AM
30058+ return err;
30059+}
30060+
30061+/* ---------------------------------------------------------------------- */
30062+
f0c0a007 30063+void au_vdir_free(struct au_vdir *vdir, int atonce)
dece6358
AM
30064+{
30065+ unsigned char **deblk;
30066+
30067+ deblk = vdir->vd_deblk;
f0c0a007
AM
30068+ if (!atonce) {
30069+ while (vdir->vd_nblk--)
30070+ au_delayed_kfree(*deblk++);
30071+ au_delayed_kfree(vdir->vd_deblk);
30072+ au_cache_dfree_vdir(vdir);
30073+ } else {
30074+ /* not delayed */
30075+ while (vdir->vd_nblk--)
30076+ kfree(*deblk++);
30077+ kfree(vdir->vd_deblk);
30078+ au_cache_free_vdir(vdir);
30079+ }
dece6358
AM
30080+}
30081+
1308ab2a 30082+static struct au_vdir *alloc_vdir(struct file *file)
1facf9fc 30083+{
30084+ struct au_vdir *vdir;
1308ab2a 30085+ struct super_block *sb;
1facf9fc 30086+ int err;
30087+
2000de60 30088+ sb = file->f_path.dentry->d_sb;
dece6358
AM
30089+ SiMustAnyLock(sb);
30090+
1facf9fc 30091+ err = -ENOMEM;
30092+ vdir = au_cache_alloc_vdir();
30093+ if (unlikely(!vdir))
30094+ goto out;
30095+
30096+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
30097+ if (unlikely(!vdir->vd_deblk))
30098+ goto out_free;
30099+
30100+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
1308ab2a 30101+ if (!vdir->vd_deblk_sz) {
79b8bda9 30102+ /* estimate the appropriate size for deblk */
1308ab2a 30103+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
4a4d8108 30104+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
1308ab2a 30105+ }
1facf9fc 30106+ vdir->vd_nblk = 0;
30107+ vdir->vd_version = 0;
30108+ vdir->vd_jiffy = 0;
30109+ err = append_deblk(vdir);
30110+ if (!err)
30111+ return vdir; /* success */
30112+
f0c0a007 30113+ au_delayed_kfree(vdir->vd_deblk);
1facf9fc 30114+
4f0767ce 30115+out_free:
f0c0a007 30116+ au_cache_dfree_vdir(vdir);
4f0767ce 30117+out:
1facf9fc 30118+ vdir = ERR_PTR(err);
30119+ return vdir;
30120+}
30121+
30122+static int reinit_vdir(struct au_vdir *vdir)
30123+{
30124+ int err;
30125+ union au_vdir_deblk_p p, deblk_end;
30126+
30127+ while (vdir->vd_nblk > 1) {
f0c0a007 30128+ au_delayed_kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
1facf9fc 30129+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
30130+ vdir->vd_nblk--;
30131+ }
30132+ p.deblk = vdir->vd_deblk[0];
30133+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
30134+ err = set_deblk_end(&p, &deblk_end);
30135+ /* keep vd_dblk_sz */
30136+ vdir->vd_last.ul = 0;
30137+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
30138+ vdir->vd_version = 0;
30139+ vdir->vd_jiffy = 0;
30140+ /* smp_mb(); */
30141+ return err;
30142+}
30143+
30144+/* ---------------------------------------------------------------------- */
30145+
1facf9fc 30146+#define AuFillVdir_CALLED 1
30147+#define AuFillVdir_WHABLE (1 << 1)
dece6358 30148+#define AuFillVdir_SHWH (1 << 2)
1facf9fc 30149+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
7f207e10
AM
30150+#define au_fset_fillvdir(flags, name) \
30151+ do { (flags) |= AuFillVdir_##name; } while (0)
30152+#define au_fclr_fillvdir(flags, name) \
30153+ do { (flags) &= ~AuFillVdir_##name; } while (0)
1facf9fc 30154+
dece6358
AM
30155+#ifndef CONFIG_AUFS_SHWH
30156+#undef AuFillVdir_SHWH
30157+#define AuFillVdir_SHWH 0
30158+#endif
30159+
1facf9fc 30160+struct fillvdir_arg {
392086de 30161+ struct dir_context ctx;
1facf9fc 30162+ struct file *file;
30163+ struct au_vdir *vdir;
dece6358
AM
30164+ struct au_nhash delist;
30165+ struct au_nhash whlist;
1facf9fc 30166+ aufs_bindex_t bindex;
30167+ unsigned int flags;
30168+ int err;
30169+};
30170+
392086de 30171+static int fillvdir(struct dir_context *ctx, const char *__name, int nlen,
1facf9fc 30172+ loff_t offset __maybe_unused, u64 h_ino,
30173+ unsigned int d_type)
30174+{
392086de 30175+ struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
1facf9fc 30176+ char *name = (void *)__name;
30177+ struct super_block *sb;
1facf9fc 30178+ ino_t ino;
dece6358 30179+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
1facf9fc 30180+
1facf9fc 30181+ arg->err = 0;
2000de60 30182+ sb = arg->file->f_path.dentry->d_sb;
1facf9fc 30183+ au_fset_fillvdir(arg->flags, CALLED);
30184+ /* smp_mb(); */
dece6358 30185+ if (nlen <= AUFS_WH_PFX_LEN
1facf9fc 30186+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
dece6358
AM
30187+ if (test_known(&arg->delist, name, nlen)
30188+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
30189+ goto out; /* already exists or whiteouted */
1facf9fc 30190+
dece6358 30191+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
4a4d8108
AM
30192+ if (!arg->err) {
30193+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
30194+ d_type = DT_UNKNOWN;
dece6358
AM
30195+ arg->err = append_de(arg->vdir, name, nlen, ino,
30196+ d_type, &arg->delist);
4a4d8108 30197+ }
1facf9fc 30198+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
30199+ name += AUFS_WH_PFX_LEN;
dece6358
AM
30200+ nlen -= AUFS_WH_PFX_LEN;
30201+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
30202+ goto out; /* already whiteouted */
1facf9fc 30203+
dece6358
AM
30204+ if (shwh)
30205+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
30206+ &ino);
4a4d8108
AM
30207+ if (!arg->err) {
30208+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
30209+ d_type = DT_UNKNOWN;
1facf9fc 30210+ arg->err = au_nhash_append_wh
dece6358
AM
30211+ (&arg->whlist, name, nlen, ino, d_type,
30212+ arg->bindex, shwh);
4a4d8108 30213+ }
1facf9fc 30214+ }
30215+
4f0767ce 30216+out:
1facf9fc 30217+ if (!arg->err)
30218+ arg->vdir->vd_jiffy = jiffies;
30219+ /* smp_mb(); */
30220+ AuTraceErr(arg->err);
30221+ return arg->err;
30222+}
30223+
dece6358
AM
30224+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
30225+ struct au_nhash *whlist, struct au_nhash *delist)
30226+{
30227+#ifdef CONFIG_AUFS_SHWH
30228+ int err;
30229+ unsigned int nh, u;
30230+ struct hlist_head *head;
c06a8ce3
AM
30231+ struct au_vdir_wh *pos;
30232+ struct hlist_node *n;
dece6358
AM
30233+ char *p, *o;
30234+ struct au_vdir_destr *destr;
30235+
30236+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
30237+
30238+ err = -ENOMEM;
537831f9 30239+ o = p = (void *)__get_free_page(GFP_NOFS);
dece6358
AM
30240+ if (unlikely(!p))
30241+ goto out;
30242+
30243+ err = 0;
30244+ nh = whlist->nh_num;
30245+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
30246+ p += AUFS_WH_PFX_LEN;
30247+ for (u = 0; u < nh; u++) {
30248+ head = whlist->nh_head + u;
c06a8ce3
AM
30249+ hlist_for_each_entry_safe(pos, n, head, wh_hash) {
30250+ destr = &pos->wh_str;
dece6358
AM
30251+ memcpy(p, destr->name, destr->len);
30252+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
c06a8ce3 30253+ pos->wh_ino, pos->wh_type, delist);
dece6358
AM
30254+ if (unlikely(err))
30255+ break;
30256+ }
30257+ }
30258+
f0c0a007 30259+ au_delayed_free_page((unsigned long)o);
dece6358 30260+
4f0767ce 30261+out:
dece6358
AM
30262+ AuTraceErr(err);
30263+ return err;
30264+#else
30265+ return 0;
30266+#endif
30267+}
30268+
1facf9fc 30269+static int au_do_read_vdir(struct fillvdir_arg *arg)
30270+{
30271+ int err;
dece6358 30272+ unsigned int rdhash;
1facf9fc 30273+ loff_t offset;
5afbbe0d 30274+ aufs_bindex_t bbot, bindex, btop;
dece6358 30275+ unsigned char shwh;
1facf9fc 30276+ struct file *hf, *file;
30277+ struct super_block *sb;
30278+
1facf9fc 30279+ file = arg->file;
2000de60 30280+ sb = file->f_path.dentry->d_sb;
dece6358
AM
30281+ SiMustAnyLock(sb);
30282+
30283+ rdhash = au_sbi(sb)->si_rdhash;
1308ab2a 30284+ if (!rdhash)
30285+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
dece6358
AM
30286+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
30287+ if (unlikely(err))
1facf9fc 30288+ goto out;
dece6358
AM
30289+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
30290+ if (unlikely(err))
1facf9fc 30291+ goto out_delist;
30292+
30293+ err = 0;
30294+ arg->flags = 0;
dece6358
AM
30295+ shwh = 0;
30296+ if (au_opt_test(au_mntflags(sb), SHWH)) {
30297+ shwh = 1;
30298+ au_fset_fillvdir(arg->flags, SHWH);
30299+ }
5afbbe0d
AM
30300+ btop = au_fbtop(file);
30301+ bbot = au_fbbot_dir(file);
30302+ for (bindex = btop; !err && bindex <= bbot; bindex++) {
4a4d8108 30303+ hf = au_hf_dir(file, bindex);
1facf9fc 30304+ if (!hf)
30305+ continue;
30306+
30307+ offset = vfsub_llseek(hf, 0, SEEK_SET);
30308+ err = offset;
30309+ if (unlikely(offset))
30310+ break;
30311+
30312+ arg->bindex = bindex;
30313+ au_fclr_fillvdir(arg->flags, WHABLE);
dece6358 30314+ if (shwh
5afbbe0d 30315+ || (bindex != bbot
dece6358 30316+ && au_br_whable(au_sbr_perm(sb, bindex))))
1facf9fc 30317+ au_fset_fillvdir(arg->flags, WHABLE);
30318+ do {
30319+ arg->err = 0;
30320+ au_fclr_fillvdir(arg->flags, CALLED);
30321+ /* smp_mb(); */
392086de 30322+ err = vfsub_iterate_dir(hf, &arg->ctx);
1facf9fc 30323+ if (err >= 0)
30324+ err = arg->err;
30325+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
392086de
AM
30326+
30327+ /*
30328+ * dir_relax() may be good for concurrency, but aufs should not
30329+ * use it since it will cause a lockdep problem.
30330+ */
1facf9fc 30331+ }
dece6358
AM
30332+
30333+ if (!err && shwh)
30334+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
30335+
30336+ au_nhash_wh_free(&arg->whlist);
1facf9fc 30337+
4f0767ce 30338+out_delist:
dece6358 30339+ au_nhash_de_free(&arg->delist);
4f0767ce 30340+out:
1facf9fc 30341+ return err;
30342+}
30343+
30344+static int read_vdir(struct file *file, int may_read)
30345+{
30346+ int err;
30347+ unsigned long expire;
30348+ unsigned char do_read;
392086de
AM
30349+ struct fillvdir_arg arg = {
30350+ .ctx = {
2000de60 30351+ .actor = fillvdir
392086de
AM
30352+ }
30353+ };
1facf9fc 30354+ struct inode *inode;
30355+ struct au_vdir *vdir, *allocated;
30356+
30357+ err = 0;
c06a8ce3 30358+ inode = file_inode(file);
1facf9fc 30359+ IMustLock(inode);
5afbbe0d 30360+ IiMustWriteLock(inode);
dece6358
AM
30361+ SiMustAnyLock(inode->i_sb);
30362+
1facf9fc 30363+ allocated = NULL;
30364+ do_read = 0;
30365+ expire = au_sbi(inode->i_sb)->si_rdcache;
30366+ vdir = au_ivdir(inode);
30367+ if (!vdir) {
30368+ do_read = 1;
1308ab2a 30369+ vdir = alloc_vdir(file);
1facf9fc 30370+ err = PTR_ERR(vdir);
30371+ if (IS_ERR(vdir))
30372+ goto out;
30373+ err = 0;
30374+ allocated = vdir;
30375+ } else if (may_read
30376+ && (inode->i_version != vdir->vd_version
30377+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
30378+ do_read = 1;
30379+ err = reinit_vdir(vdir);
30380+ if (unlikely(err))
30381+ goto out;
30382+ }
30383+
30384+ if (!do_read)
30385+ return 0; /* success */
30386+
30387+ arg.file = file;
30388+ arg.vdir = vdir;
30389+ err = au_do_read_vdir(&arg);
30390+ if (!err) {
392086de 30391+ /* file->f_pos = 0; */ /* todo: ctx->pos? */
1facf9fc 30392+ vdir->vd_version = inode->i_version;
30393+ vdir->vd_last.ul = 0;
30394+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
30395+ if (allocated)
30396+ au_set_ivdir(inode, allocated);
30397+ } else if (allocated)
f0c0a007 30398+ au_vdir_free(allocated, /*atonce*/0);
1facf9fc 30399+
4f0767ce 30400+out:
1facf9fc 30401+ return err;
30402+}
30403+
30404+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
30405+{
30406+ int err, rerr;
30407+ unsigned long ul, n;
30408+ const unsigned int deblk_sz = src->vd_deblk_sz;
30409+
30410+ AuDebugOn(tgt->vd_nblk != 1);
30411+
30412+ err = -ENOMEM;
30413+ if (tgt->vd_nblk < src->vd_nblk) {
30414+ unsigned char **p;
30415+
e2f27e51
AM
30416+ p = au_krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
30417+ GFP_NOFS, /*may_shrink*/0);
1facf9fc 30418+ if (unlikely(!p))
30419+ goto out;
30420+ tgt->vd_deblk = p;
30421+ }
30422+
1308ab2a 30423+ if (tgt->vd_deblk_sz != deblk_sz) {
30424+ unsigned char *p;
30425+
30426+ tgt->vd_deblk_sz = deblk_sz;
e2f27e51
AM
30427+ p = au_krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS,
30428+ /*may_shrink*/1);
1308ab2a 30429+ if (unlikely(!p))
30430+ goto out;
30431+ tgt->vd_deblk[0] = p;
30432+ }
1facf9fc 30433+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
1facf9fc 30434+ tgt->vd_version = src->vd_version;
30435+ tgt->vd_jiffy = src->vd_jiffy;
30436+
30437+ n = src->vd_nblk;
30438+ for (ul = 1; ul < n; ul++) {
dece6358
AM
30439+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
30440+ GFP_NOFS);
30441+ if (unlikely(!tgt->vd_deblk[ul]))
1facf9fc 30442+ goto out;
1308ab2a 30443+ tgt->vd_nblk++;
1facf9fc 30444+ }
1308ab2a 30445+ tgt->vd_nblk = n;
30446+ tgt->vd_last.ul = tgt->vd_last.ul;
30447+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
30448+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
30449+ - src->vd_deblk[src->vd_last.ul];
1facf9fc 30450+ /* smp_mb(); */
30451+ return 0; /* success */
30452+
4f0767ce 30453+out:
1facf9fc 30454+ rerr = reinit_vdir(tgt);
30455+ BUG_ON(rerr);
30456+ return err;
30457+}
30458+
30459+int au_vdir_init(struct file *file)
30460+{
30461+ int err;
30462+ struct inode *inode;
30463+ struct au_vdir *vdir_cache, *allocated;
30464+
392086de 30465+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 30466+ err = read_vdir(file, !file->f_pos);
30467+ if (unlikely(err))
30468+ goto out;
30469+
30470+ allocated = NULL;
30471+ vdir_cache = au_fvdir_cache(file);
30472+ if (!vdir_cache) {
1308ab2a 30473+ vdir_cache = alloc_vdir(file);
1facf9fc 30474+ err = PTR_ERR(vdir_cache);
30475+ if (IS_ERR(vdir_cache))
30476+ goto out;
30477+ allocated = vdir_cache;
30478+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
392086de 30479+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 30480+ err = reinit_vdir(vdir_cache);
30481+ if (unlikely(err))
30482+ goto out;
30483+ } else
30484+ return 0; /* success */
30485+
c06a8ce3 30486+ inode = file_inode(file);
1facf9fc 30487+ err = copy_vdir(vdir_cache, au_ivdir(inode));
30488+ if (!err) {
30489+ file->f_version = inode->i_version;
30490+ if (allocated)
30491+ au_set_fvdir_cache(file, allocated);
30492+ } else if (allocated)
f0c0a007 30493+ au_vdir_free(allocated, /*atonce*/0);
1facf9fc 30494+
4f0767ce 30495+out:
1facf9fc 30496+ return err;
30497+}
30498+
30499+static loff_t calc_offset(struct au_vdir *vdir)
30500+{
30501+ loff_t offset;
30502+ union au_vdir_deblk_p p;
30503+
30504+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
30505+ offset = vdir->vd_last.p.deblk - p.deblk;
30506+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
30507+ return offset;
30508+}
30509+
30510+/* returns true or false */
392086de 30511+static int seek_vdir(struct file *file, struct dir_context *ctx)
1facf9fc 30512+{
30513+ int valid;
30514+ unsigned int deblk_sz;
30515+ unsigned long ul, n;
30516+ loff_t offset;
30517+ union au_vdir_deblk_p p, deblk_end;
30518+ struct au_vdir *vdir_cache;
30519+
30520+ valid = 1;
30521+ vdir_cache = au_fvdir_cache(file);
30522+ offset = calc_offset(vdir_cache);
30523+ AuDbg("offset %lld\n", offset);
392086de 30524+ if (ctx->pos == offset)
1facf9fc 30525+ goto out;
30526+
30527+ vdir_cache->vd_last.ul = 0;
30528+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
392086de 30529+ if (!ctx->pos)
1facf9fc 30530+ goto out;
30531+
30532+ valid = 0;
30533+ deblk_sz = vdir_cache->vd_deblk_sz;
392086de 30534+ ul = div64_u64(ctx->pos, deblk_sz);
1facf9fc 30535+ AuDbg("ul %lu\n", ul);
30536+ if (ul >= vdir_cache->vd_nblk)
30537+ goto out;
30538+
30539+ n = vdir_cache->vd_nblk;
30540+ for (; ul < n; ul++) {
30541+ p.deblk = vdir_cache->vd_deblk[ul];
30542+ deblk_end.deblk = p.deblk + deblk_sz;
30543+ offset = ul;
30544+ offset *= deblk_sz;
392086de 30545+ while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
1facf9fc 30546+ unsigned int l;
30547+
30548+ l = calc_size(p.de->de_str.len);
30549+ offset += l;
30550+ p.deblk += l;
30551+ }
30552+ if (!is_deblk_end(&p, &deblk_end)) {
30553+ valid = 1;
30554+ vdir_cache->vd_last.ul = ul;
30555+ vdir_cache->vd_last.p = p;
30556+ break;
30557+ }
30558+ }
30559+
4f0767ce 30560+out:
1facf9fc 30561+ /* smp_mb(); */
30562+ AuTraceErr(!valid);
30563+ return valid;
30564+}
30565+
392086de 30566+int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
1facf9fc 30567+{
1facf9fc 30568+ unsigned int l, deblk_sz;
30569+ union au_vdir_deblk_p deblk_end;
30570+ struct au_vdir *vdir_cache;
30571+ struct au_vdir_de *de;
30572+
30573+ vdir_cache = au_fvdir_cache(file);
392086de 30574+ if (!seek_vdir(file, ctx))
1facf9fc 30575+ return 0;
30576+
30577+ deblk_sz = vdir_cache->vd_deblk_sz;
30578+ while (1) {
30579+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
30580+ deblk_end.deblk += deblk_sz;
30581+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
30582+ de = vdir_cache->vd_last.p.de;
30583+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
392086de 30584+ de->de_str.len, de->de_str.name, ctx->pos,
1facf9fc 30585+ (unsigned long)de->de_ino, de->de_type);
392086de
AM
30586+ if (unlikely(!dir_emit(ctx, de->de_str.name,
30587+ de->de_str.len, de->de_ino,
30588+ de->de_type))) {
1facf9fc 30589+ /* todo: ignore the error caused by udba? */
30590+ /* return err; */
30591+ return 0;
30592+ }
30593+
30594+ l = calc_size(de->de_str.len);
30595+ vdir_cache->vd_last.p.deblk += l;
392086de 30596+ ctx->pos += l;
1facf9fc 30597+ }
30598+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
30599+ vdir_cache->vd_last.ul++;
30600+ vdir_cache->vd_last.p.deblk
30601+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
392086de 30602+ ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
1facf9fc 30603+ continue;
30604+ }
30605+ break;
30606+ }
30607+
30608+ /* smp_mb(); */
30609+ return 0;
30610+}
7f207e10
AM
30611diff -urN /usr/share/empty/fs/aufs/vfsub.c linux/fs/aufs/vfsub.c
30612--- /usr/share/empty/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
30613+++ linux/fs/aufs/vfsub.c 2016-12-17 12:28:17.598545045 +0100
30614@@ -0,0 +1,886 @@
1facf9fc 30615+/*
8cdd5066 30616+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 30617+ *
30618+ * This program, aufs is free software; you can redistribute it and/or modify
30619+ * it under the terms of the GNU General Public License as published by
30620+ * the Free Software Foundation; either version 2 of the License, or
30621+ * (at your option) any later version.
dece6358
AM
30622+ *
30623+ * This program is distributed in the hope that it will be useful,
30624+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30625+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30626+ * GNU General Public License for more details.
30627+ *
30628+ * You should have received a copy of the GNU General Public License
523b37e3 30629+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 30630+ */
30631+
30632+/*
30633+ * sub-routines for VFS
30634+ */
30635+
dece6358 30636+#include <linux/namei.h>
8cdd5066 30637+#include <linux/nsproxy.h>
dece6358
AM
30638+#include <linux/security.h>
30639+#include <linux/splice.h>
f2c43d5f 30640+#ifdef CONFIG_AUFS_BR_FUSE
8cdd5066 30641+#include "../fs/mount.h"
f2c43d5f 30642+#endif
1facf9fc 30643+#include "aufs.h"
30644+
8cdd5066
JR
30645+#ifdef CONFIG_AUFS_BR_FUSE
30646+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb)
30647+{
30648+ struct nsproxy *ns;
30649+
30650+ if (!au_test_fuse(h_sb) || !au_userns)
30651+ return 0;
30652+
30653+ ns = current->nsproxy;
30654+ /* no {get,put}_nsproxy(ns) */
30655+ return real_mount(mnt)->mnt_ns == ns->mnt_ns ? 0 : -EACCES;
30656+}
30657+#endif
30658+
30659+/* ---------------------------------------------------------------------- */
30660+
1facf9fc 30661+int vfsub_update_h_iattr(struct path *h_path, int *did)
30662+{
30663+ int err;
30664+ struct kstat st;
30665+ struct super_block *h_sb;
30666+
30667+ /* for remote fs, leave work for its getattr or d_revalidate */
30668+ /* for bad i_attr fs, handle them in aufs_getattr() */
30669+ /* still some fs may acquire i_mutex. we need to skip them */
30670+ err = 0;
30671+ if (!did)
30672+ did = &err;
30673+ h_sb = h_path->dentry->d_sb;
30674+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
30675+ if (*did)
c06a8ce3 30676+ err = vfs_getattr(h_path, &st);
1facf9fc 30677+
30678+ return err;
30679+}
30680+
30681+/* ---------------------------------------------------------------------- */
30682+
4a4d8108 30683+struct file *vfsub_dentry_open(struct path *path, int flags)
1308ab2a 30684+{
30685+ struct file *file;
30686+
b4510431 30687+ file = dentry_open(path, flags /* | __FMODE_NONOTIFY */,
7f207e10 30688+ current_cred());
2cbb1c4b
JR
30689+ if (!IS_ERR_OR_NULL(file)
30690+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
5527c038 30691+ i_readcount_inc(d_inode(path->dentry));
4a4d8108 30692+
1308ab2a 30693+ return file;
30694+}
30695+
1facf9fc 30696+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
30697+{
30698+ struct file *file;
30699+
2cbb1c4b 30700+ lockdep_off();
7f207e10 30701+ file = filp_open(path,
2cbb1c4b 30702+ oflags /* | __FMODE_NONOTIFY */,
7f207e10 30703+ mode);
2cbb1c4b 30704+ lockdep_on();
1facf9fc 30705+ if (IS_ERR(file))
30706+ goto out;
30707+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30708+
4f0767ce 30709+out:
1facf9fc 30710+ return file;
30711+}
30712+
b912730e
AM
30713+/*
30714+ * Ideally this function should call VFS:do_last() in order to keep all its
30715+ * checkings. But it is very hard for aufs to regenerate several VFS internal
30716+ * structure such as nameidata. This is a second (or third) best approach.
30717+ * cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
30718+ */
30719+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
30720+ struct vfsub_aopen_args *args, struct au_branch *br)
30721+{
30722+ int err;
30723+ struct file *file = args->file;
30724+ /* copied from linux/fs/namei.c:atomic_open() */
30725+ struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
30726+
30727+ IMustLock(dir);
30728+ AuDebugOn(!dir->i_op->atomic_open);
30729+
30730+ err = au_br_test_oflag(args->open_flag, br);
30731+ if (unlikely(err))
30732+ goto out;
30733+
30734+ args->file->f_path.dentry = DENTRY_NOT_SET;
30735+ args->file->f_path.mnt = au_br_mnt(br);
30736+ err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
30737+ args->create_mode, args->opened);
30738+ if (err >= 0) {
30739+ /* some filesystems don't set FILE_CREATED while succeeded? */
30740+ if (*args->opened & FILE_CREATED)
30741+ fsnotify_create(dir, dentry);
30742+ } else
30743+ goto out;
30744+
30745+
30746+ if (!err) {
30747+ /* todo: call VFS:may_open() here */
30748+ err = open_check_o_direct(file);
30749+ /* todo: ima_file_check() too? */
30750+ if (!err && (args->open_flag & __FMODE_EXEC))
30751+ err = deny_write_access(file);
30752+ if (unlikely(err))
30753+ /* note that the file is created and still opened */
30754+ goto out;
30755+ }
30756+
5afbbe0d 30757+ au_br_get(br);
b912730e
AM
30758+ fsnotify_open(file);
30759+
30760+out:
30761+ return err;
30762+}
30763+
1facf9fc 30764+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
30765+{
30766+ int err;
30767+
1facf9fc 30768+ err = kern_path(name, flags, path);
5527c038 30769+ if (!err && d_is_positive(path->dentry))
1facf9fc 30770+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
30771+ return err;
30772+}
30773+
febd17d6
JR
30774+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
30775+ struct dentry *parent, int len)
30776+{
30777+ struct path path = {
30778+ .mnt = NULL
30779+ };
30780+
30781+ path.dentry = lookup_one_len_unlocked(name, parent, len);
30782+ if (IS_ERR(path.dentry))
30783+ goto out;
30784+ if (d_is_positive(path.dentry))
30785+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
30786+
30787+out:
30788+ AuTraceErrPtr(path.dentry);
30789+ return path.dentry;
30790+}
30791+
1facf9fc 30792+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
30793+ int len)
30794+{
30795+ struct path path = {
30796+ .mnt = NULL
30797+ };
30798+
1308ab2a 30799+ /* VFS checks it too, but by WARN_ON_ONCE() */
5527c038 30800+ IMustLock(d_inode(parent));
1facf9fc 30801+
30802+ path.dentry = lookup_one_len(name, parent, len);
30803+ if (IS_ERR(path.dentry))
30804+ goto out;
5527c038 30805+ if (d_is_positive(path.dentry))
1facf9fc 30806+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
30807+
4f0767ce 30808+out:
4a4d8108 30809+ AuTraceErrPtr(path.dentry);
1facf9fc 30810+ return path.dentry;
30811+}
30812+
b4510431 30813+void vfsub_call_lkup_one(void *args)
2cbb1c4b 30814+{
b4510431
AM
30815+ struct vfsub_lkup_one_args *a = args;
30816+ *a->errp = vfsub_lkup_one(a->name, a->parent);
2cbb1c4b
JR
30817+}
30818+
1facf9fc 30819+/* ---------------------------------------------------------------------- */
30820+
30821+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
30822+ struct dentry *d2, struct au_hinode *hdir2)
30823+{
30824+ struct dentry *d;
30825+
2cbb1c4b 30826+ lockdep_off();
1facf9fc 30827+ d = lock_rename(d1, d2);
2cbb1c4b 30828+ lockdep_on();
4a4d8108 30829+ au_hn_suspend(hdir1);
1facf9fc 30830+ if (hdir1 != hdir2)
4a4d8108 30831+ au_hn_suspend(hdir2);
1facf9fc 30832+
30833+ return d;
30834+}
30835+
30836+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
30837+ struct dentry *d2, struct au_hinode *hdir2)
30838+{
4a4d8108 30839+ au_hn_resume(hdir1);
1facf9fc 30840+ if (hdir1 != hdir2)
4a4d8108 30841+ au_hn_resume(hdir2);
2cbb1c4b 30842+ lockdep_off();
1facf9fc 30843+ unlock_rename(d1, d2);
2cbb1c4b 30844+ lockdep_on();
1facf9fc 30845+}
30846+
30847+/* ---------------------------------------------------------------------- */
30848+
b4510431 30849+int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
1facf9fc 30850+{
30851+ int err;
30852+ struct dentry *d;
30853+
30854+ IMustLock(dir);
30855+
30856+ d = path->dentry;
30857+ path->dentry = d->d_parent;
b752ccd1 30858+ err = security_path_mknod(path, d, mode, 0);
1facf9fc 30859+ path->dentry = d;
30860+ if (unlikely(err))
30861+ goto out;
30862+
c1595e42 30863+ lockdep_off();
b4510431 30864+ err = vfs_create(dir, path->dentry, mode, want_excl);
c1595e42 30865+ lockdep_on();
1facf9fc 30866+ if (!err) {
30867+ struct path tmp = *path;
30868+ int did;
30869+
30870+ vfsub_update_h_iattr(&tmp, &did);
30871+ if (did) {
30872+ tmp.dentry = path->dentry->d_parent;
30873+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30874+ }
30875+ /*ignore*/
30876+ }
30877+
4f0767ce 30878+out:
1facf9fc 30879+ return err;
30880+}
30881+
30882+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
30883+{
30884+ int err;
30885+ struct dentry *d;
30886+
30887+ IMustLock(dir);
30888+
30889+ d = path->dentry;
30890+ path->dentry = d->d_parent;
b752ccd1 30891+ err = security_path_symlink(path, d, symname);
1facf9fc 30892+ path->dentry = d;
30893+ if (unlikely(err))
30894+ goto out;
30895+
c1595e42 30896+ lockdep_off();
1facf9fc 30897+ err = vfs_symlink(dir, path->dentry, symname);
c1595e42 30898+ lockdep_on();
1facf9fc 30899+ if (!err) {
30900+ struct path tmp = *path;
30901+ int did;
30902+
30903+ vfsub_update_h_iattr(&tmp, &did);
30904+ if (did) {
30905+ tmp.dentry = path->dentry->d_parent;
30906+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30907+ }
30908+ /*ignore*/
30909+ }
30910+
4f0767ce 30911+out:
1facf9fc 30912+ return err;
30913+}
30914+
30915+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
30916+{
30917+ int err;
30918+ struct dentry *d;
30919+
30920+ IMustLock(dir);
30921+
30922+ d = path->dentry;
30923+ path->dentry = d->d_parent;
027c5e7a 30924+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
1facf9fc 30925+ path->dentry = d;
30926+ if (unlikely(err))
30927+ goto out;
30928+
c1595e42 30929+ lockdep_off();
1facf9fc 30930+ err = vfs_mknod(dir, path->dentry, mode, dev);
c1595e42 30931+ lockdep_on();
1facf9fc 30932+ if (!err) {
30933+ struct path tmp = *path;
30934+ int did;
30935+
30936+ vfsub_update_h_iattr(&tmp, &did);
30937+ if (did) {
30938+ tmp.dentry = path->dentry->d_parent;
30939+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30940+ }
30941+ /*ignore*/
30942+ }
30943+
4f0767ce 30944+out:
1facf9fc 30945+ return err;
30946+}
30947+
30948+static int au_test_nlink(struct inode *inode)
30949+{
30950+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
30951+
30952+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
30953+ || inode->i_nlink < link_max)
30954+ return 0;
30955+ return -EMLINK;
30956+}
30957+
523b37e3
AM
30958+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
30959+ struct inode **delegated_inode)
1facf9fc 30960+{
30961+ int err;
30962+ struct dentry *d;
30963+
30964+ IMustLock(dir);
30965+
5527c038 30966+ err = au_test_nlink(d_inode(src_dentry));
1facf9fc 30967+ if (unlikely(err))
30968+ return err;
30969+
b4510431 30970+ /* we don't call may_linkat() */
1facf9fc 30971+ d = path->dentry;
30972+ path->dentry = d->d_parent;
b752ccd1 30973+ err = security_path_link(src_dentry, path, d);
1facf9fc 30974+ path->dentry = d;
30975+ if (unlikely(err))
30976+ goto out;
30977+
2cbb1c4b 30978+ lockdep_off();
523b37e3 30979+ err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
2cbb1c4b 30980+ lockdep_on();
1facf9fc 30981+ if (!err) {
30982+ struct path tmp = *path;
30983+ int did;
30984+
30985+ /* fuse has different memory inode for the same inumber */
30986+ vfsub_update_h_iattr(&tmp, &did);
30987+ if (did) {
30988+ tmp.dentry = path->dentry->d_parent;
30989+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30990+ tmp.dentry = src_dentry;
30991+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30992+ }
30993+ /*ignore*/
30994+ }
30995+
4f0767ce 30996+out:
1facf9fc 30997+ return err;
30998+}
30999+
31000+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
523b37e3 31001+ struct inode *dir, struct path *path,
f2c43d5f 31002+ struct inode **delegated_inode, unsigned int flags)
1facf9fc 31003+{
31004+ int err;
31005+ struct path tmp = {
31006+ .mnt = path->mnt
31007+ };
31008+ struct dentry *d;
31009+
31010+ IMustLock(dir);
31011+ IMustLock(src_dir);
31012+
31013+ d = path->dentry;
31014+ path->dentry = d->d_parent;
31015+ tmp.dentry = src_dentry->d_parent;
38d290e6 31016+ err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
1facf9fc 31017+ path->dentry = d;
31018+ if (unlikely(err))
31019+ goto out;
31020+
2cbb1c4b 31021+ lockdep_off();
523b37e3 31022+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
f2c43d5f 31023+ delegated_inode, flags);
2cbb1c4b 31024+ lockdep_on();
1facf9fc 31025+ if (!err) {
31026+ int did;
31027+
31028+ tmp.dentry = d->d_parent;
31029+ vfsub_update_h_iattr(&tmp, &did);
31030+ if (did) {
31031+ tmp.dentry = src_dentry;
31032+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31033+ tmp.dentry = src_dentry->d_parent;
31034+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31035+ }
31036+ /*ignore*/
31037+ }
31038+
4f0767ce 31039+out:
1facf9fc 31040+ return err;
31041+}
31042+
31043+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
31044+{
31045+ int err;
31046+ struct dentry *d;
31047+
31048+ IMustLock(dir);
31049+
31050+ d = path->dentry;
31051+ path->dentry = d->d_parent;
b752ccd1 31052+ err = security_path_mkdir(path, d, mode);
1facf9fc 31053+ path->dentry = d;
31054+ if (unlikely(err))
31055+ goto out;
31056+
c1595e42 31057+ lockdep_off();
1facf9fc 31058+ err = vfs_mkdir(dir, path->dentry, mode);
c1595e42 31059+ lockdep_on();
1facf9fc 31060+ if (!err) {
31061+ struct path tmp = *path;
31062+ int did;
31063+
31064+ vfsub_update_h_iattr(&tmp, &did);
31065+ if (did) {
31066+ tmp.dentry = path->dentry->d_parent;
31067+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
31068+ }
31069+ /*ignore*/
31070+ }
31071+
4f0767ce 31072+out:
1facf9fc 31073+ return err;
31074+}
31075+
31076+int vfsub_rmdir(struct inode *dir, struct path *path)
31077+{
31078+ int err;
31079+ struct dentry *d;
31080+
31081+ IMustLock(dir);
31082+
31083+ d = path->dentry;
31084+ path->dentry = d->d_parent;
b752ccd1 31085+ err = security_path_rmdir(path, d);
1facf9fc 31086+ path->dentry = d;
31087+ if (unlikely(err))
31088+ goto out;
31089+
2cbb1c4b 31090+ lockdep_off();
1facf9fc 31091+ err = vfs_rmdir(dir, path->dentry);
2cbb1c4b 31092+ lockdep_on();
1facf9fc 31093+ if (!err) {
31094+ struct path tmp = {
31095+ .dentry = path->dentry->d_parent,
31096+ .mnt = path->mnt
31097+ };
31098+
31099+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
31100+ }
31101+
4f0767ce 31102+out:
1facf9fc 31103+ return err;
31104+}
31105+
31106+/* ---------------------------------------------------------------------- */
31107+
9dbd164d 31108+/* todo: support mmap_sem? */
1facf9fc 31109+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
31110+ loff_t *ppos)
31111+{
31112+ ssize_t err;
31113+
2cbb1c4b 31114+ lockdep_off();
1facf9fc 31115+ err = vfs_read(file, ubuf, count, ppos);
2cbb1c4b 31116+ lockdep_on();
1facf9fc 31117+ if (err >= 0)
31118+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
31119+ return err;
31120+}
31121+
31122+/* todo: kernel_read()? */
31123+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
31124+ loff_t *ppos)
31125+{
31126+ ssize_t err;
31127+ mm_segment_t oldfs;
b752ccd1
AM
31128+ union {
31129+ void *k;
31130+ char __user *u;
31131+ } buf;
1facf9fc 31132+
b752ccd1 31133+ buf.k = kbuf;
1facf9fc 31134+ oldfs = get_fs();
31135+ set_fs(KERNEL_DS);
b752ccd1 31136+ err = vfsub_read_u(file, buf.u, count, ppos);
1facf9fc 31137+ set_fs(oldfs);
31138+ return err;
31139+}
31140+
31141+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
31142+ loff_t *ppos)
31143+{
31144+ ssize_t err;
31145+
2cbb1c4b 31146+ lockdep_off();
1facf9fc 31147+ err = vfs_write(file, ubuf, count, ppos);
2cbb1c4b 31148+ lockdep_on();
1facf9fc 31149+ if (err >= 0)
31150+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
31151+ return err;
31152+}
31153+
31154+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
31155+{
31156+ ssize_t err;
31157+ mm_segment_t oldfs;
b752ccd1
AM
31158+ union {
31159+ void *k;
31160+ const char __user *u;
31161+ } buf;
1facf9fc 31162+
b752ccd1 31163+ buf.k = kbuf;
1facf9fc 31164+ oldfs = get_fs();
31165+ set_fs(KERNEL_DS);
b752ccd1 31166+ err = vfsub_write_u(file, buf.u, count, ppos);
1facf9fc 31167+ set_fs(oldfs);
31168+ return err;
31169+}
31170+
4a4d8108
AM
31171+int vfsub_flush(struct file *file, fl_owner_t id)
31172+{
31173+ int err;
31174+
31175+ err = 0;
523b37e3 31176+ if (file->f_op->flush) {
2000de60 31177+ if (!au_test_nfs(file->f_path.dentry->d_sb))
2cbb1c4b
JR
31178+ err = file->f_op->flush(file, id);
31179+ else {
31180+ lockdep_off();
31181+ err = file->f_op->flush(file, id);
31182+ lockdep_on();
31183+ }
4a4d8108
AM
31184+ if (!err)
31185+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
31186+ /*ignore*/
31187+ }
31188+ return err;
31189+}
31190+
392086de 31191+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
1facf9fc 31192+{
31193+ int err;
31194+
523b37e3 31195+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 31196+
2cbb1c4b 31197+ lockdep_off();
392086de 31198+ err = iterate_dir(file, ctx);
2cbb1c4b 31199+ lockdep_on();
1facf9fc 31200+ if (err >= 0)
31201+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
31202+ return err;
31203+}
31204+
31205+long vfsub_splice_to(struct file *in, loff_t *ppos,
31206+ struct pipe_inode_info *pipe, size_t len,
31207+ unsigned int flags)
31208+{
31209+ long err;
31210+
2cbb1c4b 31211+ lockdep_off();
0fc653ad 31212+ err = do_splice_to(in, ppos, pipe, len, flags);
2cbb1c4b 31213+ lockdep_on();
4a4d8108 31214+ file_accessed(in);
1facf9fc 31215+ if (err >= 0)
31216+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
31217+ return err;
31218+}
31219+
31220+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
31221+ loff_t *ppos, size_t len, unsigned int flags)
31222+{
31223+ long err;
31224+
2cbb1c4b 31225+ lockdep_off();
0fc653ad 31226+ err = do_splice_from(pipe, out, ppos, len, flags);
2cbb1c4b 31227+ lockdep_on();
1facf9fc 31228+ if (err >= 0)
31229+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
31230+ return err;
31231+}
31232+
53392da6
AM
31233+int vfsub_fsync(struct file *file, struct path *path, int datasync)
31234+{
31235+ int err;
31236+
31237+ /* file can be NULL */
31238+ lockdep_off();
31239+ err = vfs_fsync(file, datasync);
31240+ lockdep_on();
31241+ if (!err) {
31242+ if (!path) {
31243+ AuDebugOn(!file);
31244+ path = &file->f_path;
31245+ }
31246+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
31247+ }
31248+ return err;
31249+}
31250+
1facf9fc 31251+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
31252+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
31253+ struct file *h_file)
31254+{
31255+ int err;
31256+ struct inode *h_inode;
c06a8ce3 31257+ struct super_block *h_sb;
1facf9fc 31258+
1facf9fc 31259+ if (!h_file) {
c06a8ce3
AM
31260+ err = vfsub_truncate(h_path, length);
31261+ goto out;
1facf9fc 31262+ }
31263+
5527c038 31264+ h_inode = d_inode(h_path->dentry);
c06a8ce3
AM
31265+ h_sb = h_inode->i_sb;
31266+ lockdep_off();
31267+ sb_start_write(h_sb);
31268+ lockdep_on();
1facf9fc 31269+ err = locks_verify_truncate(h_inode, h_file, length);
31270+ if (!err)
953406b4 31271+ err = security_path_truncate(h_path);
2cbb1c4b
JR
31272+ if (!err) {
31273+ lockdep_off();
1facf9fc 31274+ err = do_truncate(h_path->dentry, length, attr, h_file);
2cbb1c4b
JR
31275+ lockdep_on();
31276+ }
c06a8ce3
AM
31277+ lockdep_off();
31278+ sb_end_write(h_sb);
31279+ lockdep_on();
1facf9fc 31280+
4f0767ce 31281+out:
1facf9fc 31282+ return err;
31283+}
31284+
31285+/* ---------------------------------------------------------------------- */
31286+
31287+struct au_vfsub_mkdir_args {
31288+ int *errp;
31289+ struct inode *dir;
31290+ struct path *path;
31291+ int mode;
31292+};
31293+
31294+static void au_call_vfsub_mkdir(void *args)
31295+{
31296+ struct au_vfsub_mkdir_args *a = args;
31297+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
31298+}
31299+
31300+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
31301+{
31302+ int err, do_sio, wkq_err;
31303+
31304+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
31305+ if (!do_sio) {
31306+ lockdep_off();
1facf9fc 31307+ err = vfsub_mkdir(dir, path, mode);
c1595e42
JR
31308+ lockdep_on();
31309+ } else {
1facf9fc 31310+ struct au_vfsub_mkdir_args args = {
31311+ .errp = &err,
31312+ .dir = dir,
31313+ .path = path,
31314+ .mode = mode
31315+ };
31316+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
31317+ if (unlikely(wkq_err))
31318+ err = wkq_err;
31319+ }
31320+
31321+ return err;
31322+}
31323+
31324+struct au_vfsub_rmdir_args {
31325+ int *errp;
31326+ struct inode *dir;
31327+ struct path *path;
31328+};
31329+
31330+static void au_call_vfsub_rmdir(void *args)
31331+{
31332+ struct au_vfsub_rmdir_args *a = args;
31333+ *a->errp = vfsub_rmdir(a->dir, a->path);
31334+}
31335+
31336+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
31337+{
31338+ int err, do_sio, wkq_err;
31339+
31340+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
31341+ if (!do_sio) {
31342+ lockdep_off();
1facf9fc 31343+ err = vfsub_rmdir(dir, path);
c1595e42
JR
31344+ lockdep_on();
31345+ } else {
1facf9fc 31346+ struct au_vfsub_rmdir_args args = {
31347+ .errp = &err,
31348+ .dir = dir,
31349+ .path = path
31350+ };
31351+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
31352+ if (unlikely(wkq_err))
31353+ err = wkq_err;
31354+ }
31355+
31356+ return err;
31357+}
31358+
31359+/* ---------------------------------------------------------------------- */
31360+
31361+struct notify_change_args {
31362+ int *errp;
31363+ struct path *path;
31364+ struct iattr *ia;
523b37e3 31365+ struct inode **delegated_inode;
1facf9fc 31366+};
31367+
31368+static void call_notify_change(void *args)
31369+{
31370+ struct notify_change_args *a = args;
31371+ struct inode *h_inode;
31372+
5527c038 31373+ h_inode = d_inode(a->path->dentry);
1facf9fc 31374+ IMustLock(h_inode);
31375+
31376+ *a->errp = -EPERM;
31377+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
c1595e42 31378+ lockdep_off();
523b37e3
AM
31379+ *a->errp = notify_change(a->path->dentry, a->ia,
31380+ a->delegated_inode);
c1595e42 31381+ lockdep_on();
1facf9fc 31382+ if (!*a->errp)
31383+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
31384+ }
31385+ AuTraceErr(*a->errp);
31386+}
31387+
523b37e3
AM
31388+int vfsub_notify_change(struct path *path, struct iattr *ia,
31389+ struct inode **delegated_inode)
1facf9fc 31390+{
31391+ int err;
31392+ struct notify_change_args args = {
523b37e3
AM
31393+ .errp = &err,
31394+ .path = path,
31395+ .ia = ia,
31396+ .delegated_inode = delegated_inode
1facf9fc 31397+ };
31398+
31399+ call_notify_change(&args);
31400+
31401+ return err;
31402+}
31403+
523b37e3
AM
31404+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
31405+ struct inode **delegated_inode)
1facf9fc 31406+{
31407+ int err, wkq_err;
31408+ struct notify_change_args args = {
523b37e3
AM
31409+ .errp = &err,
31410+ .path = path,
31411+ .ia = ia,
31412+ .delegated_inode = delegated_inode
1facf9fc 31413+ };
31414+
31415+ wkq_err = au_wkq_wait(call_notify_change, &args);
31416+ if (unlikely(wkq_err))
31417+ err = wkq_err;
31418+
31419+ return err;
31420+}
31421+
31422+/* ---------------------------------------------------------------------- */
31423+
31424+struct unlink_args {
31425+ int *errp;
31426+ struct inode *dir;
31427+ struct path *path;
523b37e3 31428+ struct inode **delegated_inode;
1facf9fc 31429+};
31430+
31431+static void call_unlink(void *args)
31432+{
31433+ struct unlink_args *a = args;
31434+ struct dentry *d = a->path->dentry;
31435+ struct inode *h_inode;
31436+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
c1595e42 31437+ && au_dcount(d) == 1);
1facf9fc 31438+
31439+ IMustLock(a->dir);
31440+
31441+ a->path->dentry = d->d_parent;
31442+ *a->errp = security_path_unlink(a->path, d);
31443+ a->path->dentry = d;
31444+ if (unlikely(*a->errp))
31445+ return;
31446+
31447+ if (!stop_sillyrename)
31448+ dget(d);
5527c038
JR
31449+ h_inode = NULL;
31450+ if (d_is_positive(d)) {
31451+ h_inode = d_inode(d);
027c5e7a 31452+ ihold(h_inode);
5527c038 31453+ }
1facf9fc 31454+
2cbb1c4b 31455+ lockdep_off();
523b37e3 31456+ *a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
2cbb1c4b 31457+ lockdep_on();
1facf9fc 31458+ if (!*a->errp) {
31459+ struct path tmp = {
31460+ .dentry = d->d_parent,
31461+ .mnt = a->path->mnt
31462+ };
31463+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
31464+ }
31465+
31466+ if (!stop_sillyrename)
31467+ dput(d);
31468+ if (h_inode)
31469+ iput(h_inode);
31470+
31471+ AuTraceErr(*a->errp);
31472+}
31473+
31474+/*
31475+ * @dir: must be locked.
31476+ * @dentry: target dentry.
31477+ */
523b37e3
AM
31478+int vfsub_unlink(struct inode *dir, struct path *path,
31479+ struct inode **delegated_inode, int force)
1facf9fc 31480+{
31481+ int err;
31482+ struct unlink_args args = {
523b37e3
AM
31483+ .errp = &err,
31484+ .dir = dir,
31485+ .path = path,
31486+ .delegated_inode = delegated_inode
1facf9fc 31487+ };
31488+
31489+ if (!force)
31490+ call_unlink(&args);
31491+ else {
31492+ int wkq_err;
31493+
31494+ wkq_err = au_wkq_wait(call_unlink, &args);
31495+ if (unlikely(wkq_err))
31496+ err = wkq_err;
31497+ }
31498+
31499+ return err;
31500+}
7f207e10
AM
31501diff -urN /usr/share/empty/fs/aufs/vfsub.h linux/fs/aufs/vfsub.h
31502--- /usr/share/empty/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
f2c43d5f 31503+++ linux/fs/aufs/vfsub.h 2016-12-17 12:28:17.598545045 +0100
f0c0a007 31504@@ -0,0 +1,316 @@
1facf9fc 31505+/*
8cdd5066 31506+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 31507+ *
31508+ * This program, aufs is free software; you can redistribute it and/or modify
31509+ * it under the terms of the GNU General Public License as published by
31510+ * the Free Software Foundation; either version 2 of the License, or
31511+ * (at your option) any later version.
dece6358
AM
31512+ *
31513+ * This program is distributed in the hope that it will be useful,
31514+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31515+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31516+ * GNU General Public License for more details.
31517+ *
31518+ * You should have received a copy of the GNU General Public License
523b37e3 31519+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31520+ */
31521+
31522+/*
31523+ * sub-routines for VFS
31524+ */
31525+
31526+#ifndef __AUFS_VFSUB_H__
31527+#define __AUFS_VFSUB_H__
31528+
31529+#ifdef __KERNEL__
31530+
31531+#include <linux/fs.h>
b4510431 31532+#include <linux/mount.h>
8cdd5066 31533+#include <linux/posix_acl.h>
c1595e42 31534+#include <linux/xattr.h>
7f207e10 31535+#include "debug.h"
1facf9fc 31536+
7f207e10 31537+/* copied from linux/fs/internal.h */
2cbb1c4b 31538+/* todo: BAD approach!! */
c06a8ce3 31539+extern void __mnt_drop_write(struct vfsmount *);
b912730e 31540+extern int open_check_o_direct(struct file *f);
7f207e10
AM
31541+
31542+/* ---------------------------------------------------------------------- */
1facf9fc 31543+
31544+/* lock subclass for lower inode */
31545+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
31546+/* reduce? gave up. */
31547+enum {
c1595e42 31548+ AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
1facf9fc 31549+ AuLsc_I_PARENT, /* lower inode, parent first */
31550+ AuLsc_I_PARENT2, /* copyup dirs */
dece6358 31551+ AuLsc_I_PARENT3, /* copyup wh */
1facf9fc 31552+ AuLsc_I_CHILD,
31553+ AuLsc_I_CHILD2,
31554+ AuLsc_I_End
31555+};
31556+
31557+/* to debug easier, do not make them inlined functions */
31558+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
febd17d6 31559+#define IMustLock(i) AuDebugOn(!inode_is_locked(i))
1facf9fc 31560+
31561+/* ---------------------------------------------------------------------- */
31562+
7f207e10
AM
31563+static inline void vfsub_drop_nlink(struct inode *inode)
31564+{
31565+ AuDebugOn(!inode->i_nlink);
31566+ drop_nlink(inode);
31567+}
31568+
027c5e7a
AM
31569+static inline void vfsub_dead_dir(struct inode *inode)
31570+{
31571+ AuDebugOn(!S_ISDIR(inode->i_mode));
31572+ inode->i_flags |= S_DEAD;
31573+ clear_nlink(inode);
31574+}
31575+
392086de
AM
31576+static inline int vfsub_native_ro(struct inode *inode)
31577+{
31578+ return (inode->i_sb->s_flags & MS_RDONLY)
31579+ || IS_RDONLY(inode)
31580+ /* || IS_APPEND(inode) */
31581+ || IS_IMMUTABLE(inode);
31582+}
31583+
8cdd5066
JR
31584+#ifdef CONFIG_AUFS_BR_FUSE
31585+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb);
31586+#else
31587+AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
31588+#endif
31589+
7f207e10
AM
31590+/* ---------------------------------------------------------------------- */
31591+
31592+int vfsub_update_h_iattr(struct path *h_path, int *did);
31593+struct file *vfsub_dentry_open(struct path *path, int flags);
31594+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
b912730e
AM
31595+struct vfsub_aopen_args {
31596+ struct file *file;
31597+ unsigned int open_flag;
31598+ umode_t create_mode;
31599+ int *opened;
31600+};
31601+struct au_branch;
31602+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
31603+ struct vfsub_aopen_args *args, struct au_branch *br);
1facf9fc 31604+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
b4510431 31605+
febd17d6
JR
31606+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
31607+ struct dentry *parent, int len);
1facf9fc 31608+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
31609+ int len);
b4510431
AM
31610+
31611+struct vfsub_lkup_one_args {
31612+ struct dentry **errp;
31613+ struct qstr *name;
31614+ struct dentry *parent;
31615+};
31616+
31617+static inline struct dentry *vfsub_lkup_one(struct qstr *name,
31618+ struct dentry *parent)
31619+{
31620+ return vfsub_lookup_one_len(name->name, parent, name->len);
31621+}
31622+
31623+void vfsub_call_lkup_one(void *args);
31624+
31625+/* ---------------------------------------------------------------------- */
31626+
31627+static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
31628+{
31629+ int err;
076b876e 31630+
b4510431
AM
31631+ lockdep_off();
31632+ err = mnt_want_write(mnt);
31633+ lockdep_on();
31634+ return err;
31635+}
31636+
31637+static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
31638+{
31639+ lockdep_off();
31640+ mnt_drop_write(mnt);
31641+ lockdep_on();
31642+}
1facf9fc 31643+
7e9cd9fe 31644+#if 0 /* reserved */
c06a8ce3
AM
31645+static inline void vfsub_mnt_drop_write_file(struct file *file)
31646+{
31647+ lockdep_off();
31648+ mnt_drop_write_file(file);
31649+ lockdep_on();
31650+}
7e9cd9fe 31651+#endif
c06a8ce3 31652+
1facf9fc 31653+/* ---------------------------------------------------------------------- */
31654+
31655+struct au_hinode;
31656+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
31657+ struct dentry *d2, struct au_hinode *hdir2);
31658+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
31659+ struct dentry *d2, struct au_hinode *hdir2);
31660+
537831f9
AM
31661+int vfsub_create(struct inode *dir, struct path *path, int mode,
31662+ bool want_excl);
1facf9fc 31663+int vfsub_symlink(struct inode *dir, struct path *path,
31664+ const char *symname);
31665+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
31666+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
523b37e3 31667+ struct path *path, struct inode **delegated_inode);
1facf9fc 31668+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
523b37e3 31669+ struct inode *hdir, struct path *path,
f2c43d5f 31670+ struct inode **delegated_inode, unsigned int flags);
1facf9fc 31671+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
31672+int vfsub_rmdir(struct inode *dir, struct path *path);
31673+
31674+/* ---------------------------------------------------------------------- */
31675+
31676+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
31677+ loff_t *ppos);
31678+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
31679+ loff_t *ppos);
31680+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
31681+ loff_t *ppos);
31682+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
31683+ loff_t *ppos);
4a4d8108 31684+int vfsub_flush(struct file *file, fl_owner_t id);
392086de
AM
31685+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
31686+
c06a8ce3
AM
31687+static inline loff_t vfsub_f_size_read(struct file *file)
31688+{
31689+ return i_size_read(file_inode(file));
31690+}
31691+
4a4d8108
AM
31692+static inline unsigned int vfsub_file_flags(struct file *file)
31693+{
31694+ unsigned int flags;
31695+
31696+ spin_lock(&file->f_lock);
31697+ flags = file->f_flags;
31698+ spin_unlock(&file->f_lock);
31699+
31700+ return flags;
31701+}
1308ab2a 31702+
f0c0a007
AM
31703+static inline int vfsub_file_execed(struct file *file)
31704+{
31705+ /* todo: direct access f_flags */
31706+ return !!(vfsub_file_flags(file) & __FMODE_EXEC);
31707+}
31708+
7e9cd9fe 31709+#if 0 /* reserved */
1facf9fc 31710+static inline void vfsub_file_accessed(struct file *h_file)
31711+{
31712+ file_accessed(h_file);
31713+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
31714+}
7e9cd9fe 31715+#endif
1facf9fc 31716+
79b8bda9 31717+#if 0 /* reserved */
1facf9fc 31718+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
31719+ struct dentry *h_dentry)
31720+{
31721+ struct path h_path = {
31722+ .dentry = h_dentry,
31723+ .mnt = h_mnt
31724+ };
92d182d2 31725+ touch_atime(&h_path);
1facf9fc 31726+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
31727+}
79b8bda9 31728+#endif
1facf9fc 31729+
0c3ec466
AM
31730+static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts,
31731+ int flags)
31732+{
5afbbe0d 31733+ return update_time(h_inode, ts, flags);
0c3ec466
AM
31734+ /* no vfsub_update_h_iattr() since we don't have struct path */
31735+}
31736+
8cdd5066
JR
31737+#ifdef CONFIG_FS_POSIX_ACL
31738+static inline int vfsub_acl_chmod(struct inode *h_inode, umode_t h_mode)
31739+{
31740+ int err;
31741+
31742+ err = posix_acl_chmod(h_inode, h_mode);
31743+ if (err == -EOPNOTSUPP)
31744+ err = 0;
31745+ return err;
31746+}
31747+#else
31748+AuStubInt0(vfsub_acl_chmod, struct inode *h_inode, umode_t h_mode);
31749+#endif
31750+
4a4d8108
AM
31751+long vfsub_splice_to(struct file *in, loff_t *ppos,
31752+ struct pipe_inode_info *pipe, size_t len,
31753+ unsigned int flags);
31754+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
31755+ loff_t *ppos, size_t len, unsigned int flags);
c06a8ce3
AM
31756+
31757+static inline long vfsub_truncate(struct path *path, loff_t length)
31758+{
31759+ long err;
076b876e 31760+
c06a8ce3
AM
31761+ lockdep_off();
31762+ err = vfs_truncate(path, length);
31763+ lockdep_on();
31764+ return err;
31765+}
31766+
4a4d8108
AM
31767+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
31768+ struct file *h_file);
53392da6 31769+int vfsub_fsync(struct file *file, struct path *path, int datasync);
4a4d8108 31770+
1facf9fc 31771+/* ---------------------------------------------------------------------- */
31772+
31773+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
31774+{
31775+ loff_t err;
31776+
2cbb1c4b 31777+ lockdep_off();
1facf9fc 31778+ err = vfs_llseek(file, offset, origin);
2cbb1c4b 31779+ lockdep_on();
1facf9fc 31780+ return err;
31781+}
31782+
31783+/* ---------------------------------------------------------------------- */
31784+
4a4d8108
AM
31785+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
31786+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
523b37e3
AM
31787+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
31788+ struct inode **delegated_inode);
31789+int vfsub_notify_change(struct path *path, struct iattr *ia,
31790+ struct inode **delegated_inode);
31791+int vfsub_unlink(struct inode *dir, struct path *path,
31792+ struct inode **delegated_inode, int force);
4a4d8108 31793+
c1595e42
JR
31794+/* ---------------------------------------------------------------------- */
31795+
31796+static inline int vfsub_setxattr(struct dentry *dentry, const char *name,
31797+ const void *value, size_t size, int flags)
31798+{
31799+ int err;
31800+
31801+ lockdep_off();
31802+ err = vfs_setxattr(dentry, name, value, size, flags);
31803+ lockdep_on();
31804+
31805+ return err;
31806+}
31807+
31808+static inline int vfsub_removexattr(struct dentry *dentry, const char *name)
31809+{
31810+ int err;
31811+
31812+ lockdep_off();
31813+ err = vfs_removexattr(dentry, name);
31814+ lockdep_on();
31815+
31816+ return err;
31817+}
31818+
1facf9fc 31819+#endif /* __KERNEL__ */
31820+#endif /* __AUFS_VFSUB_H__ */
7f207e10
AM
31821diff -urN /usr/share/empty/fs/aufs/wbr_policy.c linux/fs/aufs/wbr_policy.c
31822--- /usr/share/empty/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
31823+++ linux/fs/aufs/wbr_policy.c 2016-12-17 12:28:17.598545045 +0100
31824@@ -0,0 +1,830 @@
1facf9fc 31825+/*
8cdd5066 31826+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 31827+ *
31828+ * This program, aufs is free software; you can redistribute it and/or modify
31829+ * it under the terms of the GNU General Public License as published by
31830+ * the Free Software Foundation; either version 2 of the License, or
31831+ * (at your option) any later version.
dece6358
AM
31832+ *
31833+ * This program is distributed in the hope that it will be useful,
31834+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31835+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31836+ * GNU General Public License for more details.
31837+ *
31838+ * You should have received a copy of the GNU General Public License
523b37e3 31839+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31840+ */
31841+
31842+/*
31843+ * policies for selecting one among multiple writable branches
31844+ */
31845+
31846+#include <linux/statfs.h>
31847+#include "aufs.h"
31848+
31849+/* subset of cpup_attr() */
31850+static noinline_for_stack
31851+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
31852+{
31853+ int err, sbits;
31854+ struct iattr ia;
31855+ struct inode *h_isrc;
31856+
5527c038 31857+ h_isrc = d_inode(h_src);
1facf9fc 31858+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
31859+ ia.ia_mode = h_isrc->i_mode;
31860+ ia.ia_uid = h_isrc->i_uid;
31861+ ia.ia_gid = h_isrc->i_gid;
31862+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
5527c038 31863+ au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags);
523b37e3
AM
31864+ /* no delegation since it is just created */
31865+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 31866+
31867+ /* is this nfs only? */
31868+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
31869+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
31870+ ia.ia_mode = h_isrc->i_mode;
523b37e3 31871+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 31872+ }
31873+
31874+ return err;
31875+}
31876+
31877+#define AuCpdown_PARENT_OPQ 1
31878+#define AuCpdown_WHED (1 << 1)
31879+#define AuCpdown_MADE_DIR (1 << 2)
31880+#define AuCpdown_DIROPQ (1 << 3)
31881+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
7f207e10
AM
31882+#define au_fset_cpdown(flags, name) \
31883+ do { (flags) |= AuCpdown_##name; } while (0)
31884+#define au_fclr_cpdown(flags, name) \
31885+ do { (flags) &= ~AuCpdown_##name; } while (0)
1facf9fc 31886+
1facf9fc 31887+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
c2b27bf2 31888+ unsigned int *flags)
1facf9fc 31889+{
31890+ int err;
31891+ struct dentry *opq_dentry;
31892+
31893+ opq_dentry = au_diropq_create(dentry, bdst);
31894+ err = PTR_ERR(opq_dentry);
31895+ if (IS_ERR(opq_dentry))
31896+ goto out;
31897+ dput(opq_dentry);
c2b27bf2 31898+ au_fset_cpdown(*flags, DIROPQ);
1facf9fc 31899+
4f0767ce 31900+out:
1facf9fc 31901+ return err;
31902+}
31903+
31904+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
31905+ struct inode *dir, aufs_bindex_t bdst)
31906+{
31907+ int err;
31908+ struct path h_path;
31909+ struct au_branch *br;
31910+
31911+ br = au_sbr(dentry->d_sb, bdst);
31912+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
31913+ err = PTR_ERR(h_path.dentry);
31914+ if (IS_ERR(h_path.dentry))
31915+ goto out;
31916+
31917+ err = 0;
5527c038 31918+ if (d_is_positive(h_path.dentry)) {
86dc4139 31919+ h_path.mnt = au_br_mnt(br);
1facf9fc 31920+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
31921+ dentry);
31922+ }
31923+ dput(h_path.dentry);
31924+
4f0767ce 31925+out:
1facf9fc 31926+ return err;
31927+}
31928+
31929+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 31930+ struct au_pin *pin,
1facf9fc 31931+ struct dentry *h_parent, void *arg)
31932+{
31933+ int err, rerr;
5afbbe0d 31934+ aufs_bindex_t bopq, btop;
1facf9fc 31935+ struct path h_path;
31936+ struct dentry *parent;
31937+ struct inode *h_dir, *h_inode, *inode, *dir;
c2b27bf2 31938+ unsigned int *flags = arg;
1facf9fc 31939+
5afbbe0d 31940+ btop = au_dbtop(dentry);
1facf9fc 31941+ /* dentry is di-locked */
31942+ parent = dget_parent(dentry);
5527c038
JR
31943+ dir = d_inode(parent);
31944+ h_dir = d_inode(h_parent);
1facf9fc 31945+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
31946+ IMustLock(h_dir);
31947+
86dc4139 31948+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
1facf9fc 31949+ if (unlikely(err < 0))
31950+ goto out;
31951+ h_path.dentry = au_h_dptr(dentry, bdst);
31952+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
31953+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
31954+ S_IRWXU | S_IRUGO | S_IXUGO);
31955+ if (unlikely(err))
31956+ goto out_put;
c2b27bf2 31957+ au_fset_cpdown(*flags, MADE_DIR);
1facf9fc 31958+
1facf9fc 31959+ bopq = au_dbdiropq(dentry);
c2b27bf2
AM
31960+ au_fclr_cpdown(*flags, WHED);
31961+ au_fclr_cpdown(*flags, DIROPQ);
1facf9fc 31962+ if (au_dbwh(dentry) == bdst)
c2b27bf2
AM
31963+ au_fset_cpdown(*flags, WHED);
31964+ if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
31965+ au_fset_cpdown(*flags, PARENT_OPQ);
5527c038 31966+ h_inode = d_inode(h_path.dentry);
febd17d6 31967+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
c2b27bf2
AM
31968+ if (au_ftest_cpdown(*flags, WHED)) {
31969+ err = au_cpdown_dir_opq(dentry, bdst, flags);
1facf9fc 31970+ if (unlikely(err)) {
febd17d6 31971+ inode_unlock(h_inode);
1facf9fc 31972+ goto out_dir;
31973+ }
31974+ }
31975+
5afbbe0d 31976+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, btop));
febd17d6 31977+ inode_unlock(h_inode);
1facf9fc 31978+ if (unlikely(err))
31979+ goto out_opq;
31980+
c2b27bf2 31981+ if (au_ftest_cpdown(*flags, WHED)) {
1facf9fc 31982+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
31983+ if (unlikely(err))
31984+ goto out_opq;
31985+ }
31986+
5527c038 31987+ inode = d_inode(dentry);
5afbbe0d
AM
31988+ if (au_ibbot(inode) < bdst)
31989+ au_set_ibbot(inode, bdst);
1facf9fc 31990+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
31991+ au_hi_flags(inode, /*isdir*/1));
076b876e 31992+ au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
1facf9fc 31993+ goto out; /* success */
31994+
31995+ /* revert */
4f0767ce 31996+out_opq:
c2b27bf2 31997+ if (au_ftest_cpdown(*flags, DIROPQ)) {
febd17d6 31998+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
1facf9fc 31999+ rerr = au_diropq_remove(dentry, bdst);
febd17d6 32000+ inode_unlock(h_inode);
1facf9fc 32001+ if (unlikely(rerr)) {
523b37e3
AM
32002+ AuIOErr("failed removing diropq for %pd b%d (%d)\n",
32003+ dentry, bdst, rerr);
1facf9fc 32004+ err = -EIO;
32005+ goto out;
32006+ }
32007+ }
4f0767ce 32008+out_dir:
c2b27bf2 32009+ if (au_ftest_cpdown(*flags, MADE_DIR)) {
1facf9fc 32010+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
32011+ if (unlikely(rerr)) {
523b37e3
AM
32012+ AuIOErr("failed removing %pd b%d (%d)\n",
32013+ dentry, bdst, rerr);
1facf9fc 32014+ err = -EIO;
32015+ }
32016+ }
4f0767ce 32017+out_put:
1facf9fc 32018+ au_set_h_dptr(dentry, bdst, NULL);
5afbbe0d
AM
32019+ if (au_dbbot(dentry) == bdst)
32020+ au_update_dbbot(dentry);
4f0767ce 32021+out:
1facf9fc 32022+ dput(parent);
32023+ return err;
32024+}
32025+
32026+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
32027+{
32028+ int err;
c2b27bf2 32029+ unsigned int flags;
1facf9fc 32030+
c2b27bf2
AM
32031+ flags = 0;
32032+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
1facf9fc 32033+
32034+ return err;
32035+}
32036+
32037+/* ---------------------------------------------------------------------- */
32038+
32039+/* policies for create */
32040+
c2b27bf2 32041+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
4a4d8108
AM
32042+{
32043+ int err, i, j, ndentry;
32044+ aufs_bindex_t bopq;
32045+ struct au_dcsub_pages dpages;
32046+ struct au_dpage *dpage;
32047+ struct dentry **dentries, *parent, *d;
32048+
32049+ err = au_dpages_init(&dpages, GFP_NOFS);
32050+ if (unlikely(err))
32051+ goto out;
32052+ parent = dget_parent(dentry);
027c5e7a 32053+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
4a4d8108
AM
32054+ if (unlikely(err))
32055+ goto out_free;
32056+
32057+ err = bindex;
32058+ for (i = 0; i < dpages.ndpage; i++) {
32059+ dpage = dpages.dpages + i;
32060+ dentries = dpage->dentries;
32061+ ndentry = dpage->ndentry;
32062+ for (j = 0; j < ndentry; j++) {
32063+ d = dentries[j];
32064+ di_read_lock_parent2(d, !AuLock_IR);
32065+ bopq = au_dbdiropq(d);
32066+ di_read_unlock(d, !AuLock_IR);
32067+ if (bopq >= 0 && bopq < err)
32068+ err = bopq;
32069+ }
32070+ }
32071+
32072+out_free:
32073+ dput(parent);
32074+ au_dpages_free(&dpages);
32075+out:
32076+ return err;
32077+}
32078+
1facf9fc 32079+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
32080+{
32081+ for (; bindex >= 0; bindex--)
32082+ if (!au_br_rdonly(au_sbr(sb, bindex)))
32083+ return bindex;
32084+ return -EROFS;
32085+}
32086+
32087+/* top down parent */
392086de
AM
32088+static int au_wbr_create_tdp(struct dentry *dentry,
32089+ unsigned int flags __maybe_unused)
1facf9fc 32090+{
32091+ int err;
5afbbe0d 32092+ aufs_bindex_t btop, bindex;
1facf9fc 32093+ struct super_block *sb;
32094+ struct dentry *parent, *h_parent;
32095+
32096+ sb = dentry->d_sb;
5afbbe0d
AM
32097+ btop = au_dbtop(dentry);
32098+ err = btop;
32099+ if (!au_br_rdonly(au_sbr(sb, btop)))
1facf9fc 32100+ goto out;
32101+
32102+ err = -EROFS;
32103+ parent = dget_parent(dentry);
5afbbe0d 32104+ for (bindex = au_dbtop(parent); bindex < btop; bindex++) {
1facf9fc 32105+ h_parent = au_h_dptr(parent, bindex);
5527c038 32106+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 32107+ continue;
32108+
32109+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
32110+ err = bindex;
32111+ break;
32112+ }
32113+ }
32114+ dput(parent);
32115+
32116+ /* bottom up here */
4a4d8108 32117+ if (unlikely(err < 0)) {
5afbbe0d 32118+ err = au_wbr_bu(sb, btop - 1);
4a4d8108
AM
32119+ if (err >= 0)
32120+ err = au_wbr_nonopq(dentry, err);
32121+ }
1facf9fc 32122+
4f0767ce 32123+out:
1facf9fc 32124+ AuDbg("b%d\n", err);
32125+ return err;
32126+}
32127+
32128+/* ---------------------------------------------------------------------- */
32129+
32130+/* an exception for the policy other than tdp */
32131+static int au_wbr_create_exp(struct dentry *dentry)
32132+{
32133+ int err;
32134+ aufs_bindex_t bwh, bdiropq;
32135+ struct dentry *parent;
32136+
32137+ err = -1;
32138+ bwh = au_dbwh(dentry);
32139+ parent = dget_parent(dentry);
32140+ bdiropq = au_dbdiropq(parent);
32141+ if (bwh >= 0) {
32142+ if (bdiropq >= 0)
32143+ err = min(bdiropq, bwh);
32144+ else
32145+ err = bwh;
32146+ AuDbg("%d\n", err);
32147+ } else if (bdiropq >= 0) {
32148+ err = bdiropq;
32149+ AuDbg("%d\n", err);
32150+ }
32151+ dput(parent);
32152+
4a4d8108
AM
32153+ if (err >= 0)
32154+ err = au_wbr_nonopq(dentry, err);
32155+
1facf9fc 32156+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
32157+ err = -1;
32158+
32159+ AuDbg("%d\n", err);
32160+ return err;
32161+}
32162+
32163+/* ---------------------------------------------------------------------- */
32164+
32165+/* round robin */
32166+static int au_wbr_create_init_rr(struct super_block *sb)
32167+{
32168+ int err;
32169+
5afbbe0d 32170+ err = au_wbr_bu(sb, au_sbbot(sb));
1facf9fc 32171+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
dece6358 32172+ /* smp_mb(); */
1facf9fc 32173+
32174+ AuDbg("b%d\n", err);
32175+ return err;
32176+}
32177+
392086de 32178+static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
1facf9fc 32179+{
32180+ int err, nbr;
32181+ unsigned int u;
5afbbe0d 32182+ aufs_bindex_t bindex, bbot;
1facf9fc 32183+ struct super_block *sb;
32184+ atomic_t *next;
32185+
32186+ err = au_wbr_create_exp(dentry);
32187+ if (err >= 0)
32188+ goto out;
32189+
32190+ sb = dentry->d_sb;
32191+ next = &au_sbi(sb)->si_wbr_rr_next;
5afbbe0d
AM
32192+ bbot = au_sbbot(sb);
32193+ nbr = bbot + 1;
32194+ for (bindex = 0; bindex <= bbot; bindex++) {
392086de 32195+ if (!au_ftest_wbr(flags, DIR)) {
1facf9fc 32196+ err = atomic_dec_return(next) + 1;
32197+ /* modulo for 0 is meaningless */
32198+ if (unlikely(!err))
32199+ err = atomic_dec_return(next) + 1;
32200+ } else
32201+ err = atomic_read(next);
32202+ AuDbg("%d\n", err);
32203+ u = err;
32204+ err = u % nbr;
32205+ AuDbg("%d\n", err);
32206+ if (!au_br_rdonly(au_sbr(sb, err)))
32207+ break;
32208+ err = -EROFS;
32209+ }
32210+
4a4d8108
AM
32211+ if (err >= 0)
32212+ err = au_wbr_nonopq(dentry, err);
32213+
4f0767ce 32214+out:
1facf9fc 32215+ AuDbg("%d\n", err);
32216+ return err;
32217+}
32218+
32219+/* ---------------------------------------------------------------------- */
32220+
32221+/* most free space */
392086de 32222+static void au_mfs(struct dentry *dentry, struct dentry *parent)
1facf9fc 32223+{
32224+ struct super_block *sb;
32225+ struct au_branch *br;
32226+ struct au_wbr_mfs *mfs;
392086de 32227+ struct dentry *h_parent;
5afbbe0d 32228+ aufs_bindex_t bindex, bbot;
1facf9fc 32229+ int err;
32230+ unsigned long long b, bavail;
7f207e10 32231+ struct path h_path;
1facf9fc 32232+ /* reduce the stack usage */
32233+ struct kstatfs *st;
32234+
32235+ st = kmalloc(sizeof(*st), GFP_NOFS);
32236+ if (unlikely(!st)) {
32237+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
32238+ return;
32239+ }
32240+
32241+ bavail = 0;
32242+ sb = dentry->d_sb;
32243+ mfs = &au_sbi(sb)->si_wbr_mfs;
dece6358 32244+ MtxMustLock(&mfs->mfs_lock);
1facf9fc 32245+ mfs->mfs_bindex = -EROFS;
32246+ mfs->mfsrr_bytes = 0;
392086de
AM
32247+ if (!parent) {
32248+ bindex = 0;
5afbbe0d 32249+ bbot = au_sbbot(sb);
392086de 32250+ } else {
5afbbe0d
AM
32251+ bindex = au_dbtop(parent);
32252+ bbot = au_dbtaildir(parent);
392086de
AM
32253+ }
32254+
5afbbe0d 32255+ for (; bindex <= bbot; bindex++) {
392086de
AM
32256+ if (parent) {
32257+ h_parent = au_h_dptr(parent, bindex);
5527c038 32258+ if (!h_parent || d_is_negative(h_parent))
392086de
AM
32259+ continue;
32260+ }
1facf9fc 32261+ br = au_sbr(sb, bindex);
32262+ if (au_br_rdonly(br))
32263+ continue;
32264+
32265+ /* sb->s_root for NFS is unreliable */
86dc4139 32266+ h_path.mnt = au_br_mnt(br);
7f207e10
AM
32267+ h_path.dentry = h_path.mnt->mnt_root;
32268+ err = vfs_statfs(&h_path, st);
1facf9fc 32269+ if (unlikely(err)) {
32270+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
32271+ continue;
32272+ }
32273+
32274+ /* when the available size is equal, select the lower one */
32275+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
32276+ || sizeof(b) < sizeof(st->f_bsize));
32277+ b = st->f_bavail * st->f_bsize;
32278+ br->br_wbr->wbr_bytes = b;
32279+ if (b >= bavail) {
32280+ bavail = b;
32281+ mfs->mfs_bindex = bindex;
32282+ mfs->mfs_jiffy = jiffies;
32283+ }
32284+ }
32285+
32286+ mfs->mfsrr_bytes = bavail;
32287+ AuDbg("b%d\n", mfs->mfs_bindex);
f0c0a007 32288+ au_delayed_kfree(st);
1facf9fc 32289+}
32290+
392086de 32291+static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
1facf9fc 32292+{
32293+ int err;
392086de 32294+ struct dentry *parent;
1facf9fc 32295+ struct super_block *sb;
32296+ struct au_wbr_mfs *mfs;
32297+
32298+ err = au_wbr_create_exp(dentry);
32299+ if (err >= 0)
32300+ goto out;
32301+
32302+ sb = dentry->d_sb;
392086de
AM
32303+ parent = NULL;
32304+ if (au_ftest_wbr(flags, PARENT))
32305+ parent = dget_parent(dentry);
1facf9fc 32306+ mfs = &au_sbi(sb)->si_wbr_mfs;
32307+ mutex_lock(&mfs->mfs_lock);
32308+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
32309+ || mfs->mfs_bindex < 0
32310+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
392086de 32311+ au_mfs(dentry, parent);
1facf9fc 32312+ mutex_unlock(&mfs->mfs_lock);
32313+ err = mfs->mfs_bindex;
392086de 32314+ dput(parent);
1facf9fc 32315+
4a4d8108
AM
32316+ if (err >= 0)
32317+ err = au_wbr_nonopq(dentry, err);
32318+
4f0767ce 32319+out:
1facf9fc 32320+ AuDbg("b%d\n", err);
32321+ return err;
32322+}
32323+
32324+static int au_wbr_create_init_mfs(struct super_block *sb)
32325+{
32326+ struct au_wbr_mfs *mfs;
32327+
32328+ mfs = &au_sbi(sb)->si_wbr_mfs;
32329+ mutex_init(&mfs->mfs_lock);
32330+ mfs->mfs_jiffy = 0;
32331+ mfs->mfs_bindex = -EROFS;
32332+
32333+ return 0;
32334+}
32335+
32336+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
32337+{
32338+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
32339+ return 0;
32340+}
32341+
32342+/* ---------------------------------------------------------------------- */
32343+
f2c43d5f
AM
32344+/* top down regardless parent, and then mfs */
32345+static int au_wbr_create_tdmfs(struct dentry *dentry,
32346+ unsigned int flags __maybe_unused)
32347+{
32348+ int err;
32349+ aufs_bindex_t bwh, btail, bindex, bfound, bmfs;
32350+ unsigned long long watermark;
32351+ struct super_block *sb;
32352+ struct au_wbr_mfs *mfs;
32353+ struct au_branch *br;
32354+ struct dentry *parent;
32355+
32356+ sb = dentry->d_sb;
32357+ mfs = &au_sbi(sb)->si_wbr_mfs;
32358+ mutex_lock(&mfs->mfs_lock);
32359+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
32360+ || mfs->mfs_bindex < 0)
32361+ au_mfs(dentry, /*parent*/NULL);
32362+ watermark = mfs->mfsrr_watermark;
32363+ bmfs = mfs->mfs_bindex;
32364+ mutex_unlock(&mfs->mfs_lock);
32365+
32366+ /* another style of au_wbr_create_exp() */
32367+ bwh = au_dbwh(dentry);
32368+ parent = dget_parent(dentry);
32369+ btail = au_dbtaildir(parent);
32370+ if (bwh >= 0 && bwh < btail)
32371+ btail = bwh;
32372+
32373+ err = au_wbr_nonopq(dentry, btail);
32374+ if (unlikely(err < 0))
32375+ goto out;
32376+ btail = err;
32377+ bfound = -1;
32378+ for (bindex = 0; bindex <= btail; bindex++) {
32379+ br = au_sbr(sb, bindex);
32380+ if (au_br_rdonly(br))
32381+ continue;
32382+ if (br->br_wbr->wbr_bytes > watermark) {
32383+ bfound = bindex;
32384+ break;
32385+ }
32386+ }
32387+ err = bfound;
32388+ if (err < 0)
32389+ err = bmfs;
32390+
32391+out:
32392+ dput(parent);
32393+ AuDbg("b%d\n", err);
32394+ return err;
32395+}
32396+
32397+/* ---------------------------------------------------------------------- */
32398+
1facf9fc 32399+/* most free space and then round robin */
392086de 32400+static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
1facf9fc 32401+{
32402+ int err;
32403+ struct au_wbr_mfs *mfs;
32404+
392086de 32405+ err = au_wbr_create_mfs(dentry, flags);
1facf9fc 32406+ if (err >= 0) {
32407+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
dece6358 32408+ mutex_lock(&mfs->mfs_lock);
1facf9fc 32409+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
392086de 32410+ err = au_wbr_create_rr(dentry, flags);
dece6358 32411+ mutex_unlock(&mfs->mfs_lock);
1facf9fc 32412+ }
32413+
32414+ AuDbg("b%d\n", err);
32415+ return err;
32416+}
32417+
32418+static int au_wbr_create_init_mfsrr(struct super_block *sb)
32419+{
32420+ int err;
32421+
32422+ au_wbr_create_init_mfs(sb); /* ignore */
32423+ err = au_wbr_create_init_rr(sb);
32424+
32425+ return err;
32426+}
32427+
32428+/* ---------------------------------------------------------------------- */
32429+
32430+/* top down parent and most free space */
392086de 32431+static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
1facf9fc 32432+{
32433+ int err, e2;
32434+ unsigned long long b;
5afbbe0d 32435+ aufs_bindex_t bindex, btop, bbot;
1facf9fc 32436+ struct super_block *sb;
32437+ struct dentry *parent, *h_parent;
32438+ struct au_branch *br;
32439+
392086de 32440+ err = au_wbr_create_tdp(dentry, flags);
1facf9fc 32441+ if (unlikely(err < 0))
32442+ goto out;
32443+ parent = dget_parent(dentry);
5afbbe0d
AM
32444+ btop = au_dbtop(parent);
32445+ bbot = au_dbtaildir(parent);
32446+ if (btop == bbot)
1facf9fc 32447+ goto out_parent; /* success */
32448+
392086de 32449+ e2 = au_wbr_create_mfs(dentry, flags);
1facf9fc 32450+ if (e2 < 0)
32451+ goto out_parent; /* success */
32452+
32453+ /* when the available size is equal, select upper one */
32454+ sb = dentry->d_sb;
32455+ br = au_sbr(sb, err);
32456+ b = br->br_wbr->wbr_bytes;
32457+ AuDbg("b%d, %llu\n", err, b);
32458+
5afbbe0d 32459+ for (bindex = btop; bindex <= bbot; bindex++) {
1facf9fc 32460+ h_parent = au_h_dptr(parent, bindex);
5527c038 32461+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 32462+ continue;
32463+
32464+ br = au_sbr(sb, bindex);
32465+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
32466+ b = br->br_wbr->wbr_bytes;
32467+ err = bindex;
32468+ AuDbg("b%d, %llu\n", err, b);
32469+ }
32470+ }
32471+
4a4d8108
AM
32472+ if (err >= 0)
32473+ err = au_wbr_nonopq(dentry, err);
32474+
4f0767ce 32475+out_parent:
1facf9fc 32476+ dput(parent);
4f0767ce 32477+out:
1facf9fc 32478+ AuDbg("b%d\n", err);
32479+ return err;
32480+}
32481+
32482+/* ---------------------------------------------------------------------- */
32483+
392086de
AM
32484+/*
32485+ * - top down parent
32486+ * - most free space with parent
32487+ * - most free space round-robin regardless parent
32488+ */
32489+static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
32490+{
32491+ int err;
32492+ unsigned long long watermark;
32493+ struct super_block *sb;
32494+ struct au_branch *br;
32495+ struct au_wbr_mfs *mfs;
32496+
32497+ err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
32498+ if (unlikely(err < 0))
32499+ goto out;
32500+
32501+ sb = dentry->d_sb;
32502+ br = au_sbr(sb, err);
32503+ mfs = &au_sbi(sb)->si_wbr_mfs;
32504+ mutex_lock(&mfs->mfs_lock);
32505+ watermark = mfs->mfsrr_watermark;
32506+ mutex_unlock(&mfs->mfs_lock);
32507+ if (br->br_wbr->wbr_bytes < watermark)
32508+ /* regardless the parent dir */
32509+ err = au_wbr_create_mfsrr(dentry, flags);
32510+
32511+out:
32512+ AuDbg("b%d\n", err);
32513+ return err;
32514+}
32515+
32516+/* ---------------------------------------------------------------------- */
32517+
1facf9fc 32518+/* policies for copyup */
32519+
32520+/* top down parent */
32521+static int au_wbr_copyup_tdp(struct dentry *dentry)
32522+{
392086de 32523+ return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
1facf9fc 32524+}
32525+
32526+/* bottom up parent */
32527+static int au_wbr_copyup_bup(struct dentry *dentry)
32528+{
32529+ int err;
5afbbe0d 32530+ aufs_bindex_t bindex, btop;
1facf9fc 32531+ struct dentry *parent, *h_parent;
32532+ struct super_block *sb;
32533+
32534+ err = -EROFS;
32535+ sb = dentry->d_sb;
32536+ parent = dget_parent(dentry);
5afbbe0d
AM
32537+ btop = au_dbtop(parent);
32538+ for (bindex = au_dbtop(dentry); bindex >= btop; bindex--) {
1facf9fc 32539+ h_parent = au_h_dptr(parent, bindex);
5527c038 32540+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 32541+ continue;
32542+
32543+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
32544+ err = bindex;
32545+ break;
32546+ }
32547+ }
32548+ dput(parent);
32549+
32550+ /* bottom up here */
32551+ if (unlikely(err < 0))
5afbbe0d 32552+ err = au_wbr_bu(sb, btop - 1);
1facf9fc 32553+
32554+ AuDbg("b%d\n", err);
32555+ return err;
32556+}
32557+
32558+/* bottom up */
5afbbe0d 32559+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop)
1facf9fc 32560+{
32561+ int err;
32562+
5afbbe0d 32563+ err = au_wbr_bu(dentry->d_sb, btop);
4a4d8108 32564+ AuDbg("b%d\n", err);
5afbbe0d 32565+ if (err > btop)
4a4d8108 32566+ err = au_wbr_nonopq(dentry, err);
1facf9fc 32567+
32568+ AuDbg("b%d\n", err);
32569+ return err;
32570+}
32571+
076b876e
AM
32572+static int au_wbr_copyup_bu(struct dentry *dentry)
32573+{
32574+ int err;
5afbbe0d 32575+ aufs_bindex_t btop;
076b876e 32576+
5afbbe0d
AM
32577+ btop = au_dbtop(dentry);
32578+ err = au_wbr_do_copyup_bu(dentry, btop);
076b876e
AM
32579+ return err;
32580+}
32581+
1facf9fc 32582+/* ---------------------------------------------------------------------- */
32583+
32584+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
32585+ [AuWbrCopyup_TDP] = {
32586+ .copyup = au_wbr_copyup_tdp
32587+ },
32588+ [AuWbrCopyup_BUP] = {
32589+ .copyup = au_wbr_copyup_bup
32590+ },
32591+ [AuWbrCopyup_BU] = {
32592+ .copyup = au_wbr_copyup_bu
32593+ }
32594+};
32595+
32596+struct au_wbr_create_operations au_wbr_create_ops[] = {
32597+ [AuWbrCreate_TDP] = {
32598+ .create = au_wbr_create_tdp
32599+ },
32600+ [AuWbrCreate_RR] = {
32601+ .create = au_wbr_create_rr,
32602+ .init = au_wbr_create_init_rr
32603+ },
32604+ [AuWbrCreate_MFS] = {
32605+ .create = au_wbr_create_mfs,
32606+ .init = au_wbr_create_init_mfs,
32607+ .fin = au_wbr_create_fin_mfs
32608+ },
32609+ [AuWbrCreate_MFSV] = {
32610+ .create = au_wbr_create_mfs,
32611+ .init = au_wbr_create_init_mfs,
32612+ .fin = au_wbr_create_fin_mfs
32613+ },
32614+ [AuWbrCreate_MFSRR] = {
32615+ .create = au_wbr_create_mfsrr,
32616+ .init = au_wbr_create_init_mfsrr,
32617+ .fin = au_wbr_create_fin_mfs
32618+ },
32619+ [AuWbrCreate_MFSRRV] = {
32620+ .create = au_wbr_create_mfsrr,
32621+ .init = au_wbr_create_init_mfsrr,
32622+ .fin = au_wbr_create_fin_mfs
32623+ },
f2c43d5f
AM
32624+ [AuWbrCreate_TDMFS] = {
32625+ .create = au_wbr_create_tdmfs,
32626+ .init = au_wbr_create_init_mfs,
32627+ .fin = au_wbr_create_fin_mfs
32628+ },
32629+ [AuWbrCreate_TDMFSV] = {
32630+ .create = au_wbr_create_tdmfs,
32631+ .init = au_wbr_create_init_mfs,
32632+ .fin = au_wbr_create_fin_mfs
32633+ },
1facf9fc 32634+ [AuWbrCreate_PMFS] = {
32635+ .create = au_wbr_create_pmfs,
32636+ .init = au_wbr_create_init_mfs,
32637+ .fin = au_wbr_create_fin_mfs
32638+ },
32639+ [AuWbrCreate_PMFSV] = {
32640+ .create = au_wbr_create_pmfs,
32641+ .init = au_wbr_create_init_mfs,
32642+ .fin = au_wbr_create_fin_mfs
392086de
AM
32643+ },
32644+ [AuWbrCreate_PMFSRR] = {
32645+ .create = au_wbr_create_pmfsrr,
32646+ .init = au_wbr_create_init_mfsrr,
32647+ .fin = au_wbr_create_fin_mfs
32648+ },
32649+ [AuWbrCreate_PMFSRRV] = {
32650+ .create = au_wbr_create_pmfsrr,
32651+ .init = au_wbr_create_init_mfsrr,
32652+ .fin = au_wbr_create_fin_mfs
1facf9fc 32653+ }
32654+};
7f207e10
AM
32655diff -urN /usr/share/empty/fs/aufs/whout.c linux/fs/aufs/whout.c
32656--- /usr/share/empty/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
32657+++ linux/fs/aufs/whout.c 2016-12-17 12:28:17.598545045 +0100
32658@@ -0,0 +1,1061 @@
1facf9fc 32659+/*
8cdd5066 32660+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 32661+ *
32662+ * This program, aufs is free software; you can redistribute it and/or modify
32663+ * it under the terms of the GNU General Public License as published by
32664+ * the Free Software Foundation; either version 2 of the License, or
32665+ * (at your option) any later version.
dece6358
AM
32666+ *
32667+ * This program is distributed in the hope that it will be useful,
32668+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
32669+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32670+ * GNU General Public License for more details.
32671+ *
32672+ * You should have received a copy of the GNU General Public License
523b37e3 32673+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 32674+ */
32675+
32676+/*
32677+ * whiteout for logical deletion and opaque directory
32678+ */
32679+
1facf9fc 32680+#include "aufs.h"
32681+
32682+#define WH_MASK S_IRUGO
32683+
32684+/*
32685+ * If a directory contains this file, then it is opaque. We start with the
32686+ * .wh. flag so that it is blocked by lookup.
32687+ */
0c3ec466
AM
32688+static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ,
32689+ sizeof(AUFS_WH_DIROPQ) - 1);
1facf9fc 32690+
32691+/*
32692+ * generate whiteout name, which is NOT terminated by NULL.
32693+ * @name: original d_name.name
32694+ * @len: original d_name.len
32695+ * @wh: whiteout qstr
32696+ * returns zero when succeeds, otherwise error.
32697+ * succeeded value as wh->name should be freed by kfree().
32698+ */
32699+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
32700+{
32701+ char *p;
32702+
32703+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
32704+ return -ENAMETOOLONG;
32705+
32706+ wh->len = name->len + AUFS_WH_PFX_LEN;
32707+ p = kmalloc(wh->len, GFP_NOFS);
32708+ wh->name = p;
32709+ if (p) {
32710+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
32711+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
32712+ /* smp_mb(); */
32713+ return 0;
32714+ }
32715+ return -ENOMEM;
32716+}
32717+
32718+/* ---------------------------------------------------------------------- */
32719+
32720+/*
32721+ * test if the @wh_name exists under @h_parent.
32722+ * @try_sio specifies the necessary of super-io.
32723+ */
076b876e 32724+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio)
1facf9fc 32725+{
32726+ int err;
32727+ struct dentry *wh_dentry;
1facf9fc 32728+
1facf9fc 32729+ if (!try_sio)
b4510431 32730+ wh_dentry = vfsub_lkup_one(wh_name, h_parent);
1facf9fc 32731+ else
076b876e 32732+ wh_dentry = au_sio_lkup_one(wh_name, h_parent);
1facf9fc 32733+ err = PTR_ERR(wh_dentry);
2000de60
JR
32734+ if (IS_ERR(wh_dentry)) {
32735+ if (err == -ENAMETOOLONG)
32736+ err = 0;
1facf9fc 32737+ goto out;
2000de60 32738+ }
1facf9fc 32739+
32740+ err = 0;
5527c038 32741+ if (d_is_negative(wh_dentry))
1facf9fc 32742+ goto out_wh; /* success */
32743+
32744+ err = 1;
7e9cd9fe 32745+ if (d_is_reg(wh_dentry))
1facf9fc 32746+ goto out_wh; /* success */
32747+
32748+ err = -EIO;
523b37e3 32749+ AuIOErr("%pd Invalid whiteout entry type 0%o.\n",
5527c038 32750+ wh_dentry, d_inode(wh_dentry)->i_mode);
1facf9fc 32751+
4f0767ce 32752+out_wh:
1facf9fc 32753+ dput(wh_dentry);
4f0767ce 32754+out:
1facf9fc 32755+ return err;
32756+}
32757+
32758+/*
32759+ * test if the @h_dentry sets opaque or not.
32760+ */
076b876e 32761+int au_diropq_test(struct dentry *h_dentry)
1facf9fc 32762+{
32763+ int err;
32764+ struct inode *h_dir;
32765+
5527c038 32766+ h_dir = d_inode(h_dentry);
076b876e 32767+ err = au_wh_test(h_dentry, &diropq_name,
1facf9fc 32768+ au_test_h_perm_sio(h_dir, MAY_EXEC));
32769+ return err;
32770+}
32771+
32772+/*
32773+ * returns a negative dentry whose name is unique and temporary.
32774+ */
32775+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
32776+ struct qstr *prefix)
32777+{
1facf9fc 32778+ struct dentry *dentry;
32779+ int i;
027c5e7a 32780+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
4a4d8108 32781+ *name, *p;
027c5e7a 32782+ /* strict atomic_t is unnecessary here */
1facf9fc 32783+ static unsigned short cnt;
32784+ struct qstr qs;
32785+
4a4d8108
AM
32786+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
32787+
1facf9fc 32788+ name = defname;
027c5e7a
AM
32789+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
32790+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
1facf9fc 32791+ dentry = ERR_PTR(-ENAMETOOLONG);
4a4d8108 32792+ if (unlikely(qs.len > NAME_MAX))
1facf9fc 32793+ goto out;
32794+ dentry = ERR_PTR(-ENOMEM);
32795+ name = kmalloc(qs.len + 1, GFP_NOFS);
32796+ if (unlikely(!name))
32797+ goto out;
32798+ }
32799+
32800+ /* doubly whiteout-ed */
32801+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
32802+ p = name + AUFS_WH_PFX_LEN * 2;
32803+ memcpy(p, prefix->name, prefix->len);
32804+ p += prefix->len;
32805+ *p++ = '.';
4a4d8108 32806+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
1facf9fc 32807+
32808+ qs.name = name;
32809+ for (i = 0; i < 3; i++) {
b752ccd1 32810+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
076b876e 32811+ dentry = au_sio_lkup_one(&qs, h_parent);
5527c038 32812+ if (IS_ERR(dentry) || d_is_negative(dentry))
1facf9fc 32813+ goto out_name;
32814+ dput(dentry);
32815+ }
0c3ec466 32816+ /* pr_warn("could not get random name\n"); */
1facf9fc 32817+ dentry = ERR_PTR(-EEXIST);
32818+ AuDbg("%.*s\n", AuLNPair(&qs));
32819+ BUG();
32820+
4f0767ce 32821+out_name:
1facf9fc 32822+ if (name != defname)
f0c0a007 32823+ au_delayed_kfree(name);
4f0767ce 32824+out:
4a4d8108 32825+ AuTraceErrPtr(dentry);
1facf9fc 32826+ return dentry;
1facf9fc 32827+}
32828+
32829+/*
32830+ * rename the @h_dentry on @br to the whiteouted temporary name.
32831+ */
32832+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
32833+{
32834+ int err;
32835+ struct path h_path = {
86dc4139 32836+ .mnt = au_br_mnt(br)
1facf9fc 32837+ };
523b37e3 32838+ struct inode *h_dir, *delegated;
1facf9fc 32839+ struct dentry *h_parent;
32840+
32841+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5527c038 32842+ h_dir = d_inode(h_parent);
1facf9fc 32843+ IMustLock(h_dir);
32844+
32845+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
32846+ err = PTR_ERR(h_path.dentry);
32847+ if (IS_ERR(h_path.dentry))
32848+ goto out;
32849+
32850+ /* under the same dir, no need to lock_rename() */
523b37e3 32851+ delegated = NULL;
f2c43d5f
AM
32852+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated,
32853+ /*flags*/0);
1facf9fc 32854+ AuTraceErr(err);
523b37e3
AM
32855+ if (unlikely(err == -EWOULDBLOCK)) {
32856+ pr_warn("cannot retry for NFSv4 delegation"
32857+ " for an internal rename\n");
32858+ iput(delegated);
32859+ }
1facf9fc 32860+ dput(h_path.dentry);
32861+
4f0767ce 32862+out:
4a4d8108 32863+ AuTraceErr(err);
1facf9fc 32864+ return err;
32865+}
32866+
32867+/* ---------------------------------------------------------------------- */
32868+/*
32869+ * functions for removing a whiteout
32870+ */
32871+
32872+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
32873+{
523b37e3
AM
32874+ int err, force;
32875+ struct inode *delegated;
1facf9fc 32876+
32877+ /*
32878+ * forces superio when the dir has a sticky bit.
32879+ * this may be a violation of unix fs semantics.
32880+ */
32881+ force = (h_dir->i_mode & S_ISVTX)
5527c038 32882+ && !uid_eq(current_fsuid(), d_inode(h_path->dentry)->i_uid);
523b37e3
AM
32883+ delegated = NULL;
32884+ err = vfsub_unlink(h_dir, h_path, &delegated, force);
32885+ if (unlikely(err == -EWOULDBLOCK)) {
32886+ pr_warn("cannot retry for NFSv4 delegation"
32887+ " for an internal unlink\n");
32888+ iput(delegated);
32889+ }
32890+ return err;
1facf9fc 32891+}
32892+
32893+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
32894+ struct dentry *dentry)
32895+{
32896+ int err;
32897+
32898+ err = do_unlink_wh(h_dir, h_path);
32899+ if (!err && dentry)
32900+ au_set_dbwh(dentry, -1);
32901+
32902+ return err;
32903+}
32904+
32905+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
32906+ struct au_branch *br)
32907+{
32908+ int err;
32909+ struct path h_path = {
86dc4139 32910+ .mnt = au_br_mnt(br)
1facf9fc 32911+ };
32912+
32913+ err = 0;
b4510431 32914+ h_path.dentry = vfsub_lkup_one(wh, h_parent);
1facf9fc 32915+ if (IS_ERR(h_path.dentry))
32916+ err = PTR_ERR(h_path.dentry);
32917+ else {
5527c038
JR
32918+ if (d_is_reg(h_path.dentry))
32919+ err = do_unlink_wh(d_inode(h_parent), &h_path);
1facf9fc 32920+ dput(h_path.dentry);
32921+ }
32922+
32923+ return err;
32924+}
32925+
32926+/* ---------------------------------------------------------------------- */
32927+/*
32928+ * initialize/clean whiteout for a branch
32929+ */
32930+
32931+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
32932+ const int isdir)
32933+{
32934+ int err;
523b37e3 32935+ struct inode *delegated;
1facf9fc 32936+
5527c038 32937+ if (d_is_negative(whpath->dentry))
1facf9fc 32938+ return;
32939+
86dc4139
AM
32940+ if (isdir)
32941+ err = vfsub_rmdir(h_dir, whpath);
523b37e3
AM
32942+ else {
32943+ delegated = NULL;
32944+ err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0);
32945+ if (unlikely(err == -EWOULDBLOCK)) {
32946+ pr_warn("cannot retry for NFSv4 delegation"
32947+ " for an internal unlink\n");
32948+ iput(delegated);
32949+ }
32950+ }
1facf9fc 32951+ if (unlikely(err))
523b37e3
AM
32952+ pr_warn("failed removing %pd (%d), ignored.\n",
32953+ whpath->dentry, err);
1facf9fc 32954+}
32955+
32956+static int test_linkable(struct dentry *h_root)
32957+{
5527c038 32958+ struct inode *h_dir = d_inode(h_root);
1facf9fc 32959+
32960+ if (h_dir->i_op->link)
32961+ return 0;
32962+
523b37e3
AM
32963+ pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n",
32964+ h_root, au_sbtype(h_root->d_sb));
1facf9fc 32965+ return -ENOSYS;
32966+}
32967+
32968+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
32969+static int au_whdir(struct inode *h_dir, struct path *path)
32970+{
32971+ int err;
32972+
32973+ err = -EEXIST;
5527c038 32974+ if (d_is_negative(path->dentry)) {
1facf9fc 32975+ int mode = S_IRWXU;
32976+
32977+ if (au_test_nfs(path->dentry->d_sb))
32978+ mode |= S_IXUGO;
86dc4139 32979+ err = vfsub_mkdir(h_dir, path, mode);
2000de60 32980+ } else if (d_is_dir(path->dentry))
1facf9fc 32981+ err = 0;
32982+ else
523b37e3 32983+ pr_err("unknown %pd exists\n", path->dentry);
1facf9fc 32984+
32985+ return err;
32986+}
32987+
32988+struct au_wh_base {
32989+ const struct qstr *name;
32990+ struct dentry *dentry;
32991+};
32992+
32993+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
32994+ struct path *h_path)
32995+{
32996+ h_path->dentry = base[AuBrWh_BASE].dentry;
32997+ au_wh_clean(h_dir, h_path, /*isdir*/0);
32998+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32999+ au_wh_clean(h_dir, h_path, /*isdir*/1);
33000+ h_path->dentry = base[AuBrWh_ORPH].dentry;
33001+ au_wh_clean(h_dir, h_path, /*isdir*/1);
33002+}
33003+
33004+/*
33005+ * returns tri-state,
c1595e42 33006+ * minus: error, caller should print the message
1facf9fc 33007+ * zero: succuess
c1595e42 33008+ * plus: error, caller should NOT print the message
1facf9fc 33009+ */
33010+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
33011+ int do_plink, struct au_wh_base base[],
33012+ struct path *h_path)
33013+{
33014+ int err;
33015+ struct inode *h_dir;
33016+
5527c038 33017+ h_dir = d_inode(h_root);
1facf9fc 33018+ h_path->dentry = base[AuBrWh_BASE].dentry;
33019+ au_wh_clean(h_dir, h_path, /*isdir*/0);
33020+ h_path->dentry = base[AuBrWh_PLINK].dentry;
33021+ if (do_plink) {
33022+ err = test_linkable(h_root);
33023+ if (unlikely(err)) {
33024+ err = 1;
33025+ goto out;
33026+ }
33027+
33028+ err = au_whdir(h_dir, h_path);
33029+ if (unlikely(err))
33030+ goto out;
33031+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
33032+ } else
33033+ au_wh_clean(h_dir, h_path, /*isdir*/1);
33034+ h_path->dentry = base[AuBrWh_ORPH].dentry;
33035+ err = au_whdir(h_dir, h_path);
33036+ if (unlikely(err))
33037+ goto out;
33038+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
33039+
4f0767ce 33040+out:
1facf9fc 33041+ return err;
33042+}
33043+
33044+/*
33045+ * for the moment, aufs supports the branch filesystem which does not support
33046+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
33047+ * copyup failed. finally, such filesystem will not be used as the writable
33048+ * branch.
33049+ *
33050+ * returns tri-state, see above.
33051+ */
33052+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
33053+ int do_plink, struct au_wh_base base[],
33054+ struct path *h_path)
33055+{
33056+ int err;
33057+ struct inode *h_dir;
33058+
1308ab2a 33059+ WbrWhMustWriteLock(wbr);
33060+
1facf9fc 33061+ err = test_linkable(h_root);
33062+ if (unlikely(err)) {
33063+ err = 1;
33064+ goto out;
33065+ }
33066+
33067+ /*
33068+ * todo: should this create be done in /sbin/mount.aufs helper?
33069+ */
33070+ err = -EEXIST;
5527c038
JR
33071+ h_dir = d_inode(h_root);
33072+ if (d_is_negative(base[AuBrWh_BASE].dentry)) {
86dc4139
AM
33073+ h_path->dentry = base[AuBrWh_BASE].dentry;
33074+ err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true);
7e9cd9fe 33075+ } else if (d_is_reg(base[AuBrWh_BASE].dentry))
1facf9fc 33076+ err = 0;
33077+ else
523b37e3 33078+ pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry);
1facf9fc 33079+ if (unlikely(err))
33080+ goto out;
33081+
33082+ h_path->dentry = base[AuBrWh_PLINK].dentry;
33083+ if (do_plink) {
33084+ err = au_whdir(h_dir, h_path);
33085+ if (unlikely(err))
33086+ goto out;
33087+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
33088+ } else
33089+ au_wh_clean(h_dir, h_path, /*isdir*/1);
33090+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
33091+
33092+ h_path->dentry = base[AuBrWh_ORPH].dentry;
33093+ err = au_whdir(h_dir, h_path);
33094+ if (unlikely(err))
33095+ goto out;
33096+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
33097+
4f0767ce 33098+out:
1facf9fc 33099+ return err;
33100+}
33101+
33102+/*
33103+ * initialize the whiteout base file/dir for @br.
33104+ */
86dc4139 33105+int au_wh_init(struct au_branch *br, struct super_block *sb)
1facf9fc 33106+{
33107+ int err, i;
33108+ const unsigned char do_plink
33109+ = !!au_opt_test(au_mntflags(sb), PLINK);
1facf9fc 33110+ struct inode *h_dir;
86dc4139
AM
33111+ struct path path = br->br_path;
33112+ struct dentry *h_root = path.dentry;
1facf9fc 33113+ struct au_wbr *wbr = br->br_wbr;
33114+ static const struct qstr base_name[] = {
0c3ec466
AM
33115+ [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME,
33116+ sizeof(AUFS_BASE_NAME) - 1),
33117+ [AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME,
33118+ sizeof(AUFS_PLINKDIR_NAME) - 1),
33119+ [AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME,
33120+ sizeof(AUFS_ORPHDIR_NAME) - 1)
1facf9fc 33121+ };
33122+ struct au_wh_base base[] = {
33123+ [AuBrWh_BASE] = {
33124+ .name = base_name + AuBrWh_BASE,
33125+ .dentry = NULL
33126+ },
33127+ [AuBrWh_PLINK] = {
33128+ .name = base_name + AuBrWh_PLINK,
33129+ .dentry = NULL
33130+ },
33131+ [AuBrWh_ORPH] = {
33132+ .name = base_name + AuBrWh_ORPH,
33133+ .dentry = NULL
33134+ }
33135+ };
33136+
1308ab2a 33137+ if (wbr)
33138+ WbrWhMustWriteLock(wbr);
1facf9fc 33139+
1facf9fc 33140+ for (i = 0; i < AuBrWh_Last; i++) {
33141+ /* doubly whiteouted */
33142+ struct dentry *d;
33143+
33144+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
33145+ err = PTR_ERR(d);
33146+ if (IS_ERR(d))
33147+ goto out;
33148+
33149+ base[i].dentry = d;
33150+ AuDebugOn(wbr
33151+ && wbr->wbr_wh[i]
33152+ && wbr->wbr_wh[i] != base[i].dentry);
33153+ }
33154+
33155+ if (wbr)
33156+ for (i = 0; i < AuBrWh_Last; i++) {
33157+ dput(wbr->wbr_wh[i]);
33158+ wbr->wbr_wh[i] = NULL;
33159+ }
33160+
33161+ err = 0;
1e00d052 33162+ if (!au_br_writable(br->br_perm)) {
5527c038 33163+ h_dir = d_inode(h_root);
1facf9fc 33164+ au_wh_init_ro(h_dir, base, &path);
1e00d052 33165+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 33166+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
33167+ if (err > 0)
33168+ goto out;
33169+ else if (err)
33170+ goto out_err;
1e00d052 33171+ } else {
1facf9fc 33172+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
33173+ if (err > 0)
33174+ goto out;
33175+ else if (err)
33176+ goto out_err;
1facf9fc 33177+ }
33178+ goto out; /* success */
33179+
4f0767ce 33180+out_err:
523b37e3
AM
33181+ pr_err("an error(%d) on the writable branch %pd(%s)\n",
33182+ err, h_root, au_sbtype(h_root->d_sb));
4f0767ce 33183+out:
1facf9fc 33184+ for (i = 0; i < AuBrWh_Last; i++)
33185+ dput(base[i].dentry);
33186+ return err;
33187+}
33188+
33189+/* ---------------------------------------------------------------------- */
33190+/*
33191+ * whiteouts are all hard-linked usually.
33192+ * when its link count reaches a ceiling, we create a new whiteout base
33193+ * asynchronously.
33194+ */
33195+
33196+struct reinit_br_wh {
33197+ struct super_block *sb;
33198+ struct au_branch *br;
33199+};
33200+
33201+static void reinit_br_wh(void *arg)
33202+{
33203+ int err;
33204+ aufs_bindex_t bindex;
33205+ struct path h_path;
33206+ struct reinit_br_wh *a = arg;
33207+ struct au_wbr *wbr;
523b37e3 33208+ struct inode *dir, *delegated;
1facf9fc 33209+ struct dentry *h_root;
33210+ struct au_hinode *hdir;
33211+
33212+ err = 0;
33213+ wbr = a->br->br_wbr;
33214+ /* big aufs lock */
33215+ si_noflush_write_lock(a->sb);
33216+ if (!au_br_writable(a->br->br_perm))
33217+ goto out;
33218+ bindex = au_br_index(a->sb, a->br->br_id);
33219+ if (unlikely(bindex < 0))
33220+ goto out;
33221+
1308ab2a 33222+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
5527c038 33223+ dir = d_inode(a->sb->s_root);
1facf9fc 33224+ hdir = au_hi(dir, bindex);
33225+ h_root = au_h_dptr(a->sb->s_root, bindex);
86dc4139 33226+ AuDebugOn(h_root != au_br_dentry(a->br));
1facf9fc 33227+
5afbbe0d 33228+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 33229+ wbr_wh_write_lock(wbr);
33230+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
33231+ h_root, a->br);
33232+ if (!err) {
86dc4139
AM
33233+ h_path.dentry = wbr->wbr_whbase;
33234+ h_path.mnt = au_br_mnt(a->br);
523b37e3
AM
33235+ delegated = NULL;
33236+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated,
33237+ /*force*/0);
33238+ if (unlikely(err == -EWOULDBLOCK)) {
33239+ pr_warn("cannot retry for NFSv4 delegation"
33240+ " for an internal unlink\n");
33241+ iput(delegated);
33242+ }
1facf9fc 33243+ } else {
523b37e3 33244+ pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase);
1facf9fc 33245+ err = 0;
33246+ }
33247+ dput(wbr->wbr_whbase);
33248+ wbr->wbr_whbase = NULL;
33249+ if (!err)
86dc4139 33250+ err = au_wh_init(a->br, a->sb);
1facf9fc 33251+ wbr_wh_write_unlock(wbr);
5afbbe0d 33252+ au_hn_inode_unlock(hdir);
1308ab2a 33253+ di_read_unlock(a->sb->s_root, AuLock_IR);
076b876e
AM
33254+ if (!err)
33255+ au_fhsm_wrote(a->sb, bindex, /*force*/0);
1facf9fc 33256+
4f0767ce 33257+out:
1facf9fc 33258+ if (wbr)
33259+ atomic_dec(&wbr->wbr_wh_running);
5afbbe0d 33260+ au_br_put(a->br);
1facf9fc 33261+ si_write_unlock(a->sb);
027c5e7a 33262+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
f0c0a007 33263+ au_delayed_kfree(arg);
1facf9fc 33264+ if (unlikely(err))
33265+ AuIOErr("err %d\n", err);
33266+}
33267+
33268+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
33269+{
33270+ int do_dec, wkq_err;
33271+ struct reinit_br_wh *arg;
33272+
33273+ do_dec = 1;
33274+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
33275+ goto out;
33276+
33277+ /* ignore ENOMEM */
33278+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
33279+ if (arg) {
33280+ /*
33281+ * dec(wh_running), kfree(arg) and dec(br_count)
33282+ * in reinit function
33283+ */
33284+ arg->sb = sb;
33285+ arg->br = br;
5afbbe0d 33286+ au_br_get(br);
53392da6 33287+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
1facf9fc 33288+ if (unlikely(wkq_err)) {
33289+ atomic_dec(&br->br_wbr->wbr_wh_running);
5afbbe0d 33290+ au_br_put(br);
f0c0a007 33291+ au_delayed_kfree(arg);
1facf9fc 33292+ }
33293+ do_dec = 0;
33294+ }
33295+
4f0767ce 33296+out:
1facf9fc 33297+ if (do_dec)
33298+ atomic_dec(&br->br_wbr->wbr_wh_running);
33299+}
33300+
33301+/* ---------------------------------------------------------------------- */
33302+
33303+/*
33304+ * create the whiteout @wh.
33305+ */
33306+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
33307+ struct dentry *wh)
33308+{
33309+ int err;
33310+ struct path h_path = {
33311+ .dentry = wh
33312+ };
33313+ struct au_branch *br;
33314+ struct au_wbr *wbr;
33315+ struct dentry *h_parent;
523b37e3 33316+ struct inode *h_dir, *delegated;
1facf9fc 33317+
33318+ h_parent = wh->d_parent; /* dir inode is locked */
5527c038 33319+ h_dir = d_inode(h_parent);
1facf9fc 33320+ IMustLock(h_dir);
33321+
33322+ br = au_sbr(sb, bindex);
86dc4139 33323+ h_path.mnt = au_br_mnt(br);
1facf9fc 33324+ wbr = br->br_wbr;
33325+ wbr_wh_read_lock(wbr);
33326+ if (wbr->wbr_whbase) {
523b37e3
AM
33327+ delegated = NULL;
33328+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated);
33329+ if (unlikely(err == -EWOULDBLOCK)) {
33330+ pr_warn("cannot retry for NFSv4 delegation"
33331+ " for an internal link\n");
33332+ iput(delegated);
33333+ }
1facf9fc 33334+ if (!err || err != -EMLINK)
33335+ goto out;
33336+
33337+ /* link count full. re-initialize br_whbase. */
33338+ kick_reinit_br_wh(sb, br);
33339+ }
33340+
33341+ /* return this error in this context */
b4510431 33342+ err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true);
076b876e
AM
33343+ if (!err)
33344+ au_fhsm_wrote(sb, bindex, /*force*/0);
1facf9fc 33345+
4f0767ce 33346+out:
1facf9fc 33347+ wbr_wh_read_unlock(wbr);
33348+ return err;
33349+}
33350+
33351+/* ---------------------------------------------------------------------- */
33352+
33353+/*
33354+ * create or remove the diropq.
33355+ */
33356+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
33357+ unsigned int flags)
33358+{
33359+ struct dentry *opq_dentry, *h_dentry;
33360+ struct super_block *sb;
33361+ struct au_branch *br;
33362+ int err;
33363+
33364+ sb = dentry->d_sb;
33365+ br = au_sbr(sb, bindex);
33366+ h_dentry = au_h_dptr(dentry, bindex);
b4510431 33367+ opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry);
1facf9fc 33368+ if (IS_ERR(opq_dentry))
33369+ goto out;
33370+
33371+ if (au_ftest_diropq(flags, CREATE)) {
33372+ err = link_or_create_wh(sb, bindex, opq_dentry);
33373+ if (!err) {
33374+ au_set_dbdiropq(dentry, bindex);
33375+ goto out; /* success */
33376+ }
33377+ } else {
33378+ struct path tmp = {
33379+ .dentry = opq_dentry,
86dc4139 33380+ .mnt = au_br_mnt(br)
1facf9fc 33381+ };
5527c038 33382+ err = do_unlink_wh(au_h_iptr(d_inode(dentry), bindex), &tmp);
1facf9fc 33383+ if (!err)
33384+ au_set_dbdiropq(dentry, -1);
33385+ }
33386+ dput(opq_dentry);
33387+ opq_dentry = ERR_PTR(err);
33388+
4f0767ce 33389+out:
1facf9fc 33390+ return opq_dentry;
33391+}
33392+
33393+struct do_diropq_args {
33394+ struct dentry **errp;
33395+ struct dentry *dentry;
33396+ aufs_bindex_t bindex;
33397+ unsigned int flags;
33398+};
33399+
33400+static void call_do_diropq(void *args)
33401+{
33402+ struct do_diropq_args *a = args;
33403+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
33404+}
33405+
33406+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
33407+ unsigned int flags)
33408+{
33409+ struct dentry *diropq, *h_dentry;
33410+
33411+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 33412+ if (!au_test_h_perm_sio(d_inode(h_dentry), MAY_EXEC | MAY_WRITE))
1facf9fc 33413+ diropq = do_diropq(dentry, bindex, flags);
33414+ else {
33415+ int wkq_err;
33416+ struct do_diropq_args args = {
33417+ .errp = &diropq,
33418+ .dentry = dentry,
33419+ .bindex = bindex,
33420+ .flags = flags
33421+ };
33422+
33423+ wkq_err = au_wkq_wait(call_do_diropq, &args);
33424+ if (unlikely(wkq_err))
33425+ diropq = ERR_PTR(wkq_err);
33426+ }
33427+
33428+ return diropq;
33429+}
33430+
33431+/* ---------------------------------------------------------------------- */
33432+
33433+/*
33434+ * lookup whiteout dentry.
33435+ * @h_parent: lower parent dentry which must exist and be locked
33436+ * @base_name: name of dentry which will be whiteouted
33437+ * returns dentry for whiteout.
33438+ */
33439+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
33440+ struct au_branch *br)
33441+{
33442+ int err;
33443+ struct qstr wh_name;
33444+ struct dentry *wh_dentry;
33445+
33446+ err = au_wh_name_alloc(&wh_name, base_name);
33447+ wh_dentry = ERR_PTR(err);
33448+ if (!err) {
b4510431 33449+ wh_dentry = vfsub_lkup_one(&wh_name, h_parent);
f0c0a007 33450+ au_delayed_kfree(wh_name.name);
1facf9fc 33451+ }
33452+ return wh_dentry;
33453+}
33454+
33455+/*
33456+ * link/create a whiteout for @dentry on @bindex.
33457+ */
33458+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
33459+ struct dentry *h_parent)
33460+{
33461+ struct dentry *wh_dentry;
33462+ struct super_block *sb;
33463+ int err;
33464+
33465+ sb = dentry->d_sb;
33466+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
5527c038 33467+ if (!IS_ERR(wh_dentry) && d_is_negative(wh_dentry)) {
1facf9fc 33468+ err = link_or_create_wh(sb, bindex, wh_dentry);
076b876e 33469+ if (!err) {
1facf9fc 33470+ au_set_dbwh(dentry, bindex);
076b876e
AM
33471+ au_fhsm_wrote(sb, bindex, /*force*/0);
33472+ } else {
1facf9fc 33473+ dput(wh_dentry);
33474+ wh_dentry = ERR_PTR(err);
33475+ }
33476+ }
33477+
33478+ return wh_dentry;
33479+}
33480+
33481+/* ---------------------------------------------------------------------- */
33482+
33483+/* Delete all whiteouts in this directory on branch bindex. */
33484+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
33485+ aufs_bindex_t bindex, struct au_branch *br)
33486+{
33487+ int err;
33488+ unsigned long ul, n;
33489+ struct qstr wh_name;
33490+ char *p;
33491+ struct hlist_head *head;
c06a8ce3 33492+ struct au_vdir_wh *pos;
1facf9fc 33493+ struct au_vdir_destr *str;
33494+
33495+ err = -ENOMEM;
537831f9 33496+ p = (void *)__get_free_page(GFP_NOFS);
1facf9fc 33497+ wh_name.name = p;
33498+ if (unlikely(!wh_name.name))
33499+ goto out;
33500+
33501+ err = 0;
33502+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
33503+ p += AUFS_WH_PFX_LEN;
33504+ n = whlist->nh_num;
33505+ head = whlist->nh_head;
33506+ for (ul = 0; !err && ul < n; ul++, head++) {
c06a8ce3
AM
33507+ hlist_for_each_entry(pos, head, wh_hash) {
33508+ if (pos->wh_bindex != bindex)
1facf9fc 33509+ continue;
33510+
c06a8ce3 33511+ str = &pos->wh_str;
1facf9fc 33512+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
33513+ memcpy(p, str->name, str->len);
33514+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
33515+ err = unlink_wh_name(h_dentry, &wh_name, br);
33516+ if (!err)
33517+ continue;
33518+ break;
33519+ }
33520+ AuIOErr("whiteout name too long %.*s\n",
33521+ str->len, str->name);
33522+ err = -EIO;
33523+ break;
33524+ }
33525+ }
f0c0a007 33526+ au_delayed_free_page((unsigned long)wh_name.name);
1facf9fc 33527+
4f0767ce 33528+out:
1facf9fc 33529+ return err;
33530+}
33531+
33532+struct del_wh_children_args {
33533+ int *errp;
33534+ struct dentry *h_dentry;
1308ab2a 33535+ struct au_nhash *whlist;
1facf9fc 33536+ aufs_bindex_t bindex;
33537+ struct au_branch *br;
33538+};
33539+
33540+static void call_del_wh_children(void *args)
33541+{
33542+ struct del_wh_children_args *a = args;
1308ab2a 33543+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
1facf9fc 33544+}
33545+
33546+/* ---------------------------------------------------------------------- */
33547+
33548+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
33549+{
33550+ struct au_whtmp_rmdir *whtmp;
dece6358 33551+ int err;
1308ab2a 33552+ unsigned int rdhash;
dece6358
AM
33553+
33554+ SiMustAnyLock(sb);
1facf9fc 33555+
be52b249 33556+ whtmp = kzalloc(sizeof(*whtmp), gfp);
dece6358
AM
33557+ if (unlikely(!whtmp)) {
33558+ whtmp = ERR_PTR(-ENOMEM);
1facf9fc 33559+ goto out;
dece6358 33560+ }
1facf9fc 33561+
1308ab2a 33562+ /* no estimation for dir size */
33563+ rdhash = au_sbi(sb)->si_rdhash;
33564+ if (!rdhash)
33565+ rdhash = AUFS_RDHASH_DEF;
33566+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
33567+ if (unlikely(err)) {
f0c0a007 33568+ au_delayed_kfree(whtmp);
1308ab2a 33569+ whtmp = ERR_PTR(err);
33570+ }
dece6358 33571+
4f0767ce 33572+out:
dece6358 33573+ return whtmp;
1facf9fc 33574+}
33575+
33576+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
33577+{
027c5e7a 33578+ if (whtmp->br)
5afbbe0d 33579+ au_br_put(whtmp->br);
1facf9fc 33580+ dput(whtmp->wh_dentry);
33581+ iput(whtmp->dir);
dece6358 33582+ au_nhash_wh_free(&whtmp->whlist);
f0c0a007 33583+ au_delayed_kfree(whtmp);
1facf9fc 33584+}
33585+
33586+/*
33587+ * rmdir the whiteouted temporary named dir @h_dentry.
33588+ * @whlist: whiteouted children.
33589+ */
33590+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
33591+ struct dentry *wh_dentry, struct au_nhash *whlist)
33592+{
33593+ int err;
2000de60 33594+ unsigned int h_nlink;
1facf9fc 33595+ struct path h_tmp;
33596+ struct inode *wh_inode, *h_dir;
33597+ struct au_branch *br;
33598+
5527c038 33599+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
1facf9fc 33600+ IMustLock(h_dir);
33601+
33602+ br = au_sbr(dir->i_sb, bindex);
5527c038 33603+ wh_inode = d_inode(wh_dentry);
febd17d6 33604+ inode_lock_nested(wh_inode, AuLsc_I_CHILD);
1facf9fc 33605+
33606+ /*
33607+ * someone else might change some whiteouts while we were sleeping.
33608+ * it means this whlist may have an obsoleted entry.
33609+ */
33610+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
33611+ err = del_wh_children(wh_dentry, whlist, bindex, br);
33612+ else {
33613+ int wkq_err;
33614+ struct del_wh_children_args args = {
33615+ .errp = &err,
33616+ .h_dentry = wh_dentry,
1308ab2a 33617+ .whlist = whlist,
1facf9fc 33618+ .bindex = bindex,
33619+ .br = br
33620+ };
33621+
33622+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
33623+ if (unlikely(wkq_err))
33624+ err = wkq_err;
33625+ }
febd17d6 33626+ inode_unlock(wh_inode);
1facf9fc 33627+
33628+ if (!err) {
33629+ h_tmp.dentry = wh_dentry;
86dc4139 33630+ h_tmp.mnt = au_br_mnt(br);
2000de60 33631+ h_nlink = h_dir->i_nlink;
1facf9fc 33632+ err = vfsub_rmdir(h_dir, &h_tmp);
2000de60
JR
33633+ /* some fs doesn't change the parent nlink in some cases */
33634+ h_nlink -= h_dir->i_nlink;
1facf9fc 33635+ }
33636+
33637+ if (!err) {
5afbbe0d 33638+ if (au_ibtop(dir) == bindex) {
7f207e10 33639+ /* todo: dir->i_mutex is necessary */
1facf9fc 33640+ au_cpup_attr_timesizes(dir);
2000de60
JR
33641+ if (h_nlink)
33642+ vfsub_drop_nlink(dir);
1facf9fc 33643+ }
33644+ return 0; /* success */
33645+ }
33646+
523b37e3 33647+ pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
1facf9fc 33648+ return err;
33649+}
33650+
33651+static void call_rmdir_whtmp(void *args)
33652+{
33653+ int err;
e49829fe 33654+ aufs_bindex_t bindex;
1facf9fc 33655+ struct au_whtmp_rmdir *a = args;
33656+ struct super_block *sb;
33657+ struct dentry *h_parent;
33658+ struct inode *h_dir;
1facf9fc 33659+ struct au_hinode *hdir;
33660+
33661+ /* rmdir by nfsd may cause deadlock with this i_mutex */
febd17d6 33662+ /* inode_lock(a->dir); */
e49829fe 33663+ err = -EROFS;
1facf9fc 33664+ sb = a->dir->i_sb;
e49829fe
JR
33665+ si_read_lock(sb, !AuLock_FLUSH);
33666+ if (!au_br_writable(a->br->br_perm))
33667+ goto out;
33668+ bindex = au_br_index(sb, a->br->br_id);
33669+ if (unlikely(bindex < 0))
1facf9fc 33670+ goto out;
33671+
33672+ err = -EIO;
1facf9fc 33673+ ii_write_lock_parent(a->dir);
33674+ h_parent = dget_parent(a->wh_dentry);
5527c038 33675+ h_dir = d_inode(h_parent);
e49829fe 33676+ hdir = au_hi(a->dir, bindex);
86dc4139
AM
33677+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
33678+ if (unlikely(err))
33679+ goto out_mnt;
5afbbe0d 33680+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
e49829fe
JR
33681+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
33682+ a->br);
86dc4139
AM
33683+ if (!err)
33684+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
5afbbe0d 33685+ au_hn_inode_unlock(hdir);
86dc4139
AM
33686+ vfsub_mnt_drop_write(au_br_mnt(a->br));
33687+
33688+out_mnt:
1facf9fc 33689+ dput(h_parent);
33690+ ii_write_unlock(a->dir);
4f0767ce 33691+out:
febd17d6 33692+ /* inode_unlock(a->dir); */
1facf9fc 33693+ au_whtmp_rmdir_free(a);
027c5e7a
AM
33694+ si_read_unlock(sb);
33695+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 33696+ if (unlikely(err))
33697+ AuIOErr("err %d\n", err);
33698+}
33699+
33700+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33701+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
33702+{
33703+ int wkq_err;
e49829fe 33704+ struct super_block *sb;
1facf9fc 33705+
33706+ IMustLock(dir);
33707+
33708+ /* all post-process will be done in do_rmdir_whtmp(). */
e49829fe 33709+ sb = dir->i_sb;
1facf9fc 33710+ args->dir = au_igrab(dir);
e49829fe 33711+ args->br = au_sbr(sb, bindex);
5afbbe0d 33712+ au_br_get(args->br);
1facf9fc 33713+ args->wh_dentry = dget(wh_dentry);
53392da6 33714+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1facf9fc 33715+ if (unlikely(wkq_err)) {
523b37e3 33716+ pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
1facf9fc 33717+ au_whtmp_rmdir_free(args);
33718+ }
33719+}
7f207e10
AM
33720diff -urN /usr/share/empty/fs/aufs/whout.h linux/fs/aufs/whout.h
33721--- /usr/share/empty/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 33722+++ linux/fs/aufs/whout.h 2016-10-09 16:55:36.496035060 +0200
076b876e 33723@@ -0,0 +1,85 @@
1facf9fc 33724+/*
8cdd5066 33725+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33726+ *
33727+ * This program, aufs is free software; you can redistribute it and/or modify
33728+ * it under the terms of the GNU General Public License as published by
33729+ * the Free Software Foundation; either version 2 of the License, or
33730+ * (at your option) any later version.
dece6358
AM
33731+ *
33732+ * This program is distributed in the hope that it will be useful,
33733+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33734+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33735+ * GNU General Public License for more details.
33736+ *
33737+ * You should have received a copy of the GNU General Public License
523b37e3 33738+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33739+ */
33740+
33741+/*
33742+ * whiteout for logical deletion and opaque directory
33743+ */
33744+
33745+#ifndef __AUFS_WHOUT_H__
33746+#define __AUFS_WHOUT_H__
33747+
33748+#ifdef __KERNEL__
33749+
1facf9fc 33750+#include "dir.h"
33751+
33752+/* whout.c */
33753+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
076b876e
AM
33754+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
33755+int au_diropq_test(struct dentry *h_dentry);
7e9cd9fe 33756+struct au_branch;
1facf9fc 33757+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
33758+ struct qstr *prefix);
33759+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
33760+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
33761+ struct dentry *dentry);
86dc4139 33762+int au_wh_init(struct au_branch *br, struct super_block *sb);
1facf9fc 33763+
33764+/* diropq flags */
33765+#define AuDiropq_CREATE 1
33766+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
7f207e10
AM
33767+#define au_fset_diropq(flags, name) \
33768+ do { (flags) |= AuDiropq_##name; } while (0)
33769+#define au_fclr_diropq(flags, name) \
33770+ do { (flags) &= ~AuDiropq_##name; } while (0)
1facf9fc 33771+
33772+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
33773+ unsigned int flags);
33774+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
33775+ struct au_branch *br);
33776+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
33777+ struct dentry *h_parent);
33778+
33779+/* real rmdir for the whiteout-ed dir */
33780+struct au_whtmp_rmdir {
33781+ struct inode *dir;
e49829fe 33782+ struct au_branch *br;
1facf9fc 33783+ struct dentry *wh_dentry;
dece6358 33784+ struct au_nhash whlist;
1facf9fc 33785+};
33786+
33787+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
33788+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
33789+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
33790+ struct dentry *wh_dentry, struct au_nhash *whlist);
33791+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33792+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
33793+
33794+/* ---------------------------------------------------------------------- */
33795+
33796+static inline struct dentry *au_diropq_create(struct dentry *dentry,
33797+ aufs_bindex_t bindex)
33798+{
33799+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
33800+}
33801+
33802+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
33803+{
33804+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
33805+}
33806+
33807+#endif /* __KERNEL__ */
33808+#endif /* __AUFS_WHOUT_H__ */
7f207e10
AM
33809diff -urN /usr/share/empty/fs/aufs/wkq.c linux/fs/aufs/wkq.c
33810--- /usr/share/empty/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 33811+++ linux/fs/aufs/wkq.c 2016-10-09 16:55:36.496035060 +0200
f0c0a007 33812@@ -0,0 +1,213 @@
1facf9fc 33813+/*
8cdd5066 33814+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33815+ *
33816+ * This program, aufs is free software; you can redistribute it and/or modify
33817+ * it under the terms of the GNU General Public License as published by
33818+ * the Free Software Foundation; either version 2 of the License, or
33819+ * (at your option) any later version.
dece6358
AM
33820+ *
33821+ * This program is distributed in the hope that it will be useful,
33822+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33823+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33824+ * GNU General Public License for more details.
33825+ *
33826+ * You should have received a copy of the GNU General Public License
523b37e3 33827+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33828+ */
33829+
33830+/*
33831+ * workqueue for asynchronous/super-io operations
33832+ * todo: try new dredential scheme
33833+ */
33834+
dece6358 33835+#include <linux/module.h>
1facf9fc 33836+#include "aufs.h"
33837+
9dbd164d 33838+/* internal workqueue named AUFS_WKQ_NAME */
b752ccd1 33839+
9dbd164d 33840+static struct workqueue_struct *au_wkq;
1facf9fc 33841+
33842+struct au_wkinfo {
33843+ struct work_struct wk;
7f207e10 33844+ struct kobject *kobj;
1facf9fc 33845+
33846+ unsigned int flags; /* see wkq.h */
33847+
33848+ au_wkq_func_t func;
33849+ void *args;
33850+
1facf9fc 33851+ struct completion *comp;
33852+};
33853+
33854+/* ---------------------------------------------------------------------- */
33855+
1facf9fc 33856+static void wkq_func(struct work_struct *wk)
33857+{
33858+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
33859+
2dfbb274 33860+ AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
7f207e10
AM
33861+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
33862+
1facf9fc 33863+ wkinfo->func(wkinfo->args);
1facf9fc 33864+ if (au_ftest_wkq(wkinfo->flags, WAIT))
33865+ complete(wkinfo->comp);
33866+ else {
7f207e10 33867+ kobject_put(wkinfo->kobj);
9dbd164d 33868+ module_put(THIS_MODULE); /* todo: ?? */
f0c0a007 33869+ au_delayed_kfree(wkinfo);
1facf9fc 33870+ }
33871+}
33872+
33873+/*
33874+ * Since struct completion is large, try allocating it dynamically.
33875+ */
c2b27bf2 33876+#if 1 /* defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS) */
1facf9fc 33877+#define AuWkqCompDeclare(name) struct completion *comp = NULL
33878+
33879+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33880+{
33881+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
33882+ if (*comp) {
33883+ init_completion(*comp);
33884+ wkinfo->comp = *comp;
33885+ return 0;
33886+ }
33887+ return -ENOMEM;
33888+}
33889+
33890+static void au_wkq_comp_free(struct completion *comp)
33891+{
f0c0a007 33892+ au_delayed_kfree(comp);
1facf9fc 33893+}
33894+
33895+#else
33896+
33897+/* no braces */
33898+#define AuWkqCompDeclare(name) \
33899+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
33900+ struct completion *comp = &_ ## name
33901+
33902+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33903+{
33904+ wkinfo->comp = *comp;
33905+ return 0;
33906+}
33907+
33908+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
33909+{
33910+ /* empty */
33911+}
33912+#endif /* 4KSTACKS */
33913+
53392da6 33914+static void au_wkq_run(struct au_wkinfo *wkinfo)
1facf9fc 33915+{
53392da6
AM
33916+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
33917+ if (au_wkq_test()) {
38d290e6
JR
33918+ AuWarn1("wkq from wkq, unless silly-rename on NFS,"
33919+ " due to a dead dir by UDBA?\n");
53392da6
AM
33920+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
33921+ }
33922+ } else
33923+ au_dbg_verify_kthread();
33924+
33925+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
a1f66529 33926+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
9dbd164d 33927+ queue_work(au_wkq, &wkinfo->wk);
4a4d8108
AM
33928+ } else {
33929+ INIT_WORK(&wkinfo->wk, wkq_func);
33930+ schedule_work(&wkinfo->wk);
33931+ }
1facf9fc 33932+}
33933+
7f207e10
AM
33934+/*
33935+ * Be careful. It is easy to make deadlock happen.
33936+ * processA: lock, wkq and wait
33937+ * processB: wkq and wait, lock in wkq
33938+ * --> deadlock
33939+ */
b752ccd1 33940+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
1facf9fc 33941+{
33942+ int err;
33943+ AuWkqCompDeclare(comp);
33944+ struct au_wkinfo wkinfo = {
b752ccd1 33945+ .flags = flags,
1facf9fc 33946+ .func = func,
33947+ .args = args
33948+ };
33949+
33950+ err = au_wkq_comp_alloc(&wkinfo, &comp);
33951+ if (!err) {
53392da6 33952+ au_wkq_run(&wkinfo);
1facf9fc 33953+ /* no timeout, no interrupt */
33954+ wait_for_completion(wkinfo.comp);
33955+ au_wkq_comp_free(comp);
4a4d8108 33956+ destroy_work_on_stack(&wkinfo.wk);
1facf9fc 33957+ }
33958+
33959+ return err;
33960+
33961+}
33962+
027c5e7a
AM
33963+/*
33964+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
33965+ * problem in a concurrent umounting.
33966+ */
53392da6
AM
33967+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
33968+ unsigned int flags)
1facf9fc 33969+{
33970+ int err;
33971+ struct au_wkinfo *wkinfo;
33972+
f0c0a007 33973+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
1facf9fc 33974+
33975+ /*
33976+ * wkq_func() must free this wkinfo.
33977+ * it highly depends upon the implementation of workqueue.
33978+ */
33979+ err = 0;
33980+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
33981+ if (wkinfo) {
7f207e10 33982+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
53392da6 33983+ wkinfo->flags = flags & ~AuWkq_WAIT;
1facf9fc 33984+ wkinfo->func = func;
33985+ wkinfo->args = args;
33986+ wkinfo->comp = NULL;
7f207e10 33987+ kobject_get(wkinfo->kobj);
9dbd164d 33988+ __module_get(THIS_MODULE); /* todo: ?? */
1facf9fc 33989+
53392da6 33990+ au_wkq_run(wkinfo);
1facf9fc 33991+ } else {
33992+ err = -ENOMEM;
e49829fe 33993+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 33994+ }
33995+
33996+ return err;
33997+}
33998+
33999+/* ---------------------------------------------------------------------- */
34000+
34001+void au_nwt_init(struct au_nowait_tasks *nwt)
34002+{
f0c0a007
AM
34003+ atomic_set(&nwt->nw_len, 0);
34004+ /* smp_mb(); */ /* atomic_set */
1facf9fc 34005+ init_waitqueue_head(&nwt->nw_wq);
34006+}
34007+
34008+void au_wkq_fin(void)
34009+{
9dbd164d 34010+ destroy_workqueue(au_wkq);
1facf9fc 34011+}
34012+
34013+int __init au_wkq_init(void)
34014+{
9dbd164d 34015+ int err;
b752ccd1
AM
34016+
34017+ err = 0;
86dc4139 34018+ au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
9dbd164d
AM
34019+ if (IS_ERR(au_wkq))
34020+ err = PTR_ERR(au_wkq);
34021+ else if (!au_wkq)
34022+ err = -ENOMEM;
b752ccd1
AM
34023+
34024+ return err;
1facf9fc 34025+}
7f207e10
AM
34026diff -urN /usr/share/empty/fs/aufs/wkq.h linux/fs/aufs/wkq.h
34027--- /usr/share/empty/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
e2f27e51 34028+++ linux/fs/aufs/wkq.h 2016-10-09 16:55:36.496035060 +0200
f0c0a007 34029@@ -0,0 +1,93 @@
1facf9fc 34030+/*
8cdd5066 34031+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 34032+ *
34033+ * This program, aufs is free software; you can redistribute it and/or modify
34034+ * it under the terms of the GNU General Public License as published by
34035+ * the Free Software Foundation; either version 2 of the License, or
34036+ * (at your option) any later version.
dece6358
AM
34037+ *
34038+ * This program is distributed in the hope that it will be useful,
34039+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34040+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34041+ * GNU General Public License for more details.
34042+ *
34043+ * You should have received a copy of the GNU General Public License
523b37e3 34044+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 34045+ */
34046+
34047+/*
34048+ * workqueue for asynchronous/super-io operations
34049+ * todo: try new credentials management scheme
34050+ */
34051+
34052+#ifndef __AUFS_WKQ_H__
34053+#define __AUFS_WKQ_H__
34054+
34055+#ifdef __KERNEL__
34056+
5afbbe0d
AM
34057+#include <linux/percpu_counter.h>
34058+
dece6358
AM
34059+struct super_block;
34060+
1facf9fc 34061+/* ---------------------------------------------------------------------- */
34062+
34063+/*
34064+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
34065+ */
34066+struct au_nowait_tasks {
f0c0a007 34067+ atomic_t nw_len;
1facf9fc 34068+ wait_queue_head_t nw_wq;
34069+};
34070+
34071+/* ---------------------------------------------------------------------- */
34072+
34073+typedef void (*au_wkq_func_t)(void *args);
34074+
34075+/* wkq flags */
34076+#define AuWkq_WAIT 1
9dbd164d 34077+#define AuWkq_NEST (1 << 1)
1facf9fc 34078+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
7f207e10
AM
34079+#define au_fset_wkq(flags, name) \
34080+ do { (flags) |= AuWkq_##name; } while (0)
34081+#define au_fclr_wkq(flags, name) \
34082+ do { (flags) &= ~AuWkq_##name; } while (0)
1facf9fc 34083+
9dbd164d
AM
34084+#ifndef CONFIG_AUFS_HNOTIFY
34085+#undef AuWkq_NEST
34086+#define AuWkq_NEST 0
34087+#endif
34088+
1facf9fc 34089+/* wkq.c */
b752ccd1 34090+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
53392da6
AM
34091+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
34092+ unsigned int flags);
1facf9fc 34093+void au_nwt_init(struct au_nowait_tasks *nwt);
34094+int __init au_wkq_init(void);
34095+void au_wkq_fin(void);
34096+
34097+/* ---------------------------------------------------------------------- */
34098+
53392da6
AM
34099+static inline int au_wkq_test(void)
34100+{
34101+ return current->flags & PF_WQ_WORKER;
34102+}
34103+
b752ccd1 34104+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
1facf9fc 34105+{
b752ccd1 34106+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
1facf9fc 34107+}
34108+
34109+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
34110+{
f0c0a007 34111+ if (atomic_dec_and_test(&nwt->nw_len))
1facf9fc 34112+ wake_up_all(&nwt->nw_wq);
34113+}
34114+
34115+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
34116+{
f0c0a007 34117+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
1facf9fc 34118+ return 0;
34119+}
34120+
34121+#endif /* __KERNEL__ */
34122+#endif /* __AUFS_WKQ_H__ */
c1595e42
JR
34123diff -urN /usr/share/empty/fs/aufs/xattr.c linux/fs/aufs/xattr.c
34124--- /usr/share/empty/fs/aufs/xattr.c 1970-01-01 01:00:00.000000000 +0100
f2c43d5f
AM
34125+++ linux/fs/aufs/xattr.c 2016-12-17 12:28:17.598545045 +0100
34126@@ -0,0 +1,332 @@
c1595e42 34127+/*
8cdd5066 34128+ * Copyright (C) 2014-2016 Junjiro R. Okajima
c1595e42
JR
34129+ *
34130+ * This program, aufs is free software; you can redistribute it and/or modify
34131+ * it under the terms of the GNU General Public License as published by
34132+ * the Free Software Foundation; either version 2 of the License, or
34133+ * (at your option) any later version.
34134+ *
34135+ * This program is distributed in the hope that it will be useful,
34136+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34137+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34138+ * GNU General Public License for more details.
34139+ *
34140+ * You should have received a copy of the GNU General Public License
34141+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
34142+ */
34143+
34144+/*
34145+ * handling xattr functions
34146+ */
34147+
34148+#include <linux/xattr.h>
34149+#include "aufs.h"
34150+
34151+static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
34152+{
34153+ if (!ignore_flags)
34154+ goto out;
34155+ switch (err) {
34156+ case -ENOMEM:
34157+ case -EDQUOT:
34158+ goto out;
34159+ }
34160+
34161+ if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
34162+ err = 0;
34163+ goto out;
34164+ }
34165+
34166+#define cmp(brattr, prefix) do { \
34167+ if (!strncmp(name, XATTR_##prefix##_PREFIX, \
34168+ XATTR_##prefix##_PREFIX_LEN)) { \
34169+ if (ignore_flags & AuBrAttr_ICEX_##brattr) \
34170+ err = 0; \
34171+ goto out; \
34172+ } \
34173+ } while (0)
34174+
34175+ cmp(SEC, SECURITY);
34176+ cmp(SYS, SYSTEM);
34177+ cmp(TR, TRUSTED);
34178+ cmp(USR, USER);
34179+#undef cmp
34180+
34181+ if (ignore_flags & AuBrAttr_ICEX_OTH)
34182+ err = 0;
34183+
34184+out:
34185+ return err;
34186+}
34187+
34188+static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
34189+
34190+static int au_do_cpup_xattr(struct dentry *h_dst, struct dentry *h_src,
7e9cd9fe
AM
34191+ char *name, char **buf, unsigned int ignore_flags,
34192+ unsigned int verbose)
c1595e42
JR
34193+{
34194+ int err;
34195+ ssize_t ssz;
34196+ struct inode *h_idst;
34197+
34198+ ssz = vfs_getxattr_alloc(h_src, name, buf, 0, GFP_NOFS);
34199+ err = ssz;
34200+ if (unlikely(err <= 0)) {
c1595e42
JR
34201+ if (err == -ENODATA
34202+ || (err == -EOPNOTSUPP
b912730e 34203+ && ((ignore_flags & au_xattr_out_of_list)
5527c038 34204+ || (au_test_nfs_noacl(d_inode(h_src))
b912730e
AM
34205+ && (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)
34206+ || !strcmp(name,
34207+ XATTR_NAME_POSIX_ACL_DEFAULT))))
34208+ ))
c1595e42 34209+ err = 0;
b912730e
AM
34210+ if (err && (verbose || au_debug_test()))
34211+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
34212+ goto out;
34213+ }
34214+
34215+ /* unlock it temporary */
5527c038 34216+ h_idst = d_inode(h_dst);
febd17d6 34217+ inode_unlock(h_idst);
c1595e42 34218+ err = vfsub_setxattr(h_dst, name, *buf, ssz, /*flags*/0);
febd17d6 34219+ inode_lock_nested(h_idst, AuLsc_I_CHILD2);
c1595e42 34220+ if (unlikely(err)) {
7e9cd9fe
AM
34221+ if (verbose || au_debug_test())
34222+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
34223+ err = au_xattr_ignore(err, name, ignore_flags);
34224+ }
34225+
34226+out:
34227+ return err;
34228+}
34229+
7e9cd9fe
AM
34230+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
34231+ unsigned int verbose)
c1595e42
JR
34232+{
34233+ int err, unlocked, acl_access, acl_default;
34234+ ssize_t ssz;
34235+ struct inode *h_isrc, *h_idst;
34236+ char *value, *p, *o, *e;
34237+
34238+ /* try stopping to update the source inode while we are referencing */
7e9cd9fe 34239+ /* there should not be the parent-child relationship between them */
5527c038
JR
34240+ h_isrc = d_inode(h_src);
34241+ h_idst = d_inode(h_dst);
febd17d6
JR
34242+ inode_unlock(h_idst);
34243+ inode_lock_nested(h_isrc, AuLsc_I_CHILD);
34244+ inode_lock_nested(h_idst, AuLsc_I_CHILD2);
c1595e42
JR
34245+ unlocked = 0;
34246+
34247+ /* some filesystems don't list POSIX ACL, for example tmpfs */
34248+ ssz = vfs_listxattr(h_src, NULL, 0);
34249+ err = ssz;
34250+ if (unlikely(err < 0)) {
34251+ AuTraceErr(err);
34252+ if (err == -ENODATA
34253+ || err == -EOPNOTSUPP)
34254+ err = 0; /* ignore */
34255+ goto out;
34256+ }
34257+
34258+ err = 0;
34259+ p = NULL;
34260+ o = NULL;
34261+ if (ssz) {
34262+ err = -ENOMEM;
34263+ p = kmalloc(ssz, GFP_NOFS);
34264+ o = p;
34265+ if (unlikely(!p))
34266+ goto out;
34267+ err = vfs_listxattr(h_src, p, ssz);
34268+ }
febd17d6 34269+ inode_unlock(h_isrc);
c1595e42
JR
34270+ unlocked = 1;
34271+ AuDbg("err %d, ssz %zd\n", err, ssz);
34272+ if (unlikely(err < 0))
34273+ goto out_free;
34274+
34275+ err = 0;
34276+ e = p + ssz;
34277+ value = NULL;
34278+ acl_access = 0;
34279+ acl_default = 0;
34280+ while (!err && p < e) {
34281+ acl_access |= !strncmp(p, XATTR_NAME_POSIX_ACL_ACCESS,
34282+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1);
34283+ acl_default |= !strncmp(p, XATTR_NAME_POSIX_ACL_DEFAULT,
34284+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)
34285+ - 1);
7e9cd9fe
AM
34286+ err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
34287+ verbose);
c1595e42
JR
34288+ p += strlen(p) + 1;
34289+ }
34290+ AuTraceErr(err);
34291+ ignore_flags |= au_xattr_out_of_list;
34292+ if (!err && !acl_access) {
34293+ err = au_do_cpup_xattr(h_dst, h_src,
34294+ XATTR_NAME_POSIX_ACL_ACCESS, &value,
7e9cd9fe 34295+ ignore_flags, verbose);
c1595e42
JR
34296+ AuTraceErr(err);
34297+ }
34298+ if (!err && !acl_default) {
34299+ err = au_do_cpup_xattr(h_dst, h_src,
34300+ XATTR_NAME_POSIX_ACL_DEFAULT, &value,
7e9cd9fe 34301+ ignore_flags, verbose);
c1595e42
JR
34302+ AuTraceErr(err);
34303+ }
34304+
f0c0a007
AM
34305+ if (value)
34306+ au_delayed_kfree(value);
c1595e42
JR
34307+
34308+out_free:
f0c0a007
AM
34309+ if (o)
34310+ au_delayed_kfree(o);
c1595e42
JR
34311+out:
34312+ if (!unlocked)
febd17d6 34313+ inode_unlock(h_isrc);
c1595e42
JR
34314+ AuTraceErr(err);
34315+ return err;
34316+}
34317+
34318+/* ---------------------------------------------------------------------- */
34319+
34320+enum {
34321+ AU_XATTR_LIST,
34322+ AU_XATTR_GET
34323+};
34324+
34325+struct au_lgxattr {
34326+ int type;
34327+ union {
34328+ struct {
34329+ char *list;
34330+ size_t size;
34331+ } list;
34332+ struct {
34333+ const char *name;
34334+ void *value;
34335+ size_t size;
34336+ } get;
34337+ } u;
34338+};
34339+
34340+static ssize_t au_lgxattr(struct dentry *dentry, struct au_lgxattr *arg)
34341+{
34342+ ssize_t err;
34343+ struct path h_path;
34344+ struct super_block *sb;
34345+
34346+ sb = dentry->d_sb;
34347+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
34348+ if (unlikely(err))
34349+ goto out;
34350+ err = au_h_path_getattr(dentry, /*force*/1, &h_path);
34351+ if (unlikely(err))
34352+ goto out_si;
34353+ if (unlikely(!h_path.dentry))
34354+ /* illegally overlapped or something */
34355+ goto out_di; /* pretending success */
34356+
34357+ /* always topmost entry only */
34358+ switch (arg->type) {
34359+ case AU_XATTR_LIST:
34360+ err = vfs_listxattr(h_path.dentry,
34361+ arg->u.list.list, arg->u.list.size);
34362+ break;
34363+ case AU_XATTR_GET:
5afbbe0d 34364+ AuDebugOn(d_is_negative(h_path.dentry));
c1595e42
JR
34365+ err = vfs_getxattr(h_path.dentry,
34366+ arg->u.get.name, arg->u.get.value,
34367+ arg->u.get.size);
34368+ break;
34369+ }
34370+
34371+out_di:
34372+ di_read_unlock(dentry, AuLock_IR);
34373+out_si:
34374+ si_read_unlock(sb);
34375+out:
34376+ AuTraceErr(err);
34377+ return err;
34378+}
34379+
34380+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
34381+{
34382+ struct au_lgxattr arg = {
34383+ .type = AU_XATTR_LIST,
34384+ .u.list = {
34385+ .list = list,
34386+ .size = size
34387+ },
34388+ };
34389+
34390+ return au_lgxattr(dentry, &arg);
34391+}
34392+
f2c43d5f
AM
34393+static ssize_t au_getxattr(struct dentry *dentry,
34394+ struct inode *inode __maybe_unused,
34395+ const char *name, void *value, size_t size)
c1595e42
JR
34396+{
34397+ struct au_lgxattr arg = {
34398+ .type = AU_XATTR_GET,
34399+ .u.get = {
34400+ .name = name,
34401+ .value = value,
34402+ .size = size
34403+ },
34404+ };
34405+
34406+ return au_lgxattr(dentry, &arg);
34407+}
34408+
f2c43d5f
AM
34409+static int au_setxattr(struct dentry *dentry, struct inode *inode,
34410+ const char *name, const void *value, size_t size,
34411+ int flags)
c1595e42 34412+{
f2c43d5f 34413+ struct au_sxattr arg = {
c1595e42
JR
34414+ .type = AU_XATTR_SET,
34415+ .u.set = {
34416+ .name = name,
34417+ .value = value,
34418+ .size = size,
34419+ .flags = flags
34420+ },
34421+ };
34422+
f2c43d5f 34423+ return au_sxattr(dentry, inode, &arg);
c1595e42
JR
34424+}
34425+
34426+/* ---------------------------------------------------------------------- */
34427+
f2c43d5f
AM
34428+static int au_xattr_get(const struct xattr_handler *handler,
34429+ struct dentry *dentry, struct inode *inode,
34430+ const char *name, void *buffer, size_t size)
c1595e42 34431+{
f2c43d5f 34432+ return au_getxattr(dentry, inode, name, buffer, size);
c1595e42
JR
34433+}
34434+
f2c43d5f
AM
34435+static int au_xattr_set(const struct xattr_handler *handler,
34436+ struct dentry *dentry, struct inode *inode,
34437+ const char *name, const void *value, size_t size,
34438+ int flags)
c1595e42 34439+{
f2c43d5f 34440+ return au_setxattr(dentry, inode, name, value, size, flags);
c1595e42
JR
34441+}
34442+
34443+static const struct xattr_handler au_xattr_handler = {
f2c43d5f
AM
34444+ .name = "",
34445+ .prefix = "",
c1595e42
JR
34446+ .get = au_xattr_get,
34447+ .set = au_xattr_set
c1595e42
JR
34448+};
34449+
34450+static const struct xattr_handler *au_xattr_handlers[] = {
f2c43d5f
AM
34451+ &au_xattr_handler,
34452+ NULL
c1595e42
JR
34453+};
34454+
34455+void au_xattr_init(struct super_block *sb)
34456+{
f2c43d5f 34457+ sb->s_xattr = au_xattr_handlers;
c1595e42 34458+}
7f207e10
AM
34459diff -urN /usr/share/empty/fs/aufs/xino.c linux/fs/aufs/xino.c
34460--- /usr/share/empty/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
e2f27e51 34461+++ linux/fs/aufs/xino.c 2016-10-09 16:55:36.496035060 +0200
f0c0a007 34462@@ -0,0 +1,1318 @@
1facf9fc 34463+/*
8cdd5066 34464+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 34465+ *
34466+ * This program, aufs is free software; you can redistribute it and/or modify
34467+ * it under the terms of the GNU General Public License as published by
34468+ * the Free Software Foundation; either version 2 of the License, or
34469+ * (at your option) any later version.
dece6358
AM
34470+ *
34471+ * This program is distributed in the hope that it will be useful,
34472+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34473+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34474+ * GNU General Public License for more details.
34475+ *
34476+ * You should have received a copy of the GNU General Public License
523b37e3 34477+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 34478+ */
34479+
34480+/*
34481+ * external inode number translation table and bitmap
34482+ */
34483+
34484+#include <linux/seq_file.h>
392086de 34485+#include <linux/statfs.h>
1facf9fc 34486+#include "aufs.h"
34487+
9dbd164d 34488+/* todo: unnecessary to support mmap_sem since kernel-space? */
5527c038 34489+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
1facf9fc 34490+ loff_t *pos)
34491+{
34492+ ssize_t err;
34493+ mm_segment_t oldfs;
b752ccd1
AM
34494+ union {
34495+ void *k;
34496+ char __user *u;
34497+ } buf;
1facf9fc 34498+
b752ccd1 34499+ buf.k = kbuf;
1facf9fc 34500+ oldfs = get_fs();
34501+ set_fs(KERNEL_DS);
34502+ do {
34503+ /* todo: signal_pending? */
b752ccd1 34504+ err = func(file, buf.u, size, pos);
1facf9fc 34505+ } while (err == -EAGAIN || err == -EINTR);
34506+ set_fs(oldfs);
34507+
34508+#if 0 /* reserved for future use */
34509+ if (err > 0)
2000de60 34510+ fsnotify_access(file->f_path.dentry);
1facf9fc 34511+#endif
34512+
34513+ return err;
34514+}
34515+
34516+/* ---------------------------------------------------------------------- */
34517+
be52b249
AM
34518+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
34519+ size_t size, loff_t *pos);
34520+
5527c038 34521+static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
1facf9fc 34522+ size_t size, loff_t *pos)
34523+{
34524+ ssize_t err;
34525+ mm_segment_t oldfs;
b752ccd1
AM
34526+ union {
34527+ void *k;
34528+ const char __user *u;
34529+ } buf;
be52b249
AM
34530+ int i;
34531+ const int prevent_endless = 10;
1facf9fc 34532+
be52b249 34533+ i = 0;
b752ccd1 34534+ buf.k = kbuf;
1facf9fc 34535+ oldfs = get_fs();
34536+ set_fs(KERNEL_DS);
1facf9fc 34537+ do {
b752ccd1 34538+ err = func(file, buf.u, size, pos);
be52b249
AM
34539+ if (err == -EINTR
34540+ && !au_wkq_test()
34541+ && fatal_signal_pending(current)) {
34542+ set_fs(oldfs);
34543+ err = xino_fwrite_wkq(func, file, kbuf, size, pos);
34544+ BUG_ON(err == -EINTR);
34545+ oldfs = get_fs();
34546+ set_fs(KERNEL_DS);
34547+ }
34548+ } while (i++ < prevent_endless
34549+ && (err == -EAGAIN || err == -EINTR));
1facf9fc 34550+ set_fs(oldfs);
34551+
34552+#if 0 /* reserved for future use */
34553+ if (err > 0)
2000de60 34554+ fsnotify_modify(file->f_path.dentry);
1facf9fc 34555+#endif
34556+
34557+ return err;
34558+}
34559+
34560+struct do_xino_fwrite_args {
34561+ ssize_t *errp;
5527c038 34562+ vfs_writef_t func;
1facf9fc 34563+ struct file *file;
34564+ void *buf;
34565+ size_t size;
34566+ loff_t *pos;
34567+};
34568+
34569+static void call_do_xino_fwrite(void *args)
34570+{
34571+ struct do_xino_fwrite_args *a = args;
34572+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
34573+}
34574+
be52b249
AM
34575+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
34576+ size_t size, loff_t *pos)
34577+{
34578+ ssize_t err;
34579+ int wkq_err;
34580+ struct do_xino_fwrite_args args = {
34581+ .errp = &err,
34582+ .func = func,
34583+ .file = file,
34584+ .buf = buf,
34585+ .size = size,
34586+ .pos = pos
34587+ };
34588+
34589+ /*
34590+ * it breaks RLIMIT_FSIZE and normal user's limit,
34591+ * users should care about quota and real 'filesystem full.'
34592+ */
34593+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
34594+ if (unlikely(wkq_err))
34595+ err = wkq_err;
34596+
34597+ return err;
34598+}
34599+
5527c038
JR
34600+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
34601+ size_t size, loff_t *pos)
1facf9fc 34602+{
34603+ ssize_t err;
34604+
b752ccd1
AM
34605+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
34606+ lockdep_off();
34607+ err = do_xino_fwrite(func, file, buf, size, pos);
34608+ lockdep_on();
be52b249
AM
34609+ } else
34610+ err = xino_fwrite_wkq(func, file, buf, size, pos);
1facf9fc 34611+
34612+ return err;
34613+}
34614+
34615+/* ---------------------------------------------------------------------- */
34616+
34617+/*
34618+ * create a new xinofile at the same place/path as @base_file.
34619+ */
34620+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
34621+{
34622+ struct file *file;
4a4d8108 34623+ struct dentry *base, *parent;
523b37e3 34624+ struct inode *dir, *delegated;
1facf9fc 34625+ struct qstr *name;
1308ab2a 34626+ struct path path;
4a4d8108 34627+ int err;
1facf9fc 34628+
2000de60 34629+ base = base_file->f_path.dentry;
1facf9fc 34630+ parent = base->d_parent; /* dir inode is locked */
5527c038 34631+ dir = d_inode(parent);
1facf9fc 34632+ IMustLock(dir);
34633+
34634+ file = ERR_PTR(-EINVAL);
34635+ name = &base->d_name;
4a4d8108
AM
34636+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
34637+ if (IS_ERR(path.dentry)) {
34638+ file = (void *)path.dentry;
523b37e3
AM
34639+ pr_err("%pd lookup err %ld\n",
34640+ base, PTR_ERR(path.dentry));
1facf9fc 34641+ goto out;
34642+ }
34643+
34644+ /* no need to mnt_want_write() since we call dentry_open() later */
4a4d8108 34645+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
1facf9fc 34646+ if (unlikely(err)) {
34647+ file = ERR_PTR(err);
523b37e3 34648+ pr_err("%pd create err %d\n", base, err);
1facf9fc 34649+ goto out_dput;
34650+ }
34651+
c06a8ce3 34652+ path.mnt = base_file->f_path.mnt;
4a4d8108 34653+ file = vfsub_dentry_open(&path,
7f207e10 34654+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 34655+ /* | __FMODE_NONOTIFY */);
1facf9fc 34656+ if (IS_ERR(file)) {
523b37e3 34657+ pr_err("%pd open err %ld\n", base, PTR_ERR(file));
1facf9fc 34658+ goto out_dput;
34659+ }
34660+
523b37e3
AM
34661+ delegated = NULL;
34662+ err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
34663+ if (unlikely(err == -EWOULDBLOCK)) {
34664+ pr_warn("cannot retry for NFSv4 delegation"
34665+ " for an internal unlink\n");
34666+ iput(delegated);
34667+ }
1facf9fc 34668+ if (unlikely(err)) {
523b37e3 34669+ pr_err("%pd unlink err %d\n", base, err);
1facf9fc 34670+ goto out_fput;
34671+ }
34672+
34673+ if (copy_src) {
34674+ /* no one can touch copy_src xino */
c06a8ce3 34675+ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
1facf9fc 34676+ if (unlikely(err)) {
523b37e3 34677+ pr_err("%pd copy err %d\n", base, err);
1facf9fc 34678+ goto out_fput;
34679+ }
34680+ }
34681+ goto out_dput; /* success */
34682+
4f0767ce 34683+out_fput:
1facf9fc 34684+ fput(file);
34685+ file = ERR_PTR(err);
4f0767ce 34686+out_dput:
4a4d8108 34687+ dput(path.dentry);
4f0767ce 34688+out:
1facf9fc 34689+ return file;
34690+}
34691+
34692+struct au_xino_lock_dir {
34693+ struct au_hinode *hdir;
34694+ struct dentry *parent;
febd17d6 34695+ struct inode *dir;
1facf9fc 34696+};
34697+
34698+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
34699+ struct au_xino_lock_dir *ldir)
34700+{
34701+ aufs_bindex_t brid, bindex;
34702+
34703+ ldir->hdir = NULL;
34704+ bindex = -1;
34705+ brid = au_xino_brid(sb);
34706+ if (brid >= 0)
34707+ bindex = au_br_index(sb, brid);
34708+ if (bindex >= 0) {
5527c038 34709+ ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
5afbbe0d 34710+ au_hn_inode_lock_nested(ldir->hdir, AuLsc_I_PARENT);
1facf9fc 34711+ } else {
2000de60 34712+ ldir->parent = dget_parent(xino->f_path.dentry);
febd17d6
JR
34713+ ldir->dir = d_inode(ldir->parent);
34714+ inode_lock_nested(ldir->dir, AuLsc_I_PARENT);
1facf9fc 34715+ }
34716+}
34717+
34718+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
34719+{
34720+ if (ldir->hdir)
5afbbe0d 34721+ au_hn_inode_unlock(ldir->hdir);
1facf9fc 34722+ else {
febd17d6 34723+ inode_unlock(ldir->dir);
1facf9fc 34724+ dput(ldir->parent);
34725+ }
34726+}
34727+
34728+/* ---------------------------------------------------------------------- */
34729+
34730+/* trucate xino files asynchronously */
34731+
34732+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
34733+{
34734+ int err;
392086de
AM
34735+ unsigned long jiffy;
34736+ blkcnt_t blocks;
5afbbe0d 34737+ aufs_bindex_t bi, bbot;
392086de 34738+ struct kstatfs *st;
1facf9fc 34739+ struct au_branch *br;
34740+ struct file *new_xino, *file;
34741+ struct super_block *h_sb;
34742+ struct au_xino_lock_dir ldir;
34743+
392086de 34744+ err = -ENOMEM;
be52b249 34745+ st = kmalloc(sizeof(*st), GFP_NOFS);
392086de
AM
34746+ if (unlikely(!st))
34747+ goto out;
34748+
1facf9fc 34749+ err = -EINVAL;
5afbbe0d
AM
34750+ bbot = au_sbbot(sb);
34751+ if (unlikely(bindex < 0 || bbot < bindex))
392086de 34752+ goto out_st;
1facf9fc 34753+ br = au_sbr(sb, bindex);
34754+ file = br->br_xino.xi_file;
34755+ if (!file)
392086de
AM
34756+ goto out_st;
34757+
34758+ err = vfs_statfs(&file->f_path, st);
34759+ if (unlikely(err))
34760+ AuErr1("statfs err %d, ignored\n", err);
34761+ jiffy = jiffies;
34762+ blocks = file_inode(file)->i_blocks;
34763+ pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34764+ bindex, (u64)blocks, st->f_bfree, st->f_blocks);
1facf9fc 34765+
34766+ au_xino_lock_dir(sb, file, &ldir);
34767+ /* mnt_want_write() is unnecessary here */
34768+ new_xino = au_xino_create2(file, file);
34769+ au_xino_unlock_dir(&ldir);
34770+ err = PTR_ERR(new_xino);
392086de
AM
34771+ if (IS_ERR(new_xino)) {
34772+ pr_err("err %d, ignored\n", err);
34773+ goto out_st;
34774+ }
1facf9fc 34775+ err = 0;
34776+ fput(file);
34777+ br->br_xino.xi_file = new_xino;
34778+
86dc4139 34779+ h_sb = au_br_sb(br);
5afbbe0d 34780+ for (bi = 0; bi <= bbot; bi++) {
1facf9fc 34781+ if (unlikely(bi == bindex))
34782+ continue;
34783+ br = au_sbr(sb, bi);
86dc4139 34784+ if (au_br_sb(br) != h_sb)
1facf9fc 34785+ continue;
34786+
34787+ fput(br->br_xino.xi_file);
34788+ br->br_xino.xi_file = new_xino;
34789+ get_file(new_xino);
34790+ }
34791+
392086de
AM
34792+ err = vfs_statfs(&new_xino->f_path, st);
34793+ if (!err) {
34794+ pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34795+ bindex, (u64)file_inode(new_xino)->i_blocks,
34796+ st->f_bfree, st->f_blocks);
34797+ if (file_inode(new_xino)->i_blocks < blocks)
34798+ au_sbi(sb)->si_xino_jiffy = jiffy;
34799+ } else
34800+ AuErr1("statfs err %d, ignored\n", err);
34801+
34802+out_st:
f0c0a007 34803+ au_delayed_kfree(st);
4f0767ce 34804+out:
1facf9fc 34805+ return err;
34806+}
34807+
34808+struct xino_do_trunc_args {
34809+ struct super_block *sb;
34810+ struct au_branch *br;
34811+};
34812+
34813+static void xino_do_trunc(void *_args)
34814+{
34815+ struct xino_do_trunc_args *args = _args;
34816+ struct super_block *sb;
34817+ struct au_branch *br;
34818+ struct inode *dir;
34819+ int err;
34820+ aufs_bindex_t bindex;
34821+
34822+ err = 0;
34823+ sb = args->sb;
5527c038 34824+ dir = d_inode(sb->s_root);
1facf9fc 34825+ br = args->br;
34826+
34827+ si_noflush_write_lock(sb);
34828+ ii_read_lock_parent(dir);
34829+ bindex = au_br_index(sb, br->br_id);
34830+ err = au_xino_trunc(sb, bindex);
1facf9fc 34831+ ii_read_unlock(dir);
34832+ if (unlikely(err))
392086de 34833+ pr_warn("err b%d, (%d)\n", bindex, err);
1facf9fc 34834+ atomic_dec(&br->br_xino_running);
5afbbe0d 34835+ au_br_put(br);
1facf9fc 34836+ si_write_unlock(sb);
027c5e7a 34837+ au_nwt_done(&au_sbi(sb)->si_nowait);
f0c0a007 34838+ au_delayed_kfree(args);
1facf9fc 34839+}
34840+
392086de
AM
34841+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
34842+{
34843+ int err;
34844+ struct kstatfs st;
34845+ struct au_sbinfo *sbinfo;
34846+
34847+ /* todo: si_xino_expire and the ratio should be customizable */
34848+ sbinfo = au_sbi(sb);
34849+ if (time_before(jiffies,
34850+ sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
34851+ return 0;
34852+
34853+ /* truncation border */
34854+ err = vfs_statfs(&br->br_xino.xi_file->f_path, &st);
34855+ if (unlikely(err)) {
34856+ AuErr1("statfs err %d, ignored\n", err);
34857+ return 0;
34858+ }
34859+ if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC)
34860+ return 0;
34861+
34862+ return 1;
34863+}
34864+
1facf9fc 34865+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
34866+{
34867+ struct xino_do_trunc_args *args;
34868+ int wkq_err;
34869+
392086de 34870+ if (!xino_trunc_test(sb, br))
1facf9fc 34871+ return;
34872+
34873+ if (atomic_inc_return(&br->br_xino_running) > 1)
34874+ goto out;
34875+
34876+ /* lock and kfree() will be called in trunc_xino() */
34877+ args = kmalloc(sizeof(*args), GFP_NOFS);
34878+ if (unlikely(!args)) {
34879+ AuErr1("no memory\n");
f0c0a007 34880+ goto out;
1facf9fc 34881+ }
34882+
5afbbe0d 34883+ au_br_get(br);
1facf9fc 34884+ args->sb = sb;
34885+ args->br = br;
53392da6 34886+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
1facf9fc 34887+ if (!wkq_err)
34888+ return; /* success */
34889+
4a4d8108 34890+ pr_err("wkq %d\n", wkq_err);
5afbbe0d 34891+ au_br_put(br);
f0c0a007 34892+ au_delayed_kfree(args);
1facf9fc 34893+
4f0767ce 34894+out:
e49829fe 34895+ atomic_dec(&br->br_xino_running);
1facf9fc 34896+}
34897+
34898+/* ---------------------------------------------------------------------- */
34899+
5527c038 34900+static int au_xino_do_write(vfs_writef_t write, struct file *file,
1facf9fc 34901+ ino_t h_ino, ino_t ino)
34902+{
34903+ loff_t pos;
34904+ ssize_t sz;
34905+
34906+ pos = h_ino;
34907+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
34908+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
34909+ return -EFBIG;
34910+ }
34911+ pos *= sizeof(ino);
34912+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
34913+ if (sz == sizeof(ino))
34914+ return 0; /* success */
34915+
34916+ AuIOErr("write failed (%zd)\n", sz);
34917+ return -EIO;
34918+}
34919+
34920+/*
34921+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
34922+ * at the position of @h_ino.
34923+ * even if @ino is zero, it is written to the xinofile and means no entry.
34924+ * if the size of the xino file on a specific filesystem exceeds the watermark,
34925+ * try truncating it.
34926+ */
34927+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
34928+ ino_t ino)
34929+{
34930+ int err;
34931+ unsigned int mnt_flags;
34932+ struct au_branch *br;
34933+
34934+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
34935+ || ((loff_t)-1) > 0);
dece6358 34936+ SiMustAnyLock(sb);
1facf9fc 34937+
34938+ mnt_flags = au_mntflags(sb);
34939+ if (!au_opt_test(mnt_flags, XINO))
34940+ return 0;
34941+
34942+ br = au_sbr(sb, bindex);
34943+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
34944+ h_ino, ino);
34945+ if (!err) {
34946+ if (au_opt_test(mnt_flags, TRUNC_XINO)
86dc4139 34947+ && au_test_fs_trunc_xino(au_br_sb(br)))
1facf9fc 34948+ xino_try_trunc(sb, br);
34949+ return 0; /* success */
34950+ }
34951+
34952+ AuIOErr("write failed (%d)\n", err);
34953+ return -EIO;
34954+}
34955+
34956+/* ---------------------------------------------------------------------- */
34957+
34958+/* aufs inode number bitmap */
34959+
34960+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
34961+static ino_t xib_calc_ino(unsigned long pindex, int bit)
34962+{
34963+ ino_t ino;
34964+
34965+ AuDebugOn(bit < 0 || page_bits <= bit);
34966+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
34967+ return ino;
34968+}
34969+
34970+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
34971+{
34972+ AuDebugOn(ino < AUFS_FIRST_INO);
34973+ ino -= AUFS_FIRST_INO;
34974+ *pindex = ino / page_bits;
34975+ *bit = ino % page_bits;
34976+}
34977+
34978+static int xib_pindex(struct super_block *sb, unsigned long pindex)
34979+{
34980+ int err;
34981+ loff_t pos;
34982+ ssize_t sz;
34983+ struct au_sbinfo *sbinfo;
34984+ struct file *xib;
34985+ unsigned long *p;
34986+
34987+ sbinfo = au_sbi(sb);
34988+ MtxMustLock(&sbinfo->si_xib_mtx);
34989+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
34990+ || !au_opt_test(sbinfo->si_mntflags, XINO));
34991+
34992+ if (pindex == sbinfo->si_xib_last_pindex)
34993+ return 0;
34994+
34995+ xib = sbinfo->si_xib;
34996+ p = sbinfo->si_xib_buf;
34997+ pos = sbinfo->si_xib_last_pindex;
34998+ pos *= PAGE_SIZE;
34999+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
35000+ if (unlikely(sz != PAGE_SIZE))
35001+ goto out;
35002+
35003+ pos = pindex;
35004+ pos *= PAGE_SIZE;
c06a8ce3 35005+ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
1facf9fc 35006+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
35007+ else {
35008+ memset(p, 0, PAGE_SIZE);
35009+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
35010+ }
35011+ if (sz == PAGE_SIZE) {
35012+ sbinfo->si_xib_last_pindex = pindex;
35013+ return 0; /* success */
35014+ }
35015+
4f0767ce 35016+out:
b752ccd1
AM
35017+ AuIOErr1("write failed (%zd)\n", sz);
35018+ err = sz;
35019+ if (sz >= 0)
35020+ err = -EIO;
35021+ return err;
35022+}
35023+
35024+/* ---------------------------------------------------------------------- */
35025+
35026+static void au_xib_clear_bit(struct inode *inode)
35027+{
35028+ int err, bit;
35029+ unsigned long pindex;
35030+ struct super_block *sb;
35031+ struct au_sbinfo *sbinfo;
35032+
35033+ AuDebugOn(inode->i_nlink);
35034+
35035+ sb = inode->i_sb;
35036+ xib_calc_bit(inode->i_ino, &pindex, &bit);
35037+ AuDebugOn(page_bits <= bit);
35038+ sbinfo = au_sbi(sb);
35039+ mutex_lock(&sbinfo->si_xib_mtx);
35040+ err = xib_pindex(sb, pindex);
35041+ if (!err) {
35042+ clear_bit(bit, sbinfo->si_xib_buf);
35043+ sbinfo->si_xib_next_bit = bit;
35044+ }
35045+ mutex_unlock(&sbinfo->si_xib_mtx);
35046+}
35047+
35048+/* for s_op->delete_inode() */
35049+void au_xino_delete_inode(struct inode *inode, const int unlinked)
35050+{
35051+ int err;
35052+ unsigned int mnt_flags;
5afbbe0d 35053+ aufs_bindex_t bindex, bbot, bi;
b752ccd1
AM
35054+ unsigned char try_trunc;
35055+ struct au_iinfo *iinfo;
35056+ struct super_block *sb;
35057+ struct au_hinode *hi;
35058+ struct inode *h_inode;
35059+ struct au_branch *br;
5527c038 35060+ vfs_writef_t xwrite;
b752ccd1 35061+
5afbbe0d
AM
35062+ AuDebugOn(au_is_bad_inode(inode));
35063+
b752ccd1
AM
35064+ sb = inode->i_sb;
35065+ mnt_flags = au_mntflags(sb);
35066+ if (!au_opt_test(mnt_flags, XINO)
35067+ || inode->i_ino == AUFS_ROOT_INO)
35068+ return;
35069+
35070+ if (unlinked) {
35071+ au_xigen_inc(inode);
35072+ au_xib_clear_bit(inode);
35073+ }
35074+
35075+ iinfo = au_ii(inode);
5afbbe0d 35076+ bindex = iinfo->ii_btop;
b752ccd1
AM
35077+ if (bindex < 0)
35078+ return;
1facf9fc 35079+
b752ccd1
AM
35080+ xwrite = au_sbi(sb)->si_xwrite;
35081+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
5afbbe0d
AM
35082+ hi = au_hinode(iinfo, bindex);
35083+ bbot = iinfo->ii_bbot;
35084+ for (; bindex <= bbot; bindex++, hi++) {
b752ccd1
AM
35085+ h_inode = hi->hi_inode;
35086+ if (!h_inode
35087+ || (!unlinked && h_inode->i_nlink))
35088+ continue;
1facf9fc 35089+
b752ccd1
AM
35090+ /* inode may not be revalidated */
35091+ bi = au_br_index(sb, hi->hi_id);
35092+ if (bi < 0)
35093+ continue;
1facf9fc 35094+
b752ccd1
AM
35095+ br = au_sbr(sb, bi);
35096+ err = au_xino_do_write(xwrite, br->br_xino.xi_file,
35097+ h_inode->i_ino, /*ino*/0);
35098+ if (!err && try_trunc
86dc4139 35099+ && au_test_fs_trunc_xino(au_br_sb(br)))
b752ccd1 35100+ xino_try_trunc(sb, br);
1facf9fc 35101+ }
1facf9fc 35102+}
35103+
35104+/* get an unused inode number from bitmap */
35105+ino_t au_xino_new_ino(struct super_block *sb)
35106+{
35107+ ino_t ino;
35108+ unsigned long *p, pindex, ul, pend;
35109+ struct au_sbinfo *sbinfo;
35110+ struct file *file;
35111+ int free_bit, err;
35112+
35113+ if (!au_opt_test(au_mntflags(sb), XINO))
35114+ return iunique(sb, AUFS_FIRST_INO);
35115+
35116+ sbinfo = au_sbi(sb);
35117+ mutex_lock(&sbinfo->si_xib_mtx);
35118+ p = sbinfo->si_xib_buf;
35119+ free_bit = sbinfo->si_xib_next_bit;
35120+ if (free_bit < page_bits && !test_bit(free_bit, p))
35121+ goto out; /* success */
35122+ free_bit = find_first_zero_bit(p, page_bits);
35123+ if (free_bit < page_bits)
35124+ goto out; /* success */
35125+
35126+ pindex = sbinfo->si_xib_last_pindex;
35127+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
35128+ err = xib_pindex(sb, ul);
35129+ if (unlikely(err))
35130+ goto out_err;
35131+ free_bit = find_first_zero_bit(p, page_bits);
35132+ if (free_bit < page_bits)
35133+ goto out; /* success */
35134+ }
35135+
35136+ file = sbinfo->si_xib;
c06a8ce3 35137+ pend = vfsub_f_size_read(file) / PAGE_SIZE;
1facf9fc 35138+ for (ul = pindex + 1; ul <= pend; ul++) {
35139+ err = xib_pindex(sb, ul);
35140+ if (unlikely(err))
35141+ goto out_err;
35142+ free_bit = find_first_zero_bit(p, page_bits);
35143+ if (free_bit < page_bits)
35144+ goto out; /* success */
35145+ }
35146+ BUG();
35147+
4f0767ce 35148+out:
1facf9fc 35149+ set_bit(free_bit, p);
7f207e10 35150+ sbinfo->si_xib_next_bit = free_bit + 1;
1facf9fc 35151+ pindex = sbinfo->si_xib_last_pindex;
35152+ mutex_unlock(&sbinfo->si_xib_mtx);
35153+ ino = xib_calc_ino(pindex, free_bit);
35154+ AuDbg("i%lu\n", (unsigned long)ino);
35155+ return ino;
4f0767ce 35156+out_err:
1facf9fc 35157+ mutex_unlock(&sbinfo->si_xib_mtx);
35158+ AuDbg("i0\n");
35159+ return 0;
35160+}
35161+
35162+/*
35163+ * read @ino from xinofile for the specified branch{@sb, @bindex}
35164+ * at the position of @h_ino.
35165+ * if @ino does not exist and @do_new is true, get new one.
35166+ */
35167+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
35168+ ino_t *ino)
35169+{
35170+ int err;
35171+ ssize_t sz;
35172+ loff_t pos;
35173+ struct file *file;
35174+ struct au_sbinfo *sbinfo;
35175+
35176+ *ino = 0;
35177+ if (!au_opt_test(au_mntflags(sb), XINO))
35178+ return 0; /* no xino */
35179+
35180+ err = 0;
35181+ sbinfo = au_sbi(sb);
35182+ pos = h_ino;
35183+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
35184+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
35185+ return -EFBIG;
35186+ }
35187+ pos *= sizeof(*ino);
35188+
35189+ file = au_sbr(sb, bindex)->br_xino.xi_file;
c06a8ce3 35190+ if (vfsub_f_size_read(file) < pos + sizeof(*ino))
1facf9fc 35191+ return 0; /* no ino */
35192+
35193+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
35194+ if (sz == sizeof(*ino))
35195+ return 0; /* success */
35196+
35197+ err = sz;
35198+ if (unlikely(sz >= 0)) {
35199+ err = -EIO;
35200+ AuIOErr("xino read error (%zd)\n", sz);
35201+ }
35202+
35203+ return err;
35204+}
35205+
35206+/* ---------------------------------------------------------------------- */
35207+
35208+/* create and set a new xino file */
35209+
35210+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
35211+{
35212+ struct file *file;
35213+ struct dentry *h_parent, *d;
b912730e 35214+ struct inode *h_dir, *inode;
1facf9fc 35215+ int err;
35216+
35217+ /*
35218+ * at mount-time, and the xino file is the default path,
4a4d8108 35219+ * hnotify is disabled so we have no notify events to ignore.
1facf9fc 35220+ * when a user specified the xino, we cannot get au_hdir to be ignored.
35221+ */
7f207e10 35222+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 35223+ /* | __FMODE_NONOTIFY */,
1facf9fc 35224+ S_IRUGO | S_IWUGO);
35225+ if (IS_ERR(file)) {
35226+ if (!silent)
4a4d8108 35227+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
1facf9fc 35228+ return file;
35229+ }
35230+
35231+ /* keep file count */
b912730e
AM
35232+ err = 0;
35233+ inode = file_inode(file);
2000de60 35234+ h_parent = dget_parent(file->f_path.dentry);
5527c038 35235+ h_dir = d_inode(h_parent);
febd17d6 35236+ inode_lock_nested(h_dir, AuLsc_I_PARENT);
1facf9fc 35237+ /* mnt_want_write() is unnecessary here */
523b37e3 35238+ /* no delegation since it is just created */
b912730e
AM
35239+ if (inode->i_nlink)
35240+ err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
35241+ /*force*/0);
febd17d6 35242+ inode_unlock(h_dir);
1facf9fc 35243+ dput(h_parent);
35244+ if (unlikely(err)) {
35245+ if (!silent)
4a4d8108 35246+ pr_err("unlink %s(%d)\n", fname, err);
1facf9fc 35247+ goto out;
35248+ }
35249+
35250+ err = -EINVAL;
2000de60 35251+ d = file->f_path.dentry;
1facf9fc 35252+ if (unlikely(sb == d->d_sb)) {
35253+ if (!silent)
4a4d8108 35254+ pr_err("%s must be outside\n", fname);
1facf9fc 35255+ goto out;
35256+ }
35257+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
35258+ if (!silent)
4a4d8108
AM
35259+ pr_err("xino doesn't support %s(%s)\n",
35260+ fname, au_sbtype(d->d_sb));
1facf9fc 35261+ goto out;
35262+ }
35263+ return file; /* success */
35264+
4f0767ce 35265+out:
1facf9fc 35266+ fput(file);
35267+ file = ERR_PTR(err);
35268+ return file;
35269+}
35270+
35271+/*
35272+ * find another branch who is on the same filesystem of the specified
5afbbe0d 35273+ * branch{@btgt}. search until @bbot.
1facf9fc 35274+ */
35275+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
5afbbe0d 35276+ aufs_bindex_t bbot)
1facf9fc 35277+{
35278+ aufs_bindex_t bindex;
35279+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
35280+
35281+ for (bindex = 0; bindex < btgt; bindex++)
35282+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
35283+ return bindex;
5afbbe0d 35284+ for (bindex++; bindex <= bbot; bindex++)
1facf9fc 35285+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
35286+ return bindex;
35287+ return -1;
35288+}
35289+
35290+/* ---------------------------------------------------------------------- */
35291+
35292+/*
35293+ * initialize the xinofile for the specified branch @br
35294+ * at the place/path where @base_file indicates.
35295+ * test whether another branch is on the same filesystem or not,
35296+ * if @do_test is true.
35297+ */
35298+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
35299+ struct file *base_file, int do_test)
35300+{
35301+ int err;
35302+ ino_t ino;
5afbbe0d 35303+ aufs_bindex_t bbot, bindex;
1facf9fc 35304+ struct au_branch *shared_br, *b;
35305+ struct file *file;
35306+ struct super_block *tgt_sb;
35307+
35308+ shared_br = NULL;
5afbbe0d 35309+ bbot = au_sbbot(sb);
1facf9fc 35310+ if (do_test) {
86dc4139 35311+ tgt_sb = au_br_sb(br);
5afbbe0d 35312+ for (bindex = 0; bindex <= bbot; bindex++) {
1facf9fc 35313+ b = au_sbr(sb, bindex);
86dc4139 35314+ if (tgt_sb == au_br_sb(b)) {
1facf9fc 35315+ shared_br = b;
35316+ break;
35317+ }
35318+ }
35319+ }
35320+
35321+ if (!shared_br || !shared_br->br_xino.xi_file) {
35322+ struct au_xino_lock_dir ldir;
35323+
35324+ au_xino_lock_dir(sb, base_file, &ldir);
35325+ /* mnt_want_write() is unnecessary here */
35326+ file = au_xino_create2(base_file, NULL);
35327+ au_xino_unlock_dir(&ldir);
35328+ err = PTR_ERR(file);
35329+ if (IS_ERR(file))
35330+ goto out;
35331+ br->br_xino.xi_file = file;
35332+ } else {
35333+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
35334+ get_file(br->br_xino.xi_file);
35335+ }
35336+
35337+ ino = AUFS_ROOT_INO;
35338+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
35339+ h_ino, ino);
b752ccd1
AM
35340+ if (unlikely(err)) {
35341+ fput(br->br_xino.xi_file);
35342+ br->br_xino.xi_file = NULL;
35343+ }
1facf9fc 35344+
4f0767ce 35345+out:
1facf9fc 35346+ return err;
35347+}
35348+
35349+/* ---------------------------------------------------------------------- */
35350+
35351+/* trucate a xino bitmap file */
35352+
35353+/* todo: slow */
35354+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
35355+{
35356+ int err, bit;
35357+ ssize_t sz;
35358+ unsigned long pindex;
35359+ loff_t pos, pend;
35360+ struct au_sbinfo *sbinfo;
5527c038 35361+ vfs_readf_t func;
1facf9fc 35362+ ino_t *ino;
35363+ unsigned long *p;
35364+
35365+ err = 0;
35366+ sbinfo = au_sbi(sb);
dece6358 35367+ MtxMustLock(&sbinfo->si_xib_mtx);
1facf9fc 35368+ p = sbinfo->si_xib_buf;
35369+ func = sbinfo->si_xread;
c06a8ce3 35370+ pend = vfsub_f_size_read(file);
1facf9fc 35371+ pos = 0;
35372+ while (pos < pend) {
35373+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
35374+ err = sz;
35375+ if (unlikely(sz <= 0))
35376+ goto out;
35377+
35378+ err = 0;
35379+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
35380+ if (unlikely(*ino < AUFS_FIRST_INO))
35381+ continue;
35382+
35383+ xib_calc_bit(*ino, &pindex, &bit);
35384+ AuDebugOn(page_bits <= bit);
35385+ err = xib_pindex(sb, pindex);
35386+ if (!err)
35387+ set_bit(bit, p);
35388+ else
35389+ goto out;
35390+ }
35391+ }
35392+
4f0767ce 35393+out:
1facf9fc 35394+ return err;
35395+}
35396+
35397+static int xib_restore(struct super_block *sb)
35398+{
35399+ int err;
5afbbe0d 35400+ aufs_bindex_t bindex, bbot;
1facf9fc 35401+ void *page;
35402+
35403+ err = -ENOMEM;
35404+ page = (void *)__get_free_page(GFP_NOFS);
35405+ if (unlikely(!page))
35406+ goto out;
35407+
35408+ err = 0;
5afbbe0d
AM
35409+ bbot = au_sbbot(sb);
35410+ for (bindex = 0; !err && bindex <= bbot; bindex++)
1facf9fc 35411+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
35412+ err = do_xib_restore
35413+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
35414+ else
35415+ AuDbg("b%d\n", bindex);
f0c0a007 35416+ au_delayed_free_page((unsigned long)page);
1facf9fc 35417+
4f0767ce 35418+out:
1facf9fc 35419+ return err;
35420+}
35421+
35422+int au_xib_trunc(struct super_block *sb)
35423+{
35424+ int err;
35425+ ssize_t sz;
35426+ loff_t pos;
35427+ struct au_xino_lock_dir ldir;
35428+ struct au_sbinfo *sbinfo;
35429+ unsigned long *p;
35430+ struct file *file;
35431+
dece6358
AM
35432+ SiMustWriteLock(sb);
35433+
1facf9fc 35434+ err = 0;
35435+ sbinfo = au_sbi(sb);
35436+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
35437+ goto out;
35438+
35439+ file = sbinfo->si_xib;
c06a8ce3 35440+ if (vfsub_f_size_read(file) <= PAGE_SIZE)
1facf9fc 35441+ goto out;
35442+
35443+ au_xino_lock_dir(sb, file, &ldir);
35444+ /* mnt_want_write() is unnecessary here */
35445+ file = au_xino_create2(sbinfo->si_xib, NULL);
35446+ au_xino_unlock_dir(&ldir);
35447+ err = PTR_ERR(file);
35448+ if (IS_ERR(file))
35449+ goto out;
35450+ fput(sbinfo->si_xib);
35451+ sbinfo->si_xib = file;
35452+
35453+ p = sbinfo->si_xib_buf;
35454+ memset(p, 0, PAGE_SIZE);
35455+ pos = 0;
35456+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
35457+ if (unlikely(sz != PAGE_SIZE)) {
35458+ err = sz;
35459+ AuIOErr("err %d\n", err);
35460+ if (sz >= 0)
35461+ err = -EIO;
35462+ goto out;
35463+ }
35464+
35465+ mutex_lock(&sbinfo->si_xib_mtx);
35466+ /* mnt_want_write() is unnecessary here */
35467+ err = xib_restore(sb);
35468+ mutex_unlock(&sbinfo->si_xib_mtx);
35469+
35470+out:
35471+ return err;
35472+}
35473+
35474+/* ---------------------------------------------------------------------- */
35475+
35476+/*
35477+ * xino mount option handlers
35478+ */
1facf9fc 35479+
35480+/* xino bitmap */
35481+static void xino_clear_xib(struct super_block *sb)
35482+{
35483+ struct au_sbinfo *sbinfo;
35484+
dece6358
AM
35485+ SiMustWriteLock(sb);
35486+
1facf9fc 35487+ sbinfo = au_sbi(sb);
35488+ sbinfo->si_xread = NULL;
35489+ sbinfo->si_xwrite = NULL;
35490+ if (sbinfo->si_xib)
35491+ fput(sbinfo->si_xib);
35492+ sbinfo->si_xib = NULL;
f0c0a007
AM
35493+ if (sbinfo->si_xib_buf)
35494+ au_delayed_free_page((unsigned long)sbinfo->si_xib_buf);
1facf9fc 35495+ sbinfo->si_xib_buf = NULL;
35496+}
35497+
35498+static int au_xino_set_xib(struct super_block *sb, struct file *base)
35499+{
35500+ int err;
35501+ loff_t pos;
35502+ struct au_sbinfo *sbinfo;
35503+ struct file *file;
35504+
dece6358
AM
35505+ SiMustWriteLock(sb);
35506+
1facf9fc 35507+ sbinfo = au_sbi(sb);
35508+ file = au_xino_create2(base, sbinfo->si_xib);
35509+ err = PTR_ERR(file);
35510+ if (IS_ERR(file))
35511+ goto out;
35512+ if (sbinfo->si_xib)
35513+ fput(sbinfo->si_xib);
35514+ sbinfo->si_xib = file;
5527c038
JR
35515+ sbinfo->si_xread = vfs_readf(file);
35516+ sbinfo->si_xwrite = vfs_writef(file);
1facf9fc 35517+
35518+ err = -ENOMEM;
35519+ if (!sbinfo->si_xib_buf)
35520+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
35521+ if (unlikely(!sbinfo->si_xib_buf))
35522+ goto out_unset;
35523+
35524+ sbinfo->si_xib_last_pindex = 0;
35525+ sbinfo->si_xib_next_bit = 0;
c06a8ce3 35526+ if (vfsub_f_size_read(file) < PAGE_SIZE) {
1facf9fc 35527+ pos = 0;
35528+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
35529+ PAGE_SIZE, &pos);
35530+ if (unlikely(err != PAGE_SIZE))
35531+ goto out_free;
35532+ }
35533+ err = 0;
35534+ goto out; /* success */
35535+
4f0767ce 35536+out_free:
f0c0a007
AM
35537+ if (sbinfo->si_xib_buf)
35538+ au_delayed_free_page((unsigned long)sbinfo->si_xib_buf);
b752ccd1
AM
35539+ sbinfo->si_xib_buf = NULL;
35540+ if (err >= 0)
35541+ err = -EIO;
4f0767ce 35542+out_unset:
b752ccd1
AM
35543+ fput(sbinfo->si_xib);
35544+ sbinfo->si_xib = NULL;
35545+ sbinfo->si_xread = NULL;
35546+ sbinfo->si_xwrite = NULL;
4f0767ce 35547+out:
b752ccd1 35548+ return err;
1facf9fc 35549+}
35550+
b752ccd1
AM
35551+/* xino for each branch */
35552+static void xino_clear_br(struct super_block *sb)
35553+{
5afbbe0d 35554+ aufs_bindex_t bindex, bbot;
b752ccd1 35555+ struct au_branch *br;
1facf9fc 35556+
5afbbe0d
AM
35557+ bbot = au_sbbot(sb);
35558+ for (bindex = 0; bindex <= bbot; bindex++) {
b752ccd1
AM
35559+ br = au_sbr(sb, bindex);
35560+ if (!br || !br->br_xino.xi_file)
35561+ continue;
35562+
35563+ fput(br->br_xino.xi_file);
35564+ br->br_xino.xi_file = NULL;
35565+ }
35566+}
35567+
35568+static int au_xino_set_br(struct super_block *sb, struct file *base)
1facf9fc 35569+{
35570+ int err;
b752ccd1 35571+ ino_t ino;
5afbbe0d 35572+ aufs_bindex_t bindex, bbot, bshared;
b752ccd1
AM
35573+ struct {
35574+ struct file *old, *new;
35575+ } *fpair, *p;
35576+ struct au_branch *br;
35577+ struct inode *inode;
5527c038 35578+ vfs_writef_t writef;
1facf9fc 35579+
b752ccd1
AM
35580+ SiMustWriteLock(sb);
35581+
35582+ err = -ENOMEM;
5afbbe0d
AM
35583+ bbot = au_sbbot(sb);
35584+ fpair = kcalloc(bbot + 1, sizeof(*fpair), GFP_NOFS);
b752ccd1 35585+ if (unlikely(!fpair))
1facf9fc 35586+ goto out;
35587+
5527c038 35588+ inode = d_inode(sb->s_root);
b752ccd1
AM
35589+ ino = AUFS_ROOT_INO;
35590+ writef = au_sbi(sb)->si_xwrite;
5afbbe0d 35591+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
b752ccd1
AM
35592+ bshared = is_sb_shared(sb, bindex, bindex - 1);
35593+ if (bshared >= 0) {
35594+ /* shared xino */
35595+ *p = fpair[bshared];
35596+ get_file(p->new);
35597+ }
35598+
35599+ if (!p->new) {
35600+ /* new xino */
5afbbe0d 35601+ br = au_sbr(sb, bindex);
b752ccd1
AM
35602+ p->old = br->br_xino.xi_file;
35603+ p->new = au_xino_create2(base, br->br_xino.xi_file);
35604+ err = PTR_ERR(p->new);
35605+ if (IS_ERR(p->new)) {
35606+ p->new = NULL;
35607+ goto out_pair;
35608+ }
35609+ }
35610+
35611+ err = au_xino_do_write(writef, p->new,
35612+ au_h_iptr(inode, bindex)->i_ino, ino);
35613+ if (unlikely(err))
35614+ goto out_pair;
35615+ }
35616+
5afbbe0d 35617+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
b752ccd1
AM
35618+ br = au_sbr(sb, bindex);
35619+ if (br->br_xino.xi_file)
35620+ fput(br->br_xino.xi_file);
35621+ get_file(p->new);
35622+ br->br_xino.xi_file = p->new;
35623+ }
1facf9fc 35624+
4f0767ce 35625+out_pair:
5afbbe0d 35626+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++)
b752ccd1
AM
35627+ if (p->new)
35628+ fput(p->new);
35629+ else
35630+ break;
f0c0a007 35631+ au_delayed_kfree(fpair);
4f0767ce 35632+out:
1facf9fc 35633+ return err;
35634+}
b752ccd1
AM
35635+
35636+void au_xino_clr(struct super_block *sb)
35637+{
35638+ struct au_sbinfo *sbinfo;
35639+
35640+ au_xigen_clr(sb);
35641+ xino_clear_xib(sb);
35642+ xino_clear_br(sb);
35643+ sbinfo = au_sbi(sb);
35644+ /* lvalue, do not call au_mntflags() */
35645+ au_opt_clr(sbinfo->si_mntflags, XINO);
35646+}
35647+
35648+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
35649+{
35650+ int err, skip;
35651+ struct dentry *parent, *cur_parent;
35652+ struct qstr *dname, *cur_name;
35653+ struct file *cur_xino;
35654+ struct inode *dir;
35655+ struct au_sbinfo *sbinfo;
35656+
35657+ SiMustWriteLock(sb);
35658+
35659+ err = 0;
35660+ sbinfo = au_sbi(sb);
2000de60 35661+ parent = dget_parent(xino->file->f_path.dentry);
b752ccd1
AM
35662+ if (remount) {
35663+ skip = 0;
2000de60 35664+ dname = &xino->file->f_path.dentry->d_name;
b752ccd1
AM
35665+ cur_xino = sbinfo->si_xib;
35666+ if (cur_xino) {
2000de60
JR
35667+ cur_parent = dget_parent(cur_xino->f_path.dentry);
35668+ cur_name = &cur_xino->f_path.dentry->d_name;
b752ccd1 35669+ skip = (cur_parent == parent
38d290e6 35670+ && au_qstreq(dname, cur_name));
b752ccd1
AM
35671+ dput(cur_parent);
35672+ }
35673+ if (skip)
35674+ goto out;
35675+ }
35676+
35677+ au_opt_set(sbinfo->si_mntflags, XINO);
5527c038 35678+ dir = d_inode(parent);
febd17d6 35679+ inode_lock_nested(dir, AuLsc_I_PARENT);
b752ccd1
AM
35680+ /* mnt_want_write() is unnecessary here */
35681+ err = au_xino_set_xib(sb, xino->file);
35682+ if (!err)
35683+ err = au_xigen_set(sb, xino->file);
35684+ if (!err)
35685+ err = au_xino_set_br(sb, xino->file);
febd17d6 35686+ inode_unlock(dir);
b752ccd1
AM
35687+ if (!err)
35688+ goto out; /* success */
35689+
35690+ /* reset all */
35691+ AuIOErr("failed creating xino(%d).\n", err);
c1595e42
JR
35692+ au_xigen_clr(sb);
35693+ xino_clear_xib(sb);
b752ccd1 35694+
4f0767ce 35695+out:
b752ccd1
AM
35696+ dput(parent);
35697+ return err;
35698+}
35699+
35700+/* ---------------------------------------------------------------------- */
35701+
35702+/*
35703+ * create a xinofile at the default place/path.
35704+ */
35705+struct file *au_xino_def(struct super_block *sb)
35706+{
35707+ struct file *file;
35708+ char *page, *p;
35709+ struct au_branch *br;
35710+ struct super_block *h_sb;
35711+ struct path path;
5afbbe0d 35712+ aufs_bindex_t bbot, bindex, bwr;
b752ccd1
AM
35713+
35714+ br = NULL;
5afbbe0d 35715+ bbot = au_sbbot(sb);
b752ccd1 35716+ bwr = -1;
5afbbe0d 35717+ for (bindex = 0; bindex <= bbot; bindex++) {
b752ccd1
AM
35718+ br = au_sbr(sb, bindex);
35719+ if (au_br_writable(br->br_perm)
86dc4139 35720+ && !au_test_fs_bad_xino(au_br_sb(br))) {
b752ccd1
AM
35721+ bwr = bindex;
35722+ break;
35723+ }
35724+ }
35725+
7f207e10
AM
35726+ if (bwr >= 0) {
35727+ file = ERR_PTR(-ENOMEM);
537831f9 35728+ page = (void *)__get_free_page(GFP_NOFS);
7f207e10
AM
35729+ if (unlikely(!page))
35730+ goto out;
86dc4139 35731+ path.mnt = au_br_mnt(br);
7f207e10
AM
35732+ path.dentry = au_h_dptr(sb->s_root, bwr);
35733+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
35734+ file = (void *)p;
35735+ if (!IS_ERR(p)) {
35736+ strcat(p, "/" AUFS_XINO_FNAME);
35737+ AuDbg("%s\n", p);
35738+ file = au_xino_create(sb, p, /*silent*/0);
35739+ if (!IS_ERR(file))
35740+ au_xino_brid_set(sb, br->br_id);
35741+ }
f0c0a007 35742+ au_delayed_free_page((unsigned long)page);
7f207e10
AM
35743+ } else {
35744+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
35745+ if (IS_ERR(file))
35746+ goto out;
2000de60 35747+ h_sb = file->f_path.dentry->d_sb;
7f207e10
AM
35748+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
35749+ pr_err("xino doesn't support %s(%s)\n",
35750+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
35751+ fput(file);
35752+ file = ERR_PTR(-EINVAL);
35753+ }
35754+ if (!IS_ERR(file))
35755+ au_xino_brid_set(sb, -1);
35756+ }
0c5527e5 35757+
7f207e10
AM
35758+out:
35759+ return file;
35760+}
35761+
35762+/* ---------------------------------------------------------------------- */
35763+
35764+int au_xino_path(struct seq_file *seq, struct file *file)
35765+{
35766+ int err;
35767+
35768+ err = au_seq_path(seq, &file->f_path);
79b8bda9 35769+ if (unlikely(err))
7f207e10
AM
35770+ goto out;
35771+
7f207e10
AM
35772+#define Deleted "\\040(deleted)"
35773+ seq->count -= sizeof(Deleted) - 1;
35774+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
35775+ sizeof(Deleted) - 1));
35776+#undef Deleted
35777+
35778+out:
35779+ return err;
35780+}
537831f9
AM
35781diff -urN /usr/share/empty/include/uapi/linux/aufs_type.h linux/include/uapi/linux/aufs_type.h
35782--- /usr/share/empty/include/uapi/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
f2c43d5f 35783+++ linux/include/uapi/linux/aufs_type.h 2016-12-17 12:28:38.769494865 +0100
c1595e42 35784@@ -0,0 +1,419 @@
7f207e10 35785+/*
8cdd5066 35786+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7f207e10
AM
35787+ *
35788+ * This program, aufs is free software; you can redistribute it and/or modify
35789+ * it under the terms of the GNU General Public License as published by
35790+ * the Free Software Foundation; either version 2 of the License, or
35791+ * (at your option) any later version.
35792+ *
35793+ * This program is distributed in the hope that it will be useful,
35794+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35795+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35796+ * GNU General Public License for more details.
35797+ *
35798+ * You should have received a copy of the GNU General Public License
523b37e3 35799+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
35800+ */
35801+
35802+#ifndef __AUFS_TYPE_H__
35803+#define __AUFS_TYPE_H__
35804+
f6c5ef8b
AM
35805+#define AUFS_NAME "aufs"
35806+
9dbd164d 35807+#ifdef __KERNEL__
f6c5ef8b
AM
35808+/*
35809+ * define it before including all other headers.
35810+ * sched.h may use pr_* macros before defining "current", so define the
35811+ * no-current version first, and re-define later.
35812+ */
35813+#define pr_fmt(fmt) AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
35814+#include <linux/sched.h>
35815+#undef pr_fmt
a2a7ad62
AM
35816+#define pr_fmt(fmt) \
35817+ AUFS_NAME " %s:%d:%.*s[%d]: " fmt, __func__, __LINE__, \
35818+ (int)sizeof(current->comm), current->comm, current->pid
9dbd164d
AM
35819+#else
35820+#include <stdint.h>
35821+#include <sys/types.h>
f6c5ef8b 35822+#endif /* __KERNEL__ */
7f207e10 35823+
f6c5ef8b
AM
35824+#include <linux/limits.h>
35825+
f2c43d5f 35826+#define AUFS_VERSION "4.9-20161219"
7f207e10
AM
35827+
35828+/* todo? move this to linux-2.6.19/include/magic.h */
35829+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
35830+
35831+/* ---------------------------------------------------------------------- */
35832+
35833+#ifdef CONFIG_AUFS_BRANCH_MAX_127
9dbd164d 35834+typedef int8_t aufs_bindex_t;
7f207e10
AM
35835+#define AUFS_BRANCH_MAX 127
35836+#else
9dbd164d 35837+typedef int16_t aufs_bindex_t;
7f207e10
AM
35838+#ifdef CONFIG_AUFS_BRANCH_MAX_511
35839+#define AUFS_BRANCH_MAX 511
35840+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
35841+#define AUFS_BRANCH_MAX 1023
35842+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
35843+#define AUFS_BRANCH_MAX 32767
35844+#endif
35845+#endif
35846+
35847+#ifdef __KERNEL__
35848+#ifndef AUFS_BRANCH_MAX
35849+#error unknown CONFIG_AUFS_BRANCH_MAX value
35850+#endif
35851+#endif /* __KERNEL__ */
35852+
35853+/* ---------------------------------------------------------------------- */
35854+
7f207e10
AM
35855+#define AUFS_FSTYPE AUFS_NAME
35856+
35857+#define AUFS_ROOT_INO 2
35858+#define AUFS_FIRST_INO 11
35859+
35860+#define AUFS_WH_PFX ".wh."
35861+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
35862+#define AUFS_WH_TMP_LEN 4
86dc4139 35863+/* a limit for rmdir/rename a dir and copyup */
7f207e10
AM
35864+#define AUFS_MAX_NAMELEN (NAME_MAX \
35865+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
35866+ - 1 /* dot */\
35867+ - AUFS_WH_TMP_LEN) /* hex */
35868+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
35869+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
392086de
AM
35870+#define AUFS_XINO_DEF_SEC 30 /* seconds */
35871+#define AUFS_XINO_DEF_TRUNC 45 /* percentage */
7f207e10
AM
35872+#define AUFS_DIRWH_DEF 3
35873+#define AUFS_RDCACHE_DEF 10 /* seconds */
027c5e7a 35874+#define AUFS_RDCACHE_MAX 3600 /* seconds */
7f207e10
AM
35875+#define AUFS_RDBLK_DEF 512 /* bytes */
35876+#define AUFS_RDHASH_DEF 32
35877+#define AUFS_WKQ_NAME AUFS_NAME "d"
027c5e7a
AM
35878+#define AUFS_MFS_DEF_SEC 30 /* seconds */
35879+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
076b876e 35880+#define AUFS_FHSM_CACHE_DEF_SEC 30 /* seconds */
86dc4139 35881+#define AUFS_PLINK_WARN 50 /* number of plinks in a single bucket */
7f207e10
AM
35882+
35883+/* pseudo-link maintenace under /proc */
35884+#define AUFS_PLINK_MAINT_NAME "plink_maint"
35885+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
35886+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
35887+
35888+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
35889+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
35890+
35891+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
35892+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
35893+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
35894+
35895+/* doubly whiteouted */
35896+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
35897+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
35898+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
35899+
1e00d052 35900+/* branch permissions and attributes */
7f207e10
AM
35901+#define AUFS_BRPERM_RW "rw"
35902+#define AUFS_BRPERM_RO "ro"
35903+#define AUFS_BRPERM_RR "rr"
076b876e
AM
35904+#define AUFS_BRATTR_COO_REG "coo_reg"
35905+#define AUFS_BRATTR_COO_ALL "coo_all"
35906+#define AUFS_BRATTR_FHSM "fhsm"
35907+#define AUFS_BRATTR_UNPIN "unpin"
c1595e42
JR
35908+#define AUFS_BRATTR_ICEX "icex"
35909+#define AUFS_BRATTR_ICEX_SEC "icexsec"
35910+#define AUFS_BRATTR_ICEX_SYS "icexsys"
35911+#define AUFS_BRATTR_ICEX_TR "icextr"
35912+#define AUFS_BRATTR_ICEX_USR "icexusr"
35913+#define AUFS_BRATTR_ICEX_OTH "icexoth"
1e00d052
AM
35914+#define AUFS_BRRATTR_WH "wh"
35915+#define AUFS_BRWATTR_NLWH "nolwh"
076b876e
AM
35916+#define AUFS_BRWATTR_MOO "moo"
35917+
35918+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */
35919+#define AuBrPerm_RO (1 << 1) /* readonly */
35920+#define AuBrPerm_RR (1 << 2) /* natively readonly */
35921+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
35922+
35923+#define AuBrAttr_COO_REG (1 << 3) /* copy-up on open */
35924+#define AuBrAttr_COO_ALL (1 << 4)
35925+#define AuBrAttr_COO_Mask (AuBrAttr_COO_REG | AuBrAttr_COO_ALL)
35926+
35927+#define AuBrAttr_FHSM (1 << 5) /* file-based hsm */
35928+#define AuBrAttr_UNPIN (1 << 6) /* rename-able top dir of
c1595e42
JR
35929+ branch. meaningless since
35930+ linux-3.18-rc1 */
35931+
35932+/* ignore error in copying XATTR */
35933+#define AuBrAttr_ICEX_SEC (1 << 7)
35934+#define AuBrAttr_ICEX_SYS (1 << 8)
35935+#define AuBrAttr_ICEX_TR (1 << 9)
35936+#define AuBrAttr_ICEX_USR (1 << 10)
35937+#define AuBrAttr_ICEX_OTH (1 << 11)
35938+#define AuBrAttr_ICEX (AuBrAttr_ICEX_SEC \
35939+ | AuBrAttr_ICEX_SYS \
35940+ | AuBrAttr_ICEX_TR \
35941+ | AuBrAttr_ICEX_USR \
35942+ | AuBrAttr_ICEX_OTH)
35943+
35944+#define AuBrRAttr_WH (1 << 12) /* whiteout-able */
076b876e
AM
35945+#define AuBrRAttr_Mask AuBrRAttr_WH
35946+
c1595e42
JR
35947+#define AuBrWAttr_NoLinkWH (1 << 13) /* un-hardlinkable whiteouts */
35948+#define AuBrWAttr_MOO (1 << 14) /* move-up on open */
076b876e
AM
35949+#define AuBrWAttr_Mask (AuBrWAttr_NoLinkWH | AuBrWAttr_MOO)
35950+
35951+#define AuBrAttr_CMOO_Mask (AuBrAttr_COO_Mask | AuBrWAttr_MOO)
35952+
c1595e42 35953+/* #warning test userspace */
076b876e
AM
35954+#ifdef __KERNEL__
35955+#ifndef CONFIG_AUFS_FHSM
35956+#undef AuBrAttr_FHSM
35957+#define AuBrAttr_FHSM 0
35958+#endif
c1595e42
JR
35959+#ifndef CONFIG_AUFS_XATTR
35960+#undef AuBrAttr_ICEX
35961+#define AuBrAttr_ICEX 0
35962+#undef AuBrAttr_ICEX_SEC
35963+#define AuBrAttr_ICEX_SEC 0
35964+#undef AuBrAttr_ICEX_SYS
35965+#define AuBrAttr_ICEX_SYS 0
35966+#undef AuBrAttr_ICEX_TR
35967+#define AuBrAttr_ICEX_TR 0
35968+#undef AuBrAttr_ICEX_USR
35969+#define AuBrAttr_ICEX_USR 0
35970+#undef AuBrAttr_ICEX_OTH
35971+#define AuBrAttr_ICEX_OTH 0
35972+#endif
076b876e
AM
35973+#endif
35974+
35975+/* the longest combination */
c1595e42
JR
35976+/* AUFS_BRATTR_ICEX and AUFS_BRATTR_ICEX_TR don't affect here */
35977+#define AuBrPermStrSz sizeof(AUFS_BRPERM_RW \
35978+ "+" AUFS_BRATTR_COO_REG \
35979+ "+" AUFS_BRATTR_FHSM \
35980+ "+" AUFS_BRATTR_UNPIN \
7e9cd9fe
AM
35981+ "+" AUFS_BRATTR_ICEX_SEC \
35982+ "+" AUFS_BRATTR_ICEX_SYS \
35983+ "+" AUFS_BRATTR_ICEX_USR \
35984+ "+" AUFS_BRATTR_ICEX_OTH \
076b876e
AM
35985+ "+" AUFS_BRWATTR_NLWH)
35986+
35987+typedef struct {
35988+ char a[AuBrPermStrSz];
35989+} au_br_perm_str_t;
35990+
35991+static inline int au_br_writable(int brperm)
35992+{
35993+ return brperm & AuBrPerm_RW;
35994+}
35995+
35996+static inline int au_br_whable(int brperm)
35997+{
35998+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
35999+}
36000+
36001+static inline int au_br_wh_linkable(int brperm)
36002+{
36003+ return !(brperm & AuBrWAttr_NoLinkWH);
36004+}
36005+
36006+static inline int au_br_cmoo(int brperm)
36007+{
36008+ return brperm & AuBrAttr_CMOO_Mask;
36009+}
36010+
36011+static inline int au_br_fhsm(int brperm)
36012+{
36013+ return brperm & AuBrAttr_FHSM;
36014+}
7f207e10
AM
36015+
36016+/* ---------------------------------------------------------------------- */
36017+
36018+/* ioctl */
36019+enum {
36020+ /* readdir in userspace */
36021+ AuCtl_RDU,
36022+ AuCtl_RDU_INO,
36023+
076b876e
AM
36024+ AuCtl_WBR_FD, /* pathconf wrapper */
36025+ AuCtl_IBUSY, /* busy inode */
36026+ AuCtl_MVDOWN, /* move-down */
36027+ AuCtl_BR, /* info about branches */
36028+ AuCtl_FHSM_FD /* connection for fhsm */
7f207e10
AM
36029+};
36030+
36031+/* borrowed from linux/include/linux/kernel.h */
36032+#ifndef ALIGN
36033+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
36034+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
36035+#endif
36036+
36037+/* borrowed from linux/include/linux/compiler-gcc3.h */
36038+#ifndef __aligned
36039+#define __aligned(x) __attribute__((aligned(x)))
53392da6
AM
36040+#endif
36041+
36042+#ifdef __KERNEL__
36043+#ifndef __packed
7f207e10
AM
36044+#define __packed __attribute__((packed))
36045+#endif
53392da6 36046+#endif
7f207e10
AM
36047+
36048+struct au_rdu_cookie {
9dbd164d
AM
36049+ uint64_t h_pos;
36050+ int16_t bindex;
36051+ uint8_t flags;
36052+ uint8_t pad;
36053+ uint32_t generation;
7f207e10
AM
36054+} __aligned(8);
36055+
36056+struct au_rdu_ent {
9dbd164d
AM
36057+ uint64_t ino;
36058+ int16_t bindex;
36059+ uint8_t type;
36060+ uint8_t nlen;
36061+ uint8_t wh;
7f207e10
AM
36062+ char name[0];
36063+} __aligned(8);
36064+
36065+static inline int au_rdu_len(int nlen)
36066+{
36067+ /* include the terminating NULL */
36068+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
9dbd164d 36069+ sizeof(uint64_t));
7f207e10
AM
36070+}
36071+
36072+union au_rdu_ent_ul {
36073+ struct au_rdu_ent __user *e;
9dbd164d 36074+ uint64_t ul;
7f207e10
AM
36075+};
36076+
36077+enum {
36078+ AufsCtlRduV_SZ,
36079+ AufsCtlRduV_End
36080+};
36081+
36082+struct aufs_rdu {
36083+ /* input */
36084+ union {
9dbd164d
AM
36085+ uint64_t sz; /* AuCtl_RDU */
36086+ uint64_t nent; /* AuCtl_RDU_INO */
7f207e10
AM
36087+ };
36088+ union au_rdu_ent_ul ent;
9dbd164d 36089+ uint16_t verify[AufsCtlRduV_End];
7f207e10
AM
36090+
36091+ /* input/output */
9dbd164d 36092+ uint32_t blk;
7f207e10
AM
36093+
36094+ /* output */
36095+ union au_rdu_ent_ul tail;
36096+ /* number of entries which were added in a single call */
9dbd164d
AM
36097+ uint64_t rent;
36098+ uint8_t full;
36099+ uint8_t shwh;
7f207e10
AM
36100+
36101+ struct au_rdu_cookie cookie;
36102+} __aligned(8);
36103+
1e00d052
AM
36104+/* ---------------------------------------------------------------------- */
36105+
36106+struct aufs_wbr_fd {
9dbd164d
AM
36107+ uint32_t oflags;
36108+ int16_t brid;
1e00d052
AM
36109+} __aligned(8);
36110+
36111+/* ---------------------------------------------------------------------- */
36112+
027c5e7a 36113+struct aufs_ibusy {
9dbd164d
AM
36114+ uint64_t ino, h_ino;
36115+ int16_t bindex;
027c5e7a
AM
36116+} __aligned(8);
36117+
1e00d052
AM
36118+/* ---------------------------------------------------------------------- */
36119+
392086de
AM
36120+/* error code for move-down */
36121+/* the actual message strings are implemented in aufs-util.git */
36122+enum {
36123+ EAU_MVDOWN_OPAQUE = 1,
36124+ EAU_MVDOWN_WHITEOUT,
36125+ EAU_MVDOWN_UPPER,
36126+ EAU_MVDOWN_BOTTOM,
36127+ EAU_MVDOWN_NOUPPER,
36128+ EAU_MVDOWN_NOLOWERBR,
36129+ EAU_Last
36130+};
36131+
c2b27bf2 36132+/* flags for move-down */
392086de
AM
36133+#define AUFS_MVDOWN_DMSG 1
36134+#define AUFS_MVDOWN_OWLOWER (1 << 1) /* overwrite lower */
36135+#define AUFS_MVDOWN_KUPPER (1 << 2) /* keep upper */
36136+#define AUFS_MVDOWN_ROLOWER (1 << 3) /* do even if lower is RO */
36137+#define AUFS_MVDOWN_ROLOWER_R (1 << 4) /* did on lower RO */
36138+#define AUFS_MVDOWN_ROUPPER (1 << 5) /* do even if upper is RO */
36139+#define AUFS_MVDOWN_ROUPPER_R (1 << 6) /* did on upper RO */
36140+#define AUFS_MVDOWN_BRID_UPPER (1 << 7) /* upper brid */
36141+#define AUFS_MVDOWN_BRID_LOWER (1 << 8) /* lower brid */
076b876e
AM
36142+#define AUFS_MVDOWN_FHSM_LOWER (1 << 9) /* find fhsm attr for lower */
36143+#define AUFS_MVDOWN_STFS (1 << 10) /* req. stfs */
36144+#define AUFS_MVDOWN_STFS_FAILED (1 << 11) /* output: stfs is unusable */
36145+#define AUFS_MVDOWN_BOTTOM (1 << 12) /* output: no more lowers */
c2b27bf2 36146+
076b876e 36147+/* index for move-down */
392086de
AM
36148+enum {
36149+ AUFS_MVDOWN_UPPER,
36150+ AUFS_MVDOWN_LOWER,
36151+ AUFS_MVDOWN_NARRAY
36152+};
36153+
076b876e
AM
36154+/*
36155+ * additional info of move-down
36156+ * number of free blocks and inodes.
36157+ * subset of struct kstatfs, but smaller and always 64bit.
36158+ */
36159+struct aufs_stfs {
36160+ uint64_t f_blocks;
36161+ uint64_t f_bavail;
36162+ uint64_t f_files;
36163+ uint64_t f_ffree;
36164+};
36165+
36166+struct aufs_stbr {
36167+ int16_t brid; /* optional input */
36168+ int16_t bindex; /* output */
36169+ struct aufs_stfs stfs; /* output when AUFS_MVDOWN_STFS set */
36170+} __aligned(8);
36171+
c2b27bf2 36172+struct aufs_mvdown {
076b876e
AM
36173+ uint32_t flags; /* input/output */
36174+ struct aufs_stbr stbr[AUFS_MVDOWN_NARRAY]; /* input/output */
36175+ int8_t au_errno; /* output */
36176+} __aligned(8);
36177+
36178+/* ---------------------------------------------------------------------- */
36179+
36180+union aufs_brinfo {
36181+ /* PATH_MAX may differ between kernel-space and user-space */
36182+ char _spacer[4096];
392086de 36183+ struct {
076b876e
AM
36184+ int16_t id;
36185+ int perm;
36186+ char path[0];
36187+ };
c2b27bf2
AM
36188+} __aligned(8);
36189+
36190+/* ---------------------------------------------------------------------- */
36191+
7f207e10
AM
36192+#define AuCtlType 'A'
36193+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
36194+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
1e00d052
AM
36195+#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \
36196+ struct aufs_wbr_fd)
027c5e7a 36197+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
392086de
AM
36198+#define AUFS_CTL_MVDOWN _IOWR(AuCtlType, AuCtl_MVDOWN, \
36199+ struct aufs_mvdown)
076b876e
AM
36200+#define AUFS_CTL_BRINFO _IOW(AuCtlType, AuCtl_BR, union aufs_brinfo)
36201+#define AUFS_CTL_FHSM_FD _IOW(AuCtlType, AuCtl_FHSM_FD, int)
7f207e10
AM
36202+
36203+#endif /* __AUFS_TYPE_H__ */
f2c43d5f 36204aufs4.9 loopback patch
5527c038
JR
36205
36206diff --git a/drivers/block/loop.c b/drivers/block/loop.c
f2c43d5f 36207index 6ee9235..f64161f 100644
5527c038
JR
36208--- a/drivers/block/loop.c
36209+++ b/drivers/block/loop.c
e2f27e51 36210@@ -551,7 +551,7 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
5527c038
JR
36211 }
36212
36213 struct switch_request {
36214- struct file *file;
36215+ struct file *file, *virt_file;
36216 struct completion wait;
36217 };
36218
e2f27e51 36219@@ -577,6 +577,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
5527c038
JR
36220 mapping = file->f_mapping;
36221 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
36222 lo->lo_backing_file = file;
36223+ lo->lo_backing_virt_file = p->virt_file;
36224 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
36225 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
36226 lo->old_gfp_mask = mapping_gfp_mask(mapping);
e2f27e51 36227@@ -589,11 +590,13 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
5527c038
JR
36228 * First it needs to flush existing IO, it does this by sending a magic
36229 * BIO down the pipe. The completion of this BIO does the actual switch.
36230 */
36231-static int loop_switch(struct loop_device *lo, struct file *file)
36232+static int loop_switch(struct loop_device *lo, struct file *file,
36233+ struct file *virt_file)
36234 {
36235 struct switch_request w;
36236
36237 w.file = file;
36238+ w.virt_file = virt_file;
36239
36240 /* freeze queue and wait for completion of scheduled requests */
36241 blk_mq_freeze_queue(lo->lo_queue);
e2f27e51 36242@@ -612,7 +615,16 @@ static int loop_switch(struct loop_device *lo, struct file *file)
5527c038
JR
36243 */
36244 static int loop_flush(struct loop_device *lo)
36245 {
36246- return loop_switch(lo, NULL);
36247+ return loop_switch(lo, NULL, NULL);
36248+}
36249+
36250+static struct file *loop_real_file(struct file *file)
36251+{
36252+ struct file *f = NULL;
36253+
36254+ if (file->f_path.dentry->d_sb->s_op->real_loop)
36255+ f = file->f_path.dentry->d_sb->s_op->real_loop(file);
36256+ return f;
36257 }
36258
c2c0f25c 36259 static void loop_reread_partitions(struct loop_device *lo,
e2f27e51 36260@@ -649,6 +661,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
5527c038
JR
36261 unsigned int arg)
36262 {
36263 struct file *file, *old_file;
36264+ struct file *f, *virt_file = NULL, *old_virt_file;
36265 struct inode *inode;
36266 int error;
36267
e2f27e51 36268@@ -665,9 +678,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
5527c038
JR
36269 file = fget(arg);
36270 if (!file)
36271 goto out;
36272+ f = loop_real_file(file);
36273+ if (f) {
36274+ virt_file = file;
36275+ file = f;
36276+ get_file(file);
36277+ }
36278
36279 inode = file->f_mapping->host;
36280 old_file = lo->lo_backing_file;
36281+ old_virt_file = lo->lo_backing_virt_file;
36282
36283 error = -EINVAL;
36284
e2f27e51 36285@@ -679,17 +699,21 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
5527c038
JR
36286 goto out_putf;
36287
36288 /* and ... switch */
36289- error = loop_switch(lo, file);
36290+ error = loop_switch(lo, file, virt_file);
36291 if (error)
36292 goto out_putf;
36293
36294 fput(old_file);
36295+ if (old_virt_file)
36296+ fput(old_virt_file);
36297 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
c2c0f25c 36298 loop_reread_partitions(lo, bdev);
5527c038
JR
36299 return 0;
36300
36301 out_putf:
36302 fput(file);
36303+ if (virt_file)
36304+ fput(virt_file);
36305 out:
36306 return error;
36307 }
e2f27e51 36308@@ -876,7 +900,7 @@ static int loop_prepare_queue(struct loop_device *lo)
5527c038
JR
36309 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
36310 struct block_device *bdev, unsigned int arg)
36311 {
36312- struct file *file, *f;
36313+ struct file *file, *f, *virt_file = NULL;
36314 struct inode *inode;
36315 struct address_space *mapping;
36316 unsigned lo_blocksize;
e2f27e51 36317@@ -891,6 +915,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
36318 file = fget(arg);
36319 if (!file)
36320 goto out;
36321+ f = loop_real_file(file);
36322+ if (f) {
36323+ virt_file = file;
36324+ file = f;
36325+ get_file(file);
36326+ }
36327
36328 error = -EBUSY;
36329 if (lo->lo_state != Lo_unbound)
e2f27e51 36330@@ -943,6 +973,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
36331 lo->lo_device = bdev;
36332 lo->lo_flags = lo_flags;
36333 lo->lo_backing_file = file;
36334+ lo->lo_backing_virt_file = virt_file;
36335 lo->transfer = NULL;
36336 lo->ioctl = NULL;
36337 lo->lo_sizelimit = 0;
e2f27e51 36338@@ -975,6 +1006,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
36339
36340 out_putf:
36341 fput(file);
36342+ if (virt_file)
36343+ fput(virt_file);
36344 out:
36345 /* This is safe: open() is still holding a reference. */
36346 module_put(THIS_MODULE);
f2c43d5f 36347@@ -1021,6 +1054,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
36348 static int loop_clr_fd(struct loop_device *lo)
36349 {
36350 struct file *filp = lo->lo_backing_file;
36351+ struct file *virt_filp = lo->lo_backing_virt_file;
36352 gfp_t gfp = lo->old_gfp_mask;
36353 struct block_device *bdev = lo->lo_device;
36354
e2f27e51 36355@@ -1052,6 +1086,7 @@ static int loop_clr_fd(struct loop_device *lo)
5527c038
JR
36356 spin_lock_irq(&lo->lo_lock);
36357 lo->lo_state = Lo_rundown;
36358 lo->lo_backing_file = NULL;
36359+ lo->lo_backing_virt_file = NULL;
36360 spin_unlock_irq(&lo->lo_lock);
36361
36362 loop_release_xfer(lo);
e2f27e51 36363@@ -1096,6 +1131,8 @@ static int loop_clr_fd(struct loop_device *lo)
5527c038
JR
36364 * bd_mutex which is usually taken before lo_ctl_mutex.
36365 */
36366 fput(filp);
36367+ if (virt_filp)
36368+ fput(virt_filp);
36369 return 0;
36370 }
36371
36372diff --git a/drivers/block/loop.h b/drivers/block/loop.h
be52b249 36373index fb2237c..c3888c5 100644
5527c038
JR
36374--- a/drivers/block/loop.h
36375+++ b/drivers/block/loop.h
36376@@ -46,7 +46,7 @@ struct loop_device {
36377 int (*ioctl)(struct loop_device *, int cmd,
36378 unsigned long arg);
36379
36380- struct file * lo_backing_file;
36381+ struct file * lo_backing_file, *lo_backing_virt_file;
36382 struct block_device *lo_device;
36383 unsigned lo_blocksize;
36384 void *key_data;
36385diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
f2c43d5f 36386index d2a9a1d..d7519d0 100644
5527c038
JR
36387--- a/fs/aufs/f_op.c
36388+++ b/fs/aufs/f_op.c
f2c43d5f 36389@@ -351,7 +351,7 @@ static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
5527c038
JR
36390 if (IS_ERR(h_file))
36391 goto out;
36392
36393- if (au_test_loopback_kthread()) {
36394+ if (0 && au_test_loopback_kthread()) {
36395 au_warn_loopback(h_file->f_path.dentry->d_sb);
36396 if (file->f_mapping != h_file->f_mapping) {
36397 file->f_mapping = h_file->f_mapping;
36398diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c
e2f27e51 36399index c3ca50f..a3dbdaf 100644
5527c038
JR
36400--- a/fs/aufs/loop.c
36401+++ b/fs/aufs/loop.c
e2f27e51 36402@@ -132,3 +132,19 @@ void au_loopback_fin(void)
79b8bda9 36403 symbol_put(loop_backing_file);
f0c0a007 36404 au_delayed_kfree(au_warn_loopback_array);
5527c038
JR
36405 }
36406+
36407+/* ---------------------------------------------------------------------- */
36408+
36409+/* support the loopback block device insude aufs */
36410+
36411+struct file *aufs_real_loop(struct file *file)
36412+{
36413+ struct file *f;
36414+
36415+ BUG_ON(!au_test_aufs(file->f_path.dentry->d_sb));
36416+ fi_read_lock(file);
36417+ f = au_hf_top(file);
36418+ fi_read_unlock(file);
36419+ AuDebugOn(!f);
36420+ return f;
36421+}
36422diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h
8cdd5066 36423index 48bf070..66afec7 100644
5527c038
JR
36424--- a/fs/aufs/loop.h
36425+++ b/fs/aufs/loop.h
f2c43d5f 36426@@ -25,7 +25,11 @@
5527c038
JR
36427
36428 int au_loopback_init(void);
36429 void au_loopback_fin(void);
36430+
36431+struct file *aufs_real_loop(struct file *file);
36432 #else
36433+AuStub(struct file *, loop_backing_file, return NULL)
36434+
36435 AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
36436 struct dentry *h_adding)
36437 AuStubInt0(au_test_loopback_kthread, void)
f2c43d5f 36438@@ -33,6 +37,8 @@
5527c038
JR
36439
36440 AuStubInt0(au_loopback_init, void)
36441 AuStubVoid(au_loopback_fin, void)
36442+
36443+AuStub(struct file *, aufs_real_loop, return NULL, struct file *file)
36444 #endif /* BLK_DEV_LOOP */
36445
36446 #endif /* __KERNEL__ */
36447diff --git a/fs/aufs/super.c b/fs/aufs/super.c
f2c43d5f 36448index 0082ce4..5085378 100644
5527c038
JR
36449--- a/fs/aufs/super.c
36450+++ b/fs/aufs/super.c
f2c43d5f 36451@@ -839,7 +839,10 @@ static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
5527c038
JR
36452 .statfs = aufs_statfs,
36453 .put_super = aufs_put_super,
36454 .sync_fs = aufs_sync_fs,
36455- .remount_fs = aufs_remount_fs
36456+ .remount_fs = aufs_remount_fs,
36457+#ifdef CONFIG_AUFS_BDEV_LOOP
36458+ .real_loop = aufs_real_loop
36459+#endif
36460 };
36461
36462 /* ---------------------------------------------------------------------- */
36463diff --git a/include/linux/fs.h b/include/linux/fs.h
f2c43d5f 36464index a903bc3..db820e3 100644
5527c038
JR
36465--- a/include/linux/fs.h
36466+++ b/include/linux/fs.h
f2c43d5f 36467@@ -1823,6 +1823,10 @@ struct super_operations {
5527c038
JR
36468 struct shrink_control *);
36469 long (*free_cached_objects)(struct super_block *,
36470 struct shrink_control *);
36471+#if defined(CONFIG_BLK_DEV_LOOP) || defined(CONFIG_BLK_DEV_LOOP_MODULE)
36472+ /* and aufs */
36473+ struct file *(*real_loop)(struct file *);
36474+#endif
36475 };
36476
36477 /*
This page took 6.79835 seconds and 4 git commands to generate.