]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-aufs4.patch
- v3 version of the patch (split into two chunks)
[packages/kernel.git] / kernel-aufs4.patch
CommitLineData
79b8bda9 1aufs4.3 kbuild patch
7f207e10
AM
2
3diff --git a/fs/Kconfig b/fs/Kconfig
79b8bda9 4index da3f32f..b9879fe 100644
7f207e10
AM
5--- a/fs/Kconfig
6+++ b/fs/Kconfig
79b8bda9 7@@ -215,6 +215,7 @@ source "fs/pstore/Kconfig"
5527c038 8 source "fs/sysv/Kconfig"
7e9cd9fe 9 source "fs/ufs/Kconfig"
7f207e10
AM
10 source "fs/exofs/Kconfig"
11+source "fs/aufs/Kconfig"
12
13 endif # MISC_FILESYSTEMS
14
15diff --git a/fs/Makefile b/fs/Makefile
79b8bda9 16index f79cf40..7562a4d 100644
7f207e10
AM
17--- a/fs/Makefile
18+++ b/fs/Makefile
79b8bda9 19@@ -125,3 +125,4 @@ obj-y += exofs/ # Multiple modules
7f207e10 20 obj-$(CONFIG_CEPH_FS) += ceph/
bf0370f2 21 obj-$(CONFIG_PSTORE) += pstore/
c06a8ce3 22 obj-$(CONFIG_EFIVAR_FS) += efivarfs/
86dc4139 23+obj-$(CONFIG_AUFS_FS) += aufs/
c06a8ce3 24diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
79b8bda9 25index f7b2db4..47098aed 100644
c06a8ce3
AM
26--- a/include/uapi/linux/Kbuild
27+++ b/include/uapi/linux/Kbuild
5527c038 28@@ -59,6 +59,7 @@ header-y += atmsvc.h
03673fb0
JR
29 header-y += atm_tcp.h
30 header-y += atm_zatm.h
c06a8ce3
AM
31 header-y += audit.h
32+header-y += aufs_type.h
c06a8ce3 33 header-y += auto_fs4.h
03673fb0 34 header-y += auto_fs.h
c06a8ce3 35 header-y += auxvec.h
79b8bda9 36aufs4.3 base patch
7f207e10 37
c1595e42 38diff --git a/MAINTAINERS b/MAINTAINERS
79b8bda9 39index 747c653..53ecc33 100644
c1595e42
JR
40--- a/MAINTAINERS
41+++ b/MAINTAINERS
79b8bda9 42@@ -1985,6 +1985,19 @@ F: include/linux/audit.h
c1595e42
JR
43 F: include/uapi/linux/audit.h
44 F: kernel/audit*
45
46+AUFS (advanced multi layered unification filesystem) FILESYSTEM
47+M: "J. R. Okajima" <hooanon05g@gmail.com>
48+L: linux-unionfs@vger.kernel.org
49+L: aufs-users@lists.sourceforge.net (members only)
50+W: http://aufs.sourceforge.net
5527c038 51+T: git://github.com/sfjro/aufs4-linux.git
c1595e42
JR
52+S: Supported
53+F: Documentation/filesystems/aufs/
54+F: Documentation/ABI/testing/debugfs-aufs
55+F: Documentation/ABI/testing/sysfs-aufs
56+F: fs/aufs/
57+F: include/uapi/linux/aufs_type.h
58+
59 AUXILIARY DISPLAY DRIVERS
60 M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
61 W: http://miguelojeda.es/auxdisplay.htm
392086de 62diff --git a/drivers/block/loop.c b/drivers/block/loop.c
79b8bda9 63index 674f800a..291ec9e 100644
392086de
AM
64--- a/drivers/block/loop.c
65+++ b/drivers/block/loop.c
c2c0f25c 66@@ -560,6 +560,24 @@ static inline int is_loop_device(struct file *file)
392086de
AM
67 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
68 }
69
70+/*
71+ * for AUFS
72+ * no get/put for file.
73+ */
74+struct file *loop_backing_file(struct super_block *sb)
75+{
76+ struct file *ret;
77+ struct loop_device *l;
78+
79+ ret = NULL;
80+ if (MAJOR(sb->s_dev) == LOOP_MAJOR) {
81+ l = sb->s_bdev->bd_disk->private_data;
82+ ret = l->lo_backing_file;
83+ }
84+ return ret;
85+}
86+EXPORT_SYMBOL(loop_backing_file);
87+
88 /* loop sysfs attributes */
89
90 static ssize_t loop_attr_show(struct device *dev, char *page,
c1595e42 91diff --git a/fs/dcache.c b/fs/dcache.c
79b8bda9 92index 5c33aeb..8aa7f26 100644
c1595e42
JR
93--- a/fs/dcache.c
94+++ b/fs/dcache.c
79b8bda9 95@@ -1167,7 +1167,7 @@ enum d_walk_ret {
c1595e42
JR
96 *
97 * The @enter() and @finish() callbacks are called with d_lock held.
98 */
99-static void d_walk(struct dentry *parent, void *data,
100+void d_walk(struct dentry *parent, void *data,
101 enum d_walk_ret (*enter)(void *, struct dentry *),
102 void (*finish)(void *))
103 {
5527c038
JR
104diff --git a/fs/read_write.c b/fs/read_write.c
105index 819ef3f..fd0414e 100644
106--- a/fs/read_write.c
107+++ b/fs/read_write.c
108@@ -494,6 +494,28 @@ ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
109 }
110 EXPORT_SYMBOL(__vfs_write);
111
112+vfs_readf_t vfs_readf(struct file *file)
113+{
114+ const struct file_operations *fop = file->f_op;
115+
116+ if (fop->read)
117+ return fop->read;
118+ if (fop->read_iter)
119+ return new_sync_read;
120+ return ERR_PTR(-ENOSYS);
121+}
122+
123+vfs_writef_t vfs_writef(struct file *file)
124+{
125+ const struct file_operations *fop = file->f_op;
126+
127+ if (fop->write)
128+ return fop->write;
129+ if (fop->write_iter)
130+ return new_sync_write;
131+ return ERR_PTR(-ENOSYS);
132+}
133+
134 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
135 {
136 mm_segment_t old_fs;
7f207e10 137diff --git a/fs/splice.c b/fs/splice.c
c2c0f25c 138index 5fc1e50..5f8385a 100644
7f207e10
AM
139--- a/fs/splice.c
140+++ b/fs/splice.c
c2c0f25c 141@@ -1102,8 +1102,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
7f207e10
AM
142 /*
143 * Attempt to initiate a splice from pipe to file.
144 */
145-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
146- loff_t *ppos, size_t len, unsigned int flags)
147+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
148+ loff_t *ppos, size_t len, unsigned int flags)
149 {
150 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
151 loff_t *, size_t, unsigned int);
c2c0f25c 152@@ -1119,9 +1119,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
153 /*
154 * Attempt to initiate a splice from a file to a pipe.
155 */
156-static long do_splice_to(struct file *in, loff_t *ppos,
157- struct pipe_inode_info *pipe, size_t len,
158- unsigned int flags)
159+long do_splice_to(struct file *in, loff_t *ppos,
160+ struct pipe_inode_info *pipe, size_t len,
161+ unsigned int flags)
162 {
163 ssize_t (*splice_read)(struct file *, loff_t *,
164 struct pipe_inode_info *, size_t, unsigned int);
b912730e
AM
165diff --git a/include/linux/file.h b/include/linux/file.h
166index f87d308..9a290b3 100644
167--- a/include/linux/file.h
168+++ b/include/linux/file.h
169@@ -19,6 +19,7 @@ struct dentry;
170 struct path;
171 extern struct file *alloc_file(struct path *, fmode_t mode,
172 const struct file_operations *fop);
173+extern struct file *get_empty_filp(void);
174
175 static inline void fput_light(struct file *file, int fput_needed)
176 {
5527c038 177diff --git a/include/linux/fs.h b/include/linux/fs.h
79b8bda9 178index 72d8a84..fabd9d7a 100644
5527c038
JR
179--- a/include/linux/fs.h
180+++ b/include/linux/fs.h
79b8bda9 181@@ -1687,6 +1687,12 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
5527c038
JR
182 struct iovec *fast_pointer,
183 struct iovec **ret_pointer);
184
185+typedef ssize_t (*vfs_readf_t)(struct file *, char __user *, size_t, loff_t *);
186+typedef ssize_t (*vfs_writef_t)(struct file *, const char __user *, size_t,
187+ loff_t *);
188+vfs_readf_t vfs_readf(struct file *file);
189+vfs_writef_t vfs_writef(struct file *file);
190+
191 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
192 extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
193 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
1e00d052 194diff --git a/include/linux/splice.h b/include/linux/splice.h
076b876e 195index da2751d..2e0fca6 100644
1e00d052
AM
196--- a/include/linux/splice.h
197+++ b/include/linux/splice.h
076b876e 198@@ -83,4 +83,10 @@ extern void splice_shrink_spd(struct splice_pipe_desc *);
4b3da204
AM
199 extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
200
201 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
1e00d052
AM
202+
203+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
204+ loff_t *ppos, size_t len, unsigned int flags);
205+extern long do_splice_to(struct file *in, loff_t *ppos,
206+ struct pipe_inode_info *pipe, size_t len,
207+ unsigned int flags);
208 #endif
79b8bda9 209aufs4.3 mmap patch
fb47a38f
JR
210
211diff --git a/fs/buffer.c b/fs/buffer.c
79b8bda9 212index 82283ab..477e5f3 100644
fb47a38f
JR
213--- a/fs/buffer.c
214+++ b/fs/buffer.c
c2c0f25c 215@@ -2473,7 +2473,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
fb47a38f
JR
216 * Update file times before taking page lock. We may end up failing the
217 * fault so this update may be superfluous but who really cares...
218 */
219- file_update_time(vma->vm_file);
220+ vma_file_update_time(vma);
221
222 ret = __block_page_mkwrite(vma, vmf, get_block);
223 sb_end_pagefault(sb);
c1595e42 224diff --git a/fs/proc/base.c b/fs/proc/base.c
79b8bda9 225index b25eee4..c83d588 100644
c1595e42
JR
226--- a/fs/proc/base.c
227+++ b/fs/proc/base.c
79b8bda9 228@@ -1914,7 +1914,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
c1595e42
JR
229 down_read(&mm->mmap_sem);
230 vma = find_exact_vma(mm, vm_start, vm_end);
231 if (vma && vma->vm_file) {
232- *path = vma->vm_file->f_path;
233+ *path = vma_pr_or_file(vma)->f_path;
234 path_get(path);
235 rc = 0;
236 }
fb47a38f 237diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
c2c0f25c 238index f8595e8..cb8eda0 100644
fb47a38f
JR
239--- a/fs/proc/nommu.c
240+++ b/fs/proc/nommu.c
076b876e 241@@ -45,7 +45,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
fb47a38f
JR
242 file = region->vm_file;
243
244 if (file) {
245- struct inode *inode = file_inode(region->vm_file);
246+ struct inode *inode;
076b876e 247+
fb47a38f
JR
248+ file = vmr_pr_or_file(region);
249+ inode = file_inode(file);
250 dev = inode->i_sb->s_dev;
251 ino = inode->i_ino;
252 }
253diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
79b8bda9 254index e2d46ad..5e7e631 100644
fb47a38f
JR
255--- a/fs/proc/task_mmu.c
256+++ b/fs/proc/task_mmu.c
79b8bda9 257@@ -280,7 +280,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
fb47a38f
JR
258 const char *name = NULL;
259
260 if (file) {
261- struct inode *inode = file_inode(vma->vm_file);
262+ struct inode *inode;
076b876e 263+
fb47a38f
JR
264+ file = vma_pr_or_file(vma);
265+ inode = file_inode(file);
266 dev = inode->i_sb->s_dev;
267 ino = inode->i_ino;
268 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
79b8bda9 269@@ -1465,7 +1468,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
076b876e
AM
270 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
271 struct vm_area_struct *vma = v;
272 struct numa_maps *md = &numa_priv->md;
273- struct file *file = vma->vm_file;
274+ struct file *file = vma_pr_or_file(vma);
076b876e 275 struct mm_struct *mm = vma->vm_mm;
7e9cd9fe
AM
276 struct mm_walk walk = {
277 .hugetlb_entry = gather_hugetlb_stats,
fb47a38f 278diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
c2c0f25c 279index e0d64c9..7aa92db 100644
fb47a38f
JR
280--- a/fs/proc/task_nommu.c
281+++ b/fs/proc/task_nommu.c
c1595e42 282@@ -160,7 +160,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
fb47a38f
JR
283 file = vma->vm_file;
284
285 if (file) {
286- struct inode *inode = file_inode(vma->vm_file);
287+ struct inode *inode;
076b876e 288+
b912730e 289+ file = vma_pr_or_file(vma);
fb47a38f
JR
290+ inode = file_inode(file);
291 dev = inode->i_sb->s_dev;
292 ino = inode->i_ino;
293 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
294diff --git a/include/linux/mm.h b/include/linux/mm.h
79b8bda9 295index 80001de..9248b97 100644
fb47a38f
JR
296--- a/include/linux/mm.h
297+++ b/include/linux/mm.h
79b8bda9 298@@ -1211,6 +1211,28 @@ static inline int fixup_user_fault(struct task_struct *tsk,
fb47a38f
JR
299 }
300 #endif
301
076b876e
AM
302+extern void vma_do_file_update_time(struct vm_area_struct *, const char[], int);
303+extern struct file *vma_do_pr_or_file(struct vm_area_struct *, const char[],
304+ int);
305+extern void vma_do_get_file(struct vm_area_struct *, const char[], int);
306+extern void vma_do_fput(struct vm_area_struct *, const char[], int);
fb47a38f 307+
fb47a38f
JR
308+#define vma_file_update_time(vma) vma_do_file_update_time(vma, __func__, \
309+ __LINE__)
310+#define vma_pr_or_file(vma) vma_do_pr_or_file(vma, __func__, \
311+ __LINE__)
312+#define vma_get_file(vma) vma_do_get_file(vma, __func__, __LINE__)
313+#define vma_fput(vma) vma_do_fput(vma, __func__, __LINE__)
b912730e
AM
314+
315+#ifndef CONFIG_MMU
076b876e
AM
316+extern struct file *vmr_do_pr_or_file(struct vm_region *, const char[], int);
317+extern void vmr_do_fput(struct vm_region *, const char[], int);
318+
319+#define vmr_pr_or_file(region) vmr_do_pr_or_file(region, __func__, \
320+ __LINE__)
321+#define vmr_fput(region) vmr_do_fput(region, __func__, __LINE__)
b912730e 322+#endif /* !CONFIG_MMU */
fb47a38f
JR
323+
324 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
325 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
326 void *buf, int len, int write);
327diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
79b8bda9 328index 3d6baa7..750ca95 100644
fb47a38f
JR
329--- a/include/linux/mm_types.h
330+++ b/include/linux/mm_types.h
79b8bda9 331@@ -250,6 +250,7 @@ struct vm_region {
fb47a38f
JR
332 unsigned long vm_top; /* region allocated to here */
333 unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
334 struct file *vm_file; /* the backing file or NULL */
335+ struct file *vm_prfile; /* the virtual backing file or NULL */
336
337 int vm_usage; /* region usage count (access under nommu_region_sem) */
338 bool vm_icache_flushed : 1; /* true if the icache has been flushed for
79b8bda9 339@@ -324,6 +325,7 @@ struct vm_area_struct {
fb47a38f
JR
340 unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
341 units, *not* PAGE_CACHE_SIZE */
342 struct file * vm_file; /* File we map to (can be NULL). */
343+ struct file *vm_prfile; /* shadow of vm_file */
344 void * vm_private_data; /* was vm_pte (shared mem) */
345
346 #ifndef CONFIG_MMU
347diff --git a/kernel/fork.c b/kernel/fork.c
79b8bda9 348index 2845623..71004bd 100644
fb47a38f
JR
349--- a/kernel/fork.c
350+++ b/kernel/fork.c
79b8bda9 351@@ -462,7 +462,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
fb47a38f
JR
352 struct inode *inode = file_inode(file);
353 struct address_space *mapping = file->f_mapping;
354
355- get_file(file);
356+ vma_get_file(tmp);
357 if (tmp->vm_flags & VM_DENYWRITE)
358 atomic_dec(&inode->i_writecount);
2000de60 359 i_mmap_lock_write(mapping);
076b876e 360diff --git a/mm/Makefile b/mm/Makefile
79b8bda9 361index 2ed4319..e3a53f5 100644
076b876e
AM
362--- a/mm/Makefile
363+++ b/mm/Makefile
7e9cd9fe 364@@ -21,7 +21,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
076b876e 365 mm_init.o mmu_context.o percpu.o slab_common.o \
c1595e42 366 compaction.o vmacache.o \
076b876e 367 interval_tree.o list_lru.o workingset.o \
7e9cd9fe
AM
368- debug.o $(mmu-y)
369+ prfile.o debug.o $(mmu-y)
076b876e
AM
370
371 obj-y += init-mm.o
372
fb47a38f 373diff --git a/mm/filemap.c b/mm/filemap.c
79b8bda9 374index 327910c..7bbc372 100644
fb47a38f
JR
375--- a/mm/filemap.c
376+++ b/mm/filemap.c
c2c0f25c 377@@ -2089,7 +2089,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
fb47a38f
JR
378 int ret = VM_FAULT_LOCKED;
379
380 sb_start_pagefault(inode->i_sb);
381- file_update_time(vma->vm_file);
382+ vma_file_update_time(vma);
383 lock_page(page);
384 if (page->mapping != inode->i_mapping) {
385 unlock_page(page);
fb47a38f 386diff --git a/mm/memory.c b/mm/memory.c
79b8bda9 387index deb679c..df2ce3e 100644
fb47a38f
JR
388--- a/mm/memory.c
389+++ b/mm/memory.c
79b8bda9 390@@ -2035,7 +2035,7 @@ static inline int wp_page_reuse(struct mm_struct *mm,
fb47a38f 391 }
7e9cd9fe 392
b912730e
AM
393 if (!page_mkwrite)
394- file_update_time(vma->vm_file);
395+ vma_file_update_time(vma);
396 }
397
398 return VM_FAULT_WRITE;
fb47a38f 399diff --git a/mm/mmap.c b/mm/mmap.c
79b8bda9 400index 79bcc9f..da28c8a 100644
fb47a38f
JR
401--- a/mm/mmap.c
402+++ b/mm/mmap.c
79b8bda9 403@@ -275,7 +275,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
fb47a38f
JR
404 if (vma->vm_ops && vma->vm_ops->close)
405 vma->vm_ops->close(vma);
406 if (vma->vm_file)
407- fput(vma->vm_file);
408+ vma_fput(vma);
409 mpol_put(vma_policy(vma));
410 kmem_cache_free(vm_area_cachep, vma);
411 return next;
79b8bda9 412@@ -887,7 +887,7 @@ again: remove_next = 1 + (end > next->vm_end);
fb47a38f
JR
413 if (remove_next) {
414 if (file) {
415 uprobe_munmap(next, next->vm_start, next->vm_end);
416- fput(file);
417+ vma_fput(vma);
418 }
419 if (next->anon_vma)
420 anon_vma_merge(vma, next);
79b8bda9 421@@ -1683,8 +1683,8 @@ out:
35939ee7
JR
422 return addr;
423
fb47a38f 424 unmap_and_free_vma:
fb47a38f
JR
425+ vma_fput(vma);
426 vma->vm_file = NULL;
427- fput(file);
428
429 /* Undo any partial mapping done by a device driver. */
430 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
79b8bda9 431@@ -2485,7 +2485,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
fb47a38f
JR
432 goto out_free_mpol;
433
434 if (new->vm_file)
435- get_file(new->vm_file);
436+ vma_get_file(new);
437
438 if (new->vm_ops && new->vm_ops->open)
439 new->vm_ops->open(new);
79b8bda9 440@@ -2504,7 +2504,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
fb47a38f
JR
441 if (new->vm_ops && new->vm_ops->close)
442 new->vm_ops->close(new);
443 if (new->vm_file)
444- fput(new->vm_file);
445+ vma_fput(new);
446 unlink_anon_vmas(new);
447 out_free_mpol:
448 mpol_put(vma_policy(new));
79b8bda9 449@@ -2646,7 +2646,6 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
7e9cd9fe
AM
450 struct vm_area_struct *vma;
451 unsigned long populate = 0;
452 unsigned long ret = -EINVAL;
453- struct file *file;
454
455 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
456 "See Documentation/vm/remap_file_pages.txt.\n",
79b8bda9 457@@ -2690,10 +2689,10 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
7e9cd9fe
AM
458 munlock_vma_pages_range(vma, start, start + size);
459 }
460
461- file = get_file(vma->vm_file);
462+ vma_get_file(vma);
463 ret = do_mmap_pgoff(vma->vm_file, start, size,
464 prot, flags, pgoff, &populate);
465- fput(file);
466+ vma_fput(vma);
467 out:
468 up_write(&mm->mmap_sem);
469 if (populate)
79b8bda9
AM
470@@ -2963,7 +2962,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
471 if (anon_vma_clone(new_vma, vma))
472 goto out_free_mempol;
473 if (new_vma->vm_file)
474- get_file(new_vma->vm_file);
475+ vma_get_file(new_vma);
476 if (new_vma->vm_ops && new_vma->vm_ops->open)
477 new_vma->vm_ops->open(new_vma);
478 vma_link(mm, new_vma, prev, rb_link, rb_parent);
fb47a38f 479diff --git a/mm/nommu.c b/mm/nommu.c
79b8bda9 480index ab14a20..fffc566 100644
fb47a38f
JR
481--- a/mm/nommu.c
482+++ b/mm/nommu.c
c2c0f25c 483@@ -671,7 +671,7 @@ static void __put_nommu_region(struct vm_region *region)
fb47a38f
JR
484 up_write(&nommu_region_sem);
485
486 if (region->vm_file)
487- fput(region->vm_file);
488+ vmr_fput(region);
489
490 /* IO memory and memory shared directly out of the pagecache
491 * from ramfs/tmpfs mustn't be released here */
c2c0f25c 492@@ -829,7 +829,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
fb47a38f
JR
493 if (vma->vm_ops && vma->vm_ops->close)
494 vma->vm_ops->close(vma);
495 if (vma->vm_file)
496- fput(vma->vm_file);
497+ vma_fput(vma);
498 put_nommu_region(vma->vm_region);
499 kmem_cache_free(vm_area_cachep, vma);
500 }
79b8bda9 501@@ -1355,7 +1355,7 @@ unsigned long do_mmap(struct file *file,
fb47a38f
JR
502 goto error_just_free;
503 }
504 }
505- fput(region->vm_file);
506+ vmr_fput(region);
507 kmem_cache_free(vm_region_jar, region);
508 region = pregion;
509 result = start;
79b8bda9 510@@ -1430,10 +1430,10 @@ error_just_free:
fb47a38f
JR
511 up_write(&nommu_region_sem);
512 error:
513 if (region->vm_file)
514- fput(region->vm_file);
515+ vmr_fput(region);
516 kmem_cache_free(vm_region_jar, region);
517 if (vma->vm_file)
518- fput(vma->vm_file);
519+ vma_fput(vma);
520 kmem_cache_free(vm_area_cachep, vma);
fb47a38f 521 return ret;
c2c0f25c 522
076b876e
AM
523diff --git a/mm/prfile.c b/mm/prfile.c
524new file mode 100644
c2c0f25c 525index 0000000..b323b8a
076b876e
AM
526--- /dev/null
527+++ b/mm/prfile.c
528@@ -0,0 +1,86 @@
529+/*
530+ * Mainly for aufs which mmap(2) diffrent file and wants to print different path
531+ * in /proc/PID/maps.
532+ * Call these functions via macros defined in linux/mm.h.
533+ *
534+ * See Documentation/filesystems/aufs/design/06mmap.txt
535+ *
536+ * Copyright (c) 2014 Junjro R. Okajima
537+ * Copyright (c) 2014 Ian Campbell
538+ */
539+
540+#include <linux/mm.h>
541+#include <linux/file.h>
542+#include <linux/fs.h>
543+
544+/* #define PRFILE_TRACE */
545+static inline void prfile_trace(struct file *f, struct file *pr,
546+ const char func[], int line, const char func2[])
547+{
548+#ifdef PRFILE_TRACE
549+ if (pr)
c2c0f25c 550+ pr_info("%s:%d: %s, %s\n", func, line, func2,
7e9cd9fe 551+ f ? (char *)f->f_path.dentry->d_name.name : "(null)");
076b876e
AM
552+#endif
553+}
554+
076b876e
AM
555+void vma_do_file_update_time(struct vm_area_struct *vma, const char func[],
556+ int line)
557+{
558+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
559+
560+ prfile_trace(f, pr, func, line, __func__);
561+ file_update_time(f);
562+ if (f && pr)
563+ file_update_time(pr);
564+}
565+
566+struct file *vma_do_pr_or_file(struct vm_area_struct *vma, const char func[],
567+ int line)
568+{
569+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
570+
571+ prfile_trace(f, pr, func, line, __func__);
572+ return (f && pr) ? pr : f;
573+}
574+
575+void vma_do_get_file(struct vm_area_struct *vma, const char func[], int line)
576+{
577+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
578+
579+ prfile_trace(f, pr, func, line, __func__);
580+ get_file(f);
581+ if (f && pr)
582+ get_file(pr);
583+}
584+
585+void vma_do_fput(struct vm_area_struct *vma, const char func[], int line)
586+{
587+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
588+
589+ prfile_trace(f, pr, func, line, __func__);
590+ fput(f);
591+ if (f && pr)
592+ fput(pr);
593+}
b912730e
AM
594+
595+#ifndef CONFIG_MMU
076b876e
AM
596+struct file *vmr_do_pr_or_file(struct vm_region *region, const char func[],
597+ int line)
598+{
599+ struct file *f = region->vm_file, *pr = region->vm_prfile;
600+
601+ prfile_trace(f, pr, func, line, __func__);
602+ return (f && pr) ? pr : f;
603+}
604+
605+void vmr_do_fput(struct vm_region *region, const char func[], int line)
606+{
607+ struct file *f = region->vm_file, *pr = region->vm_prfile;
608+
609+ prfile_trace(f, pr, func, line, __func__);
610+ fput(f);
611+ if (f && pr)
612+ fput(pr);
613+}
b912730e 614+#endif /* !CONFIG_MMU */
79b8bda9 615aufs4.3 standalone patch
7f207e10 616
c1595e42 617diff --git a/fs/dcache.c b/fs/dcache.c
79b8bda9 618index 8aa7f26..f997345 100644
c1595e42
JR
619--- a/fs/dcache.c
620+++ b/fs/dcache.c
79b8bda9 621@@ -1272,6 +1272,7 @@ rename_retry:
c1595e42
JR
622 seq = 1;
623 goto again;
624 }
625+EXPORT_SYMBOL(d_walk);
626
627 /*
628 * Search for at least 1 mount point in the dentry's subdirs.
79b8bda9
AM
629diff --git a/fs/exec.c b/fs/exec.c
630index b06623a..b9206c5 100644
631--- a/fs/exec.c
632+++ b/fs/exec.c
633@@ -103,6 +103,7 @@ bool path_noexec(const struct path *path)
634 return (path->mnt->mnt_flags & MNT_NOEXEC) ||
635 (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
636 }
637+EXPORT_SYMBOL(path_noexec);
638
639 #ifdef CONFIG_USELIB
640 /*
b912730e 641diff --git a/fs/file_table.c b/fs/file_table.c
79b8bda9 642index ad17e05..df66450 100644
b912730e
AM
643--- a/fs/file_table.c
644+++ b/fs/file_table.c
79b8bda9 645@@ -147,6 +147,7 @@ over:
b912730e
AM
646 }
647 return ERR_PTR(-ENFILE);
648 }
649+EXPORT_SYMBOL(get_empty_filp);
650
651 /**
652 * alloc_file - allocate and initialize a 'struct file'
79b8bda9 653@@ -308,6 +309,7 @@ void put_filp(struct file *file)
b912730e
AM
654 file_free(file);
655 }
656 }
657+EXPORT_SYMBOL(put_filp);
658
79b8bda9 659 void __init files_init(void)
b912730e 660 {
7f207e10 661diff --git a/fs/namespace.c b/fs/namespace.c
79b8bda9 662index 0570729..ec560d8 100644
7f207e10
AM
663--- a/fs/namespace.c
664+++ b/fs/namespace.c
7e9cd9fe 665@@ -463,6 +463,7 @@ void __mnt_drop_write(struct vfsmount *mnt)
c06a8ce3
AM
666 mnt_dec_writers(real_mount(mnt));
667 preempt_enable();
668 }
669+EXPORT_SYMBOL_GPL(__mnt_drop_write);
670
671 /**
672 * mnt_drop_write - give up write access to a mount
79b8bda9 673@@ -1803,6 +1804,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
7f207e10
AM
674 }
675 return 0;
676 }
677+EXPORT_SYMBOL(iterate_mounts);
678
7eafdf33 679 static void cleanup_group_ids(struct mount *mnt, struct mount *end)
7f207e10
AM
680 {
681diff --git a/fs/notify/group.c b/fs/notify/group.c
c1595e42 682index d16b62c..06ca6bc 100644
7f207e10
AM
683--- a/fs/notify/group.c
684+++ b/fs/notify/group.c
685@@ -22,6 +22,7 @@
686 #include <linux/srcu.h>
687 #include <linux/rculist.h>
688 #include <linux/wait.h>
689+#include <linux/module.h>
690
691 #include <linux/fsnotify_backend.h>
692 #include "fsnotify.h"
fb47a38f 693@@ -72,6 +73,7 @@ void fsnotify_get_group(struct fsnotify_group *group)
1716fcea
AM
694 {
695 atomic_inc(&group->refcnt);
696 }
697+EXPORT_SYMBOL(fsnotify_get_group);
698
699 /*
700 * Drop a reference to a group. Free it if it's through.
fb47a38f 701@@ -81,6 +83,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
7f207e10 702 if (atomic_dec_and_test(&group->refcnt))
1716fcea 703 fsnotify_final_destroy_group(group);
7f207e10
AM
704 }
705+EXPORT_SYMBOL(fsnotify_put_group);
706
707 /*
708 * Create a new fsnotify_group and hold a reference for the group returned.
fb47a38f 709@@ -109,6 +112,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
7f207e10
AM
710
711 return group;
712 }
713+EXPORT_SYMBOL(fsnotify_alloc_group);
1716fcea
AM
714
715 int fsnotify_fasync(int fd, struct file *file, int on)
716 {
7f207e10 717diff --git a/fs/notify/mark.c b/fs/notify/mark.c
79b8bda9 718index fc0df44..325b5c6 100644
7f207e10
AM
719--- a/fs/notify/mark.c
720+++ b/fs/notify/mark.c
392086de 721@@ -109,6 +109,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
7f207e10 722 mark->free_mark(mark);
1716fcea 723 }
7f207e10
AM
724 }
725+EXPORT_SYMBOL(fsnotify_put_mark);
726
2000de60
JR
727 /* Calculate mask of events for a list of marks */
728 u32 fsnotify_recalc_mask(struct hlist_head *head)
79b8bda9 729@@ -208,6 +209,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
1716fcea 730 mutex_unlock(&group->mark_mutex);
79b8bda9 731 fsnotify_free_mark(mark);
7f207e10
AM
732 }
733+EXPORT_SYMBOL(fsnotify_destroy_mark);
734
79b8bda9
AM
735 void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
736 {
737@@ -392,6 +394,7 @@ err:
7f207e10
AM
738
739 return ret;
740 }
741+EXPORT_SYMBOL(fsnotify_add_mark);
742
1716fcea
AM
743 int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
744 struct inode *inode, struct vfsmount *mnt, int allow_dups)
79b8bda9 745@@ -492,6 +495,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
7f207e10
AM
746 atomic_set(&mark->refcnt, 1);
747 mark->free_mark = free_mark;
748 }
749+EXPORT_SYMBOL(fsnotify_init_mark);
750
751 static int fsnotify_mark_destroy(void *ignored)
752 {
753diff --git a/fs/open.c b/fs/open.c
79b8bda9 754index b6f1e96..4ab0d4e 100644
7f207e10
AM
755--- a/fs/open.c
756+++ b/fs/open.c
c2c0f25c 757@@ -64,6 +64,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
7f207e10
AM
758 mutex_unlock(&dentry->d_inode->i_mutex);
759 return ret;
760 }
761+EXPORT_SYMBOL(do_truncate);
762
1716fcea 763 long vfs_truncate(struct path *path, loff_t length)
7f207e10 764 {
c2c0f25c 765@@ -678,6 +679,7 @@ int open_check_o_direct(struct file *f)
b912730e
AM
766 }
767 return 0;
768 }
769+EXPORT_SYMBOL(open_check_o_direct);
770
771 static int do_dentry_open(struct file *f,
c2c0f25c 772 struct inode *inode,
5527c038
JR
773diff --git a/fs/read_write.c b/fs/read_write.c
774index fd0414e..8ace6ec 100644
775--- a/fs/read_write.c
776+++ b/fs/read_write.c
777@@ -504,6 +504,7 @@ vfs_readf_t vfs_readf(struct file *file)
778 return new_sync_read;
779 return ERR_PTR(-ENOSYS);
780 }
781+EXPORT_SYMBOL(vfs_readf);
782
783 vfs_writef_t vfs_writef(struct file *file)
784 {
785@@ -515,6 +516,7 @@ vfs_writef_t vfs_writef(struct file *file)
786 return new_sync_write;
787 return ERR_PTR(-ENOSYS);
788 }
789+EXPORT_SYMBOL(vfs_writef);
790
791 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
792 {
7f207e10 793diff --git a/fs/splice.c b/fs/splice.c
c2c0f25c 794index 5f8385a..f76067e 100644
7f207e10
AM
795--- a/fs/splice.c
796+++ b/fs/splice.c
c2c0f25c 797@@ -1115,6 +1115,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
392086de
AM
798
799 return splice_write(pipe, out, ppos, len, flags);
7f207e10
AM
800 }
801+EXPORT_SYMBOL(do_splice_from);
802
803 /*
804 * Attempt to initiate a splice from a file to a pipe.
c2c0f25c 805@@ -1141,6 +1142,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
7f207e10
AM
806
807 return splice_read(in, ppos, pipe, len, flags);
808 }
809+EXPORT_SYMBOL(do_splice_to);
810
811 /**
812 * splice_direct_to_actor - splices data directly between two non-pipes
c1595e42 813diff --git a/fs/xattr.c b/fs/xattr.c
c2c0f25c 814index 072fee1..a7677af 100644
c1595e42
JR
815--- a/fs/xattr.c
816+++ b/fs/xattr.c
817@@ -207,6 +207,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value,
818 *xattr_value = value;
819 return error;
820 }
821+EXPORT_SYMBOL(vfs_getxattr_alloc);
822
823 /* Compare an extended attribute value with the given value */
824 int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name,
7f207e10 825diff --git a/security/commoncap.c b/security/commoncap.c
79b8bda9 826index 1832cf7..987ff5f 100644
7f207e10
AM
827--- a/security/commoncap.c
828+++ b/security/commoncap.c
79b8bda9 829@@ -1053,12 +1053,14 @@ int cap_mmap_addr(unsigned long addr)
94337f0d 830 }
7f207e10
AM
831 return ret;
832 }
0c3ec466
AM
833+EXPORT_SYMBOL(cap_mmap_addr);
834
835 int cap_mmap_file(struct file *file, unsigned long reqprot,
836 unsigned long prot, unsigned long flags)
837 {
838 return 0;
839 }
840+EXPORT_SYMBOL(cap_mmap_file);
c2c0f25c
AM
841
842 #ifdef CONFIG_SECURITY
843
7f207e10 844diff --git a/security/device_cgroup.c b/security/device_cgroup.c
79b8bda9 845index 03c1652..b00aa76 100644
7f207e10
AM
846--- a/security/device_cgroup.c
847+++ b/security/device_cgroup.c
f6c5ef8b
AM
848@@ -7,6 +7,7 @@
849 #include <linux/device_cgroup.h>
850 #include <linux/cgroup.h>
851 #include <linux/ctype.h>
852+#include <linux/export.h>
853 #include <linux/list.h>
854 #include <linux/uaccess.h>
855 #include <linux/seq_file.h>
076b876e 856@@ -849,6 +850,7 @@ int __devcgroup_inode_permission(struct inode *inode, int mask)
537831f9
AM
857 return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
858 access);
7f207e10 859 }
2cbb1c4b 860+EXPORT_SYMBOL(__devcgroup_inode_permission);
7f207e10
AM
861
862 int devcgroup_inode_mknod(int mode, dev_t dev)
863 {
864diff --git a/security/security.c b/security/security.c
79b8bda9 865index 46f405c..54488b0 100644
7f207e10
AM
866--- a/security/security.c
867+++ b/security/security.c
79b8bda9 868@@ -433,6 +433,7 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry)
7f207e10 869 return 0;
c2c0f25c 870 return call_int_hook(path_rmdir, 0, dir, dentry);
7f207e10
AM
871 }
872+EXPORT_SYMBOL(security_path_rmdir);
873
874 int security_path_unlink(struct path *dir, struct dentry *dentry)
875 {
79b8bda9 876@@ -449,6 +450,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
7f207e10 877 return 0;
c2c0f25c 878 return call_int_hook(path_symlink, 0, dir, dentry, old_name);
7f207e10
AM
879 }
880+EXPORT_SYMBOL(security_path_symlink);
881
882 int security_path_link(struct dentry *old_dentry, struct path *new_dir,
883 struct dentry *new_dentry)
79b8bda9 884@@ -457,6 +459,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
7f207e10 885 return 0;
c2c0f25c 886 return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry);
7f207e10
AM
887 }
888+EXPORT_SYMBOL(security_path_link);
889
890 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
38d290e6 891 struct path *new_dir, struct dentry *new_dentry,
79b8bda9 892@@ -484,6 +487,7 @@ int security_path_truncate(struct path *path)
7f207e10 893 return 0;
c2c0f25c 894 return call_int_hook(path_truncate, 0, path);
7f207e10
AM
895 }
896+EXPORT_SYMBOL(security_path_truncate);
897
7eafdf33
AM
898 int security_path_chmod(struct path *path, umode_t mode)
899 {
79b8bda9 900@@ -491,6 +495,7 @@ int security_path_chmod(struct path *path, umode_t mode)
7f207e10 901 return 0;
c2c0f25c 902 return call_int_hook(path_chmod, 0, path, mode);
7f207e10
AM
903 }
904+EXPORT_SYMBOL(security_path_chmod);
905
537831f9 906 int security_path_chown(struct path *path, kuid_t uid, kgid_t gid)
7f207e10 907 {
79b8bda9 908@@ -498,6 +503,7 @@ int security_path_chown(struct path *path, kuid_t uid, kgid_t gid)
7f207e10 909 return 0;
c2c0f25c 910 return call_int_hook(path_chown, 0, path, uid, gid);
7f207e10
AM
911 }
912+EXPORT_SYMBOL(security_path_chown);
913
914 int security_path_chroot(struct path *path)
915 {
79b8bda9 916@@ -583,6 +589,7 @@ int security_inode_readlink(struct dentry *dentry)
7f207e10 917 return 0;
c2c0f25c 918 return call_int_hook(inode_readlink, 0, dentry);
7f207e10
AM
919 }
920+EXPORT_SYMBOL(security_inode_readlink);
921
c2c0f25c
AM
922 int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
923 bool rcu)
79b8bda9 924@@ -598,6 +605,7 @@ int security_inode_permission(struct inode *inode, int mask)
7f207e10 925 return 0;
c2c0f25c 926 return call_int_hook(inode_permission, 0, inode, mask);
7f207e10
AM
927 }
928+EXPORT_SYMBOL(security_inode_permission);
929
1e00d052 930 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
7f207e10 931 {
79b8bda9 932@@ -736,6 +744,7 @@ int security_file_permission(struct file *file, int mask)
7f207e10
AM
933
934 return fsnotify_perm(file, mask);
935 }
936+EXPORT_SYMBOL(security_file_permission);
937
938 int security_file_alloc(struct file *file)
939 {
79b8bda9 940@@ -795,6 +804,7 @@ int security_mmap_file(struct file *file, unsigned long prot,
7f207e10
AM
941 return ret;
942 return ima_file_mmap(file, prot);
943 }
0c3ec466 944+EXPORT_SYMBOL(security_mmap_file);
7f207e10 945
0c3ec466
AM
946 int security_mmap_addr(unsigned long addr)
947 {
7f207e10
AM
948diff -urN /usr/share/empty/Documentation/ABI/testing/debugfs-aufs linux/Documentation/ABI/testing/debugfs-aufs
949--- /usr/share/empty/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 950+++ linux/Documentation/ABI/testing/debugfs-aufs 2015-09-24 10:47:58.244719488 +0200
86dc4139 951@@ -0,0 +1,50 @@
7f207e10
AM
952+What: /debug/aufs/si_<id>/
953+Date: March 2009
f6b6e03d 954+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
955+Description:
956+ Under /debug/aufs, a directory named si_<id> is created
957+ per aufs mount, where <id> is a unique id generated
958+ internally.
1facf9fc 959+
86dc4139
AM
960+What: /debug/aufs/si_<id>/plink
961+Date: Apr 2013
f6b6e03d 962+Contact: J. R. Okajima <hooanon05g@gmail.com>
86dc4139
AM
963+Description:
964+ It has three lines and shows the information about the
965+ pseudo-link. The first line is a single number
966+ representing a number of buckets. The second line is a
967+ number of pseudo-links per buckets (separated by a
968+ blank). The last line is a single number representing a
969+ total number of psedo-links.
970+ When the aufs mount option 'noplink' is specified, it
971+ will show "1\n0\n0\n".
972+
7f207e10
AM
973+What: /debug/aufs/si_<id>/xib
974+Date: March 2009
f6b6e03d 975+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
976+Description:
977+ It shows the consumed blocks by xib (External Inode Number
978+ Bitmap), its block size and file size.
979+ When the aufs mount option 'noxino' is specified, it
980+ will be empty. About XINO files, see the aufs manual.
981+
982+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
983+Date: March 2009
f6b6e03d 984+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
985+Description:
986+ It shows the consumed blocks by xino (External Inode Number
987+ Translation Table), its link count, block size and file
988+ size.
989+ When the aufs mount option 'noxino' is specified, it
990+ will be empty. About XINO files, see the aufs manual.
991+
992+What: /debug/aufs/si_<id>/xigen
993+Date: March 2009
f6b6e03d 994+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
995+Description:
996+ It shows the consumed blocks by xigen (External Inode
997+ Generation Table), its block size and file size.
998+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
999+ be created.
1000+ When the aufs mount option 'noxino' is specified, it
1001+ will be empty. About XINO files, see the aufs manual.
1002diff -urN /usr/share/empty/Documentation/ABI/testing/sysfs-aufs linux/Documentation/ABI/testing/sysfs-aufs
1003--- /usr/share/empty/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 1004+++ linux/Documentation/ABI/testing/sysfs-aufs 2015-09-24 10:47:58.244719488 +0200
392086de 1005@@ -0,0 +1,31 @@
7f207e10
AM
1006+What: /sys/fs/aufs/si_<id>/
1007+Date: March 2009
f6b6e03d 1008+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1009+Description:
1010+ Under /sys/fs/aufs, a directory named si_<id> is created
1011+ per aufs mount, where <id> is a unique id generated
1012+ internally.
1013+
1014+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
1015+Date: March 2009
f6b6e03d 1016+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1017+Description:
1018+ It shows the abolute path of a member directory (which
1019+ is called branch) in aufs, and its permission.
1020+
392086de
AM
1021+What: /sys/fs/aufs/si_<id>/brid0, brid1 ... bridN
1022+Date: July 2013
f6b6e03d 1023+Contact: J. R. Okajima <hooanon05g@gmail.com>
392086de
AM
1024+Description:
1025+ It shows the id of a member directory (which is called
1026+ branch) in aufs.
1027+
7f207e10
AM
1028+What: /sys/fs/aufs/si_<id>/xi_path
1029+Date: March 2009
f6b6e03d 1030+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1031+Description:
1032+ It shows the abolute path of XINO (External Inode Number
1033+ Bitmap, Translation Table and Generation Table) file
1034+ even if it is the default path.
1035+ When the aufs mount option 'noxino' is specified, it
1036+ will be empty. About XINO files, see the aufs manual.
53392da6
AM
1037diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt linux/Documentation/filesystems/aufs/design/01intro.txt
1038--- /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 1039+++ linux/Documentation/filesystems/aufs/design/01intro.txt 2015-09-24 10:47:58.244719488 +0200
7e9cd9fe 1040@@ -0,0 +1,170 @@
53392da6 1041+
2000de60 1042+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
1043+#
1044+# This program is free software; you can redistribute it and/or modify
1045+# it under the terms of the GNU General Public License as published by
1046+# the Free Software Foundation; either version 2 of the License, or
1047+# (at your option) any later version.
1048+#
1049+# This program is distributed in the hope that it will be useful,
1050+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1051+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1052+# GNU General Public License for more details.
1053+#
1054+# You should have received a copy of the GNU General Public License
523b37e3 1055+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1056+
1057+Introduction
1058+----------------------------------------
1059+
1060+aufs [ei ju: ef es] | [a u f s]
1061+1. abbrev. for "advanced multi-layered unification filesystem".
1062+2. abbrev. for "another unionfs".
1063+3. abbrev. for "auf das" in German which means "on the" in English.
1064+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
1065+ But "Filesystem aufs Filesystem" is hard to understand.
1066+
1067+AUFS is a filesystem with features:
1068+- multi layered stackable unification filesystem, the member directory
1069+ is called as a branch.
1070+- branch permission and attribute, 'readonly', 'real-readonly',
7e9cd9fe 1071+ 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
53392da6
AM
1072+ combination.
1073+- internal "file copy-on-write".
1074+- logical deletion, whiteout.
1075+- dynamic branch manipulation, adding, deleting and changing permission.
1076+- allow bypassing aufs, user's direct branch access.
1077+- external inode number translation table and bitmap which maintains the
1078+ persistent aufs inode number.
1079+- seekable directory, including NFS readdir.
1080+- file mapping, mmap and sharing pages.
1081+- pseudo-link, hardlink over branches.
1082+- loopback mounted filesystem as a branch.
1083+- several policies to select one among multiple writable branches.
1084+- revert a single systemcall when an error occurs in aufs.
1085+- and more...
1086+
1087+
1088+Multi Layered Stackable Unification Filesystem
1089+----------------------------------------------------------------------
1090+Most people already knows what it is.
1091+It is a filesystem which unifies several directories and provides a
1092+merged single directory. When users access a file, the access will be
1093+passed/re-directed/converted (sorry, I am not sure which English word is
1094+correct) to the real file on the member filesystem. The member
1095+filesystem is called 'lower filesystem' or 'branch' and has a mode
1096+'readonly' and 'readwrite.' And the deletion for a file on the lower
1097+readonly branch is handled by creating 'whiteout' on the upper writable
1098+branch.
1099+
1100+On LKML, there have been discussions about UnionMount (Jan Blunck,
1101+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
1102+different approaches to implement the merged-view.
1103+The former tries putting it into VFS, and the latter implements as a
1104+separate filesystem.
1105+(If I misunderstand about these implementations, please let me know and
1106+I shall correct it. Because it is a long time ago when I read their
1107+source files last time).
1108+
1109+UnionMount's approach will be able to small, but may be hard to share
1110+branches between several UnionMount since the whiteout in it is
1111+implemented in the inode on branch filesystem and always
1112+shared. According to Bharata's post, readdir does not seems to be
1113+finished yet.
1114+There are several missing features known in this implementations such as
1115+- for users, the inode number may change silently. eg. copy-up.
1116+- link(2) may break by copy-up.
1117+- read(2) may get an obsoleted filedata (fstat(2) too).
1118+- fcntl(F_SETLK) may be broken by copy-up.
1119+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
1120+ open(O_RDWR).
1121+
7e9cd9fe
AM
1122+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
1123+merged into mainline. This is another implementation of UnionMount as a
1124+separated filesystem. All the limitations and known problems which
1125+UnionMount are equally inherited to "overlay" filesystem.
1126+
1127+Unionfs has a longer history. When I started implementing a stackable
1128+filesystem (Aug 2005), it already existed. It has virtual super_block,
1129+inode, dentry and file objects and they have an array pointing lower
1130+same kind objects. After contributing many patches for Unionfs, I
1131+re-started my project AUFS (Jun 2006).
53392da6
AM
1132+
1133+In AUFS, the structure of filesystem resembles to Unionfs, but I
1134+implemented my own ideas, approaches and enhancements and it became
1135+totally different one.
1136+
1137+Comparing DM snapshot and fs based implementation
1138+- the number of bytes to be copied between devices is much smaller.
1139+- the type of filesystem must be one and only.
1140+- the fs must be writable, no readonly fs, even for the lower original
1141+ device. so the compression fs will not be usable. but if we use
1142+ loopback mount, we may address this issue.
1143+ for instance,
1144+ mount /cdrom/squashfs.img /sq
1145+ losetup /sq/ext2.img
1146+ losetup /somewhere/cow
1147+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
1148+- it will be difficult (or needs more operations) to extract the
1149+ difference between the original device and COW.
1150+- DM snapshot-merge may help a lot when users try merging. in the
1151+ fs-layer union, users will use rsync(1).
1152+
7e9cd9fe
AM
1153+You may want to read my old paper "Filesystems in LiveCD"
1154+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
53392da6 1155+
7e9cd9fe
AM
1156+
1157+Several characters/aspects/persona of aufs
53392da6
AM
1158+----------------------------------------------------------------------
1159+
7e9cd9fe 1160+Aufs has several characters, aspects or persona.
53392da6
AM
1161+1. a filesystem, callee of VFS helper
1162+2. sub-VFS, caller of VFS helper for branches
1163+3. a virtual filesystem which maintains persistent inode number
1164+4. reader/writer of files on branches such like an application
1165+
1166+1. Callee of VFS Helper
1167+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
1168+unlink(2) from an application reaches sys_unlink() kernel function and
1169+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
1170+calls filesystem specific unlink operation. Actually aufs implements the
1171+unlink operation but it behaves like a redirector.
1172+
1173+2. Caller of VFS Helper for Branches
1174+aufs_unlink() passes the unlink request to the branch filesystem as if
1175+it were called from VFS. So the called unlink operation of the branch
1176+filesystem acts as usual. As a caller of VFS helper, aufs should handle
1177+every necessary pre/post operation for the branch filesystem.
1178+- acquire the lock for the parent dir on a branch
1179+- lookup in a branch
1180+- revalidate dentry on a branch
1181+- mnt_want_write() for a branch
1182+- vfs_unlink() for a branch
1183+- mnt_drop_write() for a branch
1184+- release the lock on a branch
1185+
1186+3. Persistent Inode Number
1187+One of the most important issue for a filesystem is to maintain inode
1188+numbers. This is particularly important to support exporting a
1189+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
1190+backend block device for its own. But some storage is necessary to
7e9cd9fe
AM
1191+keep and maintain the inode numbers. It may be a large space and may not
1192+suit to keep in memory. Aufs rents some space from its first writable
1193+branch filesystem (by default) and creates file(s) on it. These files
1194+are created by aufs internally and removed soon (currently) keeping
1195+opened.
53392da6
AM
1196+Note: Because these files are removed, they are totally gone after
1197+ unmounting aufs. It means the inode numbers are not persistent
1198+ across unmount or reboot. I have a plan to make them really
1199+ persistent which will be important for aufs on NFS server.
1200+
1201+4. Read/Write Files Internally (copy-on-write)
1202+Because a branch can be readonly, when you write a file on it, aufs will
1203+"copy-up" it to the upper writable branch internally. And then write the
1204+originally requested thing to the file. Generally kernel doesn't
1205+open/read/write file actively. In aufs, even a single write may cause a
1206+internal "file copy". This behaviour is very similar to cp(1) command.
1207+
1208+Some people may think it is better to pass such work to user space
1209+helper, instead of doing in kernel space. Actually I am still thinking
1210+about it. But currently I have implemented it in kernel space.
1211diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt linux/Documentation/filesystems/aufs/design/02struct.txt
1212--- /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 1213+++ linux/Documentation/filesystems/aufs/design/02struct.txt 2015-09-24 10:47:58.244719488 +0200
7e9cd9fe 1214@@ -0,0 +1,258 @@
53392da6 1215+
2000de60 1216+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
1217+#
1218+# This program is free software; you can redistribute it and/or modify
1219+# it under the terms of the GNU General Public License as published by
1220+# the Free Software Foundation; either version 2 of the License, or
1221+# (at your option) any later version.
1222+#
1223+# This program is distributed in the hope that it will be useful,
1224+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1225+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1226+# GNU General Public License for more details.
1227+#
1228+# You should have received a copy of the GNU General Public License
523b37e3 1229+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1230+
1231+Basic Aufs Internal Structure
1232+
1233+Superblock/Inode/Dentry/File Objects
1234+----------------------------------------------------------------------
1235+As like an ordinary filesystem, aufs has its own
1236+superblock/inode/dentry/file objects. All these objects have a
1237+dynamically allocated array and store the same kind of pointers to the
1238+lower filesystem, branch.
1239+For example, when you build a union with one readwrite branch and one
1240+readonly, mounted /au, /rw and /ro respectively.
1241+- /au = /rw + /ro
1242+- /ro/fileA exists but /rw/fileA
1243+
1244+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
1245+pointers are stored in a aufs dentry. The array in aufs dentry will be,
7e9cd9fe 1246+- [0] = NULL (because /rw/fileA doesn't exist)
53392da6
AM
1247+- [1] = /ro/fileA
1248+
1249+This style of an array is essentially same to the aufs
1250+superblock/inode/dentry/file objects.
1251+
1252+Because aufs supports manipulating branches, ie. add/delete/change
7e9cd9fe
AM
1253+branches dynamically, these objects has its own generation. When
1254+branches are changed, the generation in aufs superblock is
1255+incremented. And a generation in other object are compared when it is
1256+accessed. When a generation in other objects are obsoleted, aufs
1257+refreshes the internal array.
53392da6
AM
1258+
1259+
1260+Superblock
1261+----------------------------------------------------------------------
1262+Additionally aufs superblock has some data for policies to select one
1263+among multiple writable branches, XIB files, pseudo-links and kobject.
1264+See below in detail.
7e9cd9fe
AM
1265+About the policies which supports copy-down a directory, see
1266+wbr_policy.txt too.
53392da6
AM
1267+
1268+
1269+Branch and XINO(External Inode Number Translation Table)
1270+----------------------------------------------------------------------
1271+Every branch has its own xino (external inode number translation table)
1272+file. The xino file is created and unlinked by aufs internally. When two
1273+members of a union exist on the same filesystem, they share the single
1274+xino file.
1275+The struct of a xino file is simple, just a sequence of aufs inode
1276+numbers which is indexed by the lower inode number.
1277+In the above sample, assume the inode number of /ro/fileA is i111 and
1278+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
1279+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
1280+
1281+When the inode numbers are not contiguous, the xino file will be sparse
1282+which has a hole in it and doesn't consume as much disk space as it
1283+might appear. If your branch filesystem consumes disk space for such
1284+holes, then you should specify 'xino=' option at mounting aufs.
1285+
7e9cd9fe
AM
1286+Aufs has a mount option to free the disk blocks for such holes in XINO
1287+files on tmpfs or ramdisk. But it is not so effective actually. If you
1288+meet a problem of disk shortage due to XINO files, then you should try
1289+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
1290+The patch localizes the assignment inumbers per tmpfs-mount and avoid
1291+the holes in XINO files.
1292+
53392da6 1293+Also a writable branch has three kinds of "whiteout bases". All these
7e9cd9fe 1294+are existed when the branch is joined to aufs, and their names are
53392da6
AM
1295+whiteout-ed doubly, so that users will never see their names in aufs
1296+hierarchy.
7e9cd9fe 1297+1. a regular file which will be hardlinked to all whiteouts.
53392da6 1298+2. a directory to store a pseudo-link.
7e9cd9fe 1299+3. a directory to store an "orphan"-ed file temporary.
53392da6
AM
1300+
1301+1. Whiteout Base
1302+ When you remove a file on a readonly branch, aufs handles it as a
1303+ logical deletion and creates a whiteout on the upper writable branch
1304+ as a hardlink of this file in order not to consume inode on the
1305+ writable branch.
1306+2. Pseudo-link Dir
1307+ See below, Pseudo-link.
1308+3. Step-Parent Dir
1309+ When "fileC" exists on the lower readonly branch only and it is
1310+ opened and removed with its parent dir, and then user writes
1311+ something into it, then aufs copies-up fileC to this
1312+ directory. Because there is no other dir to store fileC. After
1313+ creating a file under this dir, the file is unlinked.
1314+
1315+Because aufs supports manipulating branches, ie. add/delete/change
7e9cd9fe
AM
1316+dynamically, a branch has its own id. When the branch order changes,
1317+aufs finds the new index by searching the branch id.
53392da6
AM
1318+
1319+
1320+Pseudo-link
1321+----------------------------------------------------------------------
1322+Assume "fileA" exists on the lower readonly branch only and it is
1323+hardlinked to "fileB" on the branch. When you write something to fileA,
1324+aufs copies-up it to the upper writable branch. Additionally aufs
1325+creates a hardlink under the Pseudo-link Directory of the writable
1326+branch. The inode of a pseudo-link is kept in aufs super_block as a
1327+simple list. If fileB is read after unlinking fileA, aufs returns
1328+filedata from the pseudo-link instead of the lower readonly
1329+branch. Because the pseudo-link is based upon the inode, to keep the
7e9cd9fe 1330+inode number by xino (see above) is essentially necessary.
53392da6
AM
1331+
1332+All the hardlinks under the Pseudo-link Directory of the writable branch
1333+should be restored in a proper location later. Aufs provides a utility
1334+to do this. The userspace helpers executed at remounting and unmounting
1335+aufs by default.
1336+During this utility is running, it puts aufs into the pseudo-link
1337+maintenance mode. In this mode, only the process which began the
1338+maintenance mode (and its child processes) is allowed to operate in
1339+aufs. Some other processes which are not related to the pseudo-link will
1340+be allowed to run too, but the rest have to return an error or wait
1341+until the maintenance mode ends. If a process already acquires an inode
1342+mutex (in VFS), it has to return an error.
1343+
1344+
1345+XIB(external inode number bitmap)
1346+----------------------------------------------------------------------
1347+Addition to the xino file per a branch, aufs has an external inode number
7e9cd9fe
AM
1348+bitmap in a superblock object. It is also an internal file such like a
1349+xino file.
53392da6
AM
1350+It is a simple bitmap to mark whether the aufs inode number is in-use or
1351+not.
1352+To reduce the file I/O, aufs prepares a single memory page to cache xib.
1353+
7e9cd9fe 1354+As well as XINO files, aufs has a feature to truncate/refresh XIB to
53392da6
AM
1355+reduce the number of consumed disk blocks for these files.
1356+
1357+
1358+Virtual or Vertical Dir, and Readdir in Userspace
1359+----------------------------------------------------------------------
1360+In order to support multiple layers (branches), aufs readdir operation
1361+constructs a virtual dir block on memory. For readdir, aufs calls
1362+vfs_readdir() internally for each dir on branches, merges their entries
1363+with eliminating the whiteout-ed ones, and sets it to file (dir)
1364+object. So the file object has its entry list until it is closed. The
1365+entry list will be updated when the file position is zero and becomes
7e9cd9fe 1366+obsoleted. This decision is made in aufs automatically.
53392da6
AM
1367+
1368+The dynamically allocated memory block for the name of entries has a
1369+unit of 512 bytes (by default) and stores the names contiguously (no
1370+padding). Another block for each entry is handled by kmem_cache too.
1371+During building dir blocks, aufs creates hash list and judging whether
1372+the entry is whiteouted by its upper branch or already listed.
1373+The merged result is cached in the corresponding inode object and
1374+maintained by a customizable life-time option.
1375+
1376+Some people may call it can be a security hole or invite DoS attack
1377+since the opened and once readdir-ed dir (file object) holds its entry
1378+list and becomes a pressure for system memory. But I'd say it is similar
1379+to files under /proc or /sys. The virtual files in them also holds a
1380+memory page (generally) while they are opened. When an idea to reduce
1381+memory for them is introduced, it will be applied to aufs too.
1382+For those who really hate this situation, I've developed readdir(3)
1383+library which operates this merging in userspace. You just need to set
1384+LD_PRELOAD environment variable, and aufs will not consume no memory in
1385+kernel space for readdir(3).
1386+
1387+
1388+Workqueue
1389+----------------------------------------------------------------------
1390+Aufs sometimes requires privilege access to a branch. For instance,
1391+in copy-up/down operation. When a user process is going to make changes
1392+to a file which exists in the lower readonly branch only, and the mode
1393+of one of ancestor directories may not be writable by a user
1394+process. Here aufs copy-up the file with its ancestors and they may
1395+require privilege to set its owner/group/mode/etc.
1396+This is a typical case of a application character of aufs (see
1397+Introduction).
1398+
1399+Aufs uses workqueue synchronously for this case. It creates its own
1400+workqueue. The workqueue is a kernel thread and has privilege. Aufs
1401+passes the request to call mkdir or write (for example), and wait for
1402+its completion. This approach solves a problem of a signal handler
1403+simply.
1404+If aufs didn't adopt the workqueue and changed the privilege of the
7e9cd9fe
AM
1405+process, then the process may receive the unexpected SIGXFSZ or other
1406+signals.
53392da6
AM
1407+
1408+Also aufs uses the system global workqueue ("events" kernel thread) too
1409+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
1410+whiteout base and etc. This is unrelated to a privilege.
1411+Most of aufs operation tries acquiring a rw_semaphore for aufs
1412+superblock at the beginning, at the same time waits for the completion
1413+of all queued asynchronous tasks.
1414+
1415+
1416+Whiteout
1417+----------------------------------------------------------------------
1418+The whiteout in aufs is very similar to Unionfs's. That is represented
1419+by its filename. UnionMount takes an approach of a file mode, but I am
1420+afraid several utilities (find(1) or something) will have to support it.
1421+
1422+Basically the whiteout represents "logical deletion" which stops aufs to
1423+lookup further, but also it represents "dir is opaque" which also stop
7e9cd9fe 1424+further lookup.
53392da6
AM
1425+
1426+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
1427+In order to make several functions in a single systemcall to be
1428+revertible, aufs adopts an approach to rename a directory to a temporary
1429+unique whiteouted name.
1430+For example, in rename(2) dir where the target dir already existed, aufs
1431+renames the target dir to a temporary unique whiteouted name before the
7e9cd9fe 1432+actual rename on a branch, and then handles other actions (make it opaque,
53392da6
AM
1433+update the attributes, etc). If an error happens in these actions, aufs
1434+simply renames the whiteouted name back and returns an error. If all are
1435+succeeded, aufs registers a function to remove the whiteouted unique
1436+temporary name completely and asynchronously to the system global
1437+workqueue.
1438+
1439+
1440+Copy-up
1441+----------------------------------------------------------------------
1442+It is a well-known feature or concept.
1443+When user modifies a file on a readonly branch, aufs operate "copy-up"
1444+internally and makes change to the new file on the upper writable branch.
1445+When the trigger systemcall does not update the timestamps of the parent
1446+dir, aufs reverts it after copy-up.
c2b27bf2
AM
1447+
1448+
1449+Move-down (aufs3.9 and later)
1450+----------------------------------------------------------------------
1451+"Copy-up" is one of the essential feature in aufs. It copies a file from
1452+the lower readonly branch to the upper writable branch when a user
1453+changes something about the file.
1454+"Move-down" is an opposite action of copy-up. Basically this action is
1455+ran manually instead of automatically and internally.
076b876e
AM
1456+For desgin and implementation, aufs has to consider these issues.
1457+- whiteout for the file may exist on the lower branch.
1458+- ancestor directories may not exist on the lower branch.
1459+- diropq for the ancestor directories may exist on the upper branch.
1460+- free space on the lower branch will reduce.
1461+- another access to the file may happen during moving-down, including
7e9cd9fe 1462+ UDBA (see "Revalidate Dentry and UDBA").
076b876e
AM
1463+- the file should not be hard-linked nor pseudo-linked. they should be
1464+ handled by auplink utility later.
c2b27bf2
AM
1465+
1466+Sometimes users want to move-down a file from the upper writable branch
1467+to the lower readonly or writable branch. For instance,
1468+- the free space of the upper writable branch is going to run out.
1469+- create a new intermediate branch between the upper and lower branch.
1470+- etc.
1471+
1472+For this purpose, use "aumvdown" command in aufs-util.git.
b912730e
AM
1473diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt linux/Documentation/filesystems/aufs/design/03atomic_open.txt
1474--- /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 1475+++ linux/Documentation/filesystems/aufs/design/03atomic_open.txt 2015-09-24 10:47:58.244719488 +0200
b912730e
AM
1476@@ -0,0 +1,85 @@
1477+
1478+# Copyright (C) 2015 Junjiro R. Okajima
1479+#
1480+# This program is free software; you can redistribute it and/or modify
1481+# it under the terms of the GNU General Public License as published by
1482+# the Free Software Foundation; either version 2 of the License, or
1483+# (at your option) any later version.
1484+#
1485+# This program is distributed in the hope that it will be useful,
1486+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1487+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1488+# GNU General Public License for more details.
1489+#
1490+# You should have received a copy of the GNU General Public License
1491+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1492+
1493+Support for a branch who has its ->atomic_open()
1494+----------------------------------------------------------------------
1495+The filesystems who implement its ->atomic_open() are not majority. For
1496+example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
1497+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
1498+->atomic_open(), NFSv4 returns an error for this open(2). While I am not
1499+sure whether all filesystems who have ->atomic_open() behave like this,
1500+but NFSv4 surely returns the error.
1501+
1502+In order to support ->atomic_open() for aufs, there are a few
1503+approaches.
1504+
1505+A. Introduce aufs_atomic_open()
1506+ - calls one of VFS:do_last(), lookup_open() or atomic_open() for
1507+ branch fs.
1508+B. Introduce aufs_atomic_open() calling create, open and chmod. this is
1509+ an aufs user Pip Cet's approach
1510+ - calls aufs_create(), VFS finish_open() and notify_change().
1511+ - pass fake-mode to finish_open(), and then correct the mode by
1512+ notify_change().
1513+C. Extend aufs_open() to call branch fs's ->atomic_open()
1514+ - no aufs_atomic_open().
1515+ - aufs_lookup() registers the TID to an aufs internal object.
1516+ - aufs_create() does nothing when the matching TID is registered, but
1517+ registers the mode.
1518+ - aufs_open() calls branch fs's ->atomic_open() when the matching
1519+ TID is registered.
1520+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
1521+ credential
1522+ - no aufs_atomic_open().
1523+ - aufs_create() registers the TID to an internal object. this info
1524+ represents "this process created this file just now."
1525+ - when aufs gets EACCES from branch fs's ->open(), then confirm the
1526+ registered TID and re-try open() with superuser's credential.
1527+
1528+Pros and cons for each approach.
1529+
1530+A.
1531+ - straightforward but highly depends upon VFS internal.
1532+ - the atomic behavaiour is kept.
1533+ - some of parameters such as nameidata are hard to reproduce for
1534+ branch fs.
1535+ - large overhead.
1536+B.
1537+ - easy to implement.
1538+ - the atomic behavaiour is lost.
1539+C.
1540+ - the atomic behavaiour is kept.
1541+ - dirty and tricky.
1542+ - VFS checks whether the file is created correctly after calling
1543+ ->create(), which means this approach doesn't work.
1544+D.
1545+ - easy to implement.
1546+ - the atomic behavaiour is lost.
1547+ - to open a file with superuser's credential and give it to a user
1548+ process is a bad idea, since the file object keeps the credential
1549+ in it. It may affect LSM or something. This approach doesn't work
1550+ either.
1551+
1552+The approach A is ideal, but it hard to implement. So here is a
1553+variation of A, which is to be implemented.
1554+
1555+A-1. Introduce aufs_atomic_open()
1556+ - calls branch fs ->atomic_open() if exists. otherwise calls
1557+ vfs_create() and finish_open().
1558+ - the demerit is that the several checks after branch fs
1559+ ->atomic_open() are lost. in the ordinary case, the checks are
1560+ done by VFS:do_last(), lookup_open() and atomic_open(). some can
1561+ be implemented in aufs, but not all I am afraid.
53392da6
AM
1562diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt linux/Documentation/filesystems/aufs/design/03lookup.txt
1563--- /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 1564+++ linux/Documentation/filesystems/aufs/design/03lookup.txt 2015-09-24 10:47:58.244719488 +0200
7e9cd9fe 1565@@ -0,0 +1,113 @@
53392da6 1566+
2000de60 1567+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
1568+#
1569+# This program is free software; you can redistribute it and/or modify
1570+# it under the terms of the GNU General Public License as published by
1571+# the Free Software Foundation; either version 2 of the License, or
1572+# (at your option) any later version.
1573+#
1574+# This program is distributed in the hope that it will be useful,
1575+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1576+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1577+# GNU General Public License for more details.
1578+#
1579+# You should have received a copy of the GNU General Public License
523b37e3 1580+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1581+
1582+Lookup in a Branch
1583+----------------------------------------------------------------------
1584+Since aufs has a character of sub-VFS (see Introduction), it operates
7e9cd9fe
AM
1585+lookup for branches as VFS does. It may be a heavy work. But almost all
1586+lookup operation in aufs is the simplest case, ie. lookup only an entry
1587+directly connected to its parent. Digging down the directory hierarchy
1588+is unnecessary. VFS has a function lookup_one_len() for that use, and
1589+aufs calls it.
1590+
1591+When a branch is a remote filesystem, aufs basically relies upon its
53392da6
AM
1592+->d_revalidate(), also aufs forces the hardest revalidate tests for
1593+them.
1594+For d_revalidate, aufs implements three levels of revalidate tests. See
1595+"Revalidate Dentry and UDBA" in detail.
1596+
1597+
076b876e
AM
1598+Test Only the Highest One for the Directory Permission (dirperm1 option)
1599+----------------------------------------------------------------------
1600+Let's try case study.
1601+- aufs has two branches, upper readwrite and lower readonly.
1602+ /au = /rw + /ro
1603+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1604+- user invoked "chmod a+rx /au/dirA"
1605+- the internal copy-up is activated and "/rw/dirA" is created and its
7e9cd9fe 1606+ permission bits are set to world readable.
076b876e
AM
1607+- then "/au/dirA" becomes world readable?
1608+
1609+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1610+or it may be a natively readonly filesystem. If aufs respects the lower
1611+branch, it should not respond readdir request from other users. But user
1612+allowed it by chmod. Should really aufs rejects showing the entries
1613+under /ro/dirA?
1614+
7e9cd9fe
AM
1615+To be honest, I don't have a good solution for this case. So aufs
1616+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
1617+users.
076b876e
AM
1618+When dirperm1 is specified, aufs checks only the highest one for the
1619+directory permission, and shows the entries. Otherwise, as usual, checks
1620+every dir existing on all branches and rejects the request.
1621+
1622+As a side effect, dirperm1 option improves the performance of aufs
1623+because the number of permission check is reduced when the number of
1624+branch is many.
1625+
1626+
53392da6
AM
1627+Revalidate Dentry and UDBA (User's Direct Branch Access)
1628+----------------------------------------------------------------------
1629+Generally VFS helpers re-validate a dentry as a part of lookup.
1630+0. digging down the directory hierarchy.
1631+1. lock the parent dir by its i_mutex.
1632+2. lookup the final (child) entry.
1633+3. revalidate it.
1634+4. call the actual operation (create, unlink, etc.)
1635+5. unlock the parent dir
1636+
1637+If the filesystem implements its ->d_revalidate() (step 3), then it is
1638+called. Actually aufs implements it and checks the dentry on a branch is
1639+still valid.
1640+But it is not enough. Because aufs has to release the lock for the
1641+parent dir on a branch at the end of ->lookup() (step 2) and
1642+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
1643+held by VFS.
1644+If the file on a branch is changed directly, eg. bypassing aufs, after
1645+aufs released the lock, then the subsequent operation may cause
1646+something unpleasant result.
1647+
1648+This situation is a result of VFS architecture, ->lookup() and
1649+->d_revalidate() is separated. But I never say it is wrong. It is a good
1650+design from VFS's point of view. It is just not suitable for sub-VFS
1651+character in aufs.
1652+
1653+Aufs supports such case by three level of revalidation which is
1654+selectable by user.
1655+1. Simple Revalidate
1656+ Addition to the native flow in VFS's, confirm the child-parent
1657+ relationship on the branch just after locking the parent dir on the
1658+ branch in the "actual operation" (step 4). When this validation
1659+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
1660+ checks the validation of the dentry on branches.
1661+2. Monitor Changes Internally by Inotify/Fsnotify
1662+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
1663+ the dentry on the branch, and returns EBUSY if it finds different
1664+ dentry.
1665+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
1666+ during it is in cache. When the event is notified, aufs registers a
1667+ function to kernel 'events' thread by schedule_work(). And the
1668+ function sets some special status to the cached aufs dentry and inode
1669+ private data. If they are not cached, then aufs has nothing to
1670+ do. When the same file is accessed through aufs (step 0-3) later,
1671+ aufs will detect the status and refresh all necessary data.
1672+ In this mode, aufs has to ignore the event which is fired by aufs
1673+ itself.
1674+3. No Extra Validation
1675+ This is the simplest test and doesn't add any additional revalidation
7e9cd9fe 1676+ test, and skip the revalidation in step 4. It is useful and improves
53392da6
AM
1677+ aufs performance when system surely hide the aufs branches from user,
1678+ by over-mounting something (or another method).
1679diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt linux/Documentation/filesystems/aufs/design/04branch.txt
1680--- /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 1681+++ linux/Documentation/filesystems/aufs/design/04branch.txt 2015-09-24 10:47:58.244719488 +0200
7e9cd9fe 1682@@ -0,0 +1,74 @@
53392da6 1683+
2000de60 1684+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
1685+#
1686+# This program is free software; you can redistribute it and/or modify
1687+# it under the terms of the GNU General Public License as published by
1688+# the Free Software Foundation; either version 2 of the License, or
1689+# (at your option) any later version.
1690+#
1691+# This program is distributed in the hope that it will be useful,
1692+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1693+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1694+# GNU General Public License for more details.
1695+#
1696+# You should have received a copy of the GNU General Public License
523b37e3 1697+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1698+
1699+Branch Manipulation
1700+
1701+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
1702+and changing its permission/attribute, there are a lot of works to do.
1703+
1704+
1705+Add a Branch
1706+----------------------------------------------------------------------
1707+o Confirm the adding dir exists outside of aufs, including loopback
7e9cd9fe 1708+ mount, and its various attributes.
53392da6
AM
1709+o Initialize the xino file and whiteout bases if necessary.
1710+ See struct.txt.
1711+
1712+o Check the owner/group/mode of the directory
1713+ When the owner/group/mode of the adding directory differs from the
1714+ existing branch, aufs issues a warning because it may impose a
1715+ security risk.
1716+ For example, when a upper writable branch has a world writable empty
1717+ top directory, a malicious user can create any files on the writable
1718+ branch directly, like copy-up and modify manually. If something like
1719+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
1720+ writable branch, and the writable branch is world-writable, then a
1721+ malicious guy may create /etc/passwd on the writable branch directly
1722+ and the infected file will be valid in aufs.
7e9cd9fe 1723+ I am afraid it can be a security issue, but aufs can do nothing except
53392da6
AM
1724+ producing a warning.
1725+
1726+
1727+Delete a Branch
1728+----------------------------------------------------------------------
1729+o Confirm the deleting branch is not busy
1730+ To be general, there is one merit to adopt "remount" interface to
1731+ manipulate branches. It is to discard caches. At deleting a branch,
1732+ aufs checks the still cached (and connected) dentries and inodes. If
1733+ there are any, then they are all in-use. An inode without its
1734+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
1735+
1736+ For the cached one, aufs checks whether the same named entry exists on
1737+ other branches.
1738+ If the cached one is a directory, because aufs provides a merged view
1739+ to users, as long as one dir is left on any branch aufs can show the
1740+ dir to users. In this case, the branch can be removed from aufs.
1741+ Otherwise aufs rejects deleting the branch.
1742+
1743+ If any file on the deleting branch is opened by aufs, then aufs
1744+ rejects deleting.
1745+
1746+
1747+Modify the Permission of a Branch
1748+----------------------------------------------------------------------
1749+o Re-initialize or remove the xino file and whiteout bases if necessary.
1750+ See struct.txt.
1751+
1752+o rw --> ro: Confirm the modifying branch is not busy
1753+ Aufs rejects the request if any of these conditions are true.
1754+ - a file on the branch is mmap-ed.
1755+ - a regular file on the branch is opened for write and there is no
1756+ same named entry on the upper branch.
1757diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt linux/Documentation/filesystems/aufs/design/05wbr_policy.txt
1758--- /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 1759+++ linux/Documentation/filesystems/aufs/design/05wbr_policy.txt 2015-09-24 10:47:58.244719488 +0200
523b37e3 1760@@ -0,0 +1,64 @@
53392da6 1761+
2000de60 1762+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
1763+#
1764+# This program is free software; you can redistribute it and/or modify
1765+# it under the terms of the GNU General Public License as published by
1766+# the Free Software Foundation; either version 2 of the License, or
1767+# (at your option) any later version.
1768+#
1769+# This program is distributed in the hope that it will be useful,
1770+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1771+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1772+# GNU General Public License for more details.
1773+#
1774+# You should have received a copy of the GNU General Public License
523b37e3 1775+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1776+
1777+Policies to Select One among Multiple Writable Branches
1778+----------------------------------------------------------------------
1779+When the number of writable branch is more than one, aufs has to decide
1780+the target branch for file creation or copy-up. By default, the highest
1781+writable branch which has the parent (or ancestor) dir of the target
1782+file is chosen (top-down-parent policy).
1783+By user's request, aufs implements some other policies to select the
7e9cd9fe
AM
1784+writable branch, for file creation several policies, round-robin,
1785+most-free-space, and other policies. For copy-up, top-down-parent,
1786+bottom-up-parent, bottom-up and others.
53392da6
AM
1787+
1788+As expected, the round-robin policy selects the branch in circular. When
1789+you have two writable branches and creates 10 new files, 5 files will be
1790+created for each branch. mkdir(2) systemcall is an exception. When you
1791+create 10 new directories, all will be created on the same branch.
1792+And the most-free-space policy selects the one which has most free
1793+space among the writable branches. The amount of free space will be
1794+checked by aufs internally, and users can specify its time interval.
1795+
1796+The policies for copy-up is more simple,
1797+top-down-parent is equivalent to the same named on in create policy,
1798+bottom-up-parent selects the writable branch where the parent dir
1799+exists and the nearest upper one from the copyup-source,
1800+bottom-up selects the nearest upper writable branch from the
1801+copyup-source, regardless the existence of the parent dir.
1802+
1803+There are some rules or exceptions to apply these policies.
1804+- If there is a readonly branch above the policy-selected branch and
1805+ the parent dir is marked as opaque (a variation of whiteout), or the
1806+ target (creating) file is whiteout-ed on the upper readonly branch,
1807+ then the result of the policy is ignored and the target file will be
1808+ created on the nearest upper writable branch than the readonly branch.
1809+- If there is a writable branch above the policy-selected branch and
1810+ the parent dir is marked as opaque or the target file is whiteouted
1811+ on the branch, then the result of the policy is ignored and the target
1812+ file will be created on the highest one among the upper writable
1813+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1814+ it as usual.
1815+- link(2) and rename(2) systemcalls are exceptions in every policy.
1816+ They try selecting the branch where the source exists as possible
1817+ since copyup a large file will take long time. If it can't be,
1818+ ie. the branch where the source exists is readonly, then they will
1819+ follow the copyup policy.
1820+- There is an exception for rename(2) when the target exists.
1821+ If the rename target exists, aufs compares the index of the branches
1822+ where the source and the target exists and selects the higher
1823+ one. If the selected branch is readonly, then aufs follows the
1824+ copyup policy.
076b876e
AM
1825diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt linux/Documentation/filesystems/aufs/design/06fhsm.txt
1826--- /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 1827+++ linux/Documentation/filesystems/aufs/design/06fhsm.txt 2015-09-24 10:47:58.244719488 +0200
076b876e
AM
1828@@ -0,0 +1,120 @@
1829+
2000de60 1830+# Copyright (C) 2011-2015 Junjiro R. Okajima
076b876e
AM
1831+#
1832+# This program is free software; you can redistribute it and/or modify
1833+# it under the terms of the GNU General Public License as published by
1834+# the Free Software Foundation; either version 2 of the License, or
1835+# (at your option) any later version.
1836+#
1837+# This program is distributed in the hope that it will be useful,
1838+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1839+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1840+# GNU General Public License for more details.
1841+#
1842+# You should have received a copy of the GNU General Public License
1843+# along with this program; if not, write to the Free Software
1844+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1845+
1846+
1847+File-based Hierarchical Storage Management (FHSM)
1848+----------------------------------------------------------------------
1849+Hierarchical Storage Management (or HSM) is a well-known feature in the
1850+storage world. Aufs provides this feature as file-based with multiple
7e9cd9fe 1851+writable branches, based upon the principle of "Colder, the Lower".
076b876e 1852+Here the word "colder" means that the less used files, and "lower" means
7e9cd9fe 1853+that the position in the order of the stacked branches vertically.
076b876e
AM
1854+These multiple writable branches are prioritized, ie. the topmost one
1855+should be the fastest drive and be used heavily.
1856+
1857+o Characters in aufs FHSM story
1858+- aufs itself and a new branch attribute.
1859+- a new ioctl interface to move-down and to establish a connection with
1860+ the daemon ("move-down" is a converse of "copy-up").
1861+- userspace tool and daemon.
1862+
1863+The userspace daemon establishes a connection with aufs and waits for
1864+the notification. The notified information is very similar to struct
1865+statfs containing the number of consumed blocks and inodes.
1866+When the consumed blocks/inodes of a branch exceeds the user-specified
1867+upper watermark, the daemon activates its move-down process until the
1868+consumed blocks/inodes reaches the user-specified lower watermark.
1869+
1870+The actual move-down is done by aufs based upon the request from
1871+user-space since we need to maintain the inode number and the internal
1872+pointer arrays in aufs.
1873+
1874+Currently aufs FHSM handles the regular files only. Additionally they
1875+must not be hard-linked nor pseudo-linked.
1876+
1877+
1878+o Cowork of aufs and the user-space daemon
1879+ During the userspace daemon established the connection, aufs sends a
1880+ small notification to it whenever aufs writes something into the
1881+ writable branch. But it may cost high since aufs issues statfs(2)
1882+ internally. So user can specify a new option to cache the
1883+ info. Actually the notification is controlled by these factors.
1884+ + the specified cache time.
1885+ + classified as "force" by aufs internally.
1886+ Until the specified time expires, aufs doesn't send the info
1887+ except the forced cases. When aufs decide forcing, the info is always
1888+ notified to userspace.
1889+ For example, the number of free inodes is generally large enough and
1890+ the shortage of it happens rarely. So aufs doesn't force the
1891+ notification when creating a new file, directory and others. This is
1892+ the typical case which aufs doesn't force.
1893+ When aufs writes the actual filedata and the files consumes any of new
1894+ blocks, the aufs forces notifying.
1895+
1896+
1897+o Interfaces in aufs
1898+- New branch attribute.
1899+ + fhsm
1900+ Specifies that the branch is managed by FHSM feature. In other word,
1901+ participant in the FHSM.
1902+ When nofhsm is set to the branch, it will not be the source/target
1903+ branch of the move-down operation. This attribute is set
1904+ independently from coo and moo attributes, and if you want full
1905+ FHSM, you should specify them as well.
1906+- New mount option.
1907+ + fhsm_sec
1908+ Specifies a second to suppress many less important info to be
1909+ notified.
1910+- New ioctl.
1911+ + AUFS_CTL_FHSM_FD
1912+ create a new file descriptor which userspace can read the notification
1913+ (a subset of struct statfs) from aufs.
1914+- Module parameter 'brs'
1915+ It has to be set to 1. Otherwise the new mount option 'fhsm' will not
1916+ be set.
1917+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
1918+ When there are two or more branches with fhsm attributes,
1919+ /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
1920+ terminates it. As a result of remounting and branch-manipulation, the
1921+ number of branches with fhsm attribute can be one. In this case,
1922+ /sbin/mount.aufs will terminate the user-space daemon.
1923+
1924+
1925+Finally the operation is done as these steps in kernel-space.
1926+- make sure that,
1927+ + no one else is using the file.
1928+ + the file is not hard-linked.
1929+ + the file is not pseudo-linked.
1930+ + the file is a regular file.
1931+ + the parent dir is not opaqued.
1932+- find the target writable branch.
1933+- make sure the file is not whiteout-ed by the upper (than the target)
1934+ branch.
1935+- make the parent dir on the target branch.
1936+- mutex lock the inode on the branch.
1937+- unlink the whiteout on the target branch (if exists).
1938+- lookup and create the whiteout-ed temporary name on the target branch.
1939+- copy the file as the whiteout-ed temporary name on the target branch.
1940+- rename the whiteout-ed temporary name to the original name.
1941+- unlink the file on the source branch.
1942+- maintain the internal pointer array and the external inode number
1943+ table (XINO).
1944+- maintain the timestamps and other attributes of the parent dir and the
1945+ file.
1946+
1947+And of course, in every step, an error may happen. So the operation
1948+should restore the original file state after an error happens.
53392da6
AM
1949diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt linux/Documentation/filesystems/aufs/design/06mmap.txt
1950--- /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 1951+++ linux/Documentation/filesystems/aufs/design/06mmap.txt 2015-09-24 10:47:58.244719488 +0200
b912730e 1952@@ -0,0 +1,72 @@
53392da6 1953+
2000de60 1954+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
1955+#
1956+# This program is free software; you can redistribute it and/or modify
1957+# it under the terms of the GNU General Public License as published by
1958+# the Free Software Foundation; either version 2 of the License, or
1959+# (at your option) any later version.
1960+#
1961+# This program is distributed in the hope that it will be useful,
1962+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1963+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1964+# GNU General Public License for more details.
1965+#
1966+# You should have received a copy of the GNU General Public License
523b37e3 1967+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1968+
1969+mmap(2) -- File Memory Mapping
1970+----------------------------------------------------------------------
1971+In aufs, the file-mapped pages are handled by a branch fs directly, no
1972+interaction with aufs. It means aufs_mmap() calls the branch fs's
1973+->mmap().
1974+This approach is simple and good, but there is one problem.
7e9cd9fe 1975+Under /proc, several entries show the mmapped files by its path (with
53392da6
AM
1976+device and inode number), and the printed path will be the path on the
1977+branch fs's instead of virtual aufs's.
1978+This is not a problem in most cases, but some utilities lsof(1) (and its
1979+user) may expect the path on aufs.
1980+
1981+To address this issue, aufs adds a new member called vm_prfile in struct
1982+vm_area_struct (and struct vm_region). The original vm_file points to
1983+the file on the branch fs in order to handle everything correctly as
1984+usual. The new vm_prfile points to a virtual file in aufs, and the
1985+show-functions in procfs refers to vm_prfile if it is set.
1986+Also we need to maintain several other places where touching vm_file
1987+such like
1988+- fork()/clone() copies vma and the reference count of vm_file is
1989+ incremented.
1990+- merging vma maintains the ref count too.
1991+
7e9cd9fe 1992+This is not a good approach. It just fakes the printed path. But it
53392da6
AM
1993+leaves all behaviour around f_mapping unchanged. This is surely an
1994+advantage.
1995+Actually aufs had adopted another complicated approach which calls
1996+generic_file_mmap() and handles struct vm_operations_struct. In this
1997+approach, aufs met a hard problem and I could not solve it without
1998+switching the approach.
b912730e
AM
1999+
2000+There may be one more another approach which is
2001+- bind-mount the branch-root onto the aufs-root internally
2002+- grab the new vfsmount (ie. struct mount)
2003+- lazy-umount the branch-root internally
2004+- in open(2) the aufs-file, open the branch-file with the hidden
2005+ vfsmount (instead of the original branch's vfsmount)
2006+- ideally this "bind-mount and lazy-umount" should be done atomically,
2007+ but it may be possible from userspace by the mount helper.
2008+
2009+Adding the internal hidden vfsmount and using it in opening a file, the
2010+file path under /proc will be printed correctly. This approach looks
2011+smarter, but is not possible I am afraid.
2012+- aufs-root may be bind-mount later. when it happens, another hidden
2013+ vfsmount will be required.
2014+- it is hard to get the chance to bind-mount and lazy-umount
2015+ + in kernel-space, FS can have vfsmount in open(2) via
2016+ file->f_path, and aufs can know its vfsmount. But several locks are
2017+ already acquired, and if aufs tries to bind-mount and lazy-umount
2018+ here, then it may cause a deadlock.
2019+ + in user-space, bind-mount doesn't invoke the mount helper.
2020+- since /proc shows dev and ino, aufs has to give vma these info. it
2021+ means a new member vm_prinode will be necessary. this is essentially
2022+ equivalent to vm_prfile described above.
2023+
2024+I have to give up this "looks-smater" approach.
c1595e42
JR
2025diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt linux/Documentation/filesystems/aufs/design/06xattr.txt
2026--- /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 2027+++ linux/Documentation/filesystems/aufs/design/06xattr.txt 2015-09-24 10:47:58.244719488 +0200
c1595e42
JR
2028@@ -0,0 +1,96 @@
2029+
2000de60 2030+# Copyright (C) 2014-2015 Junjiro R. Okajima
c1595e42
JR
2031+#
2032+# This program is free software; you can redistribute it and/or modify
2033+# it under the terms of the GNU General Public License as published by
2034+# the Free Software Foundation; either version 2 of the License, or
2035+# (at your option) any later version.
2036+#
2037+# This program is distributed in the hope that it will be useful,
2038+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2039+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2040+# GNU General Public License for more details.
2041+#
2042+# You should have received a copy of the GNU General Public License
2043+# along with this program; if not, write to the Free Software
2044+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2045+
2046+
2047+Listing XATTR/EA and getting the value
2048+----------------------------------------------------------------------
2049+For the inode standard attributes (owner, group, timestamps, etc.), aufs
2050+shows the values from the topmost existing file. This behaviour is good
7e9cd9fe 2051+for the non-dir entries since the bahaviour exactly matches the shown
c1595e42
JR
2052+information. But for the directories, aufs considers all the same named
2053+entries on the lower branches. Which means, if one of the lower entry
2054+rejects readdir call, then aufs returns an error even if the topmost
2055+entry allows it. This behaviour is necessary to respect the branch fs's
2056+security, but can make users confused since the user-visible standard
2057+attributes don't match the behaviour.
2058+To address this issue, aufs has a mount option called dirperm1 which
2059+checks the permission for the topmost entry only, and ignores the lower
2060+entry's permission.
2061+
2062+A similar issue can happen around XATTR.
2063+getxattr(2) and listxattr(2) families behave as if dirperm1 option is
7e9cd9fe
AM
2064+always set. Otherwise these very unpleasant situation would happen.
2065+- listxattr(2) may return the duplicated entries.
c1595e42
JR
2066+- users may not be able to remove or reset the XATTR forever,
2067+
2068+
2069+XATTR/EA support in the internal (copy,move)-(up,down)
2070+----------------------------------------------------------------------
7e9cd9fe 2071+Generally the extended attributes of inode are categorized as these.
c1595e42
JR
2072+- "security" for LSM and capability.
2073+- "system" for posix ACL, 'acl' mount option is required for the branch
2074+ fs generally.
2075+- "trusted" for userspace, CAP_SYS_ADMIN is required.
2076+- "user" for userspace, 'user_xattr' mount option is required for the
2077+ branch fs generally.
2078+
2079+Moreover there are some other categories. Aufs handles these rather
2080+unpopular categories as the ordinary ones, ie. there is no special
2081+condition nor exception.
2082+
2083+In copy-up, the support for XATTR on the dst branch may differ from the
2084+src branch. In this case, the copy-up operation will get an error and
7e9cd9fe
AM
2085+the original user operation which triggered the copy-up will fail. It
2086+can happen that even all copy-up will fail.
c1595e42
JR
2087+When both of src and dst branches support XATTR and if an error occurs
2088+during copying XATTR, then the copy-up should fail obviously. That is a
2089+good reason and aufs should return an error to userspace. But when only
7e9cd9fe 2090+the src branch support that XATTR, aufs should not return an error.
c1595e42
JR
2091+For example, the src branch supports ACL but the dst branch doesn't
2092+because the dst branch may natively un-support it or temporary
2093+un-support it due to "noacl" mount option. Of course, the dst branch fs
2094+may NOT return an error even if the XATTR is not supported. It is
2095+totally up to the branch fs.
2096+
2097+Anyway when the aufs internal copy-up gets an error from the dst branch
2098+fs, then aufs tries removing the just copied entry and returns the error
2099+to the userspace. The worst case of this situation will be all copy-up
2100+will fail.
2101+
2102+For the copy-up operation, there two basic approaches.
2103+- copy the specified XATTR only (by category above), and return the
7e9cd9fe 2104+ error unconditionally if it happens.
c1595e42
JR
2105+- copy all XATTR, and ignore the error on the specified category only.
2106+
2107+In order to support XATTR and to implement the correct behaviour, aufs
7e9cd9fe
AM
2108+chooses the latter approach and introduces some new branch attributes,
2109+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
c1595e42 2110+They correspond to the XATTR namespaces (see above). Additionally, to be
7e9cd9fe
AM
2111+convenient, "icex" is also provided which means all "icex*" attributes
2112+are set (here the word "icex" stands for "ignore copy-error on XATTR").
c1595e42
JR
2113+
2114+The meaning of these attributes is to ignore the error from setting
2115+XATTR on that branch.
2116+Note that aufs tries copying all XATTR unconditionally, and ignores the
2117+error from the dst branch according to the specified attributes.
2118+
2119+Some XATTR may have its default value. The default value may come from
2120+the parent dir or the environment. If the default value is set at the
2121+file creating-time, it will be overwritten by copy-up.
2122+Some contradiction may happen I am afraid.
2123+Do we need another attribute to stop copying XATTR? I am unsure. For
2124+now, aufs implements the branch attributes to ignore the error.
53392da6
AM
2125diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt linux/Documentation/filesystems/aufs/design/07export.txt
2126--- /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 2127+++ linux/Documentation/filesystems/aufs/design/07export.txt 2015-09-24 10:47:58.248052907 +0200
523b37e3 2128@@ -0,0 +1,58 @@
53392da6 2129+
2000de60 2130+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
2131+#
2132+# This program is free software; you can redistribute it and/or modify
2133+# it under the terms of the GNU General Public License as published by
2134+# the Free Software Foundation; either version 2 of the License, or
2135+# (at your option) any later version.
2136+#
2137+# This program is distributed in the hope that it will be useful,
2138+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2139+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2140+# GNU General Public License for more details.
2141+#
2142+# You should have received a copy of the GNU General Public License
523b37e3 2143+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2144+
2145+Export Aufs via NFS
2146+----------------------------------------------------------------------
2147+Here is an approach.
2148+- like xino/xib, add a new file 'xigen' which stores aufs inode
2149+ generation.
2150+- iget_locked(): initialize aufs inode generation for a new inode, and
2151+ store it in xigen file.
2152+- destroy_inode(): increment aufs inode generation and store it in xigen
2153+ file. it is necessary even if it is not unlinked, because any data of
2154+ inode may be changed by UDBA.
2155+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
2156+ build file handle by
2157+ + branch id (4 bytes)
2158+ + superblock generation (4 bytes)
2159+ + inode number (4 or 8 bytes)
2160+ + parent dir inode number (4 or 8 bytes)
2161+ + inode generation (4 bytes))
2162+ + return value of exportfs_encode_fh() for the parent on a branch (4
2163+ bytes)
2164+ + file handle for a branch (by exportfs_encode_fh())
2165+- fh_to_dentry():
2166+ + find the index of a branch from its id in handle, and check it is
2167+ still exist in aufs.
2168+ + 1st level: get the inode number from handle and search it in cache.
7e9cd9fe
AM
2169+ + 2nd level: if not found in cache, get the parent inode number from
2170+ the handle and search it in cache. and then open the found parent
2171+ dir, find the matching inode number by vfs_readdir() and get its
2172+ name, and call lookup_one_len() for the target dentry.
53392da6
AM
2173+ + 3rd level: if the parent dir is not cached, call
2174+ exportfs_decode_fh() for a branch and get the parent on a branch,
2175+ build a pathname of it, convert it a pathname in aufs, call
2176+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
2177+ the 2nd level.
2178+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
2179+ for every branch, but not itself. to get this, (currently) aufs
2180+ searches in current->nsproxy->mnt_ns list. it may not be a good
2181+ idea, but I didn't get other approach.
2182+ + test the generation of the gotten inode.
2183+- every inode operation: they may get EBUSY due to UDBA. in this case,
2184+ convert it into ESTALE for NFSD.
2185+- readdir(): call lockdep_on/off() because filldir in NFSD calls
2186+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
2187diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt linux/Documentation/filesystems/aufs/design/08shwh.txt
2188--- /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 2189+++ linux/Documentation/filesystems/aufs/design/08shwh.txt 2015-09-24 10:47:58.248052907 +0200
523b37e3 2190@@ -0,0 +1,52 @@
53392da6 2191+
2000de60 2192+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
2193+#
2194+# This program is free software; you can redistribute it and/or modify
2195+# it under the terms of the GNU General Public License as published by
2196+# the Free Software Foundation; either version 2 of the License, or
2197+# (at your option) any later version.
2198+#
2199+# This program is distributed in the hope that it will be useful,
2200+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2201+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2202+# GNU General Public License for more details.
2203+#
2204+# You should have received a copy of the GNU General Public License
523b37e3 2205+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2206+
2207+Show Whiteout Mode (shwh)
2208+----------------------------------------------------------------------
2209+Generally aufs hides the name of whiteouts. But in some cases, to show
2210+them is very useful for users. For instance, creating a new middle layer
2211+(branch) by merging existing layers.
2212+
2213+(borrowing aufs1 HOW-TO from a user, Michael Towers)
2214+When you have three branches,
2215+- Bottom: 'system', squashfs (underlying base system), read-only
2216+- Middle: 'mods', squashfs, read-only
2217+- Top: 'overlay', ram (tmpfs), read-write
2218+
2219+The top layer is loaded at boot time and saved at shutdown, to preserve
2220+the changes made to the system during the session.
2221+When larger changes have been made, or smaller changes have accumulated,
2222+the size of the saved top layer data grows. At this point, it would be
2223+nice to be able to merge the two overlay branches ('mods' and 'overlay')
2224+and rewrite the 'mods' squashfs, clearing the top layer and thus
2225+restoring save and load speed.
2226+
2227+This merging is simplified by the use of another aufs mount, of just the
2228+two overlay branches using the 'shwh' option.
2229+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
2230+ aufs /livesys/merge_union
2231+
2232+A merged view of these two branches is then available at
2233+/livesys/merge_union, and the new feature is that the whiteouts are
2234+visible!
2235+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
2236+writing to all branches. Also the default mode for all branches is 'ro'.
2237+It is now possible to save the combined contents of the two overlay
2238+branches to a new squashfs, e.g.:
2239+# mksquashfs /livesys/merge_union /path/to/newmods.squash
2240+
2241+This new squashfs archive can be stored on the boot device and the
2242+initramfs will use it to replace the old one at the next boot.
2243diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt linux/Documentation/filesystems/aufs/design/10dynop.txt
2244--- /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 2245+++ linux/Documentation/filesystems/aufs/design/10dynop.txt 2015-09-24 10:47:58.248052907 +0200
7e9cd9fe 2246@@ -0,0 +1,47 @@
53392da6 2247+
2000de60 2248+# Copyright (C) 2010-2015 Junjiro R. Okajima
53392da6
AM
2249+#
2250+# This program is free software; you can redistribute it and/or modify
2251+# it under the terms of the GNU General Public License as published by
2252+# the Free Software Foundation; either version 2 of the License, or
2253+# (at your option) any later version.
2254+#
2255+# This program is distributed in the hope that it will be useful,
2256+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2257+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2258+# GNU General Public License for more details.
2259+#
2260+# You should have received a copy of the GNU General Public License
523b37e3 2261+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2262+
2263+Dynamically customizable FS operations
2264+----------------------------------------------------------------------
2265+Generally FS operations (struct inode_operations, struct
2266+address_space_operations, struct file_operations, etc.) are defined as
2267+"static const", but it never means that FS have only one set of
2268+operation. Some FS have multiple sets of them. For instance, ext2 has
2269+three sets, one for XIP, for NOBH, and for normal.
2270+Since aufs overrides and redirects these operations, sometimes aufs has
7e9cd9fe 2271+to change its behaviour according to the branch FS type. More importantly
53392da6
AM
2272+VFS acts differently if a function (member in the struct) is set or
2273+not. It means aufs should have several sets of operations and select one
2274+among them according to the branch FS definition.
2275+
7e9cd9fe 2276+In order to solve this problem and not to affect the behaviour of VFS,
53392da6 2277+aufs defines these operations dynamically. For instance, aufs defines
7e9cd9fe
AM
2278+dummy direct_IO function for struct address_space_operations, but it may
2279+not be set to the address_space_operations actually. When the branch FS
2280+doesn't have it, aufs doesn't set it to its address_space_operations
2281+while the function definition itself is still alive. So the behaviour
2282+itself will not change, and it will return an error when direct_IO is
2283+not set.
53392da6
AM
2284+
2285+The lifetime of these dynamically generated operation object is
2286+maintained by aufs branch object. When the branch is removed from aufs,
2287+the reference counter of the object is decremented. When it reaches
2288+zero, the dynamically generated operation object will be freed.
2289+
7e9cd9fe
AM
2290+This approach is designed to support AIO (io_submit), Direct I/O and
2291+XIP (DAX) mainly.
2292+Currently this approach is applied to address_space_operations for
2293+regular files only.
53392da6
AM
2294diff -urN /usr/share/empty/Documentation/filesystems/aufs/README linux/Documentation/filesystems/aufs/README
2295--- /usr/share/empty/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 2296+++ linux/Documentation/filesystems/aufs/README 2015-09-24 10:47:58.244719488 +0200
5527c038 2297@@ -0,0 +1,383 @@
53392da6 2298+
5527c038 2299+Aufs4 -- advanced multi layered unification filesystem version 4.x
53392da6
AM
2300+http://aufs.sf.net
2301+Junjiro R. Okajima
2302+
2303+
2304+0. Introduction
2305+----------------------------------------
2306+In the early days, aufs was entirely re-designed and re-implemented
7e9cd9fe 2307+Unionfs Version 1.x series. Adding many original ideas, approaches,
53392da6
AM
2308+improvements and implementations, it becomes totally different from
2309+Unionfs while keeping the basic features.
2310+Recently, Unionfs Version 2.x series begin taking some of the same
2311+approaches to aufs1's.
2312+Unionfs is being developed by Professor Erez Zadok at Stony Brook
2313+University and his team.
2314+
5527c038 2315+Aufs4 supports linux-4.0 and later, and for linux-3.x series try aufs3.
53392da6
AM
2316+If you want older kernel version support, try aufs2-2.6.git or
2317+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
2318+
2319+Note: it becomes clear that "Aufs was rejected. Let's give it up."
38d290e6
JR
2320+ According to Christoph Hellwig, linux rejects all union-type
2321+ filesystems but UnionMount.
53392da6
AM
2322+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
2323+
38d290e6
JR
2324+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
2325+ UnionMount, and he pointed out an issue around a directory mutex
2326+ lock and aufs addressed it. But it is still unsure whether aufs will
2327+ be merged (or any other union solution).
076b876e 2328+<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
38d290e6 2329+
53392da6
AM
2330+
2331+1. Features
2332+----------------------------------------
2333+- unite several directories into a single virtual filesystem. The member
2334+ directory is called as a branch.
2335+- you can specify the permission flags to the branch, which are 'readonly',
2336+ 'readwrite' and 'whiteout-able.'
2337+- by upper writable branch, internal copyup and whiteout, files/dirs on
2338+ readonly branch are modifiable logically.
2339+- dynamic branch manipulation, add, del.
2340+- etc...
2341+
7e9cd9fe
AM
2342+Also there are many enhancements in aufs, such as:
2343+- test only the highest one for the directory permission (dirperm1)
2344+- copyup on open (coo=)
2345+- 'move' policy for copy-up between two writable branches, after
2346+ checking free space.
2347+- xattr, acl
53392da6
AM
2348+- readdir(3) in userspace.
2349+- keep inode number by external inode number table
2350+- keep the timestamps of file/dir in internal copyup operation
2351+- seekable directory, supporting NFS readdir.
2352+- whiteout is hardlinked in order to reduce the consumption of inodes
2353+ on branch
2354+- do not copyup, nor create a whiteout when it is unnecessary
2355+- revert a single systemcall when an error occurs in aufs
2356+- remount interface instead of ioctl
2357+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
2358+- loopback mounted filesystem as a branch
2359+- kernel thread for removing the dir who has a plenty of whiteouts
2360+- support copyup sparse file (a file which has a 'hole' in it)
2361+- default permission flags for branches
2362+- selectable permission flags for ro branch, whether whiteout can
2363+ exist or not
2364+- export via NFS.
2365+- support <sysfs>/fs/aufs and <debugfs>/aufs.
2366+- support multiple writable branches, some policies to select one
2367+ among multiple writable branches.
2368+- a new semantics for link(2) and rename(2) to support multiple
2369+ writable branches.
2370+- no glibc changes are required.
2371+- pseudo hardlink (hardlink over branches)
2372+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
2373+ including NFS or remote filesystem branch.
2374+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
2375+- and more...
2376+
5527c038 2377+Currently these features are dropped temporary from aufs4.
53392da6 2378+See design/08plan.txt in detail.
53392da6
AM
2379+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
2380+ (robr)
2381+- statistics of aufs thread (/sys/fs/aufs/stat)
53392da6
AM
2382+
2383+Features or just an idea in the future (see also design/*.txt),
2384+- reorder the branch index without del/re-add.
2385+- permanent xino files for NFSD
2386+- an option for refreshing the opened files after add/del branches
53392da6
AM
2387+- light version, without branch manipulation. (unnecessary?)
2388+- copyup in userspace
2389+- inotify in userspace
2390+- readv/writev
53392da6
AM
2391+
2392+
2393+2. Download
2394+----------------------------------------
5527c038
JR
2395+There are three GIT trees for aufs4, aufs4-linux.git,
2396+aufs4-standalone.git, and aufs-util.git. Note that there is no "4" in
1e00d052 2397+"aufs-util.git."
5527c038
JR
2398+While the aufs-util is always necessary, you need either of aufs4-linux
2399+or aufs4-standalone.
1e00d052 2400+
5527c038 2401+The aufs4-linux tree includes the whole linux mainline GIT tree,
1e00d052
AM
2402+git://git.kernel.org/.../torvalds/linux.git.
2403+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
5527c038 2404+build aufs4 as an external kernel module.
2000de60 2405+Several extra patches are not included in this tree. Only
5527c038 2406+aufs4-standalone tree contains them. They are describe in the later
2000de60 2407+section "Configuration and Compilation."
1e00d052 2408+
5527c038 2409+On the other hand, the aufs4-standalone tree has only aufs source files
53392da6 2410+and necessary patches, and you can select CONFIG_AUFS_FS=m.
2000de60 2411+But you need to apply all aufs patches manually.
53392da6 2412+
5527c038
JR
2413+You will find GIT branches whose name is in form of "aufs4.x" where "x"
2414+represents the linux kernel version, "linux-4.x". For instance,
2415+"aufs4.0" is for linux-4.0. For latest "linux-4.x-rcN", use
2416+"aufs4.x-rcN" branch.
1e00d052 2417+
5527c038 2418+o aufs4-linux tree
1e00d052 2419+$ git clone --reference /your/linux/git/tree \
5527c038 2420+ git://github.com/sfjro/aufs4-linux.git aufs4-linux.git
1e00d052 2421+- if you don't have linux GIT tree, then remove "--reference ..."
5527c038
JR
2422+$ cd aufs4-linux.git
2423+$ git checkout origin/aufs4.0
53392da6 2424+
2000de60
JR
2425+Or You may want to directly git-pull aufs into your linux GIT tree, and
2426+leave the patch-work to GIT.
2427+$ cd /your/linux/git/tree
5527c038
JR
2428+$ git remote add aufs4 git://github.com/sfjro/aufs4-linux.git
2429+$ git fetch aufs4
2430+$ git checkout -b my4.0 v4.0
2431+$ (add your local change...)
2432+$ git pull aufs4 aufs4.0
2433+- now you have v4.0 + your_changes + aufs4.0 in you my4.0 branch.
2000de60 2434+- you may need to solve some conflicts between your_changes and
5527c038
JR
2435+ aufs4.0. in this case, git-rerere is recommended so that you can
2436+ solve the similar conflicts automatically when you upgrade to 4.1 or
2000de60
JR
2437+ later in the future.
2438+
5527c038
JR
2439+o aufs4-standalone tree
2440+$ git clone git://github.com/sfjro/aufs4-standalone.git aufs4-standalone.git
2441+$ cd aufs4-standalone.git
2442+$ git checkout origin/aufs4.0
53392da6
AM
2443+
2444+o aufs-util tree
5527c038
JR
2445+$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git
2446+- note that the public aufs-util.git is on SourceForge instead of
2447+ GitHUB.
53392da6 2448+$ cd aufs-util.git
5527c038 2449+$ git checkout origin/aufs4.0
53392da6 2450+
5527c038
JR
2451+Note: The 4.x-rcN branch is to be used with `rc' kernel versions ONLY.
2452+The minor version number, 'x' in '4.x', of aufs may not always
9dbd164d
AM
2453+follow the minor version number of the kernel.
2454+Because changes in the kernel that cause the use of a new
2455+minor version number do not always require changes to aufs-util.
2456+
2457+Since aufs-util has its own minor version number, you may not be
2458+able to find a GIT branch in aufs-util for your kernel's
2459+exact minor version number.
2460+In this case, you should git-checkout the branch for the
53392da6 2461+nearest lower number.
9dbd164d
AM
2462+
2463+For (an unreleased) example:
5527c038
JR
2464+If you are using "linux-4.10" and the "aufs4.10" branch
2465+does not exist in aufs-util repository, then "aufs4.9", "aufs4.8"
9dbd164d
AM
2466+or something numerically smaller is the branch for your kernel.
2467+
53392da6
AM
2468+Also you can view all branches by
2469+ $ git branch -a
2470+
2471+
2472+3. Configuration and Compilation
2473+----------------------------------------
2474+Make sure you have git-checkout'ed the correct branch.
2475+
5527c038 2476+For aufs4-linux tree,
c06a8ce3 2477+- enable CONFIG_AUFS_FS.
1e00d052
AM
2478+- set other aufs configurations if necessary.
2479+
5527c038 2480+For aufs4-standalone tree,
53392da6
AM
2481+There are several ways to build.
2482+
2483+1.
5527c038
JR
2484+- apply ./aufs4-kbuild.patch to your kernel source files.
2485+- apply ./aufs4-base.patch too.
2486+- apply ./aufs4-mmap.patch too.
2487+- apply ./aufs4-standalone.patch too, if you have a plan to set
2488+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs4-standalone.patch.
537831f9
AM
2489+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
2490+ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
c06a8ce3 2491+- enable CONFIG_AUFS_FS, you can select either
53392da6
AM
2492+ =m or =y.
2493+- and build your kernel as usual.
2494+- install the built kernel.
c06a8ce3
AM
2495+ Note: Since linux-3.9, every filesystem module requires an alias
2496+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2497+ modules.aliases file if you set CONFIG_AUFS_FS=m.
7eafdf33
AM
2498+- install the header files too by "make headers_install" to the
2499+ directory where you specify. By default, it is $PWD/usr.
b4510431 2500+ "make help" shows a brief note for headers_install.
53392da6
AM
2501+- and reboot your system.
2502+
2503+2.
2504+- module only (CONFIG_AUFS_FS=m).
5527c038
JR
2505+- apply ./aufs4-base.patch to your kernel source files.
2506+- apply ./aufs4-mmap.patch too.
2507+- apply ./aufs4-standalone.patch too.
53392da6
AM
2508+- build your kernel, don't forget "make headers_install", and reboot.
2509+- edit ./config.mk and set other aufs configurations if necessary.
b4510431 2510+ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
53392da6
AM
2511+ every aufs configurations.
2512+- build the module by simple "make".
c06a8ce3
AM
2513+ Note: Since linux-3.9, every filesystem module requires an alias
2514+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2515+ modules.aliases file.
53392da6
AM
2516+- you can specify ${KDIR} make variable which points to your kernel
2517+ source tree.
2518+- install the files
2519+ + run "make install" to install the aufs module, or copy the built
b4510431
AM
2520+ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
2521+ + run "make install_headers" (instead of headers_install) to install
2522+ the modified aufs header file (you can specify DESTDIR which is
2523+ available in aufs standalone version's Makefile only), or copy
2524+ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
2525+ you like manually. By default, the target directory is $PWD/usr.
5527c038 2526+- no need to apply aufs4-kbuild.patch, nor copying source files to your
53392da6
AM
2527+ kernel source tree.
2528+
b4510431 2529+Note: The header file aufs_type.h is necessary to build aufs-util
53392da6
AM
2530+ as well as "make headers_install" in the kernel source tree.
2531+ headers_install is subject to be forgotten, but it is essentially
2532+ necessary, not only for building aufs-util.
2533+ You may not meet problems without headers_install in some older
2534+ version though.
2535+
2536+And then,
2537+- read README in aufs-util, build and install it
9dbd164d
AM
2538+- note that your distribution may contain an obsoleted version of
2539+ aufs_type.h in /usr/include/linux or something. When you build aufs
2540+ utilities, make sure that your compiler refers the correct aufs header
2541+ file which is built by "make headers_install."
53392da6
AM
2542+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
2543+ then run "make install_ulib" too. And refer to the aufs manual in
2544+ detail.
2545+
5527c038 2546+There several other patches in aufs4-standalone.git. They are all
38d290e6 2547+optional. When you meet some problems, they will help you.
5527c038 2548+- aufs4-loopback.patch
38d290e6
JR
2549+ Supports a nested loopback mount in a branch-fs. This patch is
2550+ unnecessary until aufs produces a message like "you may want to try
2551+ another patch for loopback file".
2552+- vfs-ino.patch
2553+ Modifies a system global kernel internal function get_next_ino() in
2554+ order to stop assigning 0 for an inode-number. Not directly related to
2555+ aufs, but recommended generally.
2556+- tmpfs-idr.patch
2557+ Keeps the tmpfs inode number as the lowest value. Effective to reduce
2558+ the size of aufs XINO files for tmpfs branch. Also it prevents the
2559+ duplication of inode number, which is important for backup tools and
2560+ other utilities. When you find aufs XINO files for tmpfs branch
2561+ growing too much, try this patch.
2562+
53392da6
AM
2563+
2564+4. Usage
2565+----------------------------------------
2566+At first, make sure aufs-util are installed, and please read the aufs
2567+manual, aufs.5 in aufs-util.git tree.
2568+$ man -l aufs.5
2569+
2570+And then,
2571+$ mkdir /tmp/rw /tmp/aufs
2572+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
2573+
2574+Here is another example. The result is equivalent.
2575+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
2576+ Or
2577+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
2578+# mount -o remount,append:${HOME} /tmp/aufs
2579+
2580+Then, you can see whole tree of your home dir through /tmp/aufs. If
2581+you modify a file under /tmp/aufs, the one on your home directory is
2582+not affected, instead the same named file will be newly created under
2583+/tmp/rw. And all of your modification to a file will be applied to
2584+the one under /tmp/rw. This is called the file based Copy on Write
2585+(COW) method.
2586+Aufs mount options are described in aufs.5.
2587+If you run chroot or something and make your aufs as a root directory,
2588+then you need to customize the shutdown script. See the aufs manual in
2589+detail.
2590+
2591+Additionally, there are some sample usages of aufs which are a
2592+diskless system with network booting, and LiveCD over NFS.
2593+See sample dir in CVS tree on SourceForge.
2594+
2595+
2596+5. Contact
2597+----------------------------------------
2598+When you have any problems or strange behaviour in aufs, please let me
2599+know with:
2600+- /proc/mounts (instead of the output of mount(8))
2601+- /sys/module/aufs/*
2602+- /sys/fs/aufs/* (if you have them)
2603+- /debug/aufs/* (if you have them)
2604+- linux kernel version
2605+ if your kernel is not plain, for example modified by distributor,
2606+ the url where i can download its source is necessary too.
2607+- aufs version which was printed at loading the module or booting the
2608+ system, instead of the date you downloaded.
2609+- configuration (define/undefine CONFIG_AUFS_xxx)
2610+- kernel configuration or /proc/config.gz (if you have it)
2611+- behaviour which you think to be incorrect
2612+- actual operation, reproducible one is better
2613+- mailto: aufs-users at lists.sourceforge.net
2614+
2615+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
2616+and Feature Requests) on SourceForge. Please join and write to
2617+aufs-users ML.
2618+
2619+
2620+6. Acknowledgements
2621+----------------------------------------
2622+Thanks to everyone who have tried and are using aufs, whoever
2623+have reported a bug or any feedback.
2624+
2625+Especially donators:
2626+Tomas Matejicek(slax.org) made a donation (much more than once).
2627+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
2628+ scripts) is making "doubling" donations.
2629+ Unfortunately I cannot list all of the donators, but I really
b4510431 2630+ appreciate.
53392da6
AM
2631+ It ends Aug 2010, but the ordinary donation URL is still available.
2632+ <http://sourceforge.net/donate/index.php?group_id=167503>
2633+Dai Itasaka made a donation (2007/8).
2634+Chuck Smith made a donation (2008/4, 10 and 12).
2635+Henk Schoneveld made a donation (2008/9).
2636+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
2637+Francois Dupoux made a donation (2008/11).
2638+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
2639+ aufs2 GIT tree (2009/2).
2640+William Grant made a donation (2009/3).
2641+Patrick Lane made a donation (2009/4).
2642+The Mail Archive (mail-archive.com) made donations (2009/5).
2643+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
2644+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
2645+Pavel Pronskiy made a donation (2011/2).
2646+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
2647+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
537831f9
AM
2648+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
2649+11).
1e00d052 2650+Sam Liddicott made a donation (2011/9).
86dc4139
AM
2651+Era Scarecrow made a donation (2013/4).
2652+Bor Ratajc made a donation (2013/4).
2653+Alessandro Gorreta made a donation (2013/4).
2654+POIRETTE Marc made a donation (2013/4).
2655+Alessandro Gorreta made a donation (2013/4).
2656+lauri kasvandik made a donation (2013/5).
392086de 2657+"pemasu from Finland" made a donation (2013/7).
523b37e3
AM
2658+The Parted Magic Project made a donation (2013/9 and 11).
2659+Pavel Barta made a donation (2013/10).
38d290e6 2660+Nikolay Pertsev made a donation (2014/5).
c2c0f25c 2661+James B made a donation (2014/7 and 2015/7).
076b876e 2662+Stefano Di Biase made a donation (2014/8).
2000de60 2663+Daniel Epellei made a donation (2015/1).
53392da6
AM
2664+
2665+Thank you very much.
2666+Donations are always, including future donations, very important and
2667+helpful for me to keep on developing aufs.
2668+
2669+
2670+7.
2671+----------------------------------------
2672+If you are an experienced user, no explanation is needed. Aufs is
2673+just a linux filesystem.
2674+
2675+
2676+Enjoy!
2677+
2678+# Local variables: ;
2679+# mode: text;
2680+# End: ;
7f207e10
AM
2681diff -urN /usr/share/empty/fs/aufs/aufs.h linux/fs/aufs/aufs.h
2682--- /usr/share/empty/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 2683+++ linux/fs/aufs/aufs.h 2015-09-24 10:47:58.248052907 +0200
523b37e3 2684@@ -0,0 +1,59 @@
7f207e10 2685+/*
2000de60 2686+ * Copyright (C) 2005-2015 Junjiro R. Okajima
7f207e10
AM
2687+ *
2688+ * This program, aufs is free software; you can redistribute it and/or modify
2689+ * it under the terms of the GNU General Public License as published by
2690+ * the Free Software Foundation; either version 2 of the License, or
2691+ * (at your option) any later version.
2692+ *
2693+ * This program is distributed in the hope that it will be useful,
2694+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2695+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2696+ * GNU General Public License for more details.
2697+ *
2698+ * You should have received a copy of the GNU General Public License
523b37e3 2699+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2700+ */
2701+
2702+/*
2703+ * all header files
2704+ */
2705+
2706+#ifndef __AUFS_H__
2707+#define __AUFS_H__
2708+
2709+#ifdef __KERNEL__
2710+
2711+#define AuStub(type, name, body, ...) \
2712+ static inline type name(__VA_ARGS__) { body; }
2713+
2714+#define AuStubVoid(name, ...) \
2715+ AuStub(void, name, , __VA_ARGS__)
2716+#define AuStubInt0(name, ...) \
2717+ AuStub(int, name, return 0, __VA_ARGS__)
2718+
2719+#include "debug.h"
2720+
2721+#include "branch.h"
2722+#include "cpup.h"
2723+#include "dcsub.h"
2724+#include "dbgaufs.h"
2725+#include "dentry.h"
2726+#include "dir.h"
2727+#include "dynop.h"
2728+#include "file.h"
2729+#include "fstype.h"
2730+#include "inode.h"
2731+#include "loop.h"
2732+#include "module.h"
7f207e10
AM
2733+#include "opts.h"
2734+#include "rwsem.h"
2735+#include "spl.h"
2736+#include "super.h"
2737+#include "sysaufs.h"
2738+#include "vfsub.h"
2739+#include "whout.h"
2740+#include "wkq.h"
2741+
2742+#endif /* __KERNEL__ */
2743+#endif /* __AUFS_H__ */
2744diff -urN /usr/share/empty/fs/aufs/branch.c linux/fs/aufs/branch.c
2745--- /usr/share/empty/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
79b8bda9
AM
2746+++ linux/fs/aufs/branch.c 2015-11-11 17:21:46.915530388 +0100
2747@@ -0,0 +1,1413 @@
7f207e10 2748+/*
2000de60 2749+ * Copyright (C) 2005-2015 Junjiro R. Okajima
7f207e10
AM
2750+ *
2751+ * This program, aufs is free software; you can redistribute it and/or modify
2752+ * it under the terms of the GNU General Public License as published by
2753+ * the Free Software Foundation; either version 2 of the License, or
2754+ * (at your option) any later version.
2755+ *
2756+ * This program is distributed in the hope that it will be useful,
2757+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2758+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2759+ * GNU General Public License for more details.
2760+ *
2761+ * You should have received a copy of the GNU General Public License
523b37e3 2762+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2763+ */
2764+
2765+/*
2766+ * branch management
2767+ */
2768+
027c5e7a 2769+#include <linux/compat.h>
7f207e10
AM
2770+#include <linux/statfs.h>
2771+#include "aufs.h"
2772+
2773+/*
2774+ * free a single branch
1facf9fc 2775+ */
2776+static void au_br_do_free(struct au_branch *br)
2777+{
2778+ int i;
2779+ struct au_wbr *wbr;
4a4d8108 2780+ struct au_dykey **key;
1facf9fc 2781+
027c5e7a
AM
2782+ au_hnotify_fin_br(br);
2783+
1facf9fc 2784+ if (br->br_xino.xi_file)
2785+ fput(br->br_xino.xi_file);
2786+ mutex_destroy(&br->br_xino.xi_nondir_mtx);
2787+
2788+ AuDebugOn(atomic_read(&br->br_count));
2789+
2790+ wbr = br->br_wbr;
2791+ if (wbr) {
2792+ for (i = 0; i < AuBrWh_Last; i++)
2793+ dput(wbr->wbr_wh[i]);
2794+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
dece6358 2795+ AuRwDestroy(&wbr->wbr_wh_rwsem);
1facf9fc 2796+ }
2797+
076b876e
AM
2798+ if (br->br_fhsm) {
2799+ au_br_fhsm_fin(br->br_fhsm);
2800+ kfree(br->br_fhsm);
2801+ }
2802+
4a4d8108
AM
2803+ key = br->br_dykey;
2804+ for (i = 0; i < AuBrDynOp; i++, key++)
2805+ if (*key)
2806+ au_dy_put(*key);
2807+ else
2808+ break;
2809+
537831f9
AM
2810+ /* recursive lock, s_umount of branch's */
2811+ lockdep_off();
86dc4139 2812+ path_put(&br->br_path);
537831f9 2813+ lockdep_on();
1facf9fc 2814+ kfree(wbr);
2815+ kfree(br);
2816+}
2817+
2818+/*
2819+ * frees all branches
2820+ */
2821+void au_br_free(struct au_sbinfo *sbinfo)
2822+{
2823+ aufs_bindex_t bmax;
2824+ struct au_branch **br;
2825+
dece6358
AM
2826+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2827+
1facf9fc 2828+ bmax = sbinfo->si_bend + 1;
2829+ br = sbinfo->si_branch;
2830+ while (bmax--)
2831+ au_br_do_free(*br++);
2832+}
2833+
2834+/*
2835+ * find the index of a branch which is specified by @br_id.
2836+ */
2837+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
2838+{
2839+ aufs_bindex_t bindex, bend;
2840+
2841+ bend = au_sbend(sb);
2842+ for (bindex = 0; bindex <= bend; bindex++)
2843+ if (au_sbr_id(sb, bindex) == br_id)
2844+ return bindex;
2845+ return -1;
2846+}
2847+
2848+/* ---------------------------------------------------------------------- */
2849+
2850+/*
2851+ * add a branch
2852+ */
2853+
b752ccd1
AM
2854+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
2855+ struct dentry *h_root)
1facf9fc 2856+{
b752ccd1
AM
2857+ if (unlikely(h_adding == h_root
2858+ || au_test_loopback_overlap(sb, h_adding)))
1facf9fc 2859+ return 1;
b752ccd1
AM
2860+ if (h_adding->d_sb != h_root->d_sb)
2861+ return 0;
2862+ return au_test_subdir(h_adding, h_root)
2863+ || au_test_subdir(h_root, h_adding);
1facf9fc 2864+}
2865+
2866+/*
2867+ * returns a newly allocated branch. @new_nbranch is a number of branches
2868+ * after adding a branch.
2869+ */
2870+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
2871+ int perm)
2872+{
2873+ struct au_branch *add_branch;
2874+ struct dentry *root;
5527c038 2875+ struct inode *inode;
4a4d8108 2876+ int err;
1facf9fc 2877+
4a4d8108 2878+ err = -ENOMEM;
1facf9fc 2879+ root = sb->s_root;
2880+ add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS);
2881+ if (unlikely(!add_branch))
2882+ goto out;
2883+
027c5e7a
AM
2884+ err = au_hnotify_init_br(add_branch, perm);
2885+ if (unlikely(err))
2886+ goto out_br;
2887+
1facf9fc 2888+ add_branch->br_wbr = NULL;
2889+ if (au_br_writable(perm)) {
2890+ /* may be freed separately at changing the branch permission */
2891+ add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr),
2892+ GFP_NOFS);
2893+ if (unlikely(!add_branch->br_wbr))
027c5e7a 2894+ goto out_hnotify;
1facf9fc 2895+ }
2896+
076b876e
AM
2897+ add_branch->br_fhsm = NULL;
2898+ if (au_br_fhsm(perm)) {
2899+ err = au_fhsm_br_alloc(add_branch);
2900+ if (unlikely(err))
2901+ goto out_wbr;
2902+ }
2903+
4a4d8108
AM
2904+ err = au_sbr_realloc(au_sbi(sb), new_nbranch);
2905+ if (!err)
2906+ err = au_di_realloc(au_di(root), new_nbranch);
5527c038
JR
2907+ if (!err) {
2908+ inode = d_inode(root);
2909+ err = au_ii_realloc(au_ii(inode), new_nbranch);
2910+ }
4a4d8108
AM
2911+ if (!err)
2912+ return add_branch; /* success */
1facf9fc 2913+
076b876e 2914+out_wbr:
1facf9fc 2915+ kfree(add_branch->br_wbr);
027c5e7a
AM
2916+out_hnotify:
2917+ au_hnotify_fin_br(add_branch);
4f0767ce 2918+out_br:
1facf9fc 2919+ kfree(add_branch);
4f0767ce 2920+out:
4a4d8108 2921+ return ERR_PTR(err);
1facf9fc 2922+}
2923+
2924+/*
2925+ * test if the branch permission is legal or not.
2926+ */
2927+static int test_br(struct inode *inode, int brperm, char *path)
2928+{
2929+ int err;
2930+
4a4d8108
AM
2931+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
2932+ if (!err)
2933+ goto out;
1facf9fc 2934+
4a4d8108
AM
2935+ err = -EINVAL;
2936+ pr_err("write permission for readonly mount or inode, %s\n", path);
2937+
4f0767ce 2938+out:
1facf9fc 2939+ return err;
2940+}
2941+
2942+/*
2943+ * returns:
2944+ * 0: success, the caller will add it
2945+ * plus: success, it is already unified, the caller should ignore it
2946+ * minus: error
2947+ */
2948+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
2949+{
2950+ int err;
2951+ aufs_bindex_t bend, bindex;
5527c038 2952+ struct dentry *root, *h_dentry;
1facf9fc 2953+ struct inode *inode, *h_inode;
2954+
2955+ root = sb->s_root;
2956+ bend = au_sbend(sb);
2957+ if (unlikely(bend >= 0
2958+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
2959+ err = 1;
2960+ if (!remount) {
2961+ err = -EINVAL;
4a4d8108 2962+ pr_err("%s duplicated\n", add->pathname);
1facf9fc 2963+ }
2964+ goto out;
2965+ }
2966+
2967+ err = -ENOSPC; /* -E2BIG; */
2968+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
2969+ || AUFS_BRANCH_MAX - 1 <= bend)) {
4a4d8108 2970+ pr_err("number of branches exceeded %s\n", add->pathname);
1facf9fc 2971+ goto out;
2972+ }
2973+
2974+ err = -EDOM;
2975+ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
4a4d8108 2976+ pr_err("bad index %d\n", add->bindex);
1facf9fc 2977+ goto out;
2978+ }
2979+
5527c038 2980+ inode = d_inode(add->path.dentry);
1facf9fc 2981+ err = -ENOENT;
2982+ if (unlikely(!inode->i_nlink)) {
4a4d8108 2983+ pr_err("no existence %s\n", add->pathname);
1facf9fc 2984+ goto out;
2985+ }
2986+
2987+ err = -EINVAL;
2988+ if (unlikely(inode->i_sb == sb)) {
4a4d8108 2989+ pr_err("%s must be outside\n", add->pathname);
1facf9fc 2990+ goto out;
2991+ }
2992+
2993+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
4a4d8108
AM
2994+ pr_err("unsupported filesystem, %s (%s)\n",
2995+ add->pathname, au_sbtype(inode->i_sb));
1facf9fc 2996+ goto out;
2997+ }
2998+
c1595e42
JR
2999+ if (unlikely(inode->i_sb->s_stack_depth)) {
3000+ pr_err("already stacked, %s (%s)\n",
3001+ add->pathname, au_sbtype(inode->i_sb));
3002+ goto out;
3003+ }
3004+
5527c038 3005+ err = test_br(d_inode(add->path.dentry), add->perm, add->pathname);
1facf9fc 3006+ if (unlikely(err))
3007+ goto out;
3008+
3009+ if (bend < 0)
3010+ return 0; /* success */
3011+
3012+ err = -EINVAL;
3013+ for (bindex = 0; bindex <= bend; bindex++)
3014+ if (unlikely(test_overlap(sb, add->path.dentry,
3015+ au_h_dptr(root, bindex)))) {
4a4d8108 3016+ pr_err("%s is overlapped\n", add->pathname);
1facf9fc 3017+ goto out;
3018+ }
3019+
3020+ err = 0;
3021+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
5527c038
JR
3022+ h_dentry = au_h_dptr(root, 0);
3023+ h_inode = d_inode(h_dentry);
1facf9fc 3024+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
0c3ec466
AM
3025+ || !uid_eq(h_inode->i_uid, inode->i_uid)
3026+ || !gid_eq(h_inode->i_gid, inode->i_gid))
3027+ pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
3028+ add->pathname,
3029+ i_uid_read(inode), i_gid_read(inode),
3030+ (inode->i_mode & S_IALLUGO),
3031+ i_uid_read(h_inode), i_gid_read(h_inode),
3032+ (h_inode->i_mode & S_IALLUGO));
1facf9fc 3033+ }
3034+
4f0767ce 3035+out:
1facf9fc 3036+ return err;
3037+}
3038+
3039+/*
3040+ * initialize or clean the whiteouts for an adding branch
3041+ */
3042+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
86dc4139 3043+ int new_perm)
1facf9fc 3044+{
3045+ int err, old_perm;
3046+ aufs_bindex_t bindex;
3047+ struct mutex *h_mtx;
3048+ struct au_wbr *wbr;
3049+ struct au_hinode *hdir;
5527c038 3050+ struct dentry *h_dentry;
1facf9fc 3051+
86dc4139
AM
3052+ err = vfsub_mnt_want_write(au_br_mnt(br));
3053+ if (unlikely(err))
3054+ goto out;
3055+
1facf9fc 3056+ wbr = br->br_wbr;
3057+ old_perm = br->br_perm;
3058+ br->br_perm = new_perm;
3059+ hdir = NULL;
3060+ h_mtx = NULL;
3061+ bindex = au_br_index(sb, br->br_id);
3062+ if (0 <= bindex) {
5527c038 3063+ hdir = au_hi(d_inode(sb->s_root), bindex);
4a4d8108 3064+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 3065+ } else {
5527c038
JR
3066+ h_dentry = au_br_dentry(br);
3067+ h_mtx = &d_inode(h_dentry)->i_mutex;
1facf9fc 3068+ mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
3069+ }
3070+ if (!wbr)
86dc4139 3071+ err = au_wh_init(br, sb);
1facf9fc 3072+ else {
3073+ wbr_wh_write_lock(wbr);
86dc4139 3074+ err = au_wh_init(br, sb);
1facf9fc 3075+ wbr_wh_write_unlock(wbr);
3076+ }
3077+ if (hdir)
4a4d8108 3078+ au_hn_imtx_unlock(hdir);
1facf9fc 3079+ else
3080+ mutex_unlock(h_mtx);
86dc4139 3081+ vfsub_mnt_drop_write(au_br_mnt(br));
1facf9fc 3082+ br->br_perm = old_perm;
3083+
3084+ if (!err && wbr && !au_br_writable(new_perm)) {
3085+ kfree(wbr);
3086+ br->br_wbr = NULL;
3087+ }
3088+
86dc4139 3089+out:
1facf9fc 3090+ return err;
3091+}
3092+
3093+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
86dc4139 3094+ int perm)
1facf9fc 3095+{
3096+ int err;
4a4d8108 3097+ struct kstatfs kst;
1facf9fc 3098+ struct au_wbr *wbr;
3099+
3100+ wbr = br->br_wbr;
dece6358 3101+ au_rw_init(&wbr->wbr_wh_rwsem);
1facf9fc 3102+ memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh));
3103+ atomic_set(&wbr->wbr_wh_running, 0);
3104+ wbr->wbr_bytes = 0;
3105+
4a4d8108
AM
3106+ /*
3107+ * a limit for rmdir/rename a dir
523b37e3 3108+ * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h
4a4d8108 3109+ */
86dc4139 3110+ err = vfs_statfs(&br->br_path, &kst);
4a4d8108
AM
3111+ if (unlikely(err))
3112+ goto out;
3113+ err = -EINVAL;
3114+ if (kst.f_namelen >= NAME_MAX)
86dc4139 3115+ err = au_br_init_wh(sb, br, perm);
4a4d8108 3116+ else
523b37e3
AM
3117+ pr_err("%pd(%s), unsupported namelen %ld\n",
3118+ au_br_dentry(br),
86dc4139 3119+ au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen);
1facf9fc 3120+
4f0767ce 3121+out:
1facf9fc 3122+ return err;
3123+}
3124+
c1595e42 3125+/* initialize a new branch */
1facf9fc 3126+static int au_br_init(struct au_branch *br, struct super_block *sb,
3127+ struct au_opt_add *add)
3128+{
3129+ int err;
5527c038 3130+ struct inode *h_inode;
1facf9fc 3131+
3132+ err = 0;
3133+ memset(&br->br_xino, 0, sizeof(br->br_xino));
3134+ mutex_init(&br->br_xino.xi_nondir_mtx);
3135+ br->br_perm = add->perm;
86dc4139 3136+ br->br_path = add->path; /* set first, path_get() later */
4a4d8108
AM
3137+ spin_lock_init(&br->br_dykey_lock);
3138+ memset(br->br_dykey, 0, sizeof(br->br_dykey));
1facf9fc 3139+ atomic_set(&br->br_count, 0);
1facf9fc 3140+ atomic_set(&br->br_xino_running, 0);
3141+ br->br_id = au_new_br_id(sb);
7f207e10 3142+ AuDebugOn(br->br_id < 0);
1facf9fc 3143+
3144+ if (au_br_writable(add->perm)) {
86dc4139 3145+ err = au_wbr_init(br, sb, add->perm);
1facf9fc 3146+ if (unlikely(err))
b752ccd1 3147+ goto out_err;
1facf9fc 3148+ }
3149+
3150+ if (au_opt_test(au_mntflags(sb), XINO)) {
5527c038
JR
3151+ h_inode = d_inode(add->path.dentry);
3152+ err = au_xino_br(sb, br, h_inode->i_ino,
1facf9fc 3153+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
3154+ if (unlikely(err)) {
3155+ AuDebugOn(br->br_xino.xi_file);
b752ccd1 3156+ goto out_err;
1facf9fc 3157+ }
3158+ }
3159+
3160+ sysaufs_br_init(br);
86dc4139 3161+ path_get(&br->br_path);
b752ccd1 3162+ goto out; /* success */
1facf9fc 3163+
4f0767ce 3164+out_err:
86dc4139 3165+ memset(&br->br_path, 0, sizeof(br->br_path));
4f0767ce 3166+out:
1facf9fc 3167+ return err;
3168+}
3169+
3170+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
3171+ struct au_branch *br, aufs_bindex_t bend,
3172+ aufs_bindex_t amount)
3173+{
3174+ struct au_branch **brp;
3175+
dece6358
AM
3176+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3177+
1facf9fc 3178+ brp = sbinfo->si_branch + bindex;
3179+ memmove(brp + 1, brp, sizeof(*brp) * amount);
3180+ *brp = br;
3181+ sbinfo->si_bend++;
3182+ if (unlikely(bend < 0))
3183+ sbinfo->si_bend = 0;
3184+}
3185+
3186+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
3187+ aufs_bindex_t bend, aufs_bindex_t amount)
3188+{
3189+ struct au_hdentry *hdp;
3190+
1308ab2a 3191+ AuRwMustWriteLock(&dinfo->di_rwsem);
3192+
1facf9fc 3193+ hdp = dinfo->di_hdentry + bindex;
3194+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
3195+ au_h_dentry_init(hdp);
3196+ dinfo->di_bend++;
3197+ if (unlikely(bend < 0))
3198+ dinfo->di_bstart = 0;
3199+}
3200+
3201+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
3202+ aufs_bindex_t bend, aufs_bindex_t amount)
3203+{
3204+ struct au_hinode *hip;
3205+
1308ab2a 3206+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3207+
1facf9fc 3208+ hip = iinfo->ii_hinode + bindex;
3209+ memmove(hip + 1, hip, sizeof(*hip) * amount);
3210+ hip->hi_inode = NULL;
4a4d8108 3211+ au_hn_init(hip);
1facf9fc 3212+ iinfo->ii_bend++;
3213+ if (unlikely(bend < 0))
3214+ iinfo->ii_bstart = 0;
3215+}
3216+
86dc4139
AM
3217+static void au_br_do_add(struct super_block *sb, struct au_branch *br,
3218+ aufs_bindex_t bindex)
1facf9fc 3219+{
86dc4139 3220+ struct dentry *root, *h_dentry;
5527c038 3221+ struct inode *root_inode, *h_inode;
1facf9fc 3222+ aufs_bindex_t bend, amount;
3223+
3224+ root = sb->s_root;
5527c038 3225+ root_inode = d_inode(root);
1facf9fc 3226+ bend = au_sbend(sb);
3227+ amount = bend + 1 - bindex;
86dc4139 3228+ h_dentry = au_br_dentry(br);
53392da6 3229+ au_sbilist_lock();
1facf9fc 3230+ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
3231+ au_br_do_add_hdp(au_di(root), bindex, bend, amount);
3232+ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
3233+ au_set_h_dptr(root, bindex, dget(h_dentry));
5527c038
JR
3234+ h_inode = d_inode(h_dentry);
3235+ au_set_h_iptr(root_inode, bindex, au_igrab(h_inode), /*flags*/0);
53392da6 3236+ au_sbilist_unlock();
1facf9fc 3237+}
3238+
3239+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
3240+{
3241+ int err;
1facf9fc 3242+ aufs_bindex_t bend, add_bindex;
3243+ struct dentry *root, *h_dentry;
3244+ struct inode *root_inode;
3245+ struct au_branch *add_branch;
3246+
3247+ root = sb->s_root;
5527c038 3248+ root_inode = d_inode(root);
1facf9fc 3249+ IMustLock(root_inode);
3250+ err = test_add(sb, add, remount);
3251+ if (unlikely(err < 0))
3252+ goto out;
3253+ if (err) {
3254+ err = 0;
3255+ goto out; /* success */
3256+ }
3257+
3258+ bend = au_sbend(sb);
3259+ add_branch = au_br_alloc(sb, bend + 2, add->perm);
3260+ err = PTR_ERR(add_branch);
3261+ if (IS_ERR(add_branch))
3262+ goto out;
3263+
3264+ err = au_br_init(add_branch, sb, add);
3265+ if (unlikely(err)) {
3266+ au_br_do_free(add_branch);
3267+ goto out;
3268+ }
3269+
3270+ add_bindex = add->bindex;
1facf9fc 3271+ if (!remount)
86dc4139 3272+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 3273+ else {
3274+ sysaufs_brs_del(sb, add_bindex);
86dc4139 3275+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 3276+ sysaufs_brs_add(sb, add_bindex);
3277+ }
3278+
86dc4139 3279+ h_dentry = add->path.dentry;
1308ab2a 3280+ if (!add_bindex) {
1facf9fc 3281+ au_cpup_attr_all(root_inode, /*force*/1);
1308ab2a 3282+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
3283+ } else
5527c038 3284+ au_add_nlink(root_inode, d_inode(h_dentry));
1facf9fc 3285+
3286+ /*
4a4d8108 3287+ * this test/set prevents aufs from handling unnecesary notify events
027c5e7a 3288+ * of xino files, in case of re-adding a writable branch which was
1facf9fc 3289+ * once detached from aufs.
3290+ */
3291+ if (au_xino_brid(sb) < 0
3292+ && au_br_writable(add_branch->br_perm)
3293+ && !au_test_fs_bad_xino(h_dentry->d_sb)
3294+ && add_branch->br_xino.xi_file
2000de60 3295+ && add_branch->br_xino.xi_file->f_path.dentry->d_parent == h_dentry)
1facf9fc 3296+ au_xino_brid_set(sb, add_branch->br_id);
3297+
4f0767ce 3298+out:
1facf9fc 3299+ return err;
3300+}
3301+
3302+/* ---------------------------------------------------------------------- */
3303+
79b8bda9 3304+static unsigned long long au_farray_cb(struct super_block *sb, void *a,
076b876e
AM
3305+ unsigned long long max __maybe_unused,
3306+ void *arg)
3307+{
3308+ unsigned long long n;
3309+ struct file **p, *f;
3310+ struct au_sphlhead *files;
3311+ struct au_finfo *finfo;
076b876e
AM
3312+
3313+ n = 0;
3314+ p = a;
3315+ files = &au_sbi(sb)->si_files;
3316+ spin_lock(&files->spin);
3317+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
3318+ f = finfo->fi_file;
3319+ if (file_count(f)
3320+ && !special_file(file_inode(f)->i_mode)) {
3321+ get_file(f);
3322+ *p++ = f;
3323+ n++;
3324+ AuDebugOn(n > max);
3325+ }
3326+ }
3327+ spin_unlock(&files->spin);
3328+
3329+ return n;
3330+}
3331+
3332+static struct file **au_farray_alloc(struct super_block *sb,
3333+ unsigned long long *max)
3334+{
3335+ *max = atomic_long_read(&au_sbi(sb)->si_nfiles);
79b8bda9 3336+ return au_array_alloc(max, au_farray_cb, sb, /*arg*/NULL);
076b876e
AM
3337+}
3338+
3339+static void au_farray_free(struct file **a, unsigned long long max)
3340+{
3341+ unsigned long long ull;
3342+
3343+ for (ull = 0; ull < max; ull++)
3344+ if (a[ull])
3345+ fput(a[ull]);
3346+ au_array_free(a);
3347+}
3348+
3349+/* ---------------------------------------------------------------------- */
3350+
1facf9fc 3351+/*
3352+ * delete a branch
3353+ */
3354+
3355+/* to show the line number, do not make it inlined function */
4a4d8108 3356+#define AuVerbose(do_info, fmt, ...) do { \
1facf9fc 3357+ if (do_info) \
4a4d8108 3358+ pr_info(fmt, ##__VA_ARGS__); \
1facf9fc 3359+} while (0)
3360+
027c5e7a
AM
3361+static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart,
3362+ aufs_bindex_t bend)
3363+{
3364+ return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend;
3365+}
3366+
3367+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart,
3368+ aufs_bindex_t bend)
3369+{
5527c038 3370+ return au_test_ibusy(d_inode(dentry), bstart, bend);
027c5e7a
AM
3371+}
3372+
1facf9fc 3373+/*
3374+ * test if the branch is deletable or not.
3375+ */
3376+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
b752ccd1 3377+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3378+{
3379+ int err, i, j, ndentry;
3380+ aufs_bindex_t bstart, bend;
1facf9fc 3381+ struct au_dcsub_pages dpages;
3382+ struct au_dpage *dpage;
3383+ struct dentry *d;
1facf9fc 3384+
3385+ err = au_dpages_init(&dpages, GFP_NOFS);
3386+ if (unlikely(err))
3387+ goto out;
3388+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
3389+ if (unlikely(err))
3390+ goto out_dpages;
3391+
1facf9fc 3392+ for (i = 0; !err && i < dpages.ndpage; i++) {
3393+ dpage = dpages.dpages + i;
3394+ ndentry = dpage->ndentry;
3395+ for (j = 0; !err && j < ndentry; j++) {
3396+ d = dpage->dentries[j];
c1595e42 3397+ AuDebugOn(au_dcount(d) <= 0);
027c5e7a 3398+ if (!au_digen_test(d, sigen)) {
1facf9fc 3399+ di_read_lock_child(d, AuLock_IR);
027c5e7a
AM
3400+ if (unlikely(au_dbrange_test(d))) {
3401+ di_read_unlock(d, AuLock_IR);
3402+ continue;
3403+ }
3404+ } else {
1facf9fc 3405+ di_write_lock_child(d);
027c5e7a
AM
3406+ if (unlikely(au_dbrange_test(d))) {
3407+ di_write_unlock(d);
3408+ continue;
3409+ }
1facf9fc 3410+ err = au_reval_dpath(d, sigen);
3411+ if (!err)
3412+ di_downgrade_lock(d, AuLock_IR);
3413+ else {
3414+ di_write_unlock(d);
3415+ break;
3416+ }
3417+ }
3418+
027c5e7a 3419+ /* AuDbgDentry(d); */
1facf9fc 3420+ bstart = au_dbstart(d);
3421+ bend = au_dbend(d);
3422+ if (bstart <= bindex
3423+ && bindex <= bend
3424+ && au_h_dptr(d, bindex)
027c5e7a 3425+ && au_test_dbusy(d, bstart, bend)) {
1facf9fc 3426+ err = -EBUSY;
523b37e3 3427+ AuVerbose(verbose, "busy %pd\n", d);
027c5e7a 3428+ AuDbgDentry(d);
1facf9fc 3429+ }
3430+ di_read_unlock(d, AuLock_IR);
3431+ }
3432+ }
3433+
4f0767ce 3434+out_dpages:
1facf9fc 3435+ au_dpages_free(&dpages);
4f0767ce 3436+out:
1facf9fc 3437+ return err;
3438+}
3439+
3440+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
b752ccd1 3441+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3442+{
3443+ int err;
7f207e10
AM
3444+ unsigned long long max, ull;
3445+ struct inode *i, **array;
1facf9fc 3446+ aufs_bindex_t bstart, bend;
1facf9fc 3447+
7f207e10
AM
3448+ array = au_iarray_alloc(sb, &max);
3449+ err = PTR_ERR(array);
3450+ if (IS_ERR(array))
3451+ goto out;
3452+
1facf9fc 3453+ err = 0;
7f207e10
AM
3454+ AuDbg("b%d\n", bindex);
3455+ for (ull = 0; !err && ull < max; ull++) {
3456+ i = array[ull];
076b876e
AM
3457+ if (unlikely(!i))
3458+ break;
7f207e10 3459+ if (i->i_ino == AUFS_ROOT_INO)
1facf9fc 3460+ continue;
3461+
7f207e10 3462+ /* AuDbgInode(i); */
537831f9 3463+ if (au_iigen(i, NULL) == sigen)
1facf9fc 3464+ ii_read_lock_child(i);
3465+ else {
3466+ ii_write_lock_child(i);
027c5e7a
AM
3467+ err = au_refresh_hinode_self(i);
3468+ au_iigen_dec(i);
1facf9fc 3469+ if (!err)
3470+ ii_downgrade_lock(i);
3471+ else {
3472+ ii_write_unlock(i);
3473+ break;
3474+ }
3475+ }
3476+
3477+ bstart = au_ibstart(i);
3478+ bend = au_ibend(i);
3479+ if (bstart <= bindex
3480+ && bindex <= bend
3481+ && au_h_iptr(i, bindex)
027c5e7a 3482+ && au_test_ibusy(i, bstart, bend)) {
1facf9fc 3483+ err = -EBUSY;
3484+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
7f207e10 3485+ AuDbgInode(i);
1facf9fc 3486+ }
3487+ ii_read_unlock(i);
3488+ }
7f207e10 3489+ au_iarray_free(array, max);
1facf9fc 3490+
7f207e10 3491+out:
1facf9fc 3492+ return err;
3493+}
3494+
b752ccd1
AM
3495+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
3496+ const unsigned int verbose)
1facf9fc 3497+{
3498+ int err;
3499+ unsigned int sigen;
3500+
3501+ sigen = au_sigen(root->d_sb);
3502+ DiMustNoWaiters(root);
5527c038 3503+ IiMustNoWaiters(d_inode(root));
1facf9fc 3504+ di_write_unlock(root);
b752ccd1 3505+ err = test_dentry_busy(root, bindex, sigen, verbose);
1facf9fc 3506+ if (!err)
b752ccd1 3507+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
1facf9fc 3508+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
3509+
3510+ return err;
3511+}
3512+
076b876e
AM
3513+static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
3514+ struct file **to_free, int *idx)
3515+{
3516+ int err;
c1595e42 3517+ unsigned char matched, root;
076b876e
AM
3518+ aufs_bindex_t bindex, bend;
3519+ struct au_fidir *fidir;
3520+ struct au_hfile *hfile;
3521+
3522+ err = 0;
2000de60 3523+ root = IS_ROOT(file->f_path.dentry);
c1595e42
JR
3524+ if (root) {
3525+ get_file(file);
3526+ to_free[*idx] = file;
3527+ (*idx)++;
3528+ goto out;
3529+ }
3530+
076b876e 3531+ matched = 0;
076b876e
AM
3532+ fidir = au_fi(file)->fi_hdir;
3533+ AuDebugOn(!fidir);
3534+ bend = au_fbend_dir(file);
3535+ for (bindex = au_fbstart(file); bindex <= bend; bindex++) {
3536+ hfile = fidir->fd_hfile + bindex;
3537+ if (!hfile->hf_file)
3538+ continue;
3539+
c1595e42 3540+ if (hfile->hf_br->br_id == br_id) {
076b876e 3541+ matched = 1;
076b876e 3542+ break;
c1595e42 3543+ }
076b876e 3544+ }
c1595e42 3545+ if (matched)
076b876e
AM
3546+ err = -EBUSY;
3547+
3548+out:
3549+ return err;
3550+}
3551+
3552+static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
3553+ struct file **to_free, int opened)
3554+{
3555+ int err, idx;
3556+ unsigned long long ull, max;
3557+ aufs_bindex_t bstart;
3558+ struct file *file, **array;
076b876e
AM
3559+ struct dentry *root;
3560+ struct au_hfile *hfile;
3561+
3562+ array = au_farray_alloc(sb, &max);
3563+ err = PTR_ERR(array);
3564+ if (IS_ERR(array))
3565+ goto out;
3566+
3567+ err = 0;
3568+ idx = 0;
3569+ root = sb->s_root;
3570+ di_write_unlock(root);
3571+ for (ull = 0; ull < max; ull++) {
3572+ file = array[ull];
3573+ if (unlikely(!file))
3574+ break;
3575+
3576+ /* AuDbg("%pD\n", file); */
3577+ fi_read_lock(file);
3578+ bstart = au_fbstart(file);
2000de60 3579+ if (!d_is_dir(file->f_path.dentry)) {
076b876e
AM
3580+ hfile = &au_fi(file)->fi_htop;
3581+ if (hfile->hf_br->br_id == br_id)
3582+ err = -EBUSY;
3583+ } else
3584+ err = test_dir_busy(file, br_id, to_free, &idx);
3585+ fi_read_unlock(file);
3586+ if (unlikely(err))
3587+ break;
3588+ }
3589+ di_write_lock_child(root);
3590+ au_farray_free(array, max);
3591+ AuDebugOn(idx > opened);
3592+
3593+out:
3594+ return err;
3595+}
3596+
3597+static void br_del_file(struct file **to_free, unsigned long long opened,
3598+ aufs_bindex_t br_id)
3599+{
3600+ unsigned long long ull;
3601+ aufs_bindex_t bindex, bstart, bend, bfound;
3602+ struct file *file;
3603+ struct au_fidir *fidir;
3604+ struct au_hfile *hfile;
3605+
3606+ for (ull = 0; ull < opened; ull++) {
3607+ file = to_free[ull];
3608+ if (unlikely(!file))
3609+ break;
3610+
3611+ /* AuDbg("%pD\n", file); */
2000de60 3612+ AuDebugOn(!d_is_dir(file->f_path.dentry));
076b876e
AM
3613+ bfound = -1;
3614+ fidir = au_fi(file)->fi_hdir;
3615+ AuDebugOn(!fidir);
3616+ fi_write_lock(file);
3617+ bstart = au_fbstart(file);
3618+ bend = au_fbend_dir(file);
3619+ for (bindex = bstart; bindex <= bend; bindex++) {
3620+ hfile = fidir->fd_hfile + bindex;
3621+ if (!hfile->hf_file)
3622+ continue;
3623+
3624+ if (hfile->hf_br->br_id == br_id) {
3625+ bfound = bindex;
3626+ break;
3627+ }
3628+ }
3629+ AuDebugOn(bfound < 0);
3630+ au_set_h_fptr(file, bfound, NULL);
3631+ if (bfound == bstart) {
3632+ for (bstart++; bstart <= bend; bstart++)
3633+ if (au_hf_dir(file, bstart)) {
3634+ au_set_fbstart(file, bstart);
3635+ break;
3636+ }
3637+ }
3638+ fi_write_unlock(file);
3639+ }
3640+}
3641+
1facf9fc 3642+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
3643+ const aufs_bindex_t bindex,
3644+ const aufs_bindex_t bend)
3645+{
3646+ struct au_branch **brp, **p;
3647+
dece6358
AM
3648+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3649+
1facf9fc 3650+ brp = sbinfo->si_branch + bindex;
3651+ if (bindex < bend)
3652+ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
3653+ sbinfo->si_branch[0 + bend] = NULL;
3654+ sbinfo->si_bend--;
3655+
53392da6 3656+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 3657+ if (p)
3658+ sbinfo->si_branch = p;
4a4d8108 3659+ /* harmless error */
1facf9fc 3660+}
3661+
3662+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
3663+ const aufs_bindex_t bend)
3664+{
3665+ struct au_hdentry *hdp, *p;
3666+
1308ab2a 3667+ AuRwMustWriteLock(&dinfo->di_rwsem);
3668+
4a4d8108 3669+ hdp = dinfo->di_hdentry;
1facf9fc 3670+ if (bindex < bend)
4a4d8108
AM
3671+ memmove(hdp + bindex, hdp + bindex + 1,
3672+ sizeof(*hdp) * (bend - bindex));
3673+ hdp[0 + bend].hd_dentry = NULL;
1facf9fc 3674+ dinfo->di_bend--;
3675+
53392da6 3676+ p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 3677+ if (p)
3678+ dinfo->di_hdentry = p;
4a4d8108 3679+ /* harmless error */
1facf9fc 3680+}
3681+
3682+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
3683+ const aufs_bindex_t bend)
3684+{
3685+ struct au_hinode *hip, *p;
3686+
1308ab2a 3687+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3688+
1facf9fc 3689+ hip = iinfo->ii_hinode + bindex;
3690+ if (bindex < bend)
3691+ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
3692+ iinfo->ii_hinode[0 + bend].hi_inode = NULL;
4a4d8108 3693+ au_hn_init(iinfo->ii_hinode + bend);
1facf9fc 3694+ iinfo->ii_bend--;
3695+
53392da6 3696+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 3697+ if (p)
3698+ iinfo->ii_hinode = p;
4a4d8108 3699+ /* harmless error */
1facf9fc 3700+}
3701+
3702+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
3703+ struct au_branch *br)
3704+{
3705+ aufs_bindex_t bend;
3706+ struct au_sbinfo *sbinfo;
53392da6
AM
3707+ struct dentry *root, *h_root;
3708+ struct inode *inode, *h_inode;
3709+ struct au_hinode *hinode;
1facf9fc 3710+
dece6358
AM
3711+ SiMustWriteLock(sb);
3712+
1facf9fc 3713+ root = sb->s_root;
5527c038 3714+ inode = d_inode(root);
1facf9fc 3715+ sbinfo = au_sbi(sb);
3716+ bend = sbinfo->si_bend;
3717+
53392da6
AM
3718+ h_root = au_h_dptr(root, bindex);
3719+ hinode = au_hi(inode, bindex);
3720+ h_inode = au_igrab(hinode->hi_inode);
3721+ au_hiput(hinode);
1facf9fc 3722+
53392da6 3723+ au_sbilist_lock();
1facf9fc 3724+ au_br_do_del_brp(sbinfo, bindex, bend);
3725+ au_br_do_del_hdp(au_di(root), bindex, bend);
3726+ au_br_do_del_hip(au_ii(inode), bindex, bend);
53392da6
AM
3727+ au_sbilist_unlock();
3728+
3729+ dput(h_root);
3730+ iput(h_inode);
3731+ au_br_do_free(br);
1facf9fc 3732+}
3733+
79b8bda9
AM
3734+static unsigned long long empty_cb(struct super_block *sb, void *array,
3735+ unsigned long long max, void *arg)
076b876e
AM
3736+{
3737+ return max;
3738+}
3739+
1facf9fc 3740+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
3741+{
3742+ int err, rerr, i;
076b876e 3743+ unsigned long long opened;
1facf9fc 3744+ unsigned int mnt_flags;
3745+ aufs_bindex_t bindex, bend, br_id;
3746+ unsigned char do_wh, verbose;
3747+ struct au_branch *br;
3748+ struct au_wbr *wbr;
076b876e
AM
3749+ struct dentry *root;
3750+ struct file **to_free;
1facf9fc 3751+
3752+ err = 0;
076b876e
AM
3753+ opened = 0;
3754+ to_free = NULL;
3755+ root = sb->s_root;
3756+ bindex = au_find_dbindex(root, del->h_path.dentry);
1facf9fc 3757+ if (bindex < 0) {
3758+ if (remount)
3759+ goto out; /* success */
3760+ err = -ENOENT;
4a4d8108 3761+ pr_err("%s no such branch\n", del->pathname);
1facf9fc 3762+ goto out;
3763+ }
3764+ AuDbg("bindex b%d\n", bindex);
3765+
3766+ err = -EBUSY;
3767+ mnt_flags = au_mntflags(sb);
3768+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
3769+ bend = au_sbend(sb);
3770+ if (unlikely(!bend)) {
3771+ AuVerbose(verbose, "no more branches left\n");
3772+ goto out;
3773+ }
3774+ br = au_sbr(sb, bindex);
86dc4139 3775+ AuDebugOn(!path_equal(&br->br_path, &del->h_path));
076b876e
AM
3776+
3777+ br_id = br->br_id;
3778+ opened = atomic_read(&br->br_count);
3779+ if (unlikely(opened)) {
79b8bda9 3780+ to_free = au_array_alloc(&opened, empty_cb, sb, NULL);
076b876e
AM
3781+ err = PTR_ERR(to_free);
3782+ if (IS_ERR(to_free))
3783+ goto out;
3784+
3785+ err = test_file_busy(sb, br_id, to_free, opened);
3786+ if (unlikely(err)) {
3787+ AuVerbose(verbose, "%llu file(s) opened\n", opened);
3788+ goto out;
3789+ }
1facf9fc 3790+ }
3791+
3792+ wbr = br->br_wbr;
3793+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
3794+ if (do_wh) {
1308ab2a 3795+ /* instead of WbrWhMustWriteLock(wbr) */
3796+ SiMustWriteLock(sb);
1facf9fc 3797+ for (i = 0; i < AuBrWh_Last; i++) {
3798+ dput(wbr->wbr_wh[i]);
3799+ wbr->wbr_wh[i] = NULL;
3800+ }
3801+ }
3802+
076b876e 3803+ err = test_children_busy(root, bindex, verbose);
1facf9fc 3804+ if (unlikely(err)) {
3805+ if (do_wh)
3806+ goto out_wh;
3807+ goto out;
3808+ }
3809+
3810+ err = 0;
076b876e
AM
3811+ if (to_free) {
3812+ /*
3813+ * now we confirmed the branch is deletable.
3814+ * let's free the remaining opened dirs on the branch.
3815+ */
3816+ di_write_unlock(root);
3817+ br_del_file(to_free, opened, br_id);
3818+ di_write_lock_child(root);
3819+ }
3820+
1facf9fc 3821+ if (!remount)
3822+ au_br_do_del(sb, bindex, br);
3823+ else {
3824+ sysaufs_brs_del(sb, bindex);
3825+ au_br_do_del(sb, bindex, br);
3826+ sysaufs_brs_add(sb, bindex);
3827+ }
3828+
1308ab2a 3829+ if (!bindex) {
5527c038 3830+ au_cpup_attr_all(d_inode(root), /*force*/1);
1308ab2a 3831+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
3832+ } else
5527c038 3833+ au_sub_nlink(d_inode(root), d_inode(del->h_path.dentry));
1facf9fc 3834+ if (au_opt_test(mnt_flags, PLINK))
3835+ au_plink_half_refresh(sb, br_id);
3836+
b752ccd1 3837+ if (au_xino_brid(sb) == br_id)
1facf9fc 3838+ au_xino_brid_set(sb, -1);
3839+ goto out; /* success */
3840+
4f0767ce 3841+out_wh:
1facf9fc 3842+ /* revert */
86dc4139 3843+ rerr = au_br_init_wh(sb, br, br->br_perm);
1facf9fc 3844+ if (rerr)
0c3ec466
AM
3845+ pr_warn("failed re-creating base whiteout, %s. (%d)\n",
3846+ del->pathname, rerr);
4f0767ce 3847+out:
076b876e
AM
3848+ if (to_free)
3849+ au_farray_free(to_free, opened);
1facf9fc 3850+ return err;
3851+}
3852+
3853+/* ---------------------------------------------------------------------- */
3854+
027c5e7a
AM
3855+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
3856+{
3857+ int err;
3858+ aufs_bindex_t bstart, bend;
3859+ struct aufs_ibusy ibusy;
3860+ struct inode *inode, *h_inode;
3861+
3862+ err = -EPERM;
3863+ if (unlikely(!capable(CAP_SYS_ADMIN)))
3864+ goto out;
3865+
3866+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
3867+ if (!err)
3868+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
3869+ if (unlikely(err)) {
3870+ err = -EFAULT;
3871+ AuTraceErr(err);
3872+ goto out;
3873+ }
3874+
3875+ err = -EINVAL;
3876+ si_read_lock(sb, AuLock_FLUSH);
3877+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb)))
3878+ goto out_unlock;
3879+
3880+ err = 0;
3881+ ibusy.h_ino = 0; /* invalid */
3882+ inode = ilookup(sb, ibusy.ino);
3883+ if (!inode
3884+ || inode->i_ino == AUFS_ROOT_INO
3885+ || is_bad_inode(inode))
3886+ goto out_unlock;
3887+
3888+ ii_read_lock_child(inode);
3889+ bstart = au_ibstart(inode);
3890+ bend = au_ibend(inode);
3891+ if (bstart <= ibusy.bindex && ibusy.bindex <= bend) {
3892+ h_inode = au_h_iptr(inode, ibusy.bindex);
3893+ if (h_inode && au_test_ibusy(inode, bstart, bend))
3894+ ibusy.h_ino = h_inode->i_ino;
3895+ }
3896+ ii_read_unlock(inode);
3897+ iput(inode);
3898+
3899+out_unlock:
3900+ si_read_unlock(sb);
3901+ if (!err) {
3902+ err = __put_user(ibusy.h_ino, &arg->h_ino);
3903+ if (unlikely(err)) {
3904+ err = -EFAULT;
3905+ AuTraceErr(err);
3906+ }
3907+ }
3908+out:
3909+ return err;
3910+}
3911+
3912+long au_ibusy_ioctl(struct file *file, unsigned long arg)
3913+{
2000de60 3914+ return au_ibusy(file->f_path.dentry->d_sb, (void __user *)arg);
027c5e7a
AM
3915+}
3916+
3917+#ifdef CONFIG_COMPAT
3918+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
3919+{
2000de60 3920+ return au_ibusy(file->f_path.dentry->d_sb, compat_ptr(arg));
027c5e7a
AM
3921+}
3922+#endif
3923+
3924+/* ---------------------------------------------------------------------- */
3925+
1facf9fc 3926+/*
3927+ * change a branch permission
3928+ */
3929+
dece6358
AM
3930+static void au_warn_ima(void)
3931+{
3932+#ifdef CONFIG_IMA
1308ab2a 3933+ /* since it doesn't support mark_files_ro() */
027c5e7a 3934+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
dece6358
AM
3935+#endif
3936+}
3937+
1facf9fc 3938+static int do_need_sigen_inc(int a, int b)
3939+{
3940+ return au_br_whable(a) && !au_br_whable(b);
3941+}
3942+
3943+static int need_sigen_inc(int old, int new)
3944+{
3945+ return do_need_sigen_inc(old, new)
3946+ || do_need_sigen_inc(new, old);
3947+}
3948+
3949+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
3950+{
7f207e10 3951+ int err, do_warn;
027c5e7a 3952+ unsigned int mnt_flags;
7f207e10 3953+ unsigned long long ull, max;
e49829fe 3954+ aufs_bindex_t br_id;
38d290e6 3955+ unsigned char verbose, writer;
7f207e10 3956+ struct file *file, *hf, **array;
e49829fe 3957+ struct au_hfile *hfile;
1facf9fc 3958+
027c5e7a
AM
3959+ mnt_flags = au_mntflags(sb);
3960+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
3961+
7f207e10
AM
3962+ array = au_farray_alloc(sb, &max);
3963+ err = PTR_ERR(array);
3964+ if (IS_ERR(array))
1facf9fc 3965+ goto out;
3966+
7f207e10 3967+ do_warn = 0;
e49829fe 3968+ br_id = au_sbr_id(sb, bindex);
7f207e10
AM
3969+ for (ull = 0; ull < max; ull++) {
3970+ file = array[ull];
076b876e
AM
3971+ if (unlikely(!file))
3972+ break;
1facf9fc 3973+
523b37e3 3974+ /* AuDbg("%pD\n", file); */
1facf9fc 3975+ fi_read_lock(file);
3976+ if (unlikely(au_test_mmapped(file))) {
3977+ err = -EBUSY;
523b37e3 3978+ AuVerbose(verbose, "mmapped %pD\n", file);
7f207e10 3979+ AuDbgFile(file);
1facf9fc 3980+ FiMustNoWaiters(file);
3981+ fi_read_unlock(file);
7f207e10 3982+ goto out_array;
1facf9fc 3983+ }
3984+
e49829fe
JR
3985+ hfile = &au_fi(file)->fi_htop;
3986+ hf = hfile->hf_file;
7e9cd9fe 3987+ if (!d_is_reg(file->f_path.dentry)
1facf9fc 3988+ || !(file->f_mode & FMODE_WRITE)
e49829fe 3989+ || hfile->hf_br->br_id != br_id
7f207e10
AM
3990+ || !(hf->f_mode & FMODE_WRITE))
3991+ array[ull] = NULL;
3992+ else {
3993+ do_warn = 1;
3994+ get_file(file);
1facf9fc 3995+ }
3996+
1facf9fc 3997+ FiMustNoWaiters(file);
3998+ fi_read_unlock(file);
7f207e10
AM
3999+ fput(file);
4000+ }
1facf9fc 4001+
4002+ err = 0;
7f207e10 4003+ if (do_warn)
dece6358 4004+ au_warn_ima();
7f207e10
AM
4005+
4006+ for (ull = 0; ull < max; ull++) {
4007+ file = array[ull];
4008+ if (!file)
4009+ continue;
4010+
1facf9fc 4011+ /* todo: already flushed? */
523b37e3
AM
4012+ /*
4013+ * fs/super.c:mark_files_ro() is gone, but aufs keeps its
4014+ * approach which resets f_mode and calls mnt_drop_write() and
4015+ * file_release_write() for each file, because the branch
4016+ * attribute in aufs world is totally different from the native
4017+ * fs rw/ro mode.
4018+ */
7f207e10
AM
4019+ /* fi_read_lock(file); */
4020+ hfile = &au_fi(file)->fi_htop;
4021+ hf = hfile->hf_file;
4022+ /* fi_read_unlock(file); */
027c5e7a 4023+ spin_lock(&hf->f_lock);
38d290e6
JR
4024+ writer = !!(hf->f_mode & FMODE_WRITER);
4025+ hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER);
027c5e7a 4026+ spin_unlock(&hf->f_lock);
38d290e6
JR
4027+ if (writer) {
4028+ put_write_access(file_inode(hf));
c06a8ce3 4029+ __mnt_drop_write(hf->f_path.mnt);
1facf9fc 4030+ }
4031+ }
4032+
7f207e10
AM
4033+out_array:
4034+ au_farray_free(array, max);
4f0767ce 4035+out:
7f207e10 4036+ AuTraceErr(err);
1facf9fc 4037+ return err;
4038+}
4039+
4040+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4041+ int *do_refresh)
1facf9fc 4042+{
4043+ int err, rerr;
4044+ aufs_bindex_t bindex;
4045+ struct dentry *root;
4046+ struct au_branch *br;
076b876e 4047+ struct au_br_fhsm *bf;
1facf9fc 4048+
4049+ root = sb->s_root;
1facf9fc 4050+ bindex = au_find_dbindex(root, mod->h_root);
4051+ if (bindex < 0) {
4052+ if (remount)
4053+ return 0; /* success */
4054+ err = -ENOENT;
4a4d8108 4055+ pr_err("%s no such branch\n", mod->path);
1facf9fc 4056+ goto out;
4057+ }
4058+ AuDbg("bindex b%d\n", bindex);
4059+
5527c038 4060+ err = test_br(d_inode(mod->h_root), mod->perm, mod->path);
1facf9fc 4061+ if (unlikely(err))
4062+ goto out;
4063+
4064+ br = au_sbr(sb, bindex);
86dc4139 4065+ AuDebugOn(mod->h_root != au_br_dentry(br));
1facf9fc 4066+ if (br->br_perm == mod->perm)
4067+ return 0; /* success */
4068+
076b876e
AM
4069+ /* pre-allocate for non-fhsm --> fhsm */
4070+ bf = NULL;
4071+ if (!au_br_fhsm(br->br_perm) && au_br_fhsm(mod->perm)) {
4072+ err = au_fhsm_br_alloc(br);
4073+ if (unlikely(err))
4074+ goto out;
4075+ bf = br->br_fhsm;
4076+ br->br_fhsm = NULL;
4077+ }
4078+
1facf9fc 4079+ if (au_br_writable(br->br_perm)) {
4080+ /* remove whiteout base */
86dc4139 4081+ err = au_br_init_wh(sb, br, mod->perm);
1facf9fc 4082+ if (unlikely(err))
076b876e 4083+ goto out_bf;
1facf9fc 4084+
4085+ if (!au_br_writable(mod->perm)) {
4086+ /* rw --> ro, file might be mmapped */
4087+ DiMustNoWaiters(root);
5527c038 4088+ IiMustNoWaiters(d_inode(root));
1facf9fc 4089+ di_write_unlock(root);
4090+ err = au_br_mod_files_ro(sb, bindex);
4091+ /* aufs_write_lock() calls ..._child() */
4092+ di_write_lock_child(root);
4093+
4094+ if (unlikely(err)) {
4095+ rerr = -ENOMEM;
4096+ br->br_wbr = kmalloc(sizeof(*br->br_wbr),
4097+ GFP_NOFS);
86dc4139
AM
4098+ if (br->br_wbr)
4099+ rerr = au_wbr_init(br, sb, br->br_perm);
1facf9fc 4100+ if (unlikely(rerr)) {
4101+ AuIOErr("nested error %d (%d)\n",
4102+ rerr, err);
4103+ br->br_perm = mod->perm;
4104+ }
4105+ }
4106+ }
4107+ } else if (au_br_writable(mod->perm)) {
4108+ /* ro --> rw */
4109+ err = -ENOMEM;
4110+ br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS);
4111+ if (br->br_wbr) {
86dc4139 4112+ err = au_wbr_init(br, sb, mod->perm);
1facf9fc 4113+ if (unlikely(err)) {
4114+ kfree(br->br_wbr);
4115+ br->br_wbr = NULL;
4116+ }
4117+ }
4118+ }
076b876e
AM
4119+ if (unlikely(err))
4120+ goto out_bf;
4121+
4122+ if (au_br_fhsm(br->br_perm)) {
4123+ if (!au_br_fhsm(mod->perm)) {
4124+ /* fhsm --> non-fhsm */
4125+ au_br_fhsm_fin(br->br_fhsm);
4126+ kfree(br->br_fhsm);
4127+ br->br_fhsm = NULL;
4128+ }
4129+ } else if (au_br_fhsm(mod->perm))
4130+ /* non-fhsm --> fhsm */
4131+ br->br_fhsm = bf;
4132+
076b876e
AM
4133+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
4134+ br->br_perm = mod->perm;
4135+ goto out; /* success */
1facf9fc 4136+
076b876e
AM
4137+out_bf:
4138+ kfree(bf);
4139+out:
4140+ AuTraceErr(err);
4141+ return err;
4142+}
4143+
4144+/* ---------------------------------------------------------------------- */
4145+
4146+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs)
4147+{
4148+ int err;
4149+ struct kstatfs kstfs;
4150+
4151+ err = vfs_statfs(&br->br_path, &kstfs);
1facf9fc 4152+ if (!err) {
076b876e
AM
4153+ stfs->f_blocks = kstfs.f_blocks;
4154+ stfs->f_bavail = kstfs.f_bavail;
4155+ stfs->f_files = kstfs.f_files;
4156+ stfs->f_ffree = kstfs.f_ffree;
1facf9fc 4157+ }
4158+
1facf9fc 4159+ return err;
4160+}
7f207e10
AM
4161diff -urN /usr/share/empty/fs/aufs/branch.h linux/fs/aufs/branch.h
4162--- /usr/share/empty/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
79b8bda9 4163+++ linux/fs/aufs/branch.h 2015-11-11 17:21:46.915530388 +0100
b912730e 4164@@ -0,0 +1,279 @@
1facf9fc 4165+/*
2000de60 4166+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 4167+ *
4168+ * This program, aufs is free software; you can redistribute it and/or modify
4169+ * it under the terms of the GNU General Public License as published by
4170+ * the Free Software Foundation; either version 2 of the License, or
4171+ * (at your option) any later version.
dece6358
AM
4172+ *
4173+ * This program is distributed in the hope that it will be useful,
4174+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4175+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4176+ * GNU General Public License for more details.
4177+ *
4178+ * You should have received a copy of the GNU General Public License
523b37e3 4179+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4180+ */
4181+
4182+/*
4183+ * branch filesystems and xino for them
4184+ */
4185+
4186+#ifndef __AUFS_BRANCH_H__
4187+#define __AUFS_BRANCH_H__
4188+
4189+#ifdef __KERNEL__
4190+
1facf9fc 4191+#include <linux/mount.h>
4a4d8108 4192+#include "dynop.h"
1facf9fc 4193+#include "rwsem.h"
4194+#include "super.h"
4195+
4196+/* ---------------------------------------------------------------------- */
4197+
4198+/* a xino file */
4199+struct au_xino_file {
4200+ struct file *xi_file;
4201+ struct mutex xi_nondir_mtx;
4202+
4203+ /* todo: make xino files an array to support huge inode number */
4204+
4205+#ifdef CONFIG_DEBUG_FS
4206+ struct dentry *xi_dbgaufs;
4207+#endif
4208+};
4209+
076b876e
AM
4210+/* File-based Hierarchical Storage Management */
4211+struct au_br_fhsm {
4212+#ifdef CONFIG_AUFS_FHSM
4213+ struct mutex bf_lock;
4214+ unsigned long bf_jiffy;
4215+ struct aufs_stfs bf_stfs;
4216+ int bf_readable;
4217+#endif
4218+};
4219+
1facf9fc 4220+/* members for writable branch only */
4221+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
4222+struct au_wbr {
dece6358 4223+ struct au_rwsem wbr_wh_rwsem;
1facf9fc 4224+ struct dentry *wbr_wh[AuBrWh_Last];
4a4d8108 4225+ atomic_t wbr_wh_running;
1facf9fc 4226+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
4227+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
4228+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
4229+
4230+ /* mfs mode */
4231+ unsigned long long wbr_bytes;
4232+};
4233+
4a4d8108
AM
4234+/* ext2 has 3 types of operations at least, ext3 has 4 */
4235+#define AuBrDynOp (AuDyLast * 4)
4236+
1716fcea
AM
4237+#ifdef CONFIG_AUFS_HFSNOTIFY
4238+/* support for asynchronous destruction */
4239+struct au_br_hfsnotify {
4240+ struct fsnotify_group *hfsn_group;
4241+};
4242+#endif
4243+
392086de
AM
4244+/* sysfs entries */
4245+struct au_brsysfs {
4246+ char name[16];
4247+ struct attribute attr;
4248+};
4249+
4250+enum {
4251+ AuBrSysfs_BR,
4252+ AuBrSysfs_BRID,
4253+ AuBrSysfs_Last
4254+};
4255+
1facf9fc 4256+/* protected by superblock rwsem */
4257+struct au_branch {
4258+ struct au_xino_file br_xino;
4259+
4260+ aufs_bindex_t br_id;
4261+
4262+ int br_perm;
86dc4139 4263+ struct path br_path;
4a4d8108
AM
4264+ spinlock_t br_dykey_lock;
4265+ struct au_dykey *br_dykey[AuBrDynOp];
1facf9fc 4266+ atomic_t br_count;
4267+
4268+ struct au_wbr *br_wbr;
076b876e 4269+ struct au_br_fhsm *br_fhsm;
1facf9fc 4270+
4271+ /* xino truncation */
1facf9fc 4272+ atomic_t br_xino_running;
4273+
027c5e7a 4274+#ifdef CONFIG_AUFS_HFSNOTIFY
1716fcea 4275+ struct au_br_hfsnotify *br_hfsn;
027c5e7a
AM
4276+#endif
4277+
1facf9fc 4278+#ifdef CONFIG_SYSFS
392086de
AM
4279+ /* entries under sysfs per mount-point */
4280+ struct au_brsysfs br_sysfs[AuBrSysfs_Last];
1facf9fc 4281+#endif
4282+};
4283+
4284+/* ---------------------------------------------------------------------- */
4285+
86dc4139
AM
4286+static inline struct vfsmount *au_br_mnt(struct au_branch *br)
4287+{
4288+ return br->br_path.mnt;
4289+}
4290+
4291+static inline struct dentry *au_br_dentry(struct au_branch *br)
4292+{
4293+ return br->br_path.dentry;
4294+}
4295+
4296+static inline struct super_block *au_br_sb(struct au_branch *br)
4297+{
4298+ return au_br_mnt(br)->mnt_sb;
4299+}
4300+
1facf9fc 4301+static inline int au_br_rdonly(struct au_branch *br)
4302+{
86dc4139 4303+ return ((au_br_sb(br)->s_flags & MS_RDONLY)
1facf9fc 4304+ || !au_br_writable(br->br_perm))
4305+ ? -EROFS : 0;
4306+}
4307+
4a4d8108 4308+static inline int au_br_hnotifyable(int brperm __maybe_unused)
1facf9fc 4309+{
4a4d8108 4310+#ifdef CONFIG_AUFS_HNOTIFY
1e00d052 4311+ return !(brperm & AuBrPerm_RR);
1facf9fc 4312+#else
4313+ return 0;
4314+#endif
4315+}
4316+
b912730e
AM
4317+static inline int au_br_test_oflag(int oflag, struct au_branch *br)
4318+{
4319+ int err, exec_flag;
4320+
4321+ err = 0;
4322+ exec_flag = oflag & __FMODE_EXEC;
79b8bda9 4323+ if (unlikely(exec_flag && path_noexec(&br->br_path)))
b912730e
AM
4324+ err = -EACCES;
4325+
4326+ return err;
4327+}
4328+
1facf9fc 4329+/* ---------------------------------------------------------------------- */
4330+
4331+/* branch.c */
4332+struct au_sbinfo;
4333+void au_br_free(struct au_sbinfo *sinfo);
4334+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
4335+struct au_opt_add;
4336+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
4337+struct au_opt_del;
4338+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
027c5e7a
AM
4339+long au_ibusy_ioctl(struct file *file, unsigned long arg);
4340+#ifdef CONFIG_COMPAT
4341+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
4342+#endif
1facf9fc 4343+struct au_opt_mod;
4344+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4345+ int *do_refresh);
076b876e
AM
4346+struct aufs_stfs;
4347+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
1facf9fc 4348+
4349+/* xino.c */
4350+static const loff_t au_loff_max = LLONG_MAX;
4351+
4352+int au_xib_trunc(struct super_block *sb);
5527c038 4353+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
1facf9fc 4354+ loff_t *pos);
5527c038
JR
4355+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
4356+ size_t size, loff_t *pos);
1facf9fc 4357+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
4358+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
4359+ino_t au_xino_new_ino(struct super_block *sb);
b752ccd1 4360+void au_xino_delete_inode(struct inode *inode, const int unlinked);
1facf9fc 4361+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4362+ ino_t ino);
4363+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4364+ ino_t *ino);
4365+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
4366+ struct file *base_file, int do_test);
4367+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
4368+
4369+struct au_opt_xino;
4370+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
4371+void au_xino_clr(struct super_block *sb);
4372+struct file *au_xino_def(struct super_block *sb);
4373+int au_xino_path(struct seq_file *seq, struct file *file);
4374+
4375+/* ---------------------------------------------------------------------- */
4376+
4377+/* Superblock to branch */
4378+static inline
4379+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
4380+{
4381+ return au_sbr(sb, bindex)->br_id;
4382+}
4383+
4384+static inline
4385+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
4386+{
86dc4139 4387+ return au_br_mnt(au_sbr(sb, bindex));
1facf9fc 4388+}
4389+
4390+static inline
4391+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
4392+{
86dc4139 4393+ return au_br_sb(au_sbr(sb, bindex));
1facf9fc 4394+}
4395+
4396+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
4397+{
e49829fe 4398+ atomic_dec(&au_sbr(sb, bindex)->br_count);
1facf9fc 4399+}
4400+
4401+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
4402+{
4403+ return au_sbr(sb, bindex)->br_perm;
4404+}
4405+
4406+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
4407+{
4408+ return au_br_whable(au_sbr_perm(sb, bindex));
4409+}
4410+
4411+/* ---------------------------------------------------------------------- */
4412+
4413+/*
4414+ * wbr_wh_read_lock, wbr_wh_write_lock
4415+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
4416+ */
4417+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
4418+
dece6358
AM
4419+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
4420+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
4421+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
4422+
076b876e
AM
4423+/* ---------------------------------------------------------------------- */
4424+
4425+#ifdef CONFIG_AUFS_FHSM
4426+static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
4427+{
4428+ mutex_init(&brfhsm->bf_lock);
4429+ brfhsm->bf_jiffy = 0;
4430+ brfhsm->bf_readable = 0;
4431+}
4432+
4433+static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
4434+{
4435+ mutex_destroy(&brfhsm->bf_lock);
4436+}
4437+#else
4438+AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
4439+AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
4440+#endif
4441+
1facf9fc 4442+#endif /* __KERNEL__ */
4443+#endif /* __AUFS_BRANCH_H__ */
7f207e10
AM
4444diff -urN /usr/share/empty/fs/aufs/conf.mk linux/fs/aufs/conf.mk
4445--- /usr/share/empty/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 4446+++ linux/fs/aufs/conf.mk 2015-09-24 10:47:58.248052907 +0200
c1595e42 4447@@ -0,0 +1,38 @@
4a4d8108
AM
4448+
4449+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
4450+
4451+define AuConf
4452+ifdef ${1}
4453+AuConfStr += ${1}=${${1}}
4454+endif
4455+endef
4456+
b752ccd1 4457+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
e49829fe 4458+ SBILIST \
7f207e10 4459+ HNOTIFY HFSNOTIFY \
4a4d8108 4460+ EXPORT INO_T_64 \
c1595e42 4461+ XATTR \
076b876e 4462+ FHSM \
4a4d8108 4463+ RDU \
4a4d8108
AM
4464+ SHWH \
4465+ BR_RAMFS \
4466+ BR_FUSE POLL \
4467+ BR_HFSPLUS \
4468+ BDEV_LOOP \
b752ccd1
AM
4469+ DEBUG MAGIC_SYSRQ
4470+$(foreach i, ${AuConfAll}, \
4a4d8108
AM
4471+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
4472+
4473+AuConfName = ${obj}/conf.str
4474+${AuConfName}.tmp: FORCE
4475+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
4476+${AuConfName}: ${AuConfName}.tmp
4477+ @diff -q $< $@ > /dev/null 2>&1 || { \
4478+ echo ' GEN ' $@; \
4479+ cp -p $< $@; \
4480+ }
4481+FORCE:
4482+clean-files += ${AuConfName} ${AuConfName}.tmp
4483+${obj}/sysfs.o: ${AuConfName}
b752ccd1
AM
4484+
4485+-include ${srctree}/${src}/conf_priv.mk
7f207e10
AM
4486diff -urN /usr/share/empty/fs/aufs/cpup.c linux/fs/aufs/cpup.c
4487--- /usr/share/empty/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
79b8bda9 4488+++ linux/fs/aufs/cpup.c 2015-11-11 17:21:46.915530388 +0100
5527c038 4489@@ -0,0 +1,1319 @@
1facf9fc 4490+/*
2000de60 4491+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 4492+ *
4493+ * This program, aufs is free software; you can redistribute it and/or modify
4494+ * it under the terms of the GNU General Public License as published by
4495+ * the Free Software Foundation; either version 2 of the License, or
4496+ * (at your option) any later version.
dece6358
AM
4497+ *
4498+ * This program is distributed in the hope that it will be useful,
4499+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4500+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4501+ * GNU General Public License for more details.
4502+ *
4503+ * You should have received a copy of the GNU General Public License
523b37e3 4504+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4505+ */
4506+
4507+/*
4508+ * copy-up functions, see wbr_policy.c for copy-down
4509+ */
4510+
4511+#include <linux/fs_stack.h>
dece6358 4512+#include <linux/mm.h>
1facf9fc 4513+#include "aufs.h"
4514+
86dc4139 4515+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
1facf9fc 4516+{
4517+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
367653fa 4518+ | S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
1facf9fc 4519+
86dc4139
AM
4520+ BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
4521+
4522+ dst->i_flags |= iflags & ~mask;
1facf9fc 4523+ if (au_test_fs_notime(dst->i_sb))
4524+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
4525+}
4526+
4527+void au_cpup_attr_timesizes(struct inode *inode)
4528+{
4529+ struct inode *h_inode;
4530+
4531+ h_inode = au_h_iptr(inode, au_ibstart(inode));
4532+ fsstack_copy_attr_times(inode, h_inode);
4a4d8108 4533+ fsstack_copy_inode_size(inode, h_inode);
1facf9fc 4534+}
4535+
4536+void au_cpup_attr_nlink(struct inode *inode, int force)
4537+{
4538+ struct inode *h_inode;
4539+ struct super_block *sb;
4540+ aufs_bindex_t bindex, bend;
4541+
4542+ sb = inode->i_sb;
4543+ bindex = au_ibstart(inode);
4544+ h_inode = au_h_iptr(inode, bindex);
4545+ if (!force
4546+ && !S_ISDIR(h_inode->i_mode)
4547+ && au_opt_test(au_mntflags(sb), PLINK)
4548+ && au_plink_test(inode))
4549+ return;
4550+
7eafdf33
AM
4551+ /*
4552+ * 0 can happen in revalidating.
38d290e6
JR
4553+ * h_inode->i_mutex may not be held here, but it is harmless since once
4554+ * i_nlink reaches 0, it will never become positive except O_TMPFILE
4555+ * case.
4556+ * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause
4557+ * the incorrect link count.
7eafdf33 4558+ */
92d182d2 4559+ set_nlink(inode, h_inode->i_nlink);
1facf9fc 4560+
4561+ /*
4562+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
4563+ * it may includes whplink directory.
4564+ */
4565+ if (S_ISDIR(h_inode->i_mode)) {
4566+ bend = au_ibend(inode);
4567+ for (bindex++; bindex <= bend; bindex++) {
4568+ h_inode = au_h_iptr(inode, bindex);
4569+ if (h_inode)
4570+ au_add_nlink(inode, h_inode);
4571+ }
4572+ }
4573+}
4574+
4575+void au_cpup_attr_changeable(struct inode *inode)
4576+{
4577+ struct inode *h_inode;
4578+
4579+ h_inode = au_h_iptr(inode, au_ibstart(inode));
4580+ inode->i_mode = h_inode->i_mode;
4581+ inode->i_uid = h_inode->i_uid;
4582+ inode->i_gid = h_inode->i_gid;
4583+ au_cpup_attr_timesizes(inode);
86dc4139 4584+ au_cpup_attr_flags(inode, h_inode->i_flags);
1facf9fc 4585+}
4586+
4587+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
4588+{
4589+ struct au_iinfo *iinfo = au_ii(inode);
4590+
1308ab2a 4591+ IiMustWriteLock(inode);
4592+
1facf9fc 4593+ iinfo->ii_higen = h_inode->i_generation;
4594+ iinfo->ii_hsb1 = h_inode->i_sb;
4595+}
4596+
4597+void au_cpup_attr_all(struct inode *inode, int force)
4598+{
4599+ struct inode *h_inode;
4600+
4601+ h_inode = au_h_iptr(inode, au_ibstart(inode));
4602+ au_cpup_attr_changeable(inode);
4603+ if (inode->i_nlink > 0)
4604+ au_cpup_attr_nlink(inode, force);
4605+ inode->i_rdev = h_inode->i_rdev;
4606+ inode->i_blkbits = h_inode->i_blkbits;
4607+ au_cpup_igen(inode, h_inode);
4608+}
4609+
4610+/* ---------------------------------------------------------------------- */
4611+
4612+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
4613+
4614+/* keep the timestamps of the parent dir when cpup */
4615+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4616+ struct path *h_path)
4617+{
4618+ struct inode *h_inode;
4619+
4620+ dt->dt_dentry = dentry;
4621+ dt->dt_h_path = *h_path;
5527c038 4622+ h_inode = d_inode(h_path->dentry);
1facf9fc 4623+ dt->dt_atime = h_inode->i_atime;
4624+ dt->dt_mtime = h_inode->i_mtime;
4625+ /* smp_mb(); */
4626+}
4627+
4628+void au_dtime_revert(struct au_dtime *dt)
4629+{
4630+ struct iattr attr;
4631+ int err;
4632+
4633+ attr.ia_atime = dt->dt_atime;
4634+ attr.ia_mtime = dt->dt_mtime;
4635+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
4636+ | ATTR_ATIME | ATTR_ATIME_SET;
4637+
523b37e3
AM
4638+ /* no delegation since this is a directory */
4639+ err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL);
1facf9fc 4640+ if (unlikely(err))
0c3ec466 4641+ pr_warn("restoring timestamps failed(%d). ignored\n", err);
1facf9fc 4642+}
4643+
4644+/* ---------------------------------------------------------------------- */
4645+
86dc4139
AM
4646+/* internal use only */
4647+struct au_cpup_reg_attr {
4648+ int valid;
4649+ struct kstat st;
4650+ unsigned int iflags; /* inode->i_flags */
4651+};
4652+
1facf9fc 4653+static noinline_for_stack
86dc4139
AM
4654+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src,
4655+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4656+{
c1595e42 4657+ int err, sbits, icex;
7e9cd9fe
AM
4658+ unsigned int mnt_flags;
4659+ unsigned char verbose;
1facf9fc 4660+ struct iattr ia;
4661+ struct path h_path;
1308ab2a 4662+ struct inode *h_isrc, *h_idst;
86dc4139 4663+ struct kstat *h_st;
c1595e42 4664+ struct au_branch *br;
1facf9fc 4665+
4666+ h_path.dentry = au_h_dptr(dst, bindex);
5527c038 4667+ h_idst = d_inode(h_path.dentry);
c1595e42
JR
4668+ br = au_sbr(dst->d_sb, bindex);
4669+ h_path.mnt = au_br_mnt(br);
5527c038 4670+ h_isrc = d_inode(h_src);
1308ab2a 4671+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
1facf9fc 4672+ | ATTR_ATIME | ATTR_MTIME
4673+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
86dc4139
AM
4674+ if (h_src_attr && h_src_attr->valid) {
4675+ h_st = &h_src_attr->st;
4676+ ia.ia_uid = h_st->uid;
4677+ ia.ia_gid = h_st->gid;
4678+ ia.ia_atime = h_st->atime;
4679+ ia.ia_mtime = h_st->mtime;
4680+ if (h_idst->i_mode != h_st->mode
4681+ && !S_ISLNK(h_idst->i_mode)) {
4682+ ia.ia_valid |= ATTR_MODE;
4683+ ia.ia_mode = h_st->mode;
4684+ }
4685+ sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
4686+ au_cpup_attr_flags(h_idst, h_src_attr->iflags);
4687+ } else {
4688+ ia.ia_uid = h_isrc->i_uid;
4689+ ia.ia_gid = h_isrc->i_gid;
4690+ ia.ia_atime = h_isrc->i_atime;
4691+ ia.ia_mtime = h_isrc->i_mtime;
4692+ if (h_idst->i_mode != h_isrc->i_mode
4693+ && !S_ISLNK(h_idst->i_mode)) {
4694+ ia.ia_valid |= ATTR_MODE;
4695+ ia.ia_mode = h_isrc->i_mode;
4696+ }
4697+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
4698+ au_cpup_attr_flags(h_idst, h_isrc->i_flags);
1308ab2a 4699+ }
523b37e3
AM
4700+ /* no delegation since it is just created */
4701+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4702+
4703+ /* is this nfs only? */
4704+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
4705+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
4706+ ia.ia_mode = h_isrc->i_mode;
523b37e3 4707+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4708+ }
4709+
c1595e42 4710+ icex = br->br_perm & AuBrAttr_ICEX;
7e9cd9fe
AM
4711+ if (!err) {
4712+ mnt_flags = au_mntflags(dst->d_sb);
4713+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4714+ err = au_cpup_xattr(h_path.dentry, h_src, icex, verbose);
4715+ }
c1595e42 4716+
1facf9fc 4717+ return err;
4718+}
4719+
4720+/* ---------------------------------------------------------------------- */
4721+
4722+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
4723+ char *buf, unsigned long blksize)
4724+{
4725+ int err;
4726+ size_t sz, rbytes, wbytes;
4727+ unsigned char all_zero;
4728+ char *p, *zp;
4729+ struct mutex *h_mtx;
4730+ /* reduce stack usage */
4731+ struct iattr *ia;
4732+
4733+ zp = page_address(ZERO_PAGE(0));
4734+ if (unlikely(!zp))
4735+ return -ENOMEM; /* possible? */
4736+
4737+ err = 0;
4738+ all_zero = 0;
4739+ while (len) {
4740+ AuDbg("len %lld\n", len);
4741+ sz = blksize;
4742+ if (len < blksize)
4743+ sz = len;
4744+
4745+ rbytes = 0;
4746+ /* todo: signal_pending? */
4747+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
4748+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
4749+ err = rbytes;
4750+ }
4751+ if (unlikely(err < 0))
4752+ break;
4753+
4754+ all_zero = 0;
4755+ if (len >= rbytes && rbytes == blksize)
4756+ all_zero = !memcmp(buf, zp, rbytes);
4757+ if (!all_zero) {
4758+ wbytes = rbytes;
4759+ p = buf;
4760+ while (wbytes) {
4761+ size_t b;
4762+
4763+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
4764+ err = b;
4765+ /* todo: signal_pending? */
4766+ if (unlikely(err == -EAGAIN || err == -EINTR))
4767+ continue;
4768+ if (unlikely(err < 0))
4769+ break;
4770+ wbytes -= b;
4771+ p += b;
4772+ }
392086de
AM
4773+ if (unlikely(err < 0))
4774+ break;
1facf9fc 4775+ } else {
4776+ loff_t res;
4777+
4778+ AuLabel(hole);
4779+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
4780+ err = res;
4781+ if (unlikely(res < 0))
4782+ break;
4783+ }
4784+ len -= rbytes;
4785+ err = 0;
4786+ }
4787+
4788+ /* the last block may be a hole */
4789+ if (!err && all_zero) {
4790+ AuLabel(last hole);
4791+
4792+ err = 1;
2000de60 4793+ if (au_test_nfs(dst->f_path.dentry->d_sb)) {
1facf9fc 4794+ /* nfs requires this step to make last hole */
4795+ /* is this only nfs? */
4796+ do {
4797+ /* todo: signal_pending? */
4798+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
4799+ } while (err == -EAGAIN || err == -EINTR);
4800+ if (err == 1)
4801+ dst->f_pos--;
4802+ }
4803+
4804+ if (err == 1) {
4805+ ia = (void *)buf;
4806+ ia->ia_size = dst->f_pos;
4807+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
4808+ ia->ia_file = dst;
c06a8ce3 4809+ h_mtx = &file_inode(dst)->i_mutex;
1facf9fc 4810+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
523b37e3
AM
4811+ /* no delegation since it is just created */
4812+ err = vfsub_notify_change(&dst->f_path, ia,
4813+ /*delegated*/NULL);
1facf9fc 4814+ mutex_unlock(h_mtx);
4815+ }
4816+ }
4817+
4818+ return err;
4819+}
4820+
4821+int au_copy_file(struct file *dst, struct file *src, loff_t len)
4822+{
4823+ int err;
4824+ unsigned long blksize;
4825+ unsigned char do_kfree;
4826+ char *buf;
4827+
4828+ err = -ENOMEM;
2000de60 4829+ blksize = dst->f_path.dentry->d_sb->s_blocksize;
1facf9fc 4830+ if (!blksize || PAGE_SIZE < blksize)
4831+ blksize = PAGE_SIZE;
4832+ AuDbg("blksize %lu\n", blksize);
4833+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
4834+ if (do_kfree)
4835+ buf = kmalloc(blksize, GFP_NOFS);
4836+ else
4837+ buf = (void *)__get_free_page(GFP_NOFS);
4838+ if (unlikely(!buf))
4839+ goto out;
4840+
4841+ if (len > (1 << 22))
4842+ AuDbg("copying a large file %lld\n", (long long)len);
4843+
4844+ src->f_pos = 0;
4845+ dst->f_pos = 0;
4846+ err = au_do_copy_file(dst, src, len, buf, blksize);
4847+ if (do_kfree)
4848+ kfree(buf);
4849+ else
4850+ free_page((unsigned long)buf);
4851+
4f0767ce 4852+out:
1facf9fc 4853+ return err;
4854+}
4855+
4856+/*
4857+ * to support a sparse file which is opened with O_APPEND,
4858+ * we need to close the file.
4859+ */
c2b27bf2 4860+static int au_cp_regular(struct au_cp_generic *cpg)
1facf9fc 4861+{
4862+ int err, i;
4863+ enum { SRC, DST };
4864+ struct {
4865+ aufs_bindex_t bindex;
4866+ unsigned int flags;
4867+ struct dentry *dentry;
392086de 4868+ int force_wr;
1facf9fc 4869+ struct file *file;
523b37e3 4870+ void *label;
1facf9fc 4871+ } *f, file[] = {
4872+ {
c2b27bf2 4873+ .bindex = cpg->bsrc,
1facf9fc 4874+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
523b37e3 4875+ .label = &&out
1facf9fc 4876+ },
4877+ {
c2b27bf2 4878+ .bindex = cpg->bdst,
1facf9fc 4879+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
392086de 4880+ .force_wr = !!au_ftest_cpup(cpg->flags, RWDST),
523b37e3 4881+ .label = &&out_src
1facf9fc 4882+ }
4883+ };
4884+ struct super_block *sb;
4885+
4886+ /* bsrc branch can be ro/rw. */
c2b27bf2 4887+ sb = cpg->dentry->d_sb;
1facf9fc 4888+ f = file;
4889+ for (i = 0; i < 2; i++, f++) {
c2b27bf2
AM
4890+ f->dentry = au_h_dptr(cpg->dentry, f->bindex);
4891+ f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
392086de 4892+ /*file*/NULL, f->force_wr);
1facf9fc 4893+ err = PTR_ERR(f->file);
4894+ if (IS_ERR(f->file))
4895+ goto *f->label;
1facf9fc 4896+ }
4897+
4898+ /* try stopping to update while we copyup */
5527c038 4899+ IMustLock(d_inode(file[SRC].dentry));
c2b27bf2 4900+ err = au_copy_file(file[DST].file, file[SRC].file, cpg->len);
1facf9fc 4901+
1facf9fc 4902+ fput(file[DST].file);
4903+ au_sbr_put(sb, file[DST].bindex);
523b37e3 4904+
4f0767ce 4905+out_src:
1facf9fc 4906+ fput(file[SRC].file);
4907+ au_sbr_put(sb, file[SRC].bindex);
4f0767ce 4908+out:
1facf9fc 4909+ return err;
4910+}
4911+
c2b27bf2 4912+static int au_do_cpup_regular(struct au_cp_generic *cpg,
86dc4139 4913+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4914+{
4915+ int err, rerr;
4916+ loff_t l;
86dc4139 4917+ struct path h_path;
38d290e6 4918+ struct inode *h_src_inode, *h_dst_inode;
1facf9fc 4919+
4920+ err = 0;
5527c038 4921+ h_src_inode = au_h_iptr(d_inode(cpg->dentry), cpg->bsrc);
86dc4139 4922+ l = i_size_read(h_src_inode);
c2b27bf2
AM
4923+ if (cpg->len == -1 || l < cpg->len)
4924+ cpg->len = l;
4925+ if (cpg->len) {
86dc4139
AM
4926+ /* try stopping to update while we are referencing */
4927+ mutex_lock_nested(&h_src_inode->i_mutex, AuLsc_I_CHILD);
c2b27bf2 4928+ au_pin_hdir_unlock(cpg->pin);
1facf9fc 4929+
c2b27bf2
AM
4930+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
4931+ h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
86dc4139 4932+ h_src_attr->iflags = h_src_inode->i_flags;
5527c038
JR
4933+ if (!au_test_nfs(h_src_inode->i_sb))
4934+ err = vfs_getattr(&h_path, &h_src_attr->st);
4935+ else {
4936+ mutex_unlock(&h_src_inode->i_mutex);
4937+ err = vfs_getattr(&h_path, &h_src_attr->st);
4938+ mutex_lock_nested(&h_src_inode->i_mutex, AuLsc_I_CHILD);
4939+ }
86dc4139
AM
4940+ if (unlikely(err)) {
4941+ mutex_unlock(&h_src_inode->i_mutex);
4942+ goto out;
4943+ }
4944+ h_src_attr->valid = 1;
c2b27bf2 4945+ err = au_cp_regular(cpg);
86dc4139 4946+ mutex_unlock(&h_src_inode->i_mutex);
c2b27bf2 4947+ rerr = au_pin_hdir_relock(cpg->pin);
86dc4139
AM
4948+ if (!err && rerr)
4949+ err = rerr;
1facf9fc 4950+ }
38d290e6
JR
4951+ if (!err && (h_src_inode->i_state & I_LINKABLE)) {
4952+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst);
5527c038 4953+ h_dst_inode = d_inode(h_path.dentry);
38d290e6
JR
4954+ spin_lock(&h_dst_inode->i_lock);
4955+ h_dst_inode->i_state |= I_LINKABLE;
4956+ spin_unlock(&h_dst_inode->i_lock);
4957+ }
1facf9fc 4958+
4f0767ce 4959+out:
1facf9fc 4960+ return err;
4961+}
4962+
4963+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
4964+ struct inode *h_dir)
4965+{
4966+ int err, symlen;
4967+ mm_segment_t old_fs;
b752ccd1
AM
4968+ union {
4969+ char *k;
4970+ char __user *u;
4971+ } sym;
5527c038
JR
4972+ struct inode *h_inode = d_inode(h_src);
4973+ const struct inode_operations *h_iop = h_inode->i_op;
1facf9fc 4974+
4975+ err = -ENOSYS;
5527c038 4976+ if (unlikely(!h_iop->readlink))
1facf9fc 4977+ goto out;
4978+
4979+ err = -ENOMEM;
537831f9 4980+ sym.k = (void *)__get_free_page(GFP_NOFS);
b752ccd1 4981+ if (unlikely(!sym.k))
1facf9fc 4982+ goto out;
4983+
9dbd164d 4984+ /* unnecessary to support mmap_sem since symlink is not mmap-able */
1facf9fc 4985+ old_fs = get_fs();
4986+ set_fs(KERNEL_DS);
5527c038 4987+ symlen = h_iop->readlink(h_src, sym.u, PATH_MAX);
1facf9fc 4988+ err = symlen;
4989+ set_fs(old_fs);
4990+
4991+ if (symlen > 0) {
b752ccd1
AM
4992+ sym.k[symlen] = 0;
4993+ err = vfsub_symlink(h_dir, h_path, sym.k);
1facf9fc 4994+ }
537831f9 4995+ free_page((unsigned long)sym.k);
1facf9fc 4996+
4f0767ce 4997+out:
1facf9fc 4998+ return err;
4999+}
5000+
1facf9fc 5001+static noinline_for_stack
c2b27bf2 5002+int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
86dc4139 5003+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 5004+{
5005+ int err;
5006+ umode_t mode;
5007+ unsigned int mnt_flags;
076b876e 5008+ unsigned char isdir, isreg, force;
c2b27bf2 5009+ const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5010+ struct au_dtime dt;
5011+ struct path h_path;
5012+ struct dentry *h_src, *h_dst, *h_parent;
5527c038 5013+ struct inode *h_inode, *h_dir, *dir, *inode;
1facf9fc 5014+ struct super_block *sb;
5015+
5016+ /* bsrc branch can be ro/rw. */
c2b27bf2 5017+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038
JR
5018+ h_inode = d_inode(h_src);
5019+ AuDebugOn(h_inode != au_h_iptr(d_inode(cpg->dentry), cpg->bsrc));
1facf9fc 5020+
5021+ /* try stopping to be referenced while we are creating */
c2b27bf2
AM
5022+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
5023+ if (au_ftest_cpup(cpg->flags, RENAME))
86dc4139
AM
5024+ AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
5025+ AUFS_WH_PFX_LEN));
1facf9fc 5026+ h_parent = h_dst->d_parent; /* dir inode is locked */
5527c038 5027+ h_dir = d_inode(h_parent);
1facf9fc 5028+ IMustLock(h_dir);
5029+ AuDebugOn(h_parent != h_dst->d_parent);
5030+
c2b27bf2
AM
5031+ sb = cpg->dentry->d_sb;
5032+ h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
1facf9fc 5033+ if (do_dt) {
5034+ h_path.dentry = h_parent;
5035+ au_dtime_store(&dt, dst_parent, &h_path);
5036+ }
5037+ h_path.dentry = h_dst;
5038+
076b876e 5039+ isreg = 0;
1facf9fc 5040+ isdir = 0;
5041+ mode = h_inode->i_mode;
5042+ switch (mode & S_IFMT) {
5043+ case S_IFREG:
076b876e 5044+ isreg = 1;
b4510431
AM
5045+ err = vfsub_create(h_dir, &h_path, mode | S_IWUSR,
5046+ /*want_excl*/true);
1facf9fc 5047+ if (!err)
c2b27bf2 5048+ err = au_do_cpup_regular(cpg, h_src_attr);
1facf9fc 5049+ break;
5050+ case S_IFDIR:
5051+ isdir = 1;
5052+ err = vfsub_mkdir(h_dir, &h_path, mode);
5053+ if (!err) {
5054+ /*
5055+ * strange behaviour from the users view,
5056+ * particularry setattr case
5057+ */
5527c038
JR
5058+ dir = d_inode(dst_parent);
5059+ if (au_ibstart(dir) == cpg->bdst)
5060+ au_cpup_attr_nlink(dir, /*force*/1);
5061+ inode = d_inode(cpg->dentry);
5062+ au_cpup_attr_nlink(inode, /*force*/1);
1facf9fc 5063+ }
5064+ break;
5065+ case S_IFLNK:
5066+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
5067+ break;
5068+ case S_IFCHR:
5069+ case S_IFBLK:
5070+ AuDebugOn(!capable(CAP_MKNOD));
5071+ /*FALLTHROUGH*/
5072+ case S_IFIFO:
5073+ case S_IFSOCK:
5074+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
5075+ break;
5076+ default:
5077+ AuIOErr("Unknown inode type 0%o\n", mode);
5078+ err = -EIO;
5079+ }
5080+
5081+ mnt_flags = au_mntflags(sb);
5082+ if (!au_opt_test(mnt_flags, UDBA_NONE)
5083+ && !isdir
5084+ && au_opt_test(mnt_flags, XINO)
38d290e6
JR
5085+ && (h_inode->i_nlink == 1
5086+ || (h_inode->i_state & I_LINKABLE))
1facf9fc 5087+ /* todo: unnecessary? */
5527c038 5088+ /* && d_inode(cpg->dentry)->i_nlink == 1 */
c2b27bf2
AM
5089+ && cpg->bdst < cpg->bsrc
5090+ && !au_ftest_cpup(cpg->flags, KEEPLINO))
5091+ au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
1facf9fc 5092+ /* ignore this error */
5093+
076b876e
AM
5094+ if (!err) {
5095+ force = 0;
5096+ if (isreg) {
5097+ force = !!cpg->len;
5098+ if (cpg->len == -1)
5099+ force = !!i_size_read(h_inode);
5100+ }
5101+ au_fhsm_wrote(sb, cpg->bdst, force);
5102+ }
5103+
1facf9fc 5104+ if (do_dt)
5105+ au_dtime_revert(&dt);
5106+ return err;
5107+}
5108+
392086de 5109+static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path)
86dc4139
AM
5110+{
5111+ int err;
392086de 5112+ struct dentry *dentry, *h_dentry, *h_parent, *parent;
86dc4139 5113+ struct inode *h_dir;
392086de 5114+ aufs_bindex_t bdst;
86dc4139 5115+
392086de
AM
5116+ dentry = cpg->dentry;
5117+ bdst = cpg->bdst;
5118+ h_dentry = au_h_dptr(dentry, bdst);
5119+ if (!au_ftest_cpup(cpg->flags, OVERWRITE)) {
5120+ dget(h_dentry);
5121+ au_set_h_dptr(dentry, bdst, NULL);
5122+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
5123+ if (!err)
5124+ h_path->dentry = dget(au_h_dptr(dentry, bdst));
86dc4139 5125+ au_set_h_dptr(dentry, bdst, h_dentry);
392086de
AM
5126+ } else {
5127+ err = 0;
5128+ parent = dget_parent(dentry);
5129+ h_parent = au_h_dptr(parent, bdst);
5130+ dput(parent);
5131+ h_path->dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
5132+ if (IS_ERR(h_path->dentry))
5133+ err = PTR_ERR(h_path->dentry);
86dc4139 5134+ }
392086de
AM
5135+ if (unlikely(err))
5136+ goto out;
86dc4139 5137+
86dc4139 5138+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5527c038 5139+ h_dir = d_inode(h_parent);
86dc4139 5140+ IMustLock(h_dir);
523b37e3
AM
5141+ AuDbg("%pd %pd\n", h_dentry, h_path->dentry);
5142+ /* no delegation since it is just created */
5143+ err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL);
86dc4139
AM
5144+ dput(h_path->dentry);
5145+
5146+out:
5147+ return err;
5148+}
5149+
1facf9fc 5150+/*
5151+ * copyup the @dentry from @bsrc to @bdst.
5152+ * the caller must set the both of lower dentries.
5153+ * @len is for truncating when it is -1 copyup the entire file.
5154+ * in link/rename cases, @dst_parent may be different from the real one.
c2b27bf2 5155+ * basic->bsrc can be larger than basic->bdst.
1facf9fc 5156+ */
c2b27bf2 5157+static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5158+{
5159+ int err, rerr;
5160+ aufs_bindex_t old_ibstart;
5161+ unsigned char isdir, plink;
1facf9fc 5162+ struct dentry *h_src, *h_dst, *h_parent;
5527c038 5163+ struct inode *dst_inode, *h_dir, *inode, *delegated, *src_inode;
1facf9fc 5164+ struct super_block *sb;
86dc4139 5165+ struct au_branch *br;
c2b27bf2
AM
5166+ /* to reuduce stack size */
5167+ struct {
5168+ struct au_dtime dt;
5169+ struct path h_path;
5170+ struct au_cpup_reg_attr h_src_attr;
5171+ } *a;
1facf9fc 5172+
c2b27bf2
AM
5173+ err = -ENOMEM;
5174+ a = kmalloc(sizeof(*a), GFP_NOFS);
5175+ if (unlikely(!a))
5176+ goto out;
5177+ a->h_src_attr.valid = 0;
1facf9fc 5178+
c2b27bf2
AM
5179+ sb = cpg->dentry->d_sb;
5180+ br = au_sbr(sb, cpg->bdst);
5181+ a->h_path.mnt = au_br_mnt(br);
5182+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
1facf9fc 5183+ h_parent = h_dst->d_parent; /* dir inode is locked */
5527c038 5184+ h_dir = d_inode(h_parent);
1facf9fc 5185+ IMustLock(h_dir);
5186+
c2b27bf2 5187+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038 5188+ inode = d_inode(cpg->dentry);
1facf9fc 5189+
5190+ if (!dst_parent)
c2b27bf2 5191+ dst_parent = dget_parent(cpg->dentry);
1facf9fc 5192+ else
5193+ dget(dst_parent);
5194+
5195+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
c2b27bf2 5196+ dst_inode = au_h_iptr(inode, cpg->bdst);
1facf9fc 5197+ if (dst_inode) {
5198+ if (unlikely(!plink)) {
5199+ err = -EIO;
027c5e7a
AM
5200+ AuIOErr("hi%lu(i%lu) exists on b%d "
5201+ "but plink is disabled\n",
c2b27bf2
AM
5202+ dst_inode->i_ino, inode->i_ino, cpg->bdst);
5203+ goto out_parent;
1facf9fc 5204+ }
5205+
5206+ if (dst_inode->i_nlink) {
c2b27bf2 5207+ const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5208+
c2b27bf2 5209+ h_src = au_plink_lkup(inode, cpg->bdst);
1facf9fc 5210+ err = PTR_ERR(h_src);
5211+ if (IS_ERR(h_src))
c2b27bf2 5212+ goto out_parent;
5527c038 5213+ if (unlikely(d_is_negative(h_src))) {
1facf9fc 5214+ err = -EIO;
79b8bda9 5215+ AuIOErr("i%lu exists on b%d "
027c5e7a 5216+ "but not pseudo-linked\n",
79b8bda9 5217+ inode->i_ino, cpg->bdst);
1facf9fc 5218+ dput(h_src);
c2b27bf2 5219+ goto out_parent;
1facf9fc 5220+ }
5221+
5222+ if (do_dt) {
c2b27bf2
AM
5223+ a->h_path.dentry = h_parent;
5224+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
1facf9fc 5225+ }
86dc4139 5226+
c2b27bf2 5227+ a->h_path.dentry = h_dst;
523b37e3
AM
5228+ delegated = NULL;
5229+ err = vfsub_link(h_src, h_dir, &a->h_path, &delegated);
c2b27bf2 5230+ if (!err && au_ftest_cpup(cpg->flags, RENAME))
392086de 5231+ err = au_do_ren_after_cpup(cpg, &a->h_path);
1facf9fc 5232+ if (do_dt)
c2b27bf2 5233+ au_dtime_revert(&a->dt);
523b37e3
AM
5234+ if (unlikely(err == -EWOULDBLOCK)) {
5235+ pr_warn("cannot retry for NFSv4 delegation"
5236+ " for an internal link\n");
5237+ iput(delegated);
5238+ }
1facf9fc 5239+ dput(h_src);
c2b27bf2 5240+ goto out_parent;
1facf9fc 5241+ } else
5242+ /* todo: cpup_wh_file? */
5243+ /* udba work */
4a4d8108 5244+ au_update_ibrange(inode, /*do_put_zero*/1);
1facf9fc 5245+ }
5246+
86dc4139 5247+ isdir = S_ISDIR(inode->i_mode);
1facf9fc 5248+ old_ibstart = au_ibstart(inode);
c2b27bf2 5249+ err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
1facf9fc 5250+ if (unlikely(err))
86dc4139 5251+ goto out_rev;
5527c038 5252+ dst_inode = d_inode(h_dst);
1facf9fc 5253+ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
86dc4139 5254+ /* todo: necessary? */
c2b27bf2 5255+ /* au_pin_hdir_unlock(cpg->pin); */
1facf9fc 5256+
c2b27bf2 5257+ err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr);
86dc4139
AM
5258+ if (unlikely(err)) {
5259+ /* todo: necessary? */
c2b27bf2 5260+ /* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
86dc4139
AM
5261+ mutex_unlock(&dst_inode->i_mutex);
5262+ goto out_rev;
5263+ }
5264+
c2b27bf2 5265+ if (cpg->bdst < old_ibstart) {
86dc4139 5266+ if (S_ISREG(inode->i_mode)) {
c2b27bf2 5267+ err = au_dy_iaop(inode, cpg->bdst, dst_inode);
86dc4139 5268+ if (unlikely(err)) {
c2b27bf2
AM
5269+ /* ignore an error */
5270+ /* au_pin_hdir_relock(cpg->pin); */
86dc4139
AM
5271+ mutex_unlock(&dst_inode->i_mutex);
5272+ goto out_rev;
4a4d8108 5273+ }
4a4d8108 5274+ }
c2b27bf2
AM
5275+ au_set_ibstart(inode, cpg->bdst);
5276+ } else
5277+ au_set_ibend(inode, cpg->bdst);
5278+ au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
86dc4139
AM
5279+ au_hi_flags(inode, isdir));
5280+
5281+ /* todo: necessary? */
c2b27bf2 5282+ /* err = au_pin_hdir_relock(cpg->pin); */
86dc4139
AM
5283+ mutex_unlock(&dst_inode->i_mutex);
5284+ if (unlikely(err))
5285+ goto out_rev;
5286+
5527c038 5287+ src_inode = d_inode(h_src);
86dc4139 5288+ if (!isdir
5527c038
JR
5289+ && (src_inode->i_nlink > 1
5290+ || src_inode->i_state & I_LINKABLE)
86dc4139 5291+ && plink)
c2b27bf2 5292+ au_plink_append(inode, cpg->bdst, h_dst);
86dc4139 5293+
c2b27bf2
AM
5294+ if (au_ftest_cpup(cpg->flags, RENAME)) {
5295+ a->h_path.dentry = h_dst;
392086de 5296+ err = au_do_ren_after_cpup(cpg, &a->h_path);
86dc4139
AM
5297+ }
5298+ if (!err)
c2b27bf2 5299+ goto out_parent; /* success */
1facf9fc 5300+
5301+ /* revert */
4a4d8108 5302+out_rev:
c2b27bf2
AM
5303+ a->h_path.dentry = h_parent;
5304+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
5305+ a->h_path.dentry = h_dst;
86dc4139 5306+ rerr = 0;
5527c038 5307+ if (d_is_positive(h_dst)) {
523b37e3
AM
5308+ if (!isdir) {
5309+ /* no delegation since it is just created */
5310+ rerr = vfsub_unlink(h_dir, &a->h_path,
5311+ /*delegated*/NULL, /*force*/0);
5312+ } else
c2b27bf2 5313+ rerr = vfsub_rmdir(h_dir, &a->h_path);
86dc4139 5314+ }
c2b27bf2 5315+ au_dtime_revert(&a->dt);
1facf9fc 5316+ if (rerr) {
5317+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
5318+ err = -EIO;
5319+ }
c2b27bf2 5320+out_parent:
1facf9fc 5321+ dput(dst_parent);
c2b27bf2
AM
5322+ kfree(a);
5323+out:
1facf9fc 5324+ return err;
5325+}
5326+
7e9cd9fe 5327+#if 0 /* reserved */
1facf9fc 5328+struct au_cpup_single_args {
5329+ int *errp;
c2b27bf2 5330+ struct au_cp_generic *cpg;
1facf9fc 5331+ struct dentry *dst_parent;
5332+};
5333+
5334+static void au_call_cpup_single(void *args)
5335+{
5336+ struct au_cpup_single_args *a = args;
86dc4139 5337+
c2b27bf2
AM
5338+ au_pin_hdir_acquire_nest(a->cpg->pin);
5339+ *a->errp = au_cpup_single(a->cpg, a->dst_parent);
5340+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5341+}
c2b27bf2 5342+#endif
1facf9fc 5343+
53392da6
AM
5344+/*
5345+ * prevent SIGXFSZ in copy-up.
5346+ * testing CAP_MKNOD is for generic fs,
5347+ * but CAP_FSETID is for xfs only, currently.
5348+ */
86dc4139 5349+static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
53392da6
AM
5350+{
5351+ int do_sio;
86dc4139
AM
5352+ struct super_block *sb;
5353+ struct inode *h_dir;
53392da6
AM
5354+
5355+ do_sio = 0;
86dc4139 5356+ sb = au_pinned_parent(pin)->d_sb;
53392da6
AM
5357+ if (!au_wkq_test()
5358+ && (!au_sbi(sb)->si_plink_maint_pid
5359+ || au_plink_maint(sb, AuLock_NOPLM))) {
5360+ switch (mode & S_IFMT) {
5361+ case S_IFREG:
5362+ /* no condition about RLIMIT_FSIZE and the file size */
5363+ do_sio = 1;
5364+ break;
5365+ case S_IFCHR:
5366+ case S_IFBLK:
5367+ do_sio = !capable(CAP_MKNOD);
5368+ break;
5369+ }
5370+ if (!do_sio)
5371+ do_sio = ((mode & (S_ISUID | S_ISGID))
5372+ && !capable(CAP_FSETID));
86dc4139
AM
5373+ /* this workaround may be removed in the future */
5374+ if (!do_sio) {
5375+ h_dir = au_pinned_h_dir(pin);
5376+ do_sio = h_dir->i_mode & S_ISVTX;
5377+ }
53392da6
AM
5378+ }
5379+
5380+ return do_sio;
5381+}
5382+
7e9cd9fe 5383+#if 0 /* reserved */
c2b27bf2 5384+int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5385+{
5386+ int err, wkq_err;
1facf9fc 5387+ struct dentry *h_dentry;
5388+
c2b27bf2 5389+ h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038 5390+ if (!au_cpup_sio_test(pin, d_inode(h_dentry)->i_mode))
c2b27bf2 5391+ err = au_cpup_single(cpg, dst_parent);
1facf9fc 5392+ else {
5393+ struct au_cpup_single_args args = {
5394+ .errp = &err,
c2b27bf2
AM
5395+ .cpg = cpg,
5396+ .dst_parent = dst_parent
1facf9fc 5397+ };
5398+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
5399+ if (unlikely(wkq_err))
5400+ err = wkq_err;
5401+ }
5402+
5403+ return err;
5404+}
c2b27bf2 5405+#endif
1facf9fc 5406+
5407+/*
5408+ * copyup the @dentry from the first active lower branch to @bdst,
5409+ * using au_cpup_single().
5410+ */
c2b27bf2 5411+static int au_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5412+{
5413+ int err;
c2b27bf2
AM
5414+ unsigned int flags_orig;
5415+ struct dentry *dentry;
5416+
5417+ AuDebugOn(cpg->bsrc < 0);
1facf9fc 5418+
c2b27bf2 5419+ dentry = cpg->dentry;
86dc4139 5420+ DiMustWriteLock(dentry);
1facf9fc 5421+
c2b27bf2 5422+ err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
1facf9fc 5423+ if (!err) {
c2b27bf2
AM
5424+ flags_orig = cpg->flags;
5425+ au_fset_cpup(cpg->flags, RENAME);
5426+ err = au_cpup_single(cpg, NULL);
5427+ cpg->flags = flags_orig;
1facf9fc 5428+ if (!err)
5429+ return 0; /* success */
5430+
5431+ /* revert */
c2b27bf2
AM
5432+ au_set_h_dptr(dentry, cpg->bdst, NULL);
5433+ au_set_dbstart(dentry, cpg->bsrc);
1facf9fc 5434+ }
5435+
5436+ return err;
5437+}
5438+
5439+struct au_cpup_simple_args {
5440+ int *errp;
c2b27bf2 5441+ struct au_cp_generic *cpg;
1facf9fc 5442+};
5443+
5444+static void au_call_cpup_simple(void *args)
5445+{
5446+ struct au_cpup_simple_args *a = args;
86dc4139 5447+
c2b27bf2
AM
5448+ au_pin_hdir_acquire_nest(a->cpg->pin);
5449+ *a->errp = au_cpup_simple(a->cpg);
5450+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5451+}
5452+
c2b27bf2 5453+static int au_do_sio_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5454+{
5455+ int err, wkq_err;
c2b27bf2
AM
5456+ struct dentry *dentry, *parent;
5457+ struct file *h_file;
1facf9fc 5458+ struct inode *h_dir;
5459+
c2b27bf2
AM
5460+ dentry = cpg->dentry;
5461+ h_file = NULL;
5462+ if (au_ftest_cpup(cpg->flags, HOPEN)) {
5463+ AuDebugOn(cpg->bsrc < 0);
392086de 5464+ h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0);
c2b27bf2
AM
5465+ err = PTR_ERR(h_file);
5466+ if (IS_ERR(h_file))
5467+ goto out;
5468+ }
5469+
1facf9fc 5470+ parent = dget_parent(dentry);
5527c038 5471+ h_dir = au_h_iptr(d_inode(parent), cpg->bdst);
53392da6 5472+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
5527c038 5473+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
c2b27bf2 5474+ err = au_cpup_simple(cpg);
1facf9fc 5475+ else {
5476+ struct au_cpup_simple_args args = {
5477+ .errp = &err,
c2b27bf2 5478+ .cpg = cpg
1facf9fc 5479+ };
5480+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
5481+ if (unlikely(wkq_err))
5482+ err = wkq_err;
5483+ }
5484+
5485+ dput(parent);
c2b27bf2
AM
5486+ if (h_file)
5487+ au_h_open_post(dentry, cpg->bsrc, h_file);
5488+
5489+out:
1facf9fc 5490+ return err;
5491+}
5492+
c2b27bf2 5493+int au_sio_cpup_simple(struct au_cp_generic *cpg)
367653fa 5494+{
c2b27bf2
AM
5495+ aufs_bindex_t bsrc, bend;
5496+ struct dentry *dentry, *h_dentry;
367653fa 5497+
c2b27bf2
AM
5498+ if (cpg->bsrc < 0) {
5499+ dentry = cpg->dentry;
5500+ bend = au_dbend(dentry);
5501+ for (bsrc = cpg->bdst + 1; bsrc <= bend; bsrc++) {
5502+ h_dentry = au_h_dptr(dentry, bsrc);
5503+ if (h_dentry) {
5527c038 5504+ AuDebugOn(d_is_negative(h_dentry));
c2b27bf2
AM
5505+ break;
5506+ }
5507+ }
5508+ AuDebugOn(bsrc > bend);
5509+ cpg->bsrc = bsrc;
367653fa 5510+ }
c2b27bf2
AM
5511+ AuDebugOn(cpg->bsrc <= cpg->bdst);
5512+ return au_do_sio_cpup_simple(cpg);
5513+}
367653fa 5514+
c2b27bf2
AM
5515+int au_sio_cpdown_simple(struct au_cp_generic *cpg)
5516+{
5517+ AuDebugOn(cpg->bdst <= cpg->bsrc);
5518+ return au_do_sio_cpup_simple(cpg);
367653fa
AM
5519+}
5520+
1facf9fc 5521+/* ---------------------------------------------------------------------- */
5522+
5523+/*
5524+ * copyup the deleted file for writing.
5525+ */
c2b27bf2
AM
5526+static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
5527+ struct file *file)
1facf9fc 5528+{
5529+ int err;
c2b27bf2
AM
5530+ unsigned int flags_orig;
5531+ aufs_bindex_t bsrc_orig;
1facf9fc 5532+ struct dentry *h_d_dst, *h_d_start;
c2b27bf2 5533+ struct au_dinfo *dinfo;
4a4d8108 5534+ struct au_hdentry *hdp;
1facf9fc 5535+
c2b27bf2 5536+ dinfo = au_di(cpg->dentry);
1308ab2a 5537+ AuRwMustWriteLock(&dinfo->di_rwsem);
5538+
c2b27bf2
AM
5539+ bsrc_orig = cpg->bsrc;
5540+ cpg->bsrc = dinfo->di_bstart;
4a4d8108 5541+ hdp = dinfo->di_hdentry;
c2b27bf2
AM
5542+ h_d_dst = hdp[0 + cpg->bdst].hd_dentry;
5543+ dinfo->di_bstart = cpg->bdst;
5544+ hdp[0 + cpg->bdst].hd_dentry = wh_dentry;
86dc4139 5545+ h_d_start = NULL;
027c5e7a 5546+ if (file) {
c2b27bf2 5547+ h_d_start = hdp[0 + cpg->bsrc].hd_dentry;
2000de60 5548+ hdp[0 + cpg->bsrc].hd_dentry = au_hf_top(file)->f_path.dentry;
027c5e7a 5549+ }
c2b27bf2
AM
5550+ flags_orig = cpg->flags;
5551+ cpg->flags = !AuCpup_DTIME;
5552+ err = au_cpup_single(cpg, /*h_parent*/NULL);
5553+ cpg->flags = flags_orig;
027c5e7a
AM
5554+ if (file) {
5555+ if (!err)
5556+ err = au_reopen_nondir(file);
c2b27bf2 5557+ hdp[0 + cpg->bsrc].hd_dentry = h_d_start;
1facf9fc 5558+ }
c2b27bf2
AM
5559+ hdp[0 + cpg->bdst].hd_dentry = h_d_dst;
5560+ dinfo->di_bstart = cpg->bsrc;
5561+ cpg->bsrc = bsrc_orig;
1facf9fc 5562+
5563+ return err;
5564+}
5565+
c2b27bf2 5566+static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5567+{
5568+ int err;
c2b27bf2 5569+ aufs_bindex_t bdst;
1facf9fc 5570+ struct au_dtime dt;
c2b27bf2 5571+ struct dentry *dentry, *parent, *h_parent, *wh_dentry;
1facf9fc 5572+ struct au_branch *br;
5573+ struct path h_path;
5574+
c2b27bf2
AM
5575+ dentry = cpg->dentry;
5576+ bdst = cpg->bdst;
1facf9fc 5577+ br = au_sbr(dentry->d_sb, bdst);
5578+ parent = dget_parent(dentry);
5579+ h_parent = au_h_dptr(parent, bdst);
5580+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
5581+ err = PTR_ERR(wh_dentry);
5582+ if (IS_ERR(wh_dentry))
5583+ goto out;
5584+
5585+ h_path.dentry = h_parent;
86dc4139 5586+ h_path.mnt = au_br_mnt(br);
1facf9fc 5587+ au_dtime_store(&dt, parent, &h_path);
c2b27bf2 5588+ err = au_do_cpup_wh(cpg, wh_dentry, file);
1facf9fc 5589+ if (unlikely(err))
5590+ goto out_wh;
5591+
5592+ dget(wh_dentry);
5593+ h_path.dentry = wh_dentry;
2000de60 5594+ if (!d_is_dir(wh_dentry)) {
523b37e3 5595+ /* no delegation since it is just created */
5527c038 5596+ err = vfsub_unlink(d_inode(h_parent), &h_path,
523b37e3
AM
5597+ /*delegated*/NULL, /*force*/0);
5598+ } else
5527c038 5599+ err = vfsub_rmdir(d_inode(h_parent), &h_path);
1facf9fc 5600+ if (unlikely(err)) {
523b37e3
AM
5601+ AuIOErr("failed remove copied-up tmp file %pd(%d)\n",
5602+ wh_dentry, err);
1facf9fc 5603+ err = -EIO;
5604+ }
5605+ au_dtime_revert(&dt);
5527c038 5606+ au_set_hi_wh(d_inode(dentry), bdst, wh_dentry);
1facf9fc 5607+
4f0767ce 5608+out_wh:
1facf9fc 5609+ dput(wh_dentry);
4f0767ce 5610+out:
1facf9fc 5611+ dput(parent);
5612+ return err;
5613+}
5614+
5615+struct au_cpup_wh_args {
5616+ int *errp;
c2b27bf2 5617+ struct au_cp_generic *cpg;
1facf9fc 5618+ struct file *file;
5619+};
5620+
5621+static void au_call_cpup_wh(void *args)
5622+{
5623+ struct au_cpup_wh_args *a = args;
86dc4139 5624+
c2b27bf2
AM
5625+ au_pin_hdir_acquire_nest(a->cpg->pin);
5626+ *a->errp = au_cpup_wh(a->cpg, a->file);
5627+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5628+}
5629+
c2b27bf2 5630+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5631+{
5632+ int err, wkq_err;
c2b27bf2 5633+ aufs_bindex_t bdst;
c1595e42 5634+ struct dentry *dentry, *parent, *h_orph, *h_parent;
86dc4139 5635+ struct inode *dir, *h_dir, *h_tmpdir;
1facf9fc 5636+ struct au_wbr *wbr;
c2b27bf2 5637+ struct au_pin wh_pin, *pin_orig;
1facf9fc 5638+
c2b27bf2
AM
5639+ dentry = cpg->dentry;
5640+ bdst = cpg->bdst;
1facf9fc 5641+ parent = dget_parent(dentry);
5527c038 5642+ dir = d_inode(parent);
1facf9fc 5643+ h_orph = NULL;
5644+ h_parent = NULL;
5645+ h_dir = au_igrab(au_h_iptr(dir, bdst));
5646+ h_tmpdir = h_dir;
c2b27bf2 5647+ pin_orig = NULL;
1facf9fc 5648+ if (!h_dir->i_nlink) {
5649+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
5650+ h_orph = wbr->wbr_orph;
5651+
5652+ h_parent = dget(au_h_dptr(parent, bdst));
1facf9fc 5653+ au_set_h_dptr(parent, bdst, dget(h_orph));
5527c038 5654+ h_tmpdir = d_inode(h_orph);
1facf9fc 5655+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
5656+
dece6358 5657+ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
4a4d8108 5658+ /* todo: au_h_open_pre()? */
86dc4139 5659+
c2b27bf2 5660+ pin_orig = cpg->pin;
86dc4139 5661+ au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
c2b27bf2
AM
5662+ AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
5663+ cpg->pin = &wh_pin;
1facf9fc 5664+ }
5665+
53392da6 5666+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
5527c038 5667+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
c2b27bf2 5668+ err = au_cpup_wh(cpg, file);
1facf9fc 5669+ else {
5670+ struct au_cpup_wh_args args = {
5671+ .errp = &err,
c2b27bf2
AM
5672+ .cpg = cpg,
5673+ .file = file
1facf9fc 5674+ };
5675+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
5676+ if (unlikely(wkq_err))
5677+ err = wkq_err;
5678+ }
5679+
5680+ if (h_orph) {
5681+ mutex_unlock(&h_tmpdir->i_mutex);
4a4d8108 5682+ /* todo: au_h_open_post()? */
1facf9fc 5683+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
1facf9fc 5684+ au_set_h_dptr(parent, bdst, h_parent);
c2b27bf2
AM
5685+ AuDebugOn(!pin_orig);
5686+ cpg->pin = pin_orig;
1facf9fc 5687+ }
5688+ iput(h_dir);
5689+ dput(parent);
5690+
5691+ return err;
5692+}
5693+
5694+/* ---------------------------------------------------------------------- */
5695+
5696+/*
5697+ * generic routine for both of copy-up and copy-down.
5698+ */
5699+/* cf. revalidate function in file.c */
5700+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
5701+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5702+ struct au_pin *pin,
1facf9fc 5703+ struct dentry *h_parent, void *arg),
5704+ void *arg)
5705+{
5706+ int err;
5707+ struct au_pin pin;
5527c038 5708+ struct dentry *d, *parent, *h_parent, *real_parent, *h_dentry;
1facf9fc 5709+
5710+ err = 0;
5711+ parent = dget_parent(dentry);
5712+ if (IS_ROOT(parent))
5713+ goto out;
5714+
5715+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
5716+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
5717+
5718+ /* do not use au_dpage */
5719+ real_parent = parent;
5720+ while (1) {
5721+ dput(parent);
5722+ parent = dget_parent(dentry);
5723+ h_parent = au_h_dptr(parent, bdst);
5724+ if (h_parent)
5725+ goto out; /* success */
5726+
5727+ /* find top dir which is necessary to cpup */
5728+ do {
5729+ d = parent;
5730+ dput(parent);
5731+ parent = dget_parent(d);
5732+ di_read_lock_parent3(parent, !AuLock_IR);
5733+ h_parent = au_h_dptr(parent, bdst);
5734+ di_read_unlock(parent, !AuLock_IR);
5735+ } while (!h_parent);
5736+
5737+ if (d != real_parent)
5738+ di_write_lock_child3(d);
5739+
5740+ /* somebody else might create while we were sleeping */
5527c038
JR
5741+ h_dentry = au_h_dptr(d, bdst);
5742+ if (!h_dentry || d_is_negative(h_dentry)) {
5743+ if (h_dentry)
1facf9fc 5744+ au_update_dbstart(d);
5745+
5746+ au_pin_set_dentry(&pin, d);
5747+ err = au_do_pin(&pin);
5748+ if (!err) {
86dc4139 5749+ err = cp(d, bdst, &pin, h_parent, arg);
1facf9fc 5750+ au_unpin(&pin);
5751+ }
5752+ }
5753+
5754+ if (d != real_parent)
5755+ di_write_unlock(d);
5756+ if (unlikely(err))
5757+ break;
5758+ }
5759+
4f0767ce 5760+out:
1facf9fc 5761+ dput(parent);
5762+ return err;
5763+}
5764+
5765+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5766+ struct au_pin *pin,
2000de60 5767+ struct dentry *h_parent __maybe_unused,
1facf9fc 5768+ void *arg __maybe_unused)
5769+{
c2b27bf2
AM
5770+ struct au_cp_generic cpg = {
5771+ .dentry = dentry,
5772+ .bdst = bdst,
5773+ .bsrc = -1,
5774+ .len = 0,
5775+ .pin = pin,
5776+ .flags = AuCpup_DTIME
5777+ };
5778+ return au_sio_cpup_simple(&cpg);
1facf9fc 5779+}
5780+
5781+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
5782+{
5783+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
5784+}
5785+
5786+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
5787+{
5788+ int err;
5789+ struct dentry *parent;
5790+ struct inode *dir;
5791+
5792+ parent = dget_parent(dentry);
5527c038 5793+ dir = d_inode(parent);
1facf9fc 5794+ err = 0;
5795+ if (au_h_iptr(dir, bdst))
5796+ goto out;
5797+
5798+ di_read_unlock(parent, AuLock_IR);
5799+ di_write_lock_parent(parent);
5800+ /* someone else might change our inode while we were sleeping */
5801+ if (!au_h_iptr(dir, bdst))
5802+ err = au_cpup_dirs(dentry, bdst);
5803+ di_downgrade_lock(parent, AuLock_IR);
5804+
4f0767ce 5805+out:
1facf9fc 5806+ dput(parent);
5807+ return err;
5808+}
7f207e10
AM
5809diff -urN /usr/share/empty/fs/aufs/cpup.h linux/fs/aufs/cpup.h
5810--- /usr/share/empty/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 5811+++ linux/fs/aufs/cpup.h 2015-09-24 10:47:58.248052907 +0200
523b37e3 5812@@ -0,0 +1,94 @@
1facf9fc 5813+/*
2000de60 5814+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 5815+ *
5816+ * This program, aufs is free software; you can redistribute it and/or modify
5817+ * it under the terms of the GNU General Public License as published by
5818+ * the Free Software Foundation; either version 2 of the License, or
5819+ * (at your option) any later version.
dece6358
AM
5820+ *
5821+ * This program is distributed in the hope that it will be useful,
5822+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5823+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5824+ * GNU General Public License for more details.
5825+ *
5826+ * You should have received a copy of the GNU General Public License
523b37e3 5827+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 5828+ */
5829+
5830+/*
5831+ * copy-up/down functions
5832+ */
5833+
5834+#ifndef __AUFS_CPUP_H__
5835+#define __AUFS_CPUP_H__
5836+
5837+#ifdef __KERNEL__
5838+
dece6358 5839+#include <linux/path.h>
1facf9fc 5840+
dece6358
AM
5841+struct inode;
5842+struct file;
86dc4139 5843+struct au_pin;
dece6358 5844+
86dc4139 5845+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
1facf9fc 5846+void au_cpup_attr_timesizes(struct inode *inode);
5847+void au_cpup_attr_nlink(struct inode *inode, int force);
5848+void au_cpup_attr_changeable(struct inode *inode);
5849+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
5850+void au_cpup_attr_all(struct inode *inode, int force);
5851+
5852+/* ---------------------------------------------------------------------- */
5853+
c2b27bf2
AM
5854+struct au_cp_generic {
5855+ struct dentry *dentry;
5856+ aufs_bindex_t bdst, bsrc;
5857+ loff_t len;
5858+ struct au_pin *pin;
5859+ unsigned int flags;
5860+};
5861+
1facf9fc 5862+/* cpup flags */
392086de
AM
5863+#define AuCpup_DTIME 1 /* do dtime_store/revert */
5864+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
5865+ for link(2) */
5866+#define AuCpup_RENAME (1 << 2) /* rename after cpup */
5867+#define AuCpup_HOPEN (1 << 3) /* call h_open_pre/post() in
5868+ cpup */
5869+#define AuCpup_OVERWRITE (1 << 4) /* allow overwriting the
5870+ existing entry */
5871+#define AuCpup_RWDST (1 << 5) /* force write target even if
5872+ the branch is marked as RO */
c2b27bf2 5873+
1facf9fc 5874+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
7f207e10
AM
5875+#define au_fset_cpup(flags, name) \
5876+ do { (flags) |= AuCpup_##name; } while (0)
5877+#define au_fclr_cpup(flags, name) \
5878+ do { (flags) &= ~AuCpup_##name; } while (0)
1facf9fc 5879+
5880+int au_copy_file(struct file *dst, struct file *src, loff_t len);
c2b27bf2
AM
5881+int au_sio_cpup_simple(struct au_cp_generic *cpg);
5882+int au_sio_cpdown_simple(struct au_cp_generic *cpg);
5883+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
1facf9fc 5884+
5885+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
5886+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5887+ struct au_pin *pin,
1facf9fc 5888+ struct dentry *h_parent, void *arg),
5889+ void *arg);
5890+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
5891+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
5892+
5893+/* ---------------------------------------------------------------------- */
5894+
5895+/* keep timestamps when copyup */
5896+struct au_dtime {
5897+ struct dentry *dt_dentry;
5898+ struct path dt_h_path;
5899+ struct timespec dt_atime, dt_mtime;
5900+};
5901+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
5902+ struct path *h_path);
5903+void au_dtime_revert(struct au_dtime *dt);
5904+
5905+#endif /* __KERNEL__ */
5906+#endif /* __AUFS_CPUP_H__ */
7f207e10
AM
5907diff -urN /usr/share/empty/fs/aufs/dbgaufs.c linux/fs/aufs/dbgaufs.c
5908--- /usr/share/empty/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 5909+++ linux/fs/aufs/dbgaufs.c 2015-09-24 10:47:58.248052907 +0200
523b37e3 5910@@ -0,0 +1,432 @@
1facf9fc 5911+/*
2000de60 5912+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 5913+ *
5914+ * This program, aufs is free software; you can redistribute it and/or modify
5915+ * it under the terms of the GNU General Public License as published by
5916+ * the Free Software Foundation; either version 2 of the License, or
5917+ * (at your option) any later version.
dece6358
AM
5918+ *
5919+ * This program is distributed in the hope that it will be useful,
5920+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5921+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5922+ * GNU General Public License for more details.
5923+ *
5924+ * You should have received a copy of the GNU General Public License
523b37e3 5925+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 5926+ */
5927+
5928+/*
5929+ * debugfs interface
5930+ */
5931+
5932+#include <linux/debugfs.h>
5933+#include "aufs.h"
5934+
5935+#ifndef CONFIG_SYSFS
5936+#error DEBUG_FS depends upon SYSFS
5937+#endif
5938+
5939+static struct dentry *dbgaufs;
5940+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
5941+
5942+/* 20 is max digits length of ulong 64 */
5943+struct dbgaufs_arg {
5944+ int n;
5945+ char a[20 * 4];
5946+};
5947+
5948+/*
5949+ * common function for all XINO files
5950+ */
5951+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
5952+ struct file *file)
5953+{
5954+ kfree(file->private_data);
5955+ return 0;
5956+}
5957+
5958+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
5959+{
5960+ int err;
5961+ struct kstat st;
5962+ struct dbgaufs_arg *p;
5963+
5964+ err = -ENOMEM;
5965+ p = kmalloc(sizeof(*p), GFP_NOFS);
5966+ if (unlikely(!p))
5967+ goto out;
5968+
5969+ err = 0;
5970+ p->n = 0;
5971+ file->private_data = p;
5972+ if (!xf)
5973+ goto out;
5974+
c06a8ce3 5975+ err = vfs_getattr(&xf->f_path, &st);
1facf9fc 5976+ if (!err) {
5977+ if (do_fcnt)
5978+ p->n = snprintf
5979+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
5980+ (long)file_count(xf), st.blocks, st.blksize,
5981+ (long long)st.size);
5982+ else
5983+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
5984+ st.blocks, st.blksize,
5985+ (long long)st.size);
5986+ AuDebugOn(p->n >= sizeof(p->a));
5987+ } else {
5988+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
5989+ err = 0;
5990+ }
5991+
4f0767ce 5992+out:
1facf9fc 5993+ return err;
5994+
5995+}
5996+
5997+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
5998+ size_t count, loff_t *ppos)
5999+{
6000+ struct dbgaufs_arg *p;
6001+
6002+ p = file->private_data;
6003+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6004+}
6005+
6006+/* ---------------------------------------------------------------------- */
6007+
86dc4139
AM
6008+struct dbgaufs_plink_arg {
6009+ int n;
6010+ char a[];
6011+};
6012+
6013+static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
6014+ struct file *file)
6015+{
6016+ free_page((unsigned long)file->private_data);
6017+ return 0;
6018+}
6019+
6020+static int dbgaufs_plink_open(struct inode *inode, struct file *file)
6021+{
6022+ int err, i, limit;
6023+ unsigned long n, sum;
6024+ struct dbgaufs_plink_arg *p;
6025+ struct au_sbinfo *sbinfo;
6026+ struct super_block *sb;
6027+ struct au_sphlhead *sphl;
6028+
6029+ err = -ENOMEM;
6030+ p = (void *)get_zeroed_page(GFP_NOFS);
6031+ if (unlikely(!p))
6032+ goto out;
6033+
6034+ err = -EFBIG;
6035+ sbinfo = inode->i_private;
6036+ sb = sbinfo->si_sb;
6037+ si_noflush_read_lock(sb);
6038+ if (au_opt_test(au_mntflags(sb), PLINK)) {
6039+ limit = PAGE_SIZE - sizeof(p->n);
6040+
6041+ /* the number of buckets */
6042+ n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
6043+ p->n += n;
6044+ limit -= n;
6045+
6046+ sum = 0;
6047+ for (i = 0, sphl = sbinfo->si_plink;
6048+ i < AuPlink_NHASH;
6049+ i++, sphl++) {
6050+ n = au_sphl_count(sphl);
6051+ sum += n;
6052+
6053+ n = snprintf(p->a + p->n, limit, "%lu ", n);
6054+ p->n += n;
6055+ limit -= n;
6056+ if (unlikely(limit <= 0))
6057+ goto out_free;
6058+ }
6059+ p->a[p->n - 1] = '\n';
6060+
6061+ /* the sum of plinks */
6062+ n = snprintf(p->a + p->n, limit, "%lu\n", sum);
6063+ p->n += n;
6064+ limit -= n;
6065+ if (unlikely(limit <= 0))
6066+ goto out_free;
6067+ } else {
6068+#define str "1\n0\n0\n"
6069+ p->n = sizeof(str) - 1;
6070+ strcpy(p->a, str);
6071+#undef str
6072+ }
6073+ si_read_unlock(sb);
6074+
6075+ err = 0;
6076+ file->private_data = p;
6077+ goto out; /* success */
6078+
6079+out_free:
6080+ free_page((unsigned long)p);
6081+out:
6082+ return err;
6083+}
6084+
6085+static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
6086+ size_t count, loff_t *ppos)
6087+{
6088+ struct dbgaufs_plink_arg *p;
6089+
6090+ p = file->private_data;
6091+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6092+}
6093+
6094+static const struct file_operations dbgaufs_plink_fop = {
6095+ .owner = THIS_MODULE,
6096+ .open = dbgaufs_plink_open,
6097+ .release = dbgaufs_plink_release,
6098+ .read = dbgaufs_plink_read
6099+};
6100+
6101+/* ---------------------------------------------------------------------- */
6102+
1facf9fc 6103+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
6104+{
6105+ int err;
6106+ struct au_sbinfo *sbinfo;
6107+ struct super_block *sb;
6108+
6109+ sbinfo = inode->i_private;
6110+ sb = sbinfo->si_sb;
6111+ si_noflush_read_lock(sb);
6112+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
6113+ si_read_unlock(sb);
6114+ return err;
6115+}
6116+
6117+static const struct file_operations dbgaufs_xib_fop = {
4a4d8108 6118+ .owner = THIS_MODULE,
1facf9fc 6119+ .open = dbgaufs_xib_open,
6120+ .release = dbgaufs_xi_release,
6121+ .read = dbgaufs_xi_read
6122+};
6123+
6124+/* ---------------------------------------------------------------------- */
6125+
6126+#define DbgaufsXi_PREFIX "xi"
6127+
6128+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
6129+{
6130+ int err;
6131+ long l;
6132+ struct au_sbinfo *sbinfo;
6133+ struct super_block *sb;
6134+ struct file *xf;
6135+ struct qstr *name;
6136+
6137+ err = -ENOENT;
6138+ xf = NULL;
2000de60 6139+ name = &file->f_path.dentry->d_name;
1facf9fc 6140+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
6141+ || memcmp(name->name, DbgaufsXi_PREFIX,
6142+ sizeof(DbgaufsXi_PREFIX) - 1)))
6143+ goto out;
9dbd164d 6144+ err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
1facf9fc 6145+ if (unlikely(err))
6146+ goto out;
6147+
6148+ sbinfo = inode->i_private;
6149+ sb = sbinfo->si_sb;
6150+ si_noflush_read_lock(sb);
6151+ if (l <= au_sbend(sb)) {
6152+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
6153+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
6154+ } else
6155+ err = -ENOENT;
6156+ si_read_unlock(sb);
6157+
4f0767ce 6158+out:
1facf9fc 6159+ return err;
6160+}
6161+
6162+static const struct file_operations dbgaufs_xino_fop = {
4a4d8108 6163+ .owner = THIS_MODULE,
1facf9fc 6164+ .open = dbgaufs_xino_open,
6165+ .release = dbgaufs_xi_release,
6166+ .read = dbgaufs_xi_read
6167+};
6168+
6169+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
6170+{
6171+ aufs_bindex_t bend;
6172+ struct au_branch *br;
6173+ struct au_xino_file *xi;
6174+
6175+ if (!au_sbi(sb)->si_dbgaufs)
6176+ return;
6177+
6178+ bend = au_sbend(sb);
6179+ for (; bindex <= bend; bindex++) {
6180+ br = au_sbr(sb, bindex);
6181+ xi = &br->br_xino;
c06a8ce3
AM
6182+ debugfs_remove(xi->xi_dbgaufs);
6183+ xi->xi_dbgaufs = NULL;
1facf9fc 6184+ }
6185+}
6186+
6187+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
6188+{
6189+ struct au_sbinfo *sbinfo;
6190+ struct dentry *parent;
6191+ struct au_branch *br;
6192+ struct au_xino_file *xi;
6193+ aufs_bindex_t bend;
6194+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
6195+
6196+ sbinfo = au_sbi(sb);
6197+ parent = sbinfo->si_dbgaufs;
6198+ if (!parent)
6199+ return;
6200+
6201+ bend = au_sbend(sb);
6202+ for (; bindex <= bend; bindex++) {
6203+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
6204+ br = au_sbr(sb, bindex);
6205+ xi = &br->br_xino;
6206+ AuDebugOn(xi->xi_dbgaufs);
6207+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
6208+ sbinfo, &dbgaufs_xino_fop);
6209+ /* ignore an error */
6210+ if (unlikely(!xi->xi_dbgaufs))
6211+ AuWarn1("failed %s under debugfs\n", name);
6212+ }
6213+}
6214+
6215+/* ---------------------------------------------------------------------- */
6216+
6217+#ifdef CONFIG_AUFS_EXPORT
6218+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
6219+{
6220+ int err;
6221+ struct au_sbinfo *sbinfo;
6222+ struct super_block *sb;
6223+
6224+ sbinfo = inode->i_private;
6225+ sb = sbinfo->si_sb;
6226+ si_noflush_read_lock(sb);
6227+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
6228+ si_read_unlock(sb);
6229+ return err;
6230+}
6231+
6232+static const struct file_operations dbgaufs_xigen_fop = {
4a4d8108 6233+ .owner = THIS_MODULE,
1facf9fc 6234+ .open = dbgaufs_xigen_open,
6235+ .release = dbgaufs_xi_release,
6236+ .read = dbgaufs_xi_read
6237+};
6238+
6239+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6240+{
6241+ int err;
6242+
dece6358 6243+ /*
c1595e42 6244+ * This function is a dynamic '__init' function actually,
dece6358
AM
6245+ * so the tiny check for si_rwsem is unnecessary.
6246+ */
6247+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6248+
1facf9fc 6249+ err = -EIO;
6250+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
6251+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6252+ &dbgaufs_xigen_fop);
6253+ if (sbinfo->si_dbgaufs_xigen)
6254+ err = 0;
6255+
6256+ return err;
6257+}
6258+#else
6259+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6260+{
6261+ return 0;
6262+}
6263+#endif /* CONFIG_AUFS_EXPORT */
6264+
6265+/* ---------------------------------------------------------------------- */
6266+
6267+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
6268+{
dece6358 6269+ /*
7e9cd9fe 6270+ * This function is a dynamic '__fin' function actually,
dece6358
AM
6271+ * so the tiny check for si_rwsem is unnecessary.
6272+ */
6273+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6274+
1facf9fc 6275+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
6276+ sbinfo->si_dbgaufs = NULL;
6277+ kobject_put(&sbinfo->si_kobj);
6278+}
6279+
6280+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
6281+{
6282+ int err;
6283+ char name[SysaufsSiNameLen];
6284+
dece6358 6285+ /*
c1595e42 6286+ * This function is a dynamic '__init' function actually,
dece6358
AM
6287+ * so the tiny check for si_rwsem is unnecessary.
6288+ */
6289+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6290+
1facf9fc 6291+ err = -ENOENT;
6292+ if (!dbgaufs) {
6293+ AuErr1("/debug/aufs is uninitialized\n");
6294+ goto out;
6295+ }
6296+
6297+ err = -EIO;
6298+ sysaufs_name(sbinfo, name);
6299+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
6300+ if (unlikely(!sbinfo->si_dbgaufs))
6301+ goto out;
6302+ kobject_get(&sbinfo->si_kobj);
6303+
6304+ sbinfo->si_dbgaufs_xib = debugfs_create_file
6305+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6306+ &dbgaufs_xib_fop);
6307+ if (unlikely(!sbinfo->si_dbgaufs_xib))
6308+ goto out_dir;
6309+
86dc4139
AM
6310+ sbinfo->si_dbgaufs_plink = debugfs_create_file
6311+ ("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6312+ &dbgaufs_plink_fop);
6313+ if (unlikely(!sbinfo->si_dbgaufs_plink))
6314+ goto out_dir;
6315+
1facf9fc 6316+ err = dbgaufs_xigen_init(sbinfo);
6317+ if (!err)
6318+ goto out; /* success */
6319+
4f0767ce 6320+out_dir:
1facf9fc 6321+ dbgaufs_si_fin(sbinfo);
4f0767ce 6322+out:
1facf9fc 6323+ return err;
6324+}
6325+
6326+/* ---------------------------------------------------------------------- */
6327+
6328+void dbgaufs_fin(void)
6329+{
6330+ debugfs_remove(dbgaufs);
6331+}
6332+
6333+int __init dbgaufs_init(void)
6334+{
6335+ int err;
6336+
6337+ err = -EIO;
6338+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
6339+ if (dbgaufs)
6340+ err = 0;
6341+ return err;
6342+}
7f207e10
AM
6343diff -urN /usr/share/empty/fs/aufs/dbgaufs.h linux/fs/aufs/dbgaufs.h
6344--- /usr/share/empty/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 6345+++ linux/fs/aufs/dbgaufs.h 2015-09-24 10:47:58.248052907 +0200
523b37e3 6346@@ -0,0 +1,48 @@
1facf9fc 6347+/*
2000de60 6348+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 6349+ *
6350+ * This program, aufs is free software; you can redistribute it and/or modify
6351+ * it under the terms of the GNU General Public License as published by
6352+ * the Free Software Foundation; either version 2 of the License, or
6353+ * (at your option) any later version.
dece6358
AM
6354+ *
6355+ * This program is distributed in the hope that it will be useful,
6356+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6357+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6358+ * GNU General Public License for more details.
6359+ *
6360+ * You should have received a copy of the GNU General Public License
523b37e3 6361+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6362+ */
6363+
6364+/*
6365+ * debugfs interface
6366+ */
6367+
6368+#ifndef __DBGAUFS_H__
6369+#define __DBGAUFS_H__
6370+
6371+#ifdef __KERNEL__
6372+
dece6358 6373+struct super_block;
1facf9fc 6374+struct au_sbinfo;
dece6358 6375+
1facf9fc 6376+#ifdef CONFIG_DEBUG_FS
6377+/* dbgaufs.c */
6378+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
6379+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
6380+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
6381+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
6382+void dbgaufs_fin(void);
6383+int __init dbgaufs_init(void);
1facf9fc 6384+#else
4a4d8108
AM
6385+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
6386+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
6387+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
6388+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
6389+AuStubVoid(dbgaufs_fin, void)
6390+AuStubInt0(__init dbgaufs_init, void)
1facf9fc 6391+#endif /* CONFIG_DEBUG_FS */
6392+
6393+#endif /* __KERNEL__ */
6394+#endif /* __DBGAUFS_H__ */
7f207e10
AM
6395diff -urN /usr/share/empty/fs/aufs/dcsub.c linux/fs/aufs/dcsub.c
6396--- /usr/share/empty/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 6397+++ linux/fs/aufs/dcsub.c 2015-09-24 10:47:58.248052907 +0200
c1595e42 6398@@ -0,0 +1,224 @@
1facf9fc 6399+/*
2000de60 6400+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 6401+ *
6402+ * This program, aufs is free software; you can redistribute it and/or modify
6403+ * it under the terms of the GNU General Public License as published by
6404+ * the Free Software Foundation; either version 2 of the License, or
6405+ * (at your option) any later version.
dece6358
AM
6406+ *
6407+ * This program is distributed in the hope that it will be useful,
6408+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6409+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6410+ * GNU General Public License for more details.
6411+ *
6412+ * You should have received a copy of the GNU General Public License
523b37e3 6413+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6414+ */
6415+
6416+/*
6417+ * sub-routines for dentry cache
6418+ */
6419+
6420+#include "aufs.h"
6421+
6422+static void au_dpage_free(struct au_dpage *dpage)
6423+{
6424+ int i;
6425+ struct dentry **p;
6426+
6427+ p = dpage->dentries;
6428+ for (i = 0; i < dpage->ndentry; i++)
6429+ dput(*p++);
6430+ free_page((unsigned long)dpage->dentries);
6431+}
6432+
6433+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
6434+{
6435+ int err;
6436+ void *p;
6437+
6438+ err = -ENOMEM;
6439+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
6440+ if (unlikely(!dpages->dpages))
6441+ goto out;
6442+
6443+ p = (void *)__get_free_page(gfp);
6444+ if (unlikely(!p))
6445+ goto out_dpages;
6446+
6447+ dpages->dpages[0].ndentry = 0;
6448+ dpages->dpages[0].dentries = p;
6449+ dpages->ndpage = 1;
6450+ return 0; /* success */
6451+
4f0767ce 6452+out_dpages:
1facf9fc 6453+ kfree(dpages->dpages);
4f0767ce 6454+out:
1facf9fc 6455+ return err;
6456+}
6457+
6458+void au_dpages_free(struct au_dcsub_pages *dpages)
6459+{
6460+ int i;
6461+ struct au_dpage *p;
6462+
6463+ p = dpages->dpages;
6464+ for (i = 0; i < dpages->ndpage; i++)
6465+ au_dpage_free(p++);
6466+ kfree(dpages->dpages);
6467+}
6468+
6469+static int au_dpages_append(struct au_dcsub_pages *dpages,
6470+ struct dentry *dentry, gfp_t gfp)
6471+{
6472+ int err, sz;
6473+ struct au_dpage *dpage;
6474+ void *p;
6475+
6476+ dpage = dpages->dpages + dpages->ndpage - 1;
6477+ sz = PAGE_SIZE / sizeof(dentry);
6478+ if (unlikely(dpage->ndentry >= sz)) {
6479+ AuLabel(new dpage);
6480+ err = -ENOMEM;
6481+ sz = dpages->ndpage * sizeof(*dpages->dpages);
6482+ p = au_kzrealloc(dpages->dpages, sz,
6483+ sz + sizeof(*dpages->dpages), gfp);
6484+ if (unlikely(!p))
6485+ goto out;
6486+
6487+ dpages->dpages = p;
6488+ dpage = dpages->dpages + dpages->ndpage;
6489+ p = (void *)__get_free_page(gfp);
6490+ if (unlikely(!p))
6491+ goto out;
6492+
6493+ dpage->ndentry = 0;
6494+ dpage->dentries = p;
6495+ dpages->ndpage++;
6496+ }
6497+
c1595e42 6498+ AuDebugOn(au_dcount(dentry) <= 0);
027c5e7a 6499+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
1facf9fc 6500+ return 0; /* success */
6501+
4f0767ce 6502+out:
1facf9fc 6503+ return err;
6504+}
6505+
c1595e42
JR
6506+/* todo: BAD approach */
6507+/* copied from linux/fs/dcache.c */
6508+enum d_walk_ret {
6509+ D_WALK_CONTINUE,
6510+ D_WALK_QUIT,
6511+ D_WALK_NORETRY,
6512+ D_WALK_SKIP,
6513+};
6514+
6515+extern void d_walk(struct dentry *parent, void *data,
6516+ enum d_walk_ret (*enter)(void *, struct dentry *),
6517+ void (*finish)(void *));
6518+
6519+struct ac_dpages_arg {
1facf9fc 6520+ int err;
c1595e42
JR
6521+ struct au_dcsub_pages *dpages;
6522+ struct super_block *sb;
6523+ au_dpages_test test;
6524+ void *arg;
6525+};
1facf9fc 6526+
c1595e42
JR
6527+static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
6528+{
6529+ enum d_walk_ret ret;
6530+ struct ac_dpages_arg *arg = _arg;
1facf9fc 6531+
c1595e42
JR
6532+ ret = D_WALK_CONTINUE;
6533+ if (dentry->d_sb == arg->sb
6534+ && !IS_ROOT(dentry)
6535+ && au_dcount(dentry) > 0
6536+ && au_di(dentry)
6537+ && (!arg->test || arg->test(dentry, arg->arg))) {
6538+ arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
6539+ if (unlikely(arg->err))
6540+ ret = D_WALK_QUIT;
1facf9fc 6541+ }
6542+
c1595e42
JR
6543+ return ret;
6544+}
027c5e7a 6545+
c1595e42
JR
6546+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6547+ au_dpages_test test, void *arg)
6548+{
6549+ struct ac_dpages_arg args = {
6550+ .err = 0,
6551+ .dpages = dpages,
6552+ .sb = root->d_sb,
6553+ .test = test,
6554+ .arg = arg
6555+ };
027c5e7a 6556+
c1595e42
JR
6557+ d_walk(root, &args, au_call_dpages_append, NULL);
6558+
6559+ return args.err;
1facf9fc 6560+}
6561+
6562+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6563+ int do_include, au_dpages_test test, void *arg)
6564+{
6565+ int err;
6566+
6567+ err = 0;
027c5e7a
AM
6568+ write_seqlock(&rename_lock);
6569+ spin_lock(&dentry->d_lock);
6570+ if (do_include
c1595e42 6571+ && au_dcount(dentry) > 0
027c5e7a 6572+ && (!test || test(dentry, arg)))
1facf9fc 6573+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6574+ spin_unlock(&dentry->d_lock);
6575+ if (unlikely(err))
6576+ goto out;
6577+
6578+ /*
523b37e3 6579+ * RCU for vfsmount is unnecessary since this is a traverse in a single
027c5e7a
AM
6580+ * mount
6581+ */
1facf9fc 6582+ while (!IS_ROOT(dentry)) {
027c5e7a
AM
6583+ dentry = dentry->d_parent; /* rename_lock is locked */
6584+ spin_lock(&dentry->d_lock);
c1595e42 6585+ if (au_dcount(dentry) > 0
027c5e7a 6586+ && (!test || test(dentry, arg)))
1facf9fc 6587+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6588+ spin_unlock(&dentry->d_lock);
6589+ if (unlikely(err))
6590+ break;
1facf9fc 6591+ }
6592+
4f0767ce 6593+out:
027c5e7a 6594+ write_sequnlock(&rename_lock);
1facf9fc 6595+ return err;
6596+}
6597+
027c5e7a
AM
6598+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
6599+{
6600+ return au_di(dentry) && dentry->d_sb == arg;
6601+}
6602+
6603+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6604+ struct dentry *dentry, int do_include)
6605+{
6606+ return au_dcsub_pages_rev(dpages, dentry, do_include,
6607+ au_dcsub_dpages_aufs, dentry->d_sb);
6608+}
6609+
4a4d8108 6610+int au_test_subdir(struct dentry *d1, struct dentry *d2)
1facf9fc 6611+{
4a4d8108
AM
6612+ struct path path[2] = {
6613+ {
6614+ .dentry = d1
6615+ },
6616+ {
6617+ .dentry = d2
6618+ }
6619+ };
1facf9fc 6620+
4a4d8108 6621+ return path_is_under(path + 0, path + 1);
1facf9fc 6622+}
7f207e10
AM
6623diff -urN /usr/share/empty/fs/aufs/dcsub.h linux/fs/aufs/dcsub.h
6624--- /usr/share/empty/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 6625+++ linux/fs/aufs/dcsub.h 2015-09-24 10:47:58.251386326 +0200
5527c038 6626@@ -0,0 +1,136 @@
1facf9fc 6627+/*
2000de60 6628+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 6629+ *
6630+ * This program, aufs is free software; you can redistribute it and/or modify
6631+ * it under the terms of the GNU General Public License as published by
6632+ * the Free Software Foundation; either version 2 of the License, or
6633+ * (at your option) any later version.
dece6358
AM
6634+ *
6635+ * This program is distributed in the hope that it will be useful,
6636+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6637+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6638+ * GNU General Public License for more details.
6639+ *
6640+ * You should have received a copy of the GNU General Public License
523b37e3 6641+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6642+ */
6643+
6644+/*
6645+ * sub-routines for dentry cache
6646+ */
6647+
6648+#ifndef __AUFS_DCSUB_H__
6649+#define __AUFS_DCSUB_H__
6650+
6651+#ifdef __KERNEL__
6652+
7f207e10 6653+#include <linux/dcache.h>
027c5e7a 6654+#include <linux/fs.h>
dece6358 6655+
1facf9fc 6656+struct au_dpage {
6657+ int ndentry;
6658+ struct dentry **dentries;
6659+};
6660+
6661+struct au_dcsub_pages {
6662+ int ndpage;
6663+ struct au_dpage *dpages;
6664+};
6665+
6666+/* ---------------------------------------------------------------------- */
6667+
7f207e10 6668+/* dcsub.c */
1facf9fc 6669+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
6670+void au_dpages_free(struct au_dcsub_pages *dpages);
6671+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
6672+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6673+ au_dpages_test test, void *arg);
6674+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6675+ int do_include, au_dpages_test test, void *arg);
027c5e7a
AM
6676+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6677+ struct dentry *dentry, int do_include);
4a4d8108 6678+int au_test_subdir(struct dentry *d1, struct dentry *d2);
1facf9fc 6679+
7f207e10
AM
6680+/* ---------------------------------------------------------------------- */
6681+
523b37e3
AM
6682+/*
6683+ * todo: in linux-3.13, several similar (but faster) helpers are added to
6684+ * include/linux/dcache.h. Try them (in the future).
6685+ */
6686+
027c5e7a
AM
6687+static inline int au_d_hashed_positive(struct dentry *d)
6688+{
6689+ int err;
5527c038 6690+ struct inode *inode = d_inode(d);
076b876e 6691+
027c5e7a 6692+ err = 0;
5527c038
JR
6693+ if (unlikely(d_unhashed(d)
6694+ || d_is_negative(d)
6695+ || !inode->i_nlink))
027c5e7a
AM
6696+ err = -ENOENT;
6697+ return err;
6698+}
6699+
38d290e6
JR
6700+static inline int au_d_linkable(struct dentry *d)
6701+{
6702+ int err;
5527c038 6703+ struct inode *inode = d_inode(d);
076b876e 6704+
38d290e6
JR
6705+ err = au_d_hashed_positive(d);
6706+ if (err
5527c038 6707+ && d_is_positive(d)
38d290e6
JR
6708+ && (inode->i_state & I_LINKABLE))
6709+ err = 0;
6710+ return err;
6711+}
6712+
027c5e7a
AM
6713+static inline int au_d_alive(struct dentry *d)
6714+{
6715+ int err;
6716+ struct inode *inode;
076b876e 6717+
027c5e7a
AM
6718+ err = 0;
6719+ if (!IS_ROOT(d))
6720+ err = au_d_hashed_positive(d);
6721+ else {
5527c038
JR
6722+ inode = d_inode(d);
6723+ if (unlikely(d_unlinked(d)
6724+ || d_is_negative(d)
6725+ || !inode->i_nlink))
027c5e7a
AM
6726+ err = -ENOENT;
6727+ }
6728+ return err;
6729+}
6730+
6731+static inline int au_alive_dir(struct dentry *d)
7f207e10 6732+{
027c5e7a 6733+ int err;
076b876e 6734+
027c5e7a 6735+ err = au_d_alive(d);
5527c038 6736+ if (unlikely(err || IS_DEADDIR(d_inode(d))))
027c5e7a
AM
6737+ err = -ENOENT;
6738+ return err;
7f207e10
AM
6739+}
6740+
38d290e6
JR
6741+static inline int au_qstreq(struct qstr *a, struct qstr *b)
6742+{
6743+ return a->len == b->len
6744+ && !memcmp(a->name, b->name, a->len);
6745+}
6746+
7e9cd9fe
AM
6747+/*
6748+ * by the commit
6749+ * 360f547 2015-01-25 dcache: let the dentry count go down to zero without
6750+ * taking d_lock
6751+ * the type of d_lockref.count became int, but the inlined function d_count()
6752+ * still returns unsigned int.
6753+ * I don't know why. Maybe it is for every d_count() users?
6754+ * Anyway au_dcount() lives on.
6755+ */
c1595e42
JR
6756+static inline int au_dcount(struct dentry *d)
6757+{
6758+ return (int)d_count(d);
6759+}
6760+
1facf9fc 6761+#endif /* __KERNEL__ */
6762+#endif /* __AUFS_DCSUB_H__ */
7f207e10
AM
6763diff -urN /usr/share/empty/fs/aufs/debug.c linux/fs/aufs/debug.c
6764--- /usr/share/empty/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 6765+++ linux/fs/aufs/debug.c 2015-09-24 10:47:58.251386326 +0200
5527c038 6766@@ -0,0 +1,440 @@
1facf9fc 6767+/*
2000de60 6768+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 6769+ *
6770+ * This program, aufs is free software; you can redistribute it and/or modify
6771+ * it under the terms of the GNU General Public License as published by
6772+ * the Free Software Foundation; either version 2 of the License, or
6773+ * (at your option) any later version.
dece6358
AM
6774+ *
6775+ * This program is distributed in the hope that it will be useful,
6776+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6777+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6778+ * GNU General Public License for more details.
6779+ *
6780+ * You should have received a copy of the GNU General Public License
523b37e3 6781+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6782+ */
6783+
6784+/*
6785+ * debug print functions
6786+ */
6787+
6788+#include "aufs.h"
6789+
392086de
AM
6790+/* Returns 0, or -errno. arg is in kp->arg. */
6791+static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
6792+{
6793+ int err, n;
6794+
6795+ err = kstrtoint(val, 0, &n);
6796+ if (!err) {
6797+ if (n > 0)
6798+ au_debug_on();
6799+ else
6800+ au_debug_off();
6801+ }
6802+ return err;
6803+}
6804+
6805+/* Returns length written or -errno. Buffer is 4k (ie. be short!) */
6806+static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
6807+{
6808+ atomic_t *a;
6809+
6810+ a = kp->arg;
6811+ return sprintf(buffer, "%d", atomic_read(a));
6812+}
6813+
6814+static struct kernel_param_ops param_ops_atomic_t = {
6815+ .set = param_atomic_t_set,
6816+ .get = param_atomic_t_get
6817+ /* void (*free)(void *arg) */
6818+};
6819+
6820+atomic_t aufs_debug = ATOMIC_INIT(0);
1facf9fc 6821+MODULE_PARM_DESC(debug, "debug print");
392086de 6822+module_param_named(debug, aufs_debug, atomic_t, S_IRUGO | S_IWUSR | S_IWGRP);
1facf9fc 6823+
c1595e42 6824+DEFINE_MUTEX(au_dbg_mtx); /* just to serialize the dbg msgs */
1facf9fc 6825+char *au_plevel = KERN_DEBUG;
e49829fe
JR
6826+#define dpri(fmt, ...) do { \
6827+ if ((au_plevel \
6828+ && strcmp(au_plevel, KERN_DEBUG)) \
6829+ || au_debug_test()) \
6830+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
1facf9fc 6831+} while (0)
6832+
6833+/* ---------------------------------------------------------------------- */
6834+
6835+void au_dpri_whlist(struct au_nhash *whlist)
6836+{
6837+ unsigned long ul, n;
6838+ struct hlist_head *head;
c06a8ce3 6839+ struct au_vdir_wh *pos;
1facf9fc 6840+
6841+ n = whlist->nh_num;
6842+ head = whlist->nh_head;
6843+ for (ul = 0; ul < n; ul++) {
c06a8ce3 6844+ hlist_for_each_entry(pos, head, wh_hash)
1facf9fc 6845+ dpri("b%d, %.*s, %d\n",
c06a8ce3
AM
6846+ pos->wh_bindex,
6847+ pos->wh_str.len, pos->wh_str.name,
6848+ pos->wh_str.len);
1facf9fc 6849+ head++;
6850+ }
6851+}
6852+
6853+void au_dpri_vdir(struct au_vdir *vdir)
6854+{
6855+ unsigned long ul;
6856+ union au_vdir_deblk_p p;
6857+ unsigned char *o;
6858+
6859+ if (!vdir || IS_ERR(vdir)) {
6860+ dpri("err %ld\n", PTR_ERR(vdir));
6861+ return;
6862+ }
6863+
6864+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
6865+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
6866+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
6867+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
6868+ p.deblk = vdir->vd_deblk[ul];
6869+ o = p.deblk;
6870+ dpri("[%lu]: %p\n", ul, o);
6871+ }
6872+}
6873+
53392da6 6874+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
1facf9fc 6875+ struct dentry *wh)
6876+{
6877+ char *n = NULL;
6878+ int l = 0;
6879+
6880+ if (!inode || IS_ERR(inode)) {
6881+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
6882+ return -1;
6883+ }
6884+
c2b27bf2 6885+ /* the type of i_blocks depends upon CONFIG_LBDAF */
1facf9fc 6886+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
6887+ && sizeof(inode->i_blocks) != sizeof(u64));
6888+ if (wh) {
6889+ n = (void *)wh->d_name.name;
6890+ l = wh->d_name.len;
6891+ }
6892+
53392da6
AM
6893+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
6894+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
6895+ bindex, inode,
1facf9fc 6896+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
6897+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
6898+ i_size_read(inode), (unsigned long long)inode->i_blocks,
53392da6 6899+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
1facf9fc 6900+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
b752ccd1
AM
6901+ inode->i_state, inode->i_flags, inode->i_version,
6902+ inode->i_generation,
1facf9fc 6903+ l ? ", wh " : "", l, n);
6904+ return 0;
6905+}
6906+
6907+void au_dpri_inode(struct inode *inode)
6908+{
6909+ struct au_iinfo *iinfo;
6910+ aufs_bindex_t bindex;
53392da6 6911+ int err, hn;
1facf9fc 6912+
53392da6 6913+ err = do_pri_inode(-1, inode, -1, NULL);
1facf9fc 6914+ if (err || !au_test_aufs(inode->i_sb))
6915+ return;
6916+
6917+ iinfo = au_ii(inode);
6918+ if (!iinfo)
6919+ return;
6920+ dpri("i-1: bstart %d, bend %d, gen %d\n",
537831f9 6921+ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode, NULL));
1facf9fc 6922+ if (iinfo->ii_bstart < 0)
6923+ return;
53392da6
AM
6924+ hn = 0;
6925+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) {
6926+ hn = !!au_hn(iinfo->ii_hinode + bindex);
6927+ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn,
1facf9fc 6928+ iinfo->ii_hinode[0 + bindex].hi_whdentry);
53392da6 6929+ }
1facf9fc 6930+}
6931+
2cbb1c4b
JR
6932+void au_dpri_dalias(struct inode *inode)
6933+{
6934+ struct dentry *d;
6935+
6936+ spin_lock(&inode->i_lock);
c1595e42 6937+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
2cbb1c4b
JR
6938+ au_dpri_dentry(d);
6939+ spin_unlock(&inode->i_lock);
6940+}
6941+
1facf9fc 6942+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
6943+{
6944+ struct dentry *wh = NULL;
53392da6 6945+ int hn;
076b876e 6946+ struct au_iinfo *iinfo;
1facf9fc 6947+
6948+ if (!dentry || IS_ERR(dentry)) {
6949+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
6950+ return -1;
6951+ }
6952+ /* do not call dget_parent() here */
027c5e7a 6953+ /* note: access d_xxx without d_lock */
523b37e3
AM
6954+ dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
6955+ bindex, dentry, dentry,
1facf9fc 6956+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
c1595e42 6957+ au_dcount(dentry), dentry->d_flags,
523b37e3 6958+ d_unhashed(dentry) ? "un" : "");
53392da6 6959+ hn = -1;
5527c038
JR
6960+ if (bindex >= 0
6961+ && d_is_positive(dentry)
6962+ && au_test_aufs(dentry->d_sb)) {
6963+ iinfo = au_ii(d_inode(dentry));
53392da6
AM
6964+ if (iinfo) {
6965+ hn = !!au_hn(iinfo->ii_hinode + bindex);
1facf9fc 6966+ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
53392da6 6967+ }
1facf9fc 6968+ }
5527c038 6969+ do_pri_inode(bindex, d_inode(dentry), hn, wh);
1facf9fc 6970+ return 0;
6971+}
6972+
6973+void au_dpri_dentry(struct dentry *dentry)
6974+{
6975+ struct au_dinfo *dinfo;
6976+ aufs_bindex_t bindex;
6977+ int err;
4a4d8108 6978+ struct au_hdentry *hdp;
1facf9fc 6979+
6980+ err = do_pri_dentry(-1, dentry);
6981+ if (err || !au_test_aufs(dentry->d_sb))
6982+ return;
6983+
6984+ dinfo = au_di(dentry);
6985+ if (!dinfo)
6986+ return;
38d290e6 6987+ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
1facf9fc 6988+ dinfo->di_bstart, dinfo->di_bend,
38d290e6
JR
6989+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
6990+ dinfo->di_tmpfile);
1facf9fc 6991+ if (dinfo->di_bstart < 0)
6992+ return;
4a4d8108 6993+ hdp = dinfo->di_hdentry;
1facf9fc 6994+ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
4a4d8108 6995+ do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry);
1facf9fc 6996+}
6997+
6998+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
6999+{
7000+ char a[32];
7001+
7002+ if (!file || IS_ERR(file)) {
7003+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
7004+ return -1;
7005+ }
7006+ a[0] = 0;
7007+ if (bindex < 0
b912730e 7008+ && !IS_ERR_OR_NULL(file->f_path.dentry)
2000de60 7009+ && au_test_aufs(file->f_path.dentry->d_sb)
1facf9fc 7010+ && au_fi(file))
e49829fe 7011+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
2cbb1c4b 7012+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
b752ccd1 7013+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
1facf9fc 7014+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
b752ccd1 7015+ file->f_version, file->f_pos, a);
b912730e 7016+ if (!IS_ERR_OR_NULL(file->f_path.dentry))
2000de60 7017+ do_pri_dentry(bindex, file->f_path.dentry);
1facf9fc 7018+ return 0;
7019+}
7020+
7021+void au_dpri_file(struct file *file)
7022+{
7023+ struct au_finfo *finfo;
4a4d8108
AM
7024+ struct au_fidir *fidir;
7025+ struct au_hfile *hfile;
1facf9fc 7026+ aufs_bindex_t bindex;
7027+ int err;
7028+
7029+ err = do_pri_file(-1, file);
2000de60 7030+ if (err
b912730e 7031+ || IS_ERR_OR_NULL(file->f_path.dentry)
2000de60 7032+ || !au_test_aufs(file->f_path.dentry->d_sb))
1facf9fc 7033+ return;
7034+
7035+ finfo = au_fi(file);
7036+ if (!finfo)
7037+ return;
4a4d8108 7038+ if (finfo->fi_btop < 0)
1facf9fc 7039+ return;
4a4d8108
AM
7040+ fidir = finfo->fi_hdir;
7041+ if (!fidir)
7042+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
7043+ else
e49829fe
JR
7044+ for (bindex = finfo->fi_btop;
7045+ bindex >= 0 && bindex <= fidir->fd_bbot;
4a4d8108
AM
7046+ bindex++) {
7047+ hfile = fidir->fd_hfile + bindex;
7048+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
7049+ }
1facf9fc 7050+}
7051+
7052+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
7053+{
7054+ struct vfsmount *mnt;
7055+ struct super_block *sb;
7056+
7057+ if (!br || IS_ERR(br))
7058+ goto out;
86dc4139 7059+ mnt = au_br_mnt(br);
1facf9fc 7060+ if (!mnt || IS_ERR(mnt))
7061+ goto out;
7062+ sb = mnt->mnt_sb;
7063+ if (!sb || IS_ERR(sb))
7064+ goto out;
7065+
1e00d052 7066+ dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, "
b752ccd1 7067+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
1facf9fc 7068+ "xino %d\n",
1e00d052
AM
7069+ bindex, br->br_perm, br->br_id, atomic_read(&br->br_count),
7070+ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
b752ccd1 7071+ sb->s_flags, sb->s_count,
1facf9fc 7072+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
7073+ return 0;
7074+
4f0767ce 7075+out:
1facf9fc 7076+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
7077+ return -1;
7078+}
7079+
7080+void au_dpri_sb(struct super_block *sb)
7081+{
7082+ struct au_sbinfo *sbinfo;
7083+ aufs_bindex_t bindex;
7084+ int err;
7085+ /* to reuduce stack size */
7086+ struct {
7087+ struct vfsmount mnt;
7088+ struct au_branch fake;
7089+ } *a;
7090+
7091+ /* this function can be called from magic sysrq */
7092+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
7093+ if (unlikely(!a)) {
7094+ dpri("no memory\n");
7095+ return;
7096+ }
7097+
7098+ a->mnt.mnt_sb = sb;
7099+ a->fake.br_perm = 0;
86dc4139 7100+ a->fake.br_path.mnt = &a->mnt;
1facf9fc 7101+ a->fake.br_xino.xi_file = NULL;
7102+ atomic_set(&a->fake.br_count, 0);
7103+ smp_mb(); /* atomic_set */
7104+ err = do_pri_br(-1, &a->fake);
7105+ kfree(a);
7106+ dpri("dev 0x%x\n", sb->s_dev);
7107+ if (err || !au_test_aufs(sb))
7108+ return;
7109+
7110+ sbinfo = au_sbi(sb);
7111+ if (!sbinfo)
7112+ return;
7113+ dpri("nw %d, gen %u, kobj %d\n",
7114+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
7115+ atomic_read(&sbinfo->si_kobj.kref.refcount));
7116+ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
7117+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
7118+}
7119+
7120+/* ---------------------------------------------------------------------- */
7121+
027c5e7a
AM
7122+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
7123+{
5527c038 7124+ struct inode *h_inode, *inode = d_inode(dentry);
027c5e7a
AM
7125+ struct dentry *h_dentry;
7126+ aufs_bindex_t bindex, bend, bi;
7127+
7128+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
7129+ return;
7130+
7131+ bend = au_dbend(dentry);
7132+ bi = au_ibend(inode);
7133+ if (bi < bend)
7134+ bend = bi;
7135+ bindex = au_dbstart(dentry);
7136+ bi = au_ibstart(inode);
7137+ if (bi > bindex)
7138+ bindex = bi;
7139+
7140+ for (; bindex <= bend; bindex++) {
7141+ h_dentry = au_h_dptr(dentry, bindex);
7142+ if (!h_dentry)
7143+ continue;
7144+ h_inode = au_h_iptr(inode, bindex);
5527c038 7145+ if (unlikely(h_inode != d_inode(h_dentry))) {
392086de 7146+ au_debug_on();
027c5e7a
AM
7147+ AuDbg("b%d, %s:%d\n", bindex, func, line);
7148+ AuDbgDentry(dentry);
7149+ AuDbgInode(inode);
392086de 7150+ au_debug_off();
027c5e7a
AM
7151+ BUG();
7152+ }
7153+ }
7154+}
7155+
1facf9fc 7156+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
7157+{
7158+ int err, i, j;
7159+ struct au_dcsub_pages dpages;
7160+ struct au_dpage *dpage;
7161+ struct dentry **dentries;
7162+
7163+ err = au_dpages_init(&dpages, GFP_NOFS);
7164+ AuDebugOn(err);
027c5e7a 7165+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
1facf9fc 7166+ AuDebugOn(err);
7167+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
7168+ dpage = dpages.dpages + i;
7169+ dentries = dpage->dentries;
7170+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
027c5e7a 7171+ AuDebugOn(au_digen_test(dentries[j], sigen));
1facf9fc 7172+ }
7173+ au_dpages_free(&dpages);
7174+}
7175+
1facf9fc 7176+void au_dbg_verify_kthread(void)
7177+{
53392da6 7178+ if (au_wkq_test()) {
1facf9fc 7179+ au_dbg_blocked();
1e00d052
AM
7180+ /*
7181+ * It may be recursive, but udba=notify between two aufs mounts,
7182+ * where a single ro branch is shared, is not a problem.
7183+ */
7184+ /* WARN_ON(1); */
1facf9fc 7185+ }
7186+}
7187+
7188+/* ---------------------------------------------------------------------- */
7189+
1facf9fc 7190+int __init au_debug_init(void)
7191+{
7192+ aufs_bindex_t bindex;
7193+ struct au_vdir_destr destr;
7194+
7195+ bindex = -1;
7196+ AuDebugOn(bindex >= 0);
7197+
7198+ destr.len = -1;
7199+ AuDebugOn(destr.len < NAME_MAX);
7200+
7201+#ifdef CONFIG_4KSTACKS
0c3ec466 7202+ pr_warn("CONFIG_4KSTACKS is defined.\n");
1facf9fc 7203+#endif
7204+
1facf9fc 7205+ return 0;
7206+}
7f207e10
AM
7207diff -urN /usr/share/empty/fs/aufs/debug.h linux/fs/aufs/debug.h
7208--- /usr/share/empty/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 7209+++ linux/fs/aufs/debug.h 2015-09-24 10:47:58.251386326 +0200
5527c038 7210@@ -0,0 +1,225 @@
1facf9fc 7211+/*
2000de60 7212+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 7213+ *
7214+ * This program, aufs is free software; you can redistribute it and/or modify
7215+ * it under the terms of the GNU General Public License as published by
7216+ * the Free Software Foundation; either version 2 of the License, or
7217+ * (at your option) any later version.
dece6358
AM
7218+ *
7219+ * This program is distributed in the hope that it will be useful,
7220+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7221+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7222+ * GNU General Public License for more details.
7223+ *
7224+ * You should have received a copy of the GNU General Public License
523b37e3 7225+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7226+ */
7227+
7228+/*
7229+ * debug print functions
7230+ */
7231+
7232+#ifndef __AUFS_DEBUG_H__
7233+#define __AUFS_DEBUG_H__
7234+
7235+#ifdef __KERNEL__
7236+
392086de 7237+#include <linux/atomic.h>
4a4d8108
AM
7238+#include <linux/module.h>
7239+#include <linux/kallsyms.h>
1facf9fc 7240+#include <linux/sysrq.h>
4a4d8108 7241+
1facf9fc 7242+#ifdef CONFIG_AUFS_DEBUG
7243+#define AuDebugOn(a) BUG_ON(a)
7244+
7245+/* module parameter */
392086de
AM
7246+extern atomic_t aufs_debug;
7247+static inline void au_debug_on(void)
1facf9fc 7248+{
392086de
AM
7249+ atomic_inc(&aufs_debug);
7250+}
7251+static inline void au_debug_off(void)
7252+{
7253+ atomic_dec_if_positive(&aufs_debug);
1facf9fc 7254+}
7255+
7256+static inline int au_debug_test(void)
7257+{
392086de 7258+ return atomic_read(&aufs_debug) > 0;
1facf9fc 7259+}
7260+#else
7261+#define AuDebugOn(a) do {} while (0)
392086de
AM
7262+AuStubVoid(au_debug_on, void)
7263+AuStubVoid(au_debug_off, void)
4a4d8108 7264+AuStubInt0(au_debug_test, void)
1facf9fc 7265+#endif /* CONFIG_AUFS_DEBUG */
7266+
392086de
AM
7267+#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
7268+
1facf9fc 7269+/* ---------------------------------------------------------------------- */
7270+
7271+/* debug print */
7272+
4a4d8108 7273+#define AuDbg(fmt, ...) do { \
1facf9fc 7274+ if (au_debug_test()) \
4a4d8108 7275+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
1facf9fc 7276+} while (0)
4a4d8108
AM
7277+#define AuLabel(l) AuDbg(#l "\n")
7278+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
7279+#define AuWarn1(fmt, ...) do { \
1facf9fc 7280+ static unsigned char _c; \
7281+ if (!_c++) \
0c3ec466 7282+ pr_warn(fmt, ##__VA_ARGS__); \
1facf9fc 7283+} while (0)
7284+
4a4d8108 7285+#define AuErr1(fmt, ...) do { \
1facf9fc 7286+ static unsigned char _c; \
7287+ if (!_c++) \
4a4d8108 7288+ pr_err(fmt, ##__VA_ARGS__); \
1facf9fc 7289+} while (0)
7290+
4a4d8108 7291+#define AuIOErr1(fmt, ...) do { \
1facf9fc 7292+ static unsigned char _c; \
7293+ if (!_c++) \
4a4d8108 7294+ AuIOErr(fmt, ##__VA_ARGS__); \
1facf9fc 7295+} while (0)
7296+
7297+#define AuUnsupportMsg "This operation is not supported." \
7298+ " Please report this application to aufs-users ML."
4a4d8108
AM
7299+#define AuUnsupport(fmt, ...) do { \
7300+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
1facf9fc 7301+ dump_stack(); \
7302+} while (0)
7303+
7304+#define AuTraceErr(e) do { \
7305+ if (unlikely((e) < 0)) \
7306+ AuDbg("err %d\n", (int)(e)); \
7307+} while (0)
7308+
7309+#define AuTraceErrPtr(p) do { \
7310+ if (IS_ERR(p)) \
7311+ AuDbg("err %ld\n", PTR_ERR(p)); \
7312+} while (0)
7313+
7314+/* dirty macros for debug print, use with "%.*s" and caution */
7315+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
1facf9fc 7316+
7317+/* ---------------------------------------------------------------------- */
7318+
dece6358 7319+struct dentry;
1facf9fc 7320+#ifdef CONFIG_AUFS_DEBUG
c1595e42 7321+extern struct mutex au_dbg_mtx;
1facf9fc 7322+extern char *au_plevel;
7323+struct au_nhash;
7324+void au_dpri_whlist(struct au_nhash *whlist);
7325+struct au_vdir;
7326+void au_dpri_vdir(struct au_vdir *vdir);
dece6358 7327+struct inode;
1facf9fc 7328+void au_dpri_inode(struct inode *inode);
2cbb1c4b 7329+void au_dpri_dalias(struct inode *inode);
1facf9fc 7330+void au_dpri_dentry(struct dentry *dentry);
dece6358 7331+struct file;
1facf9fc 7332+void au_dpri_file(struct file *filp);
dece6358 7333+struct super_block;
1facf9fc 7334+void au_dpri_sb(struct super_block *sb);
7335+
027c5e7a
AM
7336+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
7337+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
1facf9fc 7338+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
1facf9fc 7339+void au_dbg_verify_kthread(void);
7340+
7341+int __init au_debug_init(void);
7e9cd9fe 7342+
1facf9fc 7343+#define AuDbgWhlist(w) do { \
c1595e42 7344+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7345+ AuDbg(#w "\n"); \
7346+ au_dpri_whlist(w); \
c1595e42 7347+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7348+} while (0)
7349+
7350+#define AuDbgVdir(v) do { \
c1595e42 7351+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7352+ AuDbg(#v "\n"); \
7353+ au_dpri_vdir(v); \
c1595e42 7354+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7355+} while (0)
7356+
7357+#define AuDbgInode(i) do { \
c1595e42 7358+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7359+ AuDbg(#i "\n"); \
7360+ au_dpri_inode(i); \
c1595e42 7361+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7362+} while (0)
7363+
2cbb1c4b 7364+#define AuDbgDAlias(i) do { \
c1595e42 7365+ mutex_lock(&au_dbg_mtx); \
2cbb1c4b
JR
7366+ AuDbg(#i "\n"); \
7367+ au_dpri_dalias(i); \
c1595e42 7368+ mutex_unlock(&au_dbg_mtx); \
2cbb1c4b
JR
7369+} while (0)
7370+
1facf9fc 7371+#define AuDbgDentry(d) do { \
c1595e42 7372+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7373+ AuDbg(#d "\n"); \
7374+ au_dpri_dentry(d); \
c1595e42 7375+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7376+} while (0)
7377+
7378+#define AuDbgFile(f) do { \
c1595e42 7379+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7380+ AuDbg(#f "\n"); \
7381+ au_dpri_file(f); \
c1595e42 7382+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7383+} while (0)
7384+
7385+#define AuDbgSb(sb) do { \
c1595e42 7386+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7387+ AuDbg(#sb "\n"); \
7388+ au_dpri_sb(sb); \
c1595e42 7389+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7390+} while (0)
7391+
4a4d8108
AM
7392+#define AuDbgSym(addr) do { \
7393+ char sym[KSYM_SYMBOL_LEN]; \
7394+ sprint_symbol(sym, (unsigned long)addr); \
7395+ AuDbg("%s\n", sym); \
7396+} while (0)
1facf9fc 7397+#else
027c5e7a 7398+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
4a4d8108
AM
7399+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
7400+AuStubVoid(au_dbg_verify_kthread, void)
7401+AuStubInt0(__init au_debug_init, void)
1facf9fc 7402+
1facf9fc 7403+#define AuDbgWhlist(w) do {} while (0)
7404+#define AuDbgVdir(v) do {} while (0)
7405+#define AuDbgInode(i) do {} while (0)
2cbb1c4b 7406+#define AuDbgDAlias(i) do {} while (0)
1facf9fc 7407+#define AuDbgDentry(d) do {} while (0)
7408+#define AuDbgFile(f) do {} while (0)
7409+#define AuDbgSb(sb) do {} while (0)
4a4d8108 7410+#define AuDbgSym(addr) do {} while (0)
1facf9fc 7411+#endif /* CONFIG_AUFS_DEBUG */
7412+
7413+/* ---------------------------------------------------------------------- */
7414+
7415+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
7416+int __init au_sysrq_init(void);
7417+void au_sysrq_fin(void);
7418+
7419+#ifdef CONFIG_HW_CONSOLE
7420+#define au_dbg_blocked() do { \
7421+ WARN_ON(1); \
0c5527e5 7422+ handle_sysrq('w'); \
1facf9fc 7423+} while (0)
7424+#else
4a4d8108 7425+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7426+#endif
7427+
7428+#else
4a4d8108
AM
7429+AuStubInt0(__init au_sysrq_init, void)
7430+AuStubVoid(au_sysrq_fin, void)
7431+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7432+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
7433+
7434+#endif /* __KERNEL__ */
7435+#endif /* __AUFS_DEBUG_H__ */
7f207e10
AM
7436diff -urN /usr/share/empty/fs/aufs/dentry.c linux/fs/aufs/dentry.c
7437--- /usr/share/empty/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
79b8bda9
AM
7438+++ linux/fs/aufs/dentry.c 2015-11-11 17:21:46.918863802 +0100
7439@@ -0,0 +1,1136 @@
1facf9fc 7440+/*
2000de60 7441+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 7442+ *
7443+ * This program, aufs is free software; you can redistribute it and/or modify
7444+ * it under the terms of the GNU General Public License as published by
7445+ * the Free Software Foundation; either version 2 of the License, or
7446+ * (at your option) any later version.
dece6358
AM
7447+ *
7448+ * This program is distributed in the hope that it will be useful,
7449+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7450+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7451+ * GNU General Public License for more details.
7452+ *
7453+ * You should have received a copy of the GNU General Public License
523b37e3 7454+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7455+ */
7456+
7457+/*
7458+ * lookup and dentry operations
7459+ */
7460+
dece6358 7461+#include <linux/namei.h>
1facf9fc 7462+#include "aufs.h"
7463+
1facf9fc 7464+#define AuLkup_ALLOW_NEG 1
076b876e 7465+#define AuLkup_IGNORE_PERM (1 << 1)
1facf9fc 7466+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
7f207e10
AM
7467+#define au_fset_lkup(flags, name) \
7468+ do { (flags) |= AuLkup_##name; } while (0)
7469+#define au_fclr_lkup(flags, name) \
7470+ do { (flags) &= ~AuLkup_##name; } while (0)
1facf9fc 7471+
7472+struct au_do_lookup_args {
7473+ unsigned int flags;
7474+ mode_t type;
1facf9fc 7475+};
7476+
7477+/*
7478+ * returns positive/negative dentry, NULL or an error.
7479+ * NULL means whiteout-ed or not-found.
7480+ */
7481+static struct dentry*
7482+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
7483+ aufs_bindex_t bindex, struct qstr *wh_name,
7484+ struct au_do_lookup_args *args)
7485+{
7486+ struct dentry *h_dentry;
2000de60 7487+ struct inode *h_inode;
1facf9fc 7488+ struct au_branch *br;
7489+ int wh_found, opq;
7490+ unsigned char wh_able;
7491+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
076b876e
AM
7492+ const unsigned char ignore_perm = !!au_ftest_lkup(args->flags,
7493+ IGNORE_PERM);
1facf9fc 7494+
1facf9fc 7495+ wh_found = 0;
7496+ br = au_sbr(dentry->d_sb, bindex);
7497+ wh_able = !!au_br_whable(br->br_perm);
7498+ if (wh_able)
076b876e 7499+ wh_found = au_wh_test(h_parent, wh_name, /*try_sio*/0);
1facf9fc 7500+ h_dentry = ERR_PTR(wh_found);
7501+ if (!wh_found)
7502+ goto real_lookup;
7503+ if (unlikely(wh_found < 0))
7504+ goto out;
7505+
7506+ /* We found a whiteout */
7507+ /* au_set_dbend(dentry, bindex); */
7508+ au_set_dbwh(dentry, bindex);
7509+ if (!allow_neg)
7510+ return NULL; /* success */
7511+
4f0767ce 7512+real_lookup:
076b876e
AM
7513+ if (!ignore_perm)
7514+ h_dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
7515+ else
7516+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
2000de60
JR
7517+ if (IS_ERR(h_dentry)) {
7518+ if (PTR_ERR(h_dentry) == -ENAMETOOLONG
7519+ && !allow_neg)
7520+ h_dentry = NULL;
1facf9fc 7521+ goto out;
2000de60 7522+ }
1facf9fc 7523+
5527c038
JR
7524+ h_inode = d_inode(h_dentry);
7525+ if (d_is_negative(h_dentry)) {
1facf9fc 7526+ if (!allow_neg)
7527+ goto out_neg;
7528+ } else if (wh_found
7529+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
7530+ goto out_neg;
7531+
7532+ if (au_dbend(dentry) <= bindex)
7533+ au_set_dbend(dentry, bindex);
7534+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
7535+ au_set_dbstart(dentry, bindex);
7536+ au_set_h_dptr(dentry, bindex, h_dentry);
7537+
2000de60
JR
7538+ if (!d_is_dir(h_dentry)
7539+ || !wh_able
5527c038 7540+ || (d_really_is_positive(dentry) && !d_is_dir(dentry)))
1facf9fc 7541+ goto out; /* success */
7542+
7543+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
076b876e 7544+ opq = au_diropq_test(h_dentry);
1facf9fc 7545+ mutex_unlock(&h_inode->i_mutex);
7546+ if (opq > 0)
7547+ au_set_dbdiropq(dentry, bindex);
7548+ else if (unlikely(opq < 0)) {
7549+ au_set_h_dptr(dentry, bindex, NULL);
7550+ h_dentry = ERR_PTR(opq);
7551+ }
7552+ goto out;
7553+
4f0767ce 7554+out_neg:
1facf9fc 7555+ dput(h_dentry);
7556+ h_dentry = NULL;
4f0767ce 7557+out:
1facf9fc 7558+ return h_dentry;
7559+}
7560+
dece6358
AM
7561+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
7562+{
7563+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
7564+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
7565+ return -EPERM;
7566+ return 0;
7567+}
7568+
1facf9fc 7569+/*
7570+ * returns the number of lower positive dentries,
7571+ * otherwise an error.
7572+ * can be called at unlinking with @type is zero.
7573+ */
537831f9 7574+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type)
1facf9fc 7575+{
7576+ int npositive, err;
7577+ aufs_bindex_t bindex, btail, bdiropq;
076b876e 7578+ unsigned char isdir, dirperm1;
1facf9fc 7579+ struct qstr whname;
7580+ struct au_do_lookup_args args = {
b4510431 7581+ .flags = 0,
537831f9 7582+ .type = type
1facf9fc 7583+ };
7584+ const struct qstr *name = &dentry->d_name;
7585+ struct dentry *parent;
076b876e 7586+ struct super_block *sb;
1facf9fc 7587+
076b876e
AM
7588+ sb = dentry->d_sb;
7589+ err = au_test_shwh(sb, name);
dece6358 7590+ if (unlikely(err))
1facf9fc 7591+ goto out;
7592+
7593+ err = au_wh_name_alloc(&whname, name);
7594+ if (unlikely(err))
7595+ goto out;
7596+
2000de60 7597+ isdir = !!d_is_dir(dentry);
1facf9fc 7598+ if (!type)
7599+ au_fset_lkup(args.flags, ALLOW_NEG);
076b876e 7600+ dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1);
1facf9fc 7601+
7602+ npositive = 0;
4a4d8108 7603+ parent = dget_parent(dentry);
1facf9fc 7604+ btail = au_dbtaildir(parent);
7605+ for (bindex = bstart; bindex <= btail; bindex++) {
7606+ struct dentry *h_parent, *h_dentry;
7607+ struct inode *h_inode, *h_dir;
7608+
7609+ h_dentry = au_h_dptr(dentry, bindex);
7610+ if (h_dentry) {
5527c038 7611+ if (d_is_positive(h_dentry))
1facf9fc 7612+ npositive++;
7613+ if (type != S_IFDIR)
7614+ break;
7615+ continue;
7616+ }
7617+ h_parent = au_h_dptr(parent, bindex);
2000de60 7618+ if (!h_parent || !d_is_dir(h_parent))
1facf9fc 7619+ continue;
7620+
5527c038 7621+ h_dir = d_inode(h_parent);
1facf9fc 7622+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
7623+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
7624+ &args);
7625+ mutex_unlock(&h_dir->i_mutex);
7626+ err = PTR_ERR(h_dentry);
7627+ if (IS_ERR(h_dentry))
4a4d8108 7628+ goto out_parent;
2000de60
JR
7629+ if (h_dentry)
7630+ au_fclr_lkup(args.flags, ALLOW_NEG);
076b876e
AM
7631+ if (dirperm1)
7632+ au_fset_lkup(args.flags, IGNORE_PERM);
1facf9fc 7633+
79b8bda9 7634+ if (au_dbwh(dentry) == bindex)
1facf9fc 7635+ break;
7636+ if (!h_dentry)
7637+ continue;
5527c038 7638+ if (d_is_negative(h_dentry))
1facf9fc 7639+ continue;
5527c038 7640+ h_inode = d_inode(h_dentry);
1facf9fc 7641+ npositive++;
7642+ if (!args.type)
7643+ args.type = h_inode->i_mode & S_IFMT;
7644+ if (args.type != S_IFDIR)
7645+ break;
7646+ else if (isdir) {
7647+ /* the type of lower may be different */
7648+ bdiropq = au_dbdiropq(dentry);
7649+ if (bdiropq >= 0 && bdiropq <= bindex)
7650+ break;
7651+ }
7652+ }
7653+
7654+ if (npositive) {
7655+ AuLabel(positive);
7656+ au_update_dbstart(dentry);
7657+ }
7658+ err = npositive;
076b876e 7659+ if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
027c5e7a 7660+ && au_dbstart(dentry) < 0)) {
1facf9fc 7661+ err = -EIO;
523b37e3
AM
7662+ AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
7663+ dentry, err);
027c5e7a 7664+ }
1facf9fc 7665+
4f0767ce 7666+out_parent:
4a4d8108 7667+ dput(parent);
1facf9fc 7668+ kfree(whname.name);
4f0767ce 7669+out:
1facf9fc 7670+ return err;
7671+}
7672+
076b876e 7673+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent)
1facf9fc 7674+{
7675+ struct dentry *dentry;
7676+ int wkq_err;
7677+
5527c038 7678+ if (!au_test_h_perm_sio(d_inode(parent), MAY_EXEC))
b4510431 7679+ dentry = vfsub_lkup_one(name, parent);
1facf9fc 7680+ else {
b4510431
AM
7681+ struct vfsub_lkup_one_args args = {
7682+ .errp = &dentry,
7683+ .name = name,
7684+ .parent = parent
1facf9fc 7685+ };
7686+
b4510431 7687+ wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args);
1facf9fc 7688+ if (unlikely(wkq_err))
7689+ dentry = ERR_PTR(wkq_err);
7690+ }
7691+
7692+ return dentry;
7693+}
7694+
7695+/*
7696+ * lookup @dentry on @bindex which should be negative.
7697+ */
86dc4139 7698+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
1facf9fc 7699+{
7700+ int err;
7701+ struct dentry *parent, *h_parent, *h_dentry;
86dc4139 7702+ struct au_branch *br;
1facf9fc 7703+
1facf9fc 7704+ parent = dget_parent(dentry);
7705+ h_parent = au_h_dptr(parent, bindex);
86dc4139
AM
7706+ br = au_sbr(dentry->d_sb, bindex);
7707+ if (wh)
7708+ h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
7709+ else
076b876e 7710+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
1facf9fc 7711+ err = PTR_ERR(h_dentry);
7712+ if (IS_ERR(h_dentry))
7713+ goto out;
5527c038 7714+ if (unlikely(d_is_positive(h_dentry))) {
1facf9fc 7715+ err = -EIO;
523b37e3 7716+ AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex);
1facf9fc 7717+ dput(h_dentry);
7718+ goto out;
7719+ }
7720+
4a4d8108 7721+ err = 0;
1facf9fc 7722+ if (bindex < au_dbstart(dentry))
7723+ au_set_dbstart(dentry, bindex);
7724+ if (au_dbend(dentry) < bindex)
7725+ au_set_dbend(dentry, bindex);
7726+ au_set_h_dptr(dentry, bindex, h_dentry);
1facf9fc 7727+
4f0767ce 7728+out:
1facf9fc 7729+ dput(parent);
7730+ return err;
7731+}
7732+
7733+/* ---------------------------------------------------------------------- */
7734+
7735+/* subset of struct inode */
7736+struct au_iattr {
7737+ unsigned long i_ino;
7738+ /* unsigned int i_nlink; */
0c3ec466
AM
7739+ kuid_t i_uid;
7740+ kgid_t i_gid;
1facf9fc 7741+ u64 i_version;
7742+/*
7743+ loff_t i_size;
7744+ blkcnt_t i_blocks;
7745+*/
7746+ umode_t i_mode;
7747+};
7748+
7749+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
7750+{
7751+ ia->i_ino = h_inode->i_ino;
7752+ /* ia->i_nlink = h_inode->i_nlink; */
7753+ ia->i_uid = h_inode->i_uid;
7754+ ia->i_gid = h_inode->i_gid;
7755+ ia->i_version = h_inode->i_version;
7756+/*
7757+ ia->i_size = h_inode->i_size;
7758+ ia->i_blocks = h_inode->i_blocks;
7759+*/
7760+ ia->i_mode = (h_inode->i_mode & S_IFMT);
7761+}
7762+
7763+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
7764+{
7765+ return ia->i_ino != h_inode->i_ino
7766+ /* || ia->i_nlink != h_inode->i_nlink */
0c3ec466 7767+ || !uid_eq(ia->i_uid, h_inode->i_uid)
2dfbb274 7768+ || !gid_eq(ia->i_gid, h_inode->i_gid)
1facf9fc 7769+ || ia->i_version != h_inode->i_version
7770+/*
7771+ || ia->i_size != h_inode->i_size
7772+ || ia->i_blocks != h_inode->i_blocks
7773+*/
7774+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
7775+}
7776+
7777+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
7778+ struct au_branch *br)
7779+{
7780+ int err;
7781+ struct au_iattr ia;
7782+ struct inode *h_inode;
7783+ struct dentry *h_d;
7784+ struct super_block *h_sb;
7785+
7786+ err = 0;
7787+ memset(&ia, -1, sizeof(ia));
7788+ h_sb = h_dentry->d_sb;
5527c038
JR
7789+ h_inode = NULL;
7790+ if (d_is_positive(h_dentry)) {
7791+ h_inode = d_inode(h_dentry);
1facf9fc 7792+ au_iattr_save(&ia, h_inode);
5527c038 7793+ } else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
1facf9fc 7794+ /* nfs d_revalidate may return 0 for negative dentry */
7795+ /* fuse d_revalidate always return 0 for negative dentry */
7796+ goto out;
7797+
7798+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
b4510431 7799+ h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent);
1facf9fc 7800+ err = PTR_ERR(h_d);
7801+ if (IS_ERR(h_d))
7802+ goto out;
7803+
7804+ err = 0;
7805+ if (unlikely(h_d != h_dentry
5527c038 7806+ || d_inode(h_d) != h_inode
1facf9fc 7807+ || (h_inode && au_iattr_test(&ia, h_inode))))
7808+ err = au_busy_or_stale();
7809+ dput(h_d);
7810+
4f0767ce 7811+out:
1facf9fc 7812+ AuTraceErr(err);
7813+ return err;
7814+}
7815+
7816+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
7817+ struct dentry *h_parent, struct au_branch *br)
7818+{
7819+ int err;
7820+
7821+ err = 0;
027c5e7a
AM
7822+ if (udba == AuOpt_UDBA_REVAL
7823+ && !au_test_fs_remote(h_dentry->d_sb)) {
1facf9fc 7824+ IMustLock(h_dir);
5527c038 7825+ err = (d_inode(h_dentry->d_parent) != h_dir);
027c5e7a 7826+ } else if (udba != AuOpt_UDBA_NONE)
1facf9fc 7827+ err = au_h_verify_dentry(h_dentry, h_parent, br);
7828+
7829+ return err;
7830+}
7831+
7832+/* ---------------------------------------------------------------------- */
7833+
027c5e7a 7834+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
1facf9fc 7835+{
027c5e7a 7836+ int err;
1facf9fc 7837+ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
027c5e7a
AM
7838+ struct au_hdentry tmp, *p, *q;
7839+ struct au_dinfo *dinfo;
7840+ struct super_block *sb;
1facf9fc 7841+
027c5e7a 7842+ DiMustWriteLock(dentry);
1308ab2a 7843+
027c5e7a
AM
7844+ sb = dentry->d_sb;
7845+ dinfo = au_di(dentry);
1facf9fc 7846+ bend = dinfo->di_bend;
7847+ bwh = dinfo->di_bwh;
7848+ bdiropq = dinfo->di_bdiropq;
027c5e7a 7849+ p = dinfo->di_hdentry + dinfo->di_bstart;
1facf9fc 7850+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
027c5e7a 7851+ if (!p->hd_dentry)
1facf9fc 7852+ continue;
7853+
027c5e7a
AM
7854+ new_bindex = au_br_index(sb, p->hd_id);
7855+ if (new_bindex == bindex)
1facf9fc 7856+ continue;
1facf9fc 7857+
1facf9fc 7858+ if (dinfo->di_bwh == bindex)
7859+ bwh = new_bindex;
7860+ if (dinfo->di_bdiropq == bindex)
7861+ bdiropq = new_bindex;
7862+ if (new_bindex < 0) {
7863+ au_hdput(p);
7864+ p->hd_dentry = NULL;
7865+ continue;
7866+ }
7867+
7868+ /* swap two lower dentries, and loop again */
7869+ q = dinfo->di_hdentry + new_bindex;
7870+ tmp = *q;
7871+ *q = *p;
7872+ *p = tmp;
7873+ if (tmp.hd_dentry) {
7874+ bindex--;
7875+ p--;
7876+ }
7877+ }
7878+
1facf9fc 7879+ dinfo->di_bwh = -1;
7880+ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
7881+ dinfo->di_bwh = bwh;
7882+
7883+ dinfo->di_bdiropq = -1;
7884+ if (bdiropq >= 0
7885+ && bdiropq <= au_sbend(sb)
7886+ && au_sbr_whable(sb, bdiropq))
7887+ dinfo->di_bdiropq = bdiropq;
7888+
027c5e7a
AM
7889+ err = -EIO;
7890+ dinfo->di_bstart = -1;
7891+ dinfo->di_bend = -1;
1facf9fc 7892+ bend = au_dbend(parent);
7893+ p = dinfo->di_hdentry;
7894+ for (bindex = 0; bindex <= bend; bindex++, p++)
7895+ if (p->hd_dentry) {
7896+ dinfo->di_bstart = bindex;
7897+ break;
7898+ }
7899+
027c5e7a
AM
7900+ if (dinfo->di_bstart >= 0) {
7901+ p = dinfo->di_hdentry + bend;
7902+ for (bindex = bend; bindex >= 0; bindex--, p--)
7903+ if (p->hd_dentry) {
7904+ dinfo->di_bend = bindex;
7905+ err = 0;
7906+ break;
7907+ }
7908+ }
7909+
7910+ return err;
1facf9fc 7911+}
7912+
027c5e7a 7913+static void au_do_hide(struct dentry *dentry)
1facf9fc 7914+{
027c5e7a 7915+ struct inode *inode;
1facf9fc 7916+
5527c038
JR
7917+ if (d_really_is_positive(dentry)) {
7918+ inode = d_inode(dentry);
7919+ if (!d_is_dir(dentry)) {
027c5e7a
AM
7920+ if (inode->i_nlink && !d_unhashed(dentry))
7921+ drop_nlink(inode);
7922+ } else {
7923+ clear_nlink(inode);
7924+ /* stop next lookup */
7925+ inode->i_flags |= S_DEAD;
7926+ }
7927+ smp_mb(); /* necessary? */
7928+ }
7929+ d_drop(dentry);
7930+}
1308ab2a 7931+
027c5e7a
AM
7932+static int au_hide_children(struct dentry *parent)
7933+{
7934+ int err, i, j, ndentry;
7935+ struct au_dcsub_pages dpages;
7936+ struct au_dpage *dpage;
7937+ struct dentry *dentry;
1facf9fc 7938+
027c5e7a 7939+ err = au_dpages_init(&dpages, GFP_NOFS);
1facf9fc 7940+ if (unlikely(err))
7941+ goto out;
027c5e7a
AM
7942+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
7943+ if (unlikely(err))
7944+ goto out_dpages;
1facf9fc 7945+
027c5e7a
AM
7946+ /* in reverse order */
7947+ for (i = dpages.ndpage - 1; i >= 0; i--) {
7948+ dpage = dpages.dpages + i;
7949+ ndentry = dpage->ndentry;
7950+ for (j = ndentry - 1; j >= 0; j--) {
7951+ dentry = dpage->dentries[j];
7952+ if (dentry != parent)
7953+ au_do_hide(dentry);
7954+ }
7955+ }
1facf9fc 7956+
027c5e7a
AM
7957+out_dpages:
7958+ au_dpages_free(&dpages);
4f0767ce 7959+out:
027c5e7a 7960+ return err;
1facf9fc 7961+}
7962+
027c5e7a 7963+static void au_hide(struct dentry *dentry)
1facf9fc 7964+{
027c5e7a 7965+ int err;
1facf9fc 7966+
027c5e7a 7967+ AuDbgDentry(dentry);
2000de60 7968+ if (d_is_dir(dentry)) {
027c5e7a
AM
7969+ /* shrink_dcache_parent(dentry); */
7970+ err = au_hide_children(dentry);
7971+ if (unlikely(err))
523b37e3
AM
7972+ AuIOErr("%pd, failed hiding children, ignored %d\n",
7973+ dentry, err);
027c5e7a
AM
7974+ }
7975+ au_do_hide(dentry);
7976+}
1facf9fc 7977+
027c5e7a
AM
7978+/*
7979+ * By adding a dirty branch, a cached dentry may be affected in various ways.
7980+ *
7981+ * a dirty branch is added
7982+ * - on the top of layers
7983+ * - in the middle of layers
7984+ * - to the bottom of layers
7985+ *
7986+ * on the added branch there exists
7987+ * - a whiteout
7988+ * - a diropq
7989+ * - a same named entry
7990+ * + exist
7991+ * * negative --> positive
7992+ * * positive --> positive
7993+ * - type is unchanged
7994+ * - type is changed
7995+ * + doesn't exist
7996+ * * negative --> negative
7997+ * * positive --> negative (rejected by au_br_del() for non-dir case)
7998+ * - none
7999+ */
8000+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
8001+ struct au_dinfo *tmp)
8002+{
8003+ int err;
8004+ aufs_bindex_t bindex, bend;
8005+ struct {
8006+ struct dentry *dentry;
8007+ struct inode *inode;
8008+ mode_t mode;
8009+ } orig_h, tmp_h;
8010+ struct au_hdentry *hd;
8011+ struct inode *inode, *h_inode;
8012+ struct dentry *h_dentry;
8013+
8014+ err = 0;
8015+ AuDebugOn(dinfo->di_bstart < 0);
027c5e7a 8016+ orig_h.mode = 0;
5527c038
JR
8017+ orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
8018+ orig_h.inode = NULL;
8019+ if (d_is_positive(orig_h.dentry)) {
8020+ orig_h.inode = d_inode(orig_h.dentry);
027c5e7a 8021+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
5527c038 8022+ }
027c5e7a
AM
8023+ memset(&tmp_h, 0, sizeof(tmp_h));
8024+ if (tmp->di_bstart >= 0) {
8025+ tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
5527c038
JR
8026+ tmp_h.inode = NULL;
8027+ if (d_is_positive(tmp_h.dentry)) {
8028+ tmp_h.inode = d_inode(tmp_h.dentry);
027c5e7a 8029+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
5527c038 8030+ }
027c5e7a
AM
8031+ }
8032+
5527c038
JR
8033+ inode = NULL;
8034+ if (d_really_is_positive(dentry))
8035+ inode = d_inode(dentry);
027c5e7a
AM
8036+ if (!orig_h.inode) {
8037+ AuDbg("nagative originally\n");
8038+ if (inode) {
8039+ au_hide(dentry);
8040+ goto out;
8041+ }
8042+ AuDebugOn(inode);
8043+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
8044+ AuDebugOn(dinfo->di_bdiropq != -1);
8045+
8046+ if (!tmp_h.inode) {
8047+ AuDbg("negative --> negative\n");
8048+ /* should have only one negative lower */
8049+ if (tmp->di_bstart >= 0
8050+ && tmp->di_bstart < dinfo->di_bstart) {
8051+ AuDebugOn(tmp->di_bstart != tmp->di_bend);
8052+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
8053+ au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
8054+ au_di_cp(dinfo, tmp);
8055+ hd = tmp->di_hdentry + tmp->di_bstart;
8056+ au_set_h_dptr(dentry, tmp->di_bstart,
8057+ dget(hd->hd_dentry));
8058+ }
8059+ au_dbg_verify_dinode(dentry);
8060+ } else {
8061+ AuDbg("negative --> positive\n");
8062+ /*
8063+ * similar to the behaviour of creating with bypassing
8064+ * aufs.
8065+ * unhash it in order to force an error in the
8066+ * succeeding create operation.
8067+ * we should not set S_DEAD here.
8068+ */
8069+ d_drop(dentry);
8070+ /* au_di_swap(tmp, dinfo); */
8071+ au_dbg_verify_dinode(dentry);
8072+ }
8073+ } else {
8074+ AuDbg("positive originally\n");
8075+ /* inode may be NULL */
8076+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
8077+ if (!tmp_h.inode) {
8078+ AuDbg("positive --> negative\n");
8079+ /* or bypassing aufs */
8080+ au_hide(dentry);
8081+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
8082+ dinfo->di_bwh = tmp->di_bwh;
8083+ if (inode)
8084+ err = au_refresh_hinode_self(inode);
8085+ au_dbg_verify_dinode(dentry);
8086+ } else if (orig_h.mode == tmp_h.mode) {
8087+ AuDbg("positive --> positive, same type\n");
8088+ if (!S_ISDIR(orig_h.mode)
8089+ && dinfo->di_bstart > tmp->di_bstart) {
8090+ /*
8091+ * similar to the behaviour of removing and
8092+ * creating.
8093+ */
8094+ au_hide(dentry);
8095+ if (inode)
8096+ err = au_refresh_hinode_self(inode);
8097+ au_dbg_verify_dinode(dentry);
8098+ } else {
8099+ /* fill empty slots */
8100+ if (dinfo->di_bstart > tmp->di_bstart)
8101+ dinfo->di_bstart = tmp->di_bstart;
8102+ if (dinfo->di_bend < tmp->di_bend)
8103+ dinfo->di_bend = tmp->di_bend;
8104+ dinfo->di_bwh = tmp->di_bwh;
8105+ dinfo->di_bdiropq = tmp->di_bdiropq;
8106+ hd = tmp->di_hdentry;
8107+ bend = dinfo->di_bend;
8108+ for (bindex = tmp->di_bstart; bindex <= bend;
8109+ bindex++) {
8110+ if (au_h_dptr(dentry, bindex))
8111+ continue;
8112+ h_dentry = hd[bindex].hd_dentry;
8113+ if (!h_dentry)
8114+ continue;
5527c038
JR
8115+ AuDebugOn(d_is_negative(h_dentry));
8116+ h_inode = d_inode(h_dentry);
027c5e7a
AM
8117+ AuDebugOn(orig_h.mode
8118+ != (h_inode->i_mode
8119+ & S_IFMT));
8120+ au_set_h_dptr(dentry, bindex,
8121+ dget(h_dentry));
8122+ }
8123+ err = au_refresh_hinode(inode, dentry);
8124+ au_dbg_verify_dinode(dentry);
8125+ }
8126+ } else {
8127+ AuDbg("positive --> positive, different type\n");
8128+ /* similar to the behaviour of removing and creating */
8129+ au_hide(dentry);
8130+ if (inode)
8131+ err = au_refresh_hinode_self(inode);
8132+ au_dbg_verify_dinode(dentry);
8133+ }
8134+ }
8135+
8136+out:
8137+ return err;
8138+}
8139+
79b8bda9
AM
8140+void au_refresh_dop(struct dentry *dentry, int force_reval)
8141+{
8142+ const struct dentry_operations *dop
8143+ = force_reval ? &aufs_dop : dentry->d_sb->s_d_op;
8144+ static const unsigned int mask
8145+ = DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE;
8146+
8147+ BUILD_BUG_ON(sizeof(mask) != sizeof(dentry->d_flags));
8148+
8149+ if (dentry->d_op == dop)
8150+ return;
8151+
8152+ AuDbg("%pd\n", dentry);
8153+ spin_lock(&dentry->d_lock);
8154+ if (dop == &aufs_dop)
8155+ dentry->d_flags |= mask;
8156+ else
8157+ dentry->d_flags &= ~mask;
8158+ dentry->d_op = dop;
8159+ spin_unlock(&dentry->d_lock);
8160+}
8161+
027c5e7a
AM
8162+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
8163+{
8164+ int err, ebrange;
8165+ unsigned int sigen;
8166+ struct au_dinfo *dinfo, *tmp;
8167+ struct super_block *sb;
8168+ struct inode *inode;
8169+
8170+ DiMustWriteLock(dentry);
8171+ AuDebugOn(IS_ROOT(dentry));
5527c038 8172+ AuDebugOn(d_really_is_negative(parent));
027c5e7a
AM
8173+
8174+ sb = dentry->d_sb;
027c5e7a
AM
8175+ sigen = au_sigen(sb);
8176+ err = au_digen_test(parent, sigen);
8177+ if (unlikely(err))
8178+ goto out;
8179+
8180+ dinfo = au_di(dentry);
8181+ err = au_di_realloc(dinfo, au_sbend(sb) + 1);
8182+ if (unlikely(err))
8183+ goto out;
8184+ ebrange = au_dbrange_test(dentry);
8185+ if (!ebrange)
8186+ ebrange = au_do_refresh_hdentry(dentry, parent);
8187+
38d290e6 8188+ if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
027c5e7a 8189+ AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
5527c038
JR
8190+ if (d_really_is_positive(dentry)) {
8191+ inode = d_inode(dentry);
027c5e7a 8192+ err = au_refresh_hinode_self(inode);
5527c038 8193+ }
027c5e7a
AM
8194+ au_dbg_verify_dinode(dentry);
8195+ if (!err)
8196+ goto out_dgen; /* success */
8197+ goto out;
8198+ }
8199+
8200+ /* temporary dinfo */
8201+ AuDbgDentry(dentry);
8202+ err = -ENOMEM;
8203+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
8204+ if (unlikely(!tmp))
8205+ goto out;
8206+ au_di_swap(tmp, dinfo);
8207+ /* returns the number of positive dentries */
8208+ /*
8209+ * if current working dir is removed, it returns an error.
8210+ * but the dentry is legal.
8211+ */
537831f9 8212+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0);
027c5e7a
AM
8213+ AuDbgDentry(dentry);
8214+ au_di_swap(tmp, dinfo);
8215+ if (err == -ENOENT)
8216+ err = 0;
8217+ if (err >= 0) {
8218+ /* compare/refresh by dinfo */
8219+ AuDbgDentry(dentry);
8220+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
8221+ au_dbg_verify_dinode(dentry);
8222+ AuTraceErr(err);
8223+ }
8224+ au_rw_write_unlock(&tmp->di_rwsem);
8225+ au_di_free(tmp);
8226+ if (unlikely(err))
8227+ goto out;
8228+
8229+out_dgen:
8230+ au_update_digen(dentry);
8231+out:
8232+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
523b37e3 8233+ AuIOErr("failed refreshing %pd, %d\n", dentry, err);
027c5e7a
AM
8234+ AuDbgDentry(dentry);
8235+ }
8236+ AuTraceErr(err);
8237+ return err;
8238+}
8239+
b4510431
AM
8240+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags,
8241+ struct dentry *dentry, aufs_bindex_t bindex)
027c5e7a
AM
8242+{
8243+ int err, valid;
027c5e7a
AM
8244+
8245+ err = 0;
8246+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
8247+ goto out;
027c5e7a
AM
8248+
8249+ AuDbg("b%d\n", bindex);
b4510431
AM
8250+ /*
8251+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
8252+ * due to whiteout and branch permission.
8253+ */
8254+ flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
8255+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
8256+ /* it may return tri-state */
8257+ valid = h_dentry->d_op->d_revalidate(h_dentry, flags);
1facf9fc 8258+
8259+ if (unlikely(valid < 0))
8260+ err = valid;
8261+ else if (!valid)
8262+ err = -EINVAL;
8263+
4f0767ce 8264+out:
1facf9fc 8265+ AuTraceErr(err);
8266+ return err;
8267+}
8268+
8269+/* todo: remove this */
8270+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
b4510431 8271+ unsigned int flags, int do_udba)
1facf9fc 8272+{
8273+ int err;
8274+ umode_t mode, h_mode;
8275+ aufs_bindex_t bindex, btail, bstart, ibs, ibe;
38d290e6 8276+ unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
4a4d8108 8277+ struct inode *h_inode, *h_cached_inode;
1facf9fc 8278+ struct dentry *h_dentry;
8279+ struct qstr *name, *h_name;
8280+
8281+ err = 0;
8282+ plus = 0;
8283+ mode = 0;
1facf9fc 8284+ ibs = -1;
8285+ ibe = -1;
8286+ unhashed = !!d_unhashed(dentry);
8287+ is_root = !!IS_ROOT(dentry);
8288+ name = &dentry->d_name;
38d290e6 8289+ tmpfile = au_di(dentry)->di_tmpfile;
1facf9fc 8290+
8291+ /*
7f207e10
AM
8292+ * Theoretically, REVAL test should be unnecessary in case of
8293+ * {FS,I}NOTIFY.
8294+ * But {fs,i}notify doesn't fire some necessary events,
1facf9fc 8295+ * IN_ATTRIB for atime/nlink/pageio
1facf9fc 8296+ * Let's do REVAL test too.
8297+ */
8298+ if (do_udba && inode) {
8299+ mode = (inode->i_mode & S_IFMT);
8300+ plus = (inode->i_nlink > 0);
1facf9fc 8301+ ibs = au_ibstart(inode);
8302+ ibe = au_ibend(inode);
8303+ }
8304+
8305+ bstart = au_dbstart(dentry);
8306+ btail = bstart;
8307+ if (inode && S_ISDIR(inode->i_mode))
8308+ btail = au_dbtaildir(dentry);
8309+ for (bindex = bstart; bindex <= btail; bindex++) {
8310+ h_dentry = au_h_dptr(dentry, bindex);
8311+ if (!h_dentry)
8312+ continue;
8313+
523b37e3
AM
8314+ AuDbg("b%d, %pd\n", bindex, h_dentry);
8315+ h_nfs = !!au_test_nfs(h_dentry->d_sb);
027c5e7a 8316+ spin_lock(&h_dentry->d_lock);
1facf9fc 8317+ h_name = &h_dentry->d_name;
8318+ if (unlikely(do_udba
8319+ && !is_root
523b37e3
AM
8320+ && ((!h_nfs
8321+ && (unhashed != !!d_unhashed(h_dentry)
38d290e6
JR
8322+ || (!tmpfile
8323+ && !au_qstreq(name, h_name))
8324+ ))
523b37e3
AM
8325+ || (h_nfs
8326+ && !(flags & LOOKUP_OPEN)
8327+ && (h_dentry->d_flags
8328+ & DCACHE_NFSFS_RENAMED)))
1facf9fc 8329+ )) {
38d290e6
JR
8330+ int h_unhashed;
8331+
8332+ h_unhashed = d_unhashed(h_dentry);
027c5e7a 8333+ spin_unlock(&h_dentry->d_lock);
38d290e6
JR
8334+ AuDbg("unhash 0x%x 0x%x, %pd %pd\n",
8335+ unhashed, h_unhashed, dentry, h_dentry);
1facf9fc 8336+ goto err;
8337+ }
027c5e7a 8338+ spin_unlock(&h_dentry->d_lock);
1facf9fc 8339+
b4510431 8340+ err = au_do_h_d_reval(h_dentry, flags, dentry, bindex);
1facf9fc 8341+ if (unlikely(err))
8342+ /* do not goto err, to keep the errno */
8343+ break;
8344+
8345+ /* todo: plink too? */
8346+ if (!do_udba)
8347+ continue;
8348+
8349+ /* UDBA tests */
5527c038 8350+ if (unlikely(!!inode != d_is_positive(h_dentry)))
1facf9fc 8351+ goto err;
8352+
5527c038
JR
8353+ h_inode = NULL;
8354+ if (d_is_positive(h_dentry))
8355+ h_inode = d_inode(h_dentry);
1facf9fc 8356+ h_plus = plus;
8357+ h_mode = mode;
8358+ h_cached_inode = h_inode;
8359+ if (h_inode) {
8360+ h_mode = (h_inode->i_mode & S_IFMT);
8361+ h_plus = (h_inode->i_nlink > 0);
8362+ }
8363+ if (inode && ibs <= bindex && bindex <= ibe)
8364+ h_cached_inode = au_h_iptr(inode, bindex);
8365+
523b37e3 8366+ if (!h_nfs) {
38d290e6 8367+ if (unlikely(plus != h_plus && !tmpfile))
523b37e3
AM
8368+ goto err;
8369+ } else {
8370+ if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED)
8371+ && !is_root
8372+ && !IS_ROOT(h_dentry)
8373+ && unhashed != d_unhashed(h_dentry)))
8374+ goto err;
8375+ }
8376+ if (unlikely(mode != h_mode
1facf9fc 8377+ || h_cached_inode != h_inode))
8378+ goto err;
8379+ continue;
8380+
f6b6e03d 8381+err:
1facf9fc 8382+ err = -EINVAL;
8383+ break;
8384+ }
8385+
523b37e3 8386+ AuTraceErr(err);
1facf9fc 8387+ return err;
8388+}
8389+
027c5e7a 8390+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
1facf9fc 8391+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
8392+{
8393+ int err;
8394+ struct dentry *parent;
1facf9fc 8395+
027c5e7a 8396+ if (!au_digen_test(dentry, sigen))
1facf9fc 8397+ return 0;
8398+
8399+ parent = dget_parent(dentry);
8400+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8401+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 8402+ au_dbg_verify_gen(parent, sigen);
027c5e7a 8403+ err = au_refresh_dentry(dentry, parent);
1facf9fc 8404+ di_read_unlock(parent, AuLock_IR);
8405+ dput(parent);
027c5e7a 8406+ AuTraceErr(err);
1facf9fc 8407+ return err;
8408+}
8409+
8410+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
8411+{
8412+ int err;
8413+ struct dentry *d, *parent;
1facf9fc 8414+
027c5e7a 8415+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
1facf9fc 8416+ return simple_reval_dpath(dentry, sigen);
8417+
8418+ /* slow loop, keep it simple and stupid */
8419+ /* cf: au_cpup_dirs() */
8420+ err = 0;
8421+ parent = NULL;
027c5e7a 8422+ while (au_digen_test(dentry, sigen)) {
1facf9fc 8423+ d = dentry;
8424+ while (1) {
8425+ dput(parent);
8426+ parent = dget_parent(d);
027c5e7a 8427+ if (!au_digen_test(parent, sigen))
1facf9fc 8428+ break;
8429+ d = parent;
8430+ }
8431+
1facf9fc 8432+ if (d != dentry)
027c5e7a 8433+ di_write_lock_child2(d);
1facf9fc 8434+
8435+ /* someone might update our dentry while we were sleeping */
027c5e7a
AM
8436+ if (au_digen_test(d, sigen)) {
8437+ /*
8438+ * todo: consolidate with simple_reval_dpath(),
8439+ * do_refresh() and au_reval_for_attr().
8440+ */
1facf9fc 8441+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8442+ err = au_refresh_dentry(d, parent);
1facf9fc 8443+ di_read_unlock(parent, AuLock_IR);
8444+ }
8445+
8446+ if (d != dentry)
8447+ di_write_unlock(d);
8448+ dput(parent);
8449+ if (unlikely(err))
8450+ break;
8451+ }
8452+
8453+ return err;
8454+}
8455+
8456+/*
8457+ * if valid returns 1, otherwise 0.
8458+ */
b4510431 8459+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
1facf9fc 8460+{
8461+ int valid, err;
8462+ unsigned int sigen;
8463+ unsigned char do_udba;
8464+ struct super_block *sb;
8465+ struct inode *inode;
8466+
027c5e7a 8467+ /* todo: support rcu-walk? */
b4510431 8468+ if (flags & LOOKUP_RCU)
027c5e7a
AM
8469+ return -ECHILD;
8470+
8471+ valid = 0;
8472+ if (unlikely(!au_di(dentry)))
8473+ goto out;
8474+
e49829fe 8475+ valid = 1;
1facf9fc 8476+ sb = dentry->d_sb;
e49829fe
JR
8477+ /*
8478+ * todo: very ugly
8479+ * i_mutex of parent dir may be held,
8480+ * but we should not return 'invalid' due to busy.
8481+ */
8482+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
8483+ if (unlikely(err)) {
8484+ valid = err;
027c5e7a 8485+ AuTraceErr(err);
e49829fe
JR
8486+ goto out;
8487+ }
5527c038
JR
8488+ inode = NULL;
8489+ if (d_really_is_positive(dentry))
8490+ inode = d_inode(dentry);
c1595e42
JR
8491+ if (unlikely(inode && is_bad_inode(inode))) {
8492+ err = -EINVAL;
8493+ AuTraceErr(err);
8494+ goto out_dgrade;
8495+ }
027c5e7a
AM
8496+ if (unlikely(au_dbrange_test(dentry))) {
8497+ err = -EINVAL;
8498+ AuTraceErr(err);
8499+ goto out_dgrade;
1facf9fc 8500+ }
027c5e7a
AM
8501+
8502+ sigen = au_sigen(sb);
8503+ if (au_digen_test(dentry, sigen)) {
1facf9fc 8504+ AuDebugOn(IS_ROOT(dentry));
027c5e7a
AM
8505+ err = au_reval_dpath(dentry, sigen);
8506+ if (unlikely(err)) {
8507+ AuTraceErr(err);
1facf9fc 8508+ goto out_dgrade;
027c5e7a 8509+ }
1facf9fc 8510+ }
8511+ di_downgrade_lock(dentry, AuLock_IR);
8512+
1facf9fc 8513+ err = -EINVAL;
c1595e42 8514+ if (!(flags & (LOOKUP_OPEN | LOOKUP_EMPTY))
523b37e3 8515+ && inode
38d290e6 8516+ && !(inode->i_state && I_LINKABLE)
79b8bda9
AM
8517+ && (IS_DEADDIR(inode) || !inode->i_nlink)) {
8518+ AuTraceErr(err);
027c5e7a 8519+ goto out_inval;
79b8bda9 8520+ }
027c5e7a 8521+
1facf9fc 8522+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
8523+ if (do_udba && inode) {
8524+ aufs_bindex_t bstart = au_ibstart(inode);
027c5e7a 8525+ struct inode *h_inode;
1facf9fc 8526+
027c5e7a
AM
8527+ if (bstart >= 0) {
8528+ h_inode = au_h_iptr(inode, bstart);
79b8bda9
AM
8529+ if (h_inode && au_test_higen(inode, h_inode)) {
8530+ AuTraceErr(err);
027c5e7a 8531+ goto out_inval;
79b8bda9 8532+ }
027c5e7a 8533+ }
1facf9fc 8534+ }
8535+
b4510431 8536+ err = h_d_revalidate(dentry, inode, flags, do_udba);
027c5e7a 8537+ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
1facf9fc 8538+ err = -EIO;
523b37e3
AM
8539+ AuDbg("both of real entry and whiteout found, %p, err %d\n",
8540+ dentry, err);
027c5e7a 8541+ }
e49829fe 8542+ goto out_inval;
1facf9fc 8543+
4f0767ce 8544+out_dgrade:
1facf9fc 8545+ di_downgrade_lock(dentry, AuLock_IR);
e49829fe 8546+out_inval:
1facf9fc 8547+ aufs_read_unlock(dentry, AuLock_IR);
8548+ AuTraceErr(err);
8549+ valid = !err;
e49829fe 8550+out:
027c5e7a 8551+ if (!valid) {
523b37e3 8552+ AuDbg("%pd invalid, %d\n", dentry, valid);
027c5e7a
AM
8553+ d_drop(dentry);
8554+ }
1facf9fc 8555+ return valid;
8556+}
8557+
8558+static void aufs_d_release(struct dentry *dentry)
8559+{
027c5e7a 8560+ if (au_di(dentry)) {
4a4d8108
AM
8561+ au_di_fin(dentry);
8562+ au_hn_di_reinit(dentry);
1facf9fc 8563+ }
1facf9fc 8564+}
8565+
4a4d8108 8566+const struct dentry_operations aufs_dop = {
c06a8ce3
AM
8567+ .d_revalidate = aufs_d_revalidate,
8568+ .d_weak_revalidate = aufs_d_revalidate,
8569+ .d_release = aufs_d_release
1facf9fc 8570+};
79b8bda9
AM
8571+
8572+/* aufs_dop without d_revalidate */
8573+const struct dentry_operations aufs_dop_noreval = {
8574+ .d_release = aufs_d_release
8575+};
7f207e10
AM
8576diff -urN /usr/share/empty/fs/aufs/dentry.h linux/fs/aufs/dentry.h
8577--- /usr/share/empty/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
79b8bda9
AM
8578+++ linux/fs/aufs/dentry.h 2015-11-11 17:21:46.918863802 +0100
8579@@ -0,0 +1,234 @@
1facf9fc 8580+/*
2000de60 8581+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 8582+ *
8583+ * This program, aufs is free software; you can redistribute it and/or modify
8584+ * it under the terms of the GNU General Public License as published by
8585+ * the Free Software Foundation; either version 2 of the License, or
8586+ * (at your option) any later version.
dece6358
AM
8587+ *
8588+ * This program is distributed in the hope that it will be useful,
8589+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8590+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8591+ * GNU General Public License for more details.
8592+ *
8593+ * You should have received a copy of the GNU General Public License
523b37e3 8594+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 8595+ */
8596+
8597+/*
8598+ * lookup and dentry operations
8599+ */
8600+
8601+#ifndef __AUFS_DENTRY_H__
8602+#define __AUFS_DENTRY_H__
8603+
8604+#ifdef __KERNEL__
8605+
dece6358 8606+#include <linux/dcache.h>
1facf9fc 8607+#include "rwsem.h"
8608+
1facf9fc 8609+struct au_hdentry {
8610+ struct dentry *hd_dentry;
027c5e7a 8611+ aufs_bindex_t hd_id;
1facf9fc 8612+};
8613+
8614+struct au_dinfo {
8615+ atomic_t di_generation;
8616+
dece6358 8617+ struct au_rwsem di_rwsem;
1facf9fc 8618+ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq;
38d290e6 8619+ unsigned char di_tmpfile; /* to allow the different name */
1facf9fc 8620+ struct au_hdentry *di_hdentry;
4a4d8108 8621+} ____cacheline_aligned_in_smp;
1facf9fc 8622+
8623+/* ---------------------------------------------------------------------- */
8624+
8625+/* dentry.c */
79b8bda9 8626+extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
1facf9fc 8627+struct au_branch;
076b876e 8628+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
1facf9fc 8629+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
8630+ struct dentry *h_parent, struct au_branch *br);
8631+
537831f9 8632+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type);
86dc4139 8633+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
027c5e7a 8634+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
1facf9fc 8635+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
79b8bda9 8636+void au_refresh_dop(struct dentry *dentry, int force_reval);
1facf9fc 8637+
8638+/* dinfo.c */
4a4d8108 8639+void au_di_init_once(void *_di);
027c5e7a
AM
8640+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
8641+void au_di_free(struct au_dinfo *dinfo);
8642+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
8643+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
4a4d8108
AM
8644+int au_di_init(struct dentry *dentry);
8645+void au_di_fin(struct dentry *dentry);
1facf9fc 8646+int au_di_realloc(struct au_dinfo *dinfo, int nbr);
8647+
8648+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
8649+void di_read_unlock(struct dentry *d, int flags);
8650+void di_downgrade_lock(struct dentry *d, int flags);
8651+void di_write_lock(struct dentry *d, unsigned int lsc);
8652+void di_write_unlock(struct dentry *d);
8653+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
8654+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
8655+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
8656+
8657+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
2cbb1c4b 8658+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
1facf9fc 8659+aufs_bindex_t au_dbtail(struct dentry *dentry);
8660+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
8661+
8662+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
8663+ struct dentry *h_dentry);
027c5e7a
AM
8664+int au_digen_test(struct dentry *dentry, unsigned int sigen);
8665+int au_dbrange_test(struct dentry *dentry);
1facf9fc 8666+void au_update_digen(struct dentry *dentry);
8667+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
8668+void au_update_dbstart(struct dentry *dentry);
8669+void au_update_dbend(struct dentry *dentry);
8670+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
8671+
8672+/* ---------------------------------------------------------------------- */
8673+
8674+static inline struct au_dinfo *au_di(struct dentry *dentry)
8675+{
8676+ return dentry->d_fsdata;
8677+}
8678+
8679+/* ---------------------------------------------------------------------- */
8680+
8681+/* lock subclass for dinfo */
8682+enum {
8683+ AuLsc_DI_CHILD, /* child first */
4a4d8108 8684+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
1facf9fc 8685+ AuLsc_DI_CHILD3, /* copyup dirs */
8686+ AuLsc_DI_PARENT,
8687+ AuLsc_DI_PARENT2,
027c5e7a
AM
8688+ AuLsc_DI_PARENT3,
8689+ AuLsc_DI_TMP /* temp for replacing dinfo */
1facf9fc 8690+};
8691+
8692+/*
8693+ * di_read_lock_child, di_write_lock_child,
8694+ * di_read_lock_child2, di_write_lock_child2,
8695+ * di_read_lock_child3, di_write_lock_child3,
8696+ * di_read_lock_parent, di_write_lock_parent,
8697+ * di_read_lock_parent2, di_write_lock_parent2,
8698+ * di_read_lock_parent3, di_write_lock_parent3,
8699+ */
8700+#define AuReadLockFunc(name, lsc) \
8701+static inline void di_read_lock_##name(struct dentry *d, int flags) \
8702+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
8703+
8704+#define AuWriteLockFunc(name, lsc) \
8705+static inline void di_write_lock_##name(struct dentry *d) \
8706+{ di_write_lock(d, AuLsc_DI_##lsc); }
8707+
8708+#define AuRWLockFuncs(name, lsc) \
8709+ AuReadLockFunc(name, lsc) \
8710+ AuWriteLockFunc(name, lsc)
8711+
8712+AuRWLockFuncs(child, CHILD);
8713+AuRWLockFuncs(child2, CHILD2);
8714+AuRWLockFuncs(child3, CHILD3);
8715+AuRWLockFuncs(parent, PARENT);
8716+AuRWLockFuncs(parent2, PARENT2);
8717+AuRWLockFuncs(parent3, PARENT3);
8718+
8719+#undef AuReadLockFunc
8720+#undef AuWriteLockFunc
8721+#undef AuRWLockFuncs
8722+
8723+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
dece6358
AM
8724+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
8725+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
1facf9fc 8726+
8727+/* ---------------------------------------------------------------------- */
8728+
8729+/* todo: memory barrier? */
8730+static inline unsigned int au_digen(struct dentry *d)
8731+{
8732+ return atomic_read(&au_di(d)->di_generation);
8733+}
8734+
8735+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
8736+{
8737+ hdentry->hd_dentry = NULL;
8738+}
8739+
8740+static inline void au_hdput(struct au_hdentry *hd)
8741+{
4a4d8108
AM
8742+ if (hd)
8743+ dput(hd->hd_dentry);
1facf9fc 8744+}
8745+
8746+static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
8747+{
1308ab2a 8748+ DiMustAnyLock(dentry);
1facf9fc 8749+ return au_di(dentry)->di_bstart;
8750+}
8751+
8752+static inline aufs_bindex_t au_dbend(struct dentry *dentry)
8753+{
1308ab2a 8754+ DiMustAnyLock(dentry);
1facf9fc 8755+ return au_di(dentry)->di_bend;
8756+}
8757+
8758+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
8759+{
1308ab2a 8760+ DiMustAnyLock(dentry);
1facf9fc 8761+ return au_di(dentry)->di_bwh;
8762+}
8763+
8764+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
8765+{
1308ab2a 8766+ DiMustAnyLock(dentry);
1facf9fc 8767+ return au_di(dentry)->di_bdiropq;
8768+}
8769+
8770+/* todo: hard/soft set? */
8771+static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
8772+{
1308ab2a 8773+ DiMustWriteLock(dentry);
1facf9fc 8774+ au_di(dentry)->di_bstart = bindex;
8775+}
8776+
8777+static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
8778+{
1308ab2a 8779+ DiMustWriteLock(dentry);
1facf9fc 8780+ au_di(dentry)->di_bend = bindex;
8781+}
8782+
8783+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
8784+{
1308ab2a 8785+ DiMustWriteLock(dentry);
1facf9fc 8786+ /* dbwh can be outside of bstart - bend range */
8787+ au_di(dentry)->di_bwh = bindex;
8788+}
8789+
8790+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
8791+{
1308ab2a 8792+ DiMustWriteLock(dentry);
1facf9fc 8793+ au_di(dentry)->di_bdiropq = bindex;
8794+}
8795+
8796+/* ---------------------------------------------------------------------- */
8797+
4a4d8108 8798+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 8799+static inline void au_digen_dec(struct dentry *d)
8800+{
e49829fe 8801+ atomic_dec(&au_di(d)->di_generation);
1facf9fc 8802+}
8803+
4a4d8108 8804+static inline void au_hn_di_reinit(struct dentry *dentry)
1facf9fc 8805+{
8806+ dentry->d_fsdata = NULL;
8807+}
8808+#else
4a4d8108
AM
8809+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
8810+#endif /* CONFIG_AUFS_HNOTIFY */
1facf9fc 8811+
8812+#endif /* __KERNEL__ */
8813+#endif /* __AUFS_DENTRY_H__ */
7f207e10
AM
8814diff -urN /usr/share/empty/fs/aufs/dinfo.c linux/fs/aufs/dinfo.c
8815--- /usr/share/empty/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 8816+++ linux/fs/aufs/dinfo.c 2015-09-24 10:47:58.251386326 +0200
5527c038 8817@@ -0,0 +1,550 @@
1facf9fc 8818+/*
2000de60 8819+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 8820+ *
8821+ * This program, aufs is free software; you can redistribute it and/or modify
8822+ * it under the terms of the GNU General Public License as published by
8823+ * the Free Software Foundation; either version 2 of the License, or
8824+ * (at your option) any later version.
dece6358
AM
8825+ *
8826+ * This program is distributed in the hope that it will be useful,
8827+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8828+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8829+ * GNU General Public License for more details.
8830+ *
8831+ * You should have received a copy of the GNU General Public License
523b37e3 8832+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 8833+ */
8834+
8835+/*
8836+ * dentry private data
8837+ */
8838+
8839+#include "aufs.h"
8840+
e49829fe 8841+void au_di_init_once(void *_dinfo)
4a4d8108 8842+{
e49829fe
JR
8843+ struct au_dinfo *dinfo = _dinfo;
8844+ static struct lock_class_key aufs_di;
4a4d8108 8845+
e49829fe
JR
8846+ au_rw_init(&dinfo->di_rwsem);
8847+ au_rw_class(&dinfo->di_rwsem, &aufs_di);
4a4d8108
AM
8848+}
8849+
027c5e7a 8850+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
1facf9fc 8851+{
8852+ struct au_dinfo *dinfo;
027c5e7a 8853+ int nbr, i;
1facf9fc 8854+
8855+ dinfo = au_cache_alloc_dinfo();
8856+ if (unlikely(!dinfo))
8857+ goto out;
8858+
1facf9fc 8859+ nbr = au_sbend(sb) + 1;
8860+ if (nbr <= 0)
8861+ nbr = 1;
8862+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
027c5e7a
AM
8863+ if (dinfo->di_hdentry) {
8864+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
8865+ dinfo->di_bstart = -1;
8866+ dinfo->di_bend = -1;
8867+ dinfo->di_bwh = -1;
8868+ dinfo->di_bdiropq = -1;
38d290e6 8869+ dinfo->di_tmpfile = 0;
027c5e7a
AM
8870+ for (i = 0; i < nbr; i++)
8871+ dinfo->di_hdentry[i].hd_id = -1;
8872+ goto out;
8873+ }
1facf9fc 8874+
1facf9fc 8875+ au_cache_free_dinfo(dinfo);
027c5e7a
AM
8876+ dinfo = NULL;
8877+
4f0767ce 8878+out:
027c5e7a 8879+ return dinfo;
1facf9fc 8880+}
8881+
027c5e7a 8882+void au_di_free(struct au_dinfo *dinfo)
4a4d8108 8883+{
4a4d8108
AM
8884+ struct au_hdentry *p;
8885+ aufs_bindex_t bend, bindex;
8886+
8887+ /* dentry may not be revalidated */
027c5e7a 8888+ bindex = dinfo->di_bstart;
4a4d8108 8889+ if (bindex >= 0) {
027c5e7a
AM
8890+ bend = dinfo->di_bend;
8891+ p = dinfo->di_hdentry + bindex;
4a4d8108
AM
8892+ while (bindex++ <= bend)
8893+ au_hdput(p++);
8894+ }
027c5e7a
AM
8895+ kfree(dinfo->di_hdentry);
8896+ au_cache_free_dinfo(dinfo);
8897+}
8898+
8899+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
8900+{
8901+ struct au_hdentry *p;
8902+ aufs_bindex_t bi;
8903+
8904+ AuRwMustWriteLock(&a->di_rwsem);
8905+ AuRwMustWriteLock(&b->di_rwsem);
8906+
8907+#define DiSwap(v, name) \
8908+ do { \
8909+ v = a->di_##name; \
8910+ a->di_##name = b->di_##name; \
8911+ b->di_##name = v; \
8912+ } while (0)
8913+
8914+ DiSwap(p, hdentry);
8915+ DiSwap(bi, bstart);
8916+ DiSwap(bi, bend);
8917+ DiSwap(bi, bwh);
8918+ DiSwap(bi, bdiropq);
8919+ /* smp_mb(); */
8920+
8921+#undef DiSwap
8922+}
8923+
8924+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
8925+{
8926+ AuRwMustWriteLock(&dst->di_rwsem);
8927+ AuRwMustWriteLock(&src->di_rwsem);
8928+
8929+ dst->di_bstart = src->di_bstart;
8930+ dst->di_bend = src->di_bend;
8931+ dst->di_bwh = src->di_bwh;
8932+ dst->di_bdiropq = src->di_bdiropq;
8933+ /* smp_mb(); */
8934+}
8935+
8936+int au_di_init(struct dentry *dentry)
8937+{
8938+ int err;
8939+ struct super_block *sb;
8940+ struct au_dinfo *dinfo;
8941+
8942+ err = 0;
8943+ sb = dentry->d_sb;
8944+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
8945+ if (dinfo) {
8946+ atomic_set(&dinfo->di_generation, au_sigen(sb));
8947+ /* smp_mb(); */ /* atomic_set */
8948+ dentry->d_fsdata = dinfo;
8949+ } else
8950+ err = -ENOMEM;
8951+
8952+ return err;
8953+}
8954+
8955+void au_di_fin(struct dentry *dentry)
8956+{
8957+ struct au_dinfo *dinfo;
8958+
8959+ dinfo = au_di(dentry);
8960+ AuRwDestroy(&dinfo->di_rwsem);
8961+ au_di_free(dinfo);
4a4d8108
AM
8962+}
8963+
1facf9fc 8964+int au_di_realloc(struct au_dinfo *dinfo, int nbr)
8965+{
8966+ int err, sz;
8967+ struct au_hdentry *hdp;
8968+
1308ab2a 8969+ AuRwMustWriteLock(&dinfo->di_rwsem);
8970+
1facf9fc 8971+ err = -ENOMEM;
8972+ sz = sizeof(*hdp) * (dinfo->di_bend + 1);
8973+ if (!sz)
8974+ sz = sizeof(*hdp);
8975+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
8976+ if (hdp) {
8977+ dinfo->di_hdentry = hdp;
8978+ err = 0;
8979+ }
8980+
8981+ return err;
8982+}
8983+
8984+/* ---------------------------------------------------------------------- */
8985+
8986+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
8987+{
8988+ switch (lsc) {
8989+ case AuLsc_DI_CHILD:
8990+ ii_write_lock_child(inode);
8991+ break;
8992+ case AuLsc_DI_CHILD2:
8993+ ii_write_lock_child2(inode);
8994+ break;
8995+ case AuLsc_DI_CHILD3:
8996+ ii_write_lock_child3(inode);
8997+ break;
8998+ case AuLsc_DI_PARENT:
8999+ ii_write_lock_parent(inode);
9000+ break;
9001+ case AuLsc_DI_PARENT2:
9002+ ii_write_lock_parent2(inode);
9003+ break;
9004+ case AuLsc_DI_PARENT3:
9005+ ii_write_lock_parent3(inode);
9006+ break;
9007+ default:
9008+ BUG();
9009+ }
9010+}
9011+
9012+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
9013+{
9014+ switch (lsc) {
9015+ case AuLsc_DI_CHILD:
9016+ ii_read_lock_child(inode);
9017+ break;
9018+ case AuLsc_DI_CHILD2:
9019+ ii_read_lock_child2(inode);
9020+ break;
9021+ case AuLsc_DI_CHILD3:
9022+ ii_read_lock_child3(inode);
9023+ break;
9024+ case AuLsc_DI_PARENT:
9025+ ii_read_lock_parent(inode);
9026+ break;
9027+ case AuLsc_DI_PARENT2:
9028+ ii_read_lock_parent2(inode);
9029+ break;
9030+ case AuLsc_DI_PARENT3:
9031+ ii_read_lock_parent3(inode);
9032+ break;
9033+ default:
9034+ BUG();
9035+ }
9036+}
9037+
9038+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
9039+{
5527c038
JR
9040+ struct inode *inode;
9041+
dece6358 9042+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
5527c038
JR
9043+ if (d_really_is_positive(d)) {
9044+ inode = d_inode(d);
1facf9fc 9045+ if (au_ftest_lock(flags, IW))
5527c038 9046+ do_ii_write_lock(inode, lsc);
1facf9fc 9047+ else if (au_ftest_lock(flags, IR))
5527c038 9048+ do_ii_read_lock(inode, lsc);
1facf9fc 9049+ }
9050+}
9051+
9052+void di_read_unlock(struct dentry *d, int flags)
9053+{
5527c038
JR
9054+ struct inode *inode;
9055+
9056+ if (d_really_is_positive(d)) {
9057+ inode = d_inode(d);
027c5e7a
AM
9058+ if (au_ftest_lock(flags, IW)) {
9059+ au_dbg_verify_dinode(d);
5527c038 9060+ ii_write_unlock(inode);
027c5e7a
AM
9061+ } else if (au_ftest_lock(flags, IR)) {
9062+ au_dbg_verify_dinode(d);
5527c038 9063+ ii_read_unlock(inode);
027c5e7a 9064+ }
1facf9fc 9065+ }
dece6358 9066+ au_rw_read_unlock(&au_di(d)->di_rwsem);
1facf9fc 9067+}
9068+
9069+void di_downgrade_lock(struct dentry *d, int flags)
9070+{
5527c038
JR
9071+ if (d_really_is_positive(d) && au_ftest_lock(flags, IR))
9072+ ii_downgrade_lock(d_inode(d));
dece6358 9073+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
1facf9fc 9074+}
9075+
9076+void di_write_lock(struct dentry *d, unsigned int lsc)
9077+{
dece6358 9078+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
5527c038
JR
9079+ if (d_really_is_positive(d))
9080+ do_ii_write_lock(d_inode(d), lsc);
1facf9fc 9081+}
9082+
9083+void di_write_unlock(struct dentry *d)
9084+{
027c5e7a 9085+ au_dbg_verify_dinode(d);
5527c038
JR
9086+ if (d_really_is_positive(d))
9087+ ii_write_unlock(d_inode(d));
dece6358 9088+ au_rw_write_unlock(&au_di(d)->di_rwsem);
1facf9fc 9089+}
9090+
9091+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
9092+{
9093+ AuDebugOn(d1 == d2
5527c038 9094+ || d_inode(d1) == d_inode(d2)
1facf9fc 9095+ || d1->d_sb != d2->d_sb);
9096+
9097+ if (isdir && au_test_subdir(d1, d2)) {
9098+ di_write_lock_child(d1);
9099+ di_write_lock_child2(d2);
9100+ } else {
9101+ /* there should be no races */
9102+ di_write_lock_child(d2);
9103+ di_write_lock_child2(d1);
9104+ }
9105+}
9106+
9107+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
9108+{
9109+ AuDebugOn(d1 == d2
5527c038 9110+ || d_inode(d1) == d_inode(d2)
1facf9fc 9111+ || d1->d_sb != d2->d_sb);
9112+
9113+ if (isdir && au_test_subdir(d1, d2)) {
9114+ di_write_lock_parent(d1);
9115+ di_write_lock_parent2(d2);
9116+ } else {
9117+ /* there should be no races */
9118+ di_write_lock_parent(d2);
9119+ di_write_lock_parent2(d1);
9120+ }
9121+}
9122+
9123+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
9124+{
9125+ di_write_unlock(d1);
5527c038 9126+ if (d_inode(d1) == d_inode(d2))
dece6358 9127+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
1facf9fc 9128+ else
9129+ di_write_unlock(d2);
9130+}
9131+
9132+/* ---------------------------------------------------------------------- */
9133+
9134+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
9135+{
9136+ struct dentry *d;
9137+
1308ab2a 9138+ DiMustAnyLock(dentry);
9139+
1facf9fc 9140+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
9141+ return NULL;
9142+ AuDebugOn(bindex < 0);
9143+ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
c1595e42 9144+ AuDebugOn(d && au_dcount(d) <= 0);
1facf9fc 9145+ return d;
9146+}
9147+
2cbb1c4b
JR
9148+/*
9149+ * extended version of au_h_dptr().
38d290e6
JR
9150+ * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
9151+ * error.
2cbb1c4b
JR
9152+ */
9153+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
9154+{
9155+ struct dentry *h_dentry;
9156+ struct inode *inode, *h_inode;
9157+
5527c038 9158+ AuDebugOn(d_really_is_negative(dentry));
2cbb1c4b
JR
9159+
9160+ h_dentry = NULL;
9161+ if (au_dbstart(dentry) <= bindex
9162+ && bindex <= au_dbend(dentry))
9163+ h_dentry = au_h_dptr(dentry, bindex);
38d290e6 9164+ if (h_dentry && !au_d_linkable(h_dentry)) {
2cbb1c4b
JR
9165+ dget(h_dentry);
9166+ goto out; /* success */
9167+ }
9168+
5527c038 9169+ inode = d_inode(dentry);
2cbb1c4b
JR
9170+ AuDebugOn(bindex < au_ibstart(inode));
9171+ AuDebugOn(au_ibend(inode) < bindex);
9172+ h_inode = au_h_iptr(inode, bindex);
9173+ h_dentry = d_find_alias(h_inode);
9174+ if (h_dentry) {
9175+ if (!IS_ERR(h_dentry)) {
38d290e6 9176+ if (!au_d_linkable(h_dentry))
2cbb1c4b
JR
9177+ goto out; /* success */
9178+ dput(h_dentry);
9179+ } else
9180+ goto out;
9181+ }
9182+
9183+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
9184+ h_dentry = au_plink_lkup(inode, bindex);
9185+ AuDebugOn(!h_dentry);
9186+ if (!IS_ERR(h_dentry)) {
9187+ if (!au_d_hashed_positive(h_dentry))
9188+ goto out; /* success */
9189+ dput(h_dentry);
9190+ h_dentry = NULL;
9191+ }
9192+ }
9193+
9194+out:
9195+ AuDbgDentry(h_dentry);
9196+ return h_dentry;
9197+}
9198+
1facf9fc 9199+aufs_bindex_t au_dbtail(struct dentry *dentry)
9200+{
9201+ aufs_bindex_t bend, bwh;
9202+
9203+ bend = au_dbend(dentry);
9204+ if (0 <= bend) {
9205+ bwh = au_dbwh(dentry);
9206+ if (!bwh)
9207+ return bwh;
9208+ if (0 < bwh && bwh < bend)
9209+ return bwh - 1;
9210+ }
9211+ return bend;
9212+}
9213+
9214+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
9215+{
9216+ aufs_bindex_t bend, bopq;
9217+
9218+ bend = au_dbtail(dentry);
9219+ if (0 <= bend) {
9220+ bopq = au_dbdiropq(dentry);
9221+ if (0 <= bopq && bopq < bend)
9222+ bend = bopq;
9223+ }
9224+ return bend;
9225+}
9226+
9227+/* ---------------------------------------------------------------------- */
9228+
9229+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
9230+ struct dentry *h_dentry)
9231+{
9232+ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
027c5e7a 9233+ struct au_branch *br;
1facf9fc 9234+
1308ab2a 9235+ DiMustWriteLock(dentry);
9236+
4a4d8108 9237+ au_hdput(hd);
1facf9fc 9238+ hd->hd_dentry = h_dentry;
027c5e7a
AM
9239+ if (h_dentry) {
9240+ br = au_sbr(dentry->d_sb, bindex);
9241+ hd->hd_id = br->br_id;
9242+ }
9243+}
9244+
9245+int au_dbrange_test(struct dentry *dentry)
9246+{
9247+ int err;
9248+ aufs_bindex_t bstart, bend;
9249+
9250+ err = 0;
9251+ bstart = au_dbstart(dentry);
9252+ bend = au_dbend(dentry);
9253+ if (bstart >= 0)
9254+ AuDebugOn(bend < 0 && bstart > bend);
9255+ else {
9256+ err = -EIO;
9257+ AuDebugOn(bend >= 0);
9258+ }
9259+
9260+ return err;
9261+}
9262+
9263+int au_digen_test(struct dentry *dentry, unsigned int sigen)
9264+{
9265+ int err;
9266+
9267+ err = 0;
9268+ if (unlikely(au_digen(dentry) != sigen
5527c038 9269+ || au_iigen_test(d_inode(dentry), sigen)))
027c5e7a
AM
9270+ err = -EIO;
9271+
9272+ return err;
1facf9fc 9273+}
9274+
9275+void au_update_digen(struct dentry *dentry)
9276+{
9277+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
9278+ /* smp_mb(); */ /* atomic_set */
9279+}
9280+
9281+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
9282+{
9283+ struct au_dinfo *dinfo;
9284+ struct dentry *h_d;
4a4d8108 9285+ struct au_hdentry *hdp;
1facf9fc 9286+
1308ab2a 9287+ DiMustWriteLock(dentry);
9288+
1facf9fc 9289+ dinfo = au_di(dentry);
9290+ if (!dinfo || dinfo->di_bstart < 0)
9291+ return;
9292+
4a4d8108 9293+ hdp = dinfo->di_hdentry;
1facf9fc 9294+ if (do_put_zero) {
9295+ aufs_bindex_t bindex, bend;
9296+
9297+ bend = dinfo->di_bend;
9298+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
4a4d8108 9299+ h_d = hdp[0 + bindex].hd_dentry;
5527c038 9300+ if (h_d && d_is_negative(h_d))
1facf9fc 9301+ au_set_h_dptr(dentry, bindex, NULL);
9302+ }
9303+ }
9304+
9305+ dinfo->di_bstart = -1;
9306+ while (++dinfo->di_bstart <= dinfo->di_bend)
4a4d8108 9307+ if (hdp[0 + dinfo->di_bstart].hd_dentry)
1facf9fc 9308+ break;
9309+ if (dinfo->di_bstart > dinfo->di_bend) {
9310+ dinfo->di_bstart = -1;
9311+ dinfo->di_bend = -1;
9312+ return;
9313+ }
9314+
9315+ dinfo->di_bend++;
9316+ while (0 <= --dinfo->di_bend)
4a4d8108 9317+ if (hdp[0 + dinfo->di_bend].hd_dentry)
1facf9fc 9318+ break;
9319+ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
9320+}
9321+
9322+void au_update_dbstart(struct dentry *dentry)
9323+{
9324+ aufs_bindex_t bindex, bend;
9325+ struct dentry *h_dentry;
9326+
9327+ bend = au_dbend(dentry);
9328+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
9329+ h_dentry = au_h_dptr(dentry, bindex);
9330+ if (!h_dentry)
9331+ continue;
5527c038 9332+ if (d_is_positive(h_dentry)) {
1facf9fc 9333+ au_set_dbstart(dentry, bindex);
9334+ return;
9335+ }
9336+ au_set_h_dptr(dentry, bindex, NULL);
9337+ }
9338+}
9339+
9340+void au_update_dbend(struct dentry *dentry)
9341+{
9342+ aufs_bindex_t bindex, bstart;
9343+ struct dentry *h_dentry;
9344+
9345+ bstart = au_dbstart(dentry);
7f207e10 9346+ for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) {
1facf9fc 9347+ h_dentry = au_h_dptr(dentry, bindex);
9348+ if (!h_dentry)
9349+ continue;
5527c038 9350+ if (d_is_positive(h_dentry)) {
1facf9fc 9351+ au_set_dbend(dentry, bindex);
9352+ return;
9353+ }
9354+ au_set_h_dptr(dentry, bindex, NULL);
9355+ }
9356+}
9357+
9358+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
9359+{
9360+ aufs_bindex_t bindex, bend;
9361+
9362+ bend = au_dbend(dentry);
9363+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
9364+ if (au_h_dptr(dentry, bindex) == h_dentry)
9365+ return bindex;
9366+ return -1;
9367+}
7f207e10
AM
9368diff -urN /usr/share/empty/fs/aufs/dir.c linux/fs/aufs/dir.c
9369--- /usr/share/empty/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
b95c5147 9370+++ linux/fs/aufs/dir.c 2015-12-10 17:59:16.836166410 +0100
5527c038 9371@@ -0,0 +1,753 @@
1facf9fc 9372+/*
2000de60 9373+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 9374+ *
9375+ * This program, aufs is free software; you can redistribute it and/or modify
9376+ * it under the terms of the GNU General Public License as published by
9377+ * the Free Software Foundation; either version 2 of the License, or
9378+ * (at your option) any later version.
dece6358
AM
9379+ *
9380+ * This program is distributed in the hope that it will be useful,
9381+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9382+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9383+ * GNU General Public License for more details.
9384+ *
9385+ * You should have received a copy of the GNU General Public License
523b37e3 9386+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 9387+ */
9388+
9389+/*
9390+ * directory operations
9391+ */
9392+
9393+#include <linux/fs_stack.h>
9394+#include "aufs.h"
9395+
9396+void au_add_nlink(struct inode *dir, struct inode *h_dir)
9397+{
9dbd164d
AM
9398+ unsigned int nlink;
9399+
1facf9fc 9400+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9401+
9dbd164d
AM
9402+ nlink = dir->i_nlink;
9403+ nlink += h_dir->i_nlink - 2;
1facf9fc 9404+ if (h_dir->i_nlink < 2)
9dbd164d 9405+ nlink += 2;
f6b6e03d 9406+ smp_mb(); /* for i_nlink */
7eafdf33 9407+ /* 0 can happen in revaliding */
92d182d2 9408+ set_nlink(dir, nlink);
1facf9fc 9409+}
9410+
9411+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
9412+{
9dbd164d
AM
9413+ unsigned int nlink;
9414+
1facf9fc 9415+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9416+
9dbd164d
AM
9417+ nlink = dir->i_nlink;
9418+ nlink -= h_dir->i_nlink - 2;
1facf9fc 9419+ if (h_dir->i_nlink < 2)
9dbd164d 9420+ nlink -= 2;
f6b6e03d 9421+ smp_mb(); /* for i_nlink */
92d182d2 9422+ /* nlink == 0 means the branch-fs is broken */
9dbd164d 9423+ set_nlink(dir, nlink);
1facf9fc 9424+}
9425+
1308ab2a 9426+loff_t au_dir_size(struct file *file, struct dentry *dentry)
9427+{
9428+ loff_t sz;
9429+ aufs_bindex_t bindex, bend;
9430+ struct file *h_file;
9431+ struct dentry *h_dentry;
9432+
9433+ sz = 0;
9434+ if (file) {
2000de60 9435+ AuDebugOn(!d_is_dir(file->f_path.dentry));
1308ab2a 9436+
4a4d8108 9437+ bend = au_fbend_dir(file);
1308ab2a 9438+ for (bindex = au_fbstart(file);
9439+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
9440+ bindex++) {
4a4d8108 9441+ h_file = au_hf_dir(file, bindex);
c06a8ce3
AM
9442+ if (h_file && file_inode(h_file))
9443+ sz += vfsub_f_size_read(h_file);
1308ab2a 9444+ }
9445+ } else {
9446+ AuDebugOn(!dentry);
2000de60 9447+ AuDebugOn(!d_is_dir(dentry));
1308ab2a 9448+
9449+ bend = au_dbtaildir(dentry);
9450+ for (bindex = au_dbstart(dentry);
9451+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
9452+ bindex++) {
9453+ h_dentry = au_h_dptr(dentry, bindex);
5527c038
JR
9454+ if (h_dentry && d_is_positive(h_dentry))
9455+ sz += i_size_read(d_inode(h_dentry));
1308ab2a 9456+ }
9457+ }
9458+ if (sz < KMALLOC_MAX_SIZE)
9459+ sz = roundup_pow_of_two(sz);
9460+ if (sz > KMALLOC_MAX_SIZE)
9461+ sz = KMALLOC_MAX_SIZE;
9462+ else if (sz < NAME_MAX) {
9463+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
9464+ sz = AUFS_RDBLK_DEF;
9465+ }
9466+ return sz;
9467+}
9468+
b912730e
AM
9469+struct au_dir_ts_arg {
9470+ struct dentry *dentry;
9471+ aufs_bindex_t brid;
9472+};
9473+
9474+static void au_do_dir_ts(void *arg)
9475+{
9476+ struct au_dir_ts_arg *a = arg;
9477+ struct au_dtime dt;
9478+ struct path h_path;
9479+ struct inode *dir, *h_dir;
9480+ struct super_block *sb;
9481+ struct au_branch *br;
9482+ struct au_hinode *hdir;
9483+ int err;
9484+ aufs_bindex_t bstart, bindex;
9485+
9486+ sb = a->dentry->d_sb;
5527c038 9487+ if (d_really_is_negative(a->dentry))
b912730e 9488+ goto out;
5527c038 9489+ /* no dir->i_mutex lock */
b95c5147
AM
9490+ aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
9491+
5527c038 9492+ dir = d_inode(a->dentry);
b912730e
AM
9493+ bstart = au_ibstart(dir);
9494+ bindex = au_br_index(sb, a->brid);
9495+ if (bindex < bstart)
9496+ goto out_unlock;
9497+
9498+ br = au_sbr(sb, bindex);
9499+ h_path.dentry = au_h_dptr(a->dentry, bindex);
9500+ if (!h_path.dentry)
9501+ goto out_unlock;
9502+ h_path.mnt = au_br_mnt(br);
9503+ au_dtime_store(&dt, a->dentry, &h_path);
9504+
9505+ br = au_sbr(sb, bstart);
9506+ if (!au_br_writable(br->br_perm))
9507+ goto out_unlock;
9508+ h_path.dentry = au_h_dptr(a->dentry, bstart);
9509+ h_path.mnt = au_br_mnt(br);
9510+ err = vfsub_mnt_want_write(h_path.mnt);
9511+ if (err)
9512+ goto out_unlock;
9513+ hdir = au_hi(dir, bstart);
9514+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
9515+ h_dir = au_h_iptr(dir, bstart);
9516+ if (h_dir->i_nlink
9517+ && timespec_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
9518+ dt.dt_h_path = h_path;
9519+ au_dtime_revert(&dt);
9520+ }
9521+ au_hn_imtx_unlock(hdir);
9522+ vfsub_mnt_drop_write(h_path.mnt);
9523+ au_cpup_attr_timesizes(dir);
9524+
9525+out_unlock:
9526+ aufs_read_unlock(a->dentry, AuLock_DW);
9527+out:
9528+ dput(a->dentry);
9529+ au_nwt_done(&au_sbi(sb)->si_nowait);
9530+ kfree(arg);
9531+}
9532+
9533+void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
9534+{
9535+ int perm, wkq_err;
9536+ aufs_bindex_t bstart;
9537+ struct au_dir_ts_arg *arg;
9538+ struct dentry *dentry;
9539+ struct super_block *sb;
9540+
9541+ IMustLock(dir);
9542+
9543+ dentry = d_find_any_alias(dir);
9544+ AuDebugOn(!dentry);
9545+ sb = dentry->d_sb;
9546+ bstart = au_ibstart(dir);
9547+ if (bstart == bindex) {
9548+ au_cpup_attr_timesizes(dir);
9549+ goto out;
9550+ }
9551+
9552+ perm = au_sbr_perm(sb, bstart);
9553+ if (!au_br_writable(perm))
9554+ goto out;
9555+
9556+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
9557+ if (!arg)
9558+ goto out;
9559+
9560+ arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
9561+ arg->brid = au_sbr_id(sb, bindex);
9562+ wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
9563+ if (unlikely(wkq_err)) {
9564+ pr_err("wkq %d\n", wkq_err);
9565+ dput(dentry);
9566+ kfree(arg);
9567+ }
9568+
9569+out:
9570+ dput(dentry);
9571+}
9572+
1facf9fc 9573+/* ---------------------------------------------------------------------- */
9574+
9575+static int reopen_dir(struct file *file)
9576+{
9577+ int err;
9578+ unsigned int flags;
9579+ aufs_bindex_t bindex, btail, bstart;
9580+ struct dentry *dentry, *h_dentry;
9581+ struct file *h_file;
9582+
9583+ /* open all lower dirs */
2000de60 9584+ dentry = file->f_path.dentry;
1facf9fc 9585+ bstart = au_dbstart(dentry);
9586+ for (bindex = au_fbstart(file); bindex < bstart; bindex++)
9587+ au_set_h_fptr(file, bindex, NULL);
9588+ au_set_fbstart(file, bstart);
9589+
9590+ btail = au_dbtaildir(dentry);
4a4d8108 9591+ for (bindex = au_fbend_dir(file); btail < bindex; bindex--)
1facf9fc 9592+ au_set_h_fptr(file, bindex, NULL);
4a4d8108 9593+ au_set_fbend_dir(file, btail);
1facf9fc 9594+
4a4d8108 9595+ flags = vfsub_file_flags(file);
1facf9fc 9596+ for (bindex = bstart; bindex <= btail; bindex++) {
9597+ h_dentry = au_h_dptr(dentry, bindex);
9598+ if (!h_dentry)
9599+ continue;
4a4d8108 9600+ h_file = au_hf_dir(file, bindex);
1facf9fc 9601+ if (h_file)
9602+ continue;
9603+
392086de 9604+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9605+ err = PTR_ERR(h_file);
9606+ if (IS_ERR(h_file))
9607+ goto out; /* close all? */
9608+ au_set_h_fptr(file, bindex, h_file);
9609+ }
9610+ au_update_figen(file);
9611+ /* todo: necessary? */
9612+ /* file->f_ra = h_file->f_ra; */
9613+ err = 0;
9614+
4f0767ce 9615+out:
1facf9fc 9616+ return err;
9617+}
9618+
b912730e 9619+static int do_open_dir(struct file *file, int flags, struct file *h_file)
1facf9fc 9620+{
9621+ int err;
9622+ aufs_bindex_t bindex, btail;
9623+ struct dentry *dentry, *h_dentry;
1facf9fc 9624+
1308ab2a 9625+ FiMustWriteLock(file);
b912730e 9626+ AuDebugOn(h_file);
1308ab2a 9627+
523b37e3 9628+ err = 0;
2000de60 9629+ dentry = file->f_path.dentry;
5527c038 9630+ file->f_version = d_inode(dentry)->i_version;
1facf9fc 9631+ bindex = au_dbstart(dentry);
9632+ au_set_fbstart(file, bindex);
9633+ btail = au_dbtaildir(dentry);
4a4d8108 9634+ au_set_fbend_dir(file, btail);
1facf9fc 9635+ for (; !err && bindex <= btail; bindex++) {
9636+ h_dentry = au_h_dptr(dentry, bindex);
9637+ if (!h_dentry)
9638+ continue;
9639+
392086de 9640+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9641+ if (IS_ERR(h_file)) {
9642+ err = PTR_ERR(h_file);
9643+ break;
9644+ }
9645+ au_set_h_fptr(file, bindex, h_file);
9646+ }
9647+ au_update_figen(file);
9648+ /* todo: necessary? */
9649+ /* file->f_ra = h_file->f_ra; */
9650+ if (!err)
9651+ return 0; /* success */
9652+
9653+ /* close all */
9654+ for (bindex = au_fbstart(file); bindex <= btail; bindex++)
9655+ au_set_h_fptr(file, bindex, NULL);
9656+ au_set_fbstart(file, -1);
4a4d8108
AM
9657+ au_set_fbend_dir(file, -1);
9658+
1facf9fc 9659+ return err;
9660+}
9661+
9662+static int aufs_open_dir(struct inode *inode __maybe_unused,
9663+ struct file *file)
9664+{
4a4d8108
AM
9665+ int err;
9666+ struct super_block *sb;
9667+ struct au_fidir *fidir;
9668+
9669+ err = -ENOMEM;
2000de60 9670+ sb = file->f_path.dentry->d_sb;
4a4d8108 9671+ si_read_lock(sb, AuLock_FLUSH);
e49829fe 9672+ fidir = au_fidir_alloc(sb);
4a4d8108 9673+ if (fidir) {
b912730e
AM
9674+ struct au_do_open_args args = {
9675+ .open = do_open_dir,
9676+ .fidir = fidir
9677+ };
9678+ err = au_do_open(file, &args);
4a4d8108
AM
9679+ if (unlikely(err))
9680+ kfree(fidir);
9681+ }
9682+ si_read_unlock(sb);
9683+ return err;
1facf9fc 9684+}
9685+
9686+static int aufs_release_dir(struct inode *inode __maybe_unused,
9687+ struct file *file)
9688+{
9689+ struct au_vdir *vdir_cache;
4a4d8108
AM
9690+ struct au_finfo *finfo;
9691+ struct au_fidir *fidir;
9692+ aufs_bindex_t bindex, bend;
1facf9fc 9693+
4a4d8108
AM
9694+ finfo = au_fi(file);
9695+ fidir = finfo->fi_hdir;
9696+ if (fidir) {
076b876e 9697+ au_sphl_del(&finfo->fi_hlist,
2000de60 9698+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
4a4d8108
AM
9699+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
9700+ if (vdir_cache)
9701+ au_vdir_free(vdir_cache);
9702+
9703+ bindex = finfo->fi_btop;
9704+ if (bindex >= 0) {
9705+ /*
9706+ * calls fput() instead of filp_close(),
9707+ * since no dnotify or lock for the lower file.
9708+ */
9709+ bend = fidir->fd_bbot;
9710+ for (; bindex <= bend; bindex++)
9711+ au_set_h_fptr(file, bindex, NULL);
9712+ }
9713+ kfree(fidir);
9714+ finfo->fi_hdir = NULL;
1facf9fc 9715+ }
1facf9fc 9716+ au_finfo_fin(file);
1facf9fc 9717+ return 0;
9718+}
9719+
9720+/* ---------------------------------------------------------------------- */
9721+
4a4d8108
AM
9722+static int au_do_flush_dir(struct file *file, fl_owner_t id)
9723+{
9724+ int err;
9725+ aufs_bindex_t bindex, bend;
9726+ struct file *h_file;
9727+
9728+ err = 0;
9729+ bend = au_fbend_dir(file);
9730+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
9731+ h_file = au_hf_dir(file, bindex);
9732+ if (h_file)
9733+ err = vfsub_flush(h_file, id);
9734+ }
9735+ return err;
9736+}
9737+
9738+static int aufs_flush_dir(struct file *file, fl_owner_t id)
9739+{
9740+ return au_do_flush(file, id, au_do_flush_dir);
9741+}
9742+
9743+/* ---------------------------------------------------------------------- */
9744+
1facf9fc 9745+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
9746+{
9747+ int err;
9748+ aufs_bindex_t bend, bindex;
9749+ struct inode *inode;
9750+ struct super_block *sb;
9751+
9752+ err = 0;
9753+ sb = dentry->d_sb;
5527c038 9754+ inode = d_inode(dentry);
1facf9fc 9755+ IMustLock(inode);
9756+ bend = au_dbend(dentry);
9757+ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
9758+ struct path h_path;
1facf9fc 9759+
9760+ if (au_test_ro(sb, bindex, inode))
9761+ continue;
9762+ h_path.dentry = au_h_dptr(dentry, bindex);
9763+ if (!h_path.dentry)
9764+ continue;
1facf9fc 9765+
1facf9fc 9766+ h_path.mnt = au_sbr_mnt(sb, bindex);
53392da6 9767+ err = vfsub_fsync(NULL, &h_path, datasync);
1facf9fc 9768+ }
9769+
9770+ return err;
9771+}
9772+
9773+static int au_do_fsync_dir(struct file *file, int datasync)
9774+{
9775+ int err;
9776+ aufs_bindex_t bend, bindex;
9777+ struct file *h_file;
9778+ struct super_block *sb;
9779+ struct inode *inode;
1facf9fc 9780+
9781+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
9782+ if (unlikely(err))
9783+ goto out;
9784+
c06a8ce3 9785+ inode = file_inode(file);
b912730e 9786+ sb = inode->i_sb;
4a4d8108 9787+ bend = au_fbend_dir(file);
1facf9fc 9788+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
4a4d8108 9789+ h_file = au_hf_dir(file, bindex);
1facf9fc 9790+ if (!h_file || au_test_ro(sb, bindex, inode))
9791+ continue;
9792+
53392da6 9793+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
1facf9fc 9794+ }
9795+
4f0767ce 9796+out:
1facf9fc 9797+ return err;
9798+}
9799+
9800+/*
9801+ * @file may be NULL
9802+ */
1e00d052
AM
9803+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
9804+ int datasync)
1facf9fc 9805+{
9806+ int err;
b752ccd1 9807+ struct dentry *dentry;
5527c038 9808+ struct inode *inode;
1facf9fc 9809+ struct super_block *sb;
1e00d052 9810+ struct mutex *mtx;
1facf9fc 9811+
9812+ err = 0;
2000de60 9813+ dentry = file->f_path.dentry;
5527c038
JR
9814+ inode = d_inode(dentry);
9815+ mtx = &inode->i_mutex;
1e00d052 9816+ mutex_lock(mtx);
1facf9fc 9817+ sb = dentry->d_sb;
9818+ si_noflush_read_lock(sb);
9819+ if (file)
9820+ err = au_do_fsync_dir(file, datasync);
9821+ else {
9822+ di_write_lock_child(dentry);
9823+ err = au_do_fsync_dir_no_file(dentry, datasync);
9824+ }
5527c038 9825+ au_cpup_attr_timesizes(inode);
1facf9fc 9826+ di_write_unlock(dentry);
9827+ if (file)
9828+ fi_write_unlock(file);
9829+
9830+ si_read_unlock(sb);
1e00d052 9831+ mutex_unlock(mtx);
1facf9fc 9832+ return err;
9833+}
9834+
9835+/* ---------------------------------------------------------------------- */
9836+
392086de 9837+static int aufs_iterate(struct file *file, struct dir_context *ctx)
1facf9fc 9838+{
9839+ int err;
9840+ struct dentry *dentry;
9dbd164d 9841+ struct inode *inode, *h_inode;
1facf9fc 9842+ struct super_block *sb;
9843+
523b37e3 9844+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 9845+
2000de60 9846+ dentry = file->f_path.dentry;
5527c038 9847+ inode = d_inode(dentry);
1facf9fc 9848+ IMustLock(inode);
9849+
9850+ sb = dentry->d_sb;
9851+ si_read_lock(sb, AuLock_FLUSH);
9852+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
9853+ if (unlikely(err))
9854+ goto out;
027c5e7a
AM
9855+ err = au_alive_dir(dentry);
9856+ if (!err)
9857+ err = au_vdir_init(file);
1facf9fc 9858+ di_downgrade_lock(dentry, AuLock_IR);
9859+ if (unlikely(err))
9860+ goto out_unlock;
9861+
9dbd164d 9862+ h_inode = au_h_iptr(inode, au_ibstart(inode));
b752ccd1 9863+ if (!au_test_nfsd()) {
392086de 9864+ err = au_vdir_fill_de(file, ctx);
9dbd164d 9865+ fsstack_copy_attr_atime(inode, h_inode);
1facf9fc 9866+ } else {
9867+ /*
9868+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
9869+ * encode_fh() and others.
9870+ */
9dbd164d 9871+ atomic_inc(&h_inode->i_count);
1facf9fc 9872+ di_read_unlock(dentry, AuLock_IR);
9873+ si_read_unlock(sb);
392086de 9874+ err = au_vdir_fill_de(file, ctx);
1facf9fc 9875+ fsstack_copy_attr_atime(inode, h_inode);
9876+ fi_write_unlock(file);
9dbd164d 9877+ iput(h_inode);
1facf9fc 9878+
9879+ AuTraceErr(err);
9880+ return err;
9881+ }
9882+
4f0767ce 9883+out_unlock:
1facf9fc 9884+ di_read_unlock(dentry, AuLock_IR);
9885+ fi_write_unlock(file);
4f0767ce 9886+out:
1facf9fc 9887+ si_read_unlock(sb);
9888+ return err;
9889+}
9890+
9891+/* ---------------------------------------------------------------------- */
9892+
9893+#define AuTestEmpty_WHONLY 1
dece6358
AM
9894+#define AuTestEmpty_CALLED (1 << 1)
9895+#define AuTestEmpty_SHWH (1 << 2)
1facf9fc 9896+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
7f207e10
AM
9897+#define au_fset_testempty(flags, name) \
9898+ do { (flags) |= AuTestEmpty_##name; } while (0)
9899+#define au_fclr_testempty(flags, name) \
9900+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
1facf9fc 9901+
dece6358
AM
9902+#ifndef CONFIG_AUFS_SHWH
9903+#undef AuTestEmpty_SHWH
9904+#define AuTestEmpty_SHWH 0
9905+#endif
9906+
1facf9fc 9907+struct test_empty_arg {
392086de 9908+ struct dir_context ctx;
1308ab2a 9909+ struct au_nhash *whlist;
1facf9fc 9910+ unsigned int flags;
9911+ int err;
9912+ aufs_bindex_t bindex;
9913+};
9914+
392086de
AM
9915+static int test_empty_cb(struct dir_context *ctx, const char *__name,
9916+ int namelen, loff_t offset __maybe_unused, u64 ino,
dece6358 9917+ unsigned int d_type)
1facf9fc 9918+{
392086de
AM
9919+ struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
9920+ ctx);
1facf9fc 9921+ char *name = (void *)__name;
9922+
9923+ arg->err = 0;
9924+ au_fset_testempty(arg->flags, CALLED);
9925+ /* smp_mb(); */
9926+ if (name[0] == '.'
9927+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
9928+ goto out; /* success */
9929+
9930+ if (namelen <= AUFS_WH_PFX_LEN
9931+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
9932+ if (au_ftest_testempty(arg->flags, WHONLY)
1308ab2a 9933+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 9934+ arg->err = -ENOTEMPTY;
9935+ goto out;
9936+ }
9937+
9938+ name += AUFS_WH_PFX_LEN;
9939+ namelen -= AUFS_WH_PFX_LEN;
1308ab2a 9940+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 9941+ arg->err = au_nhash_append_wh
1308ab2a 9942+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
dece6358 9943+ au_ftest_testempty(arg->flags, SHWH));
1facf9fc 9944+
4f0767ce 9945+out:
1facf9fc 9946+ /* smp_mb(); */
9947+ AuTraceErr(arg->err);
9948+ return arg->err;
9949+}
9950+
9951+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
9952+{
9953+ int err;
9954+ struct file *h_file;
9955+
9956+ h_file = au_h_open(dentry, arg->bindex,
9957+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
392086de 9958+ /*file*/NULL, /*force_wr*/0);
1facf9fc 9959+ err = PTR_ERR(h_file);
9960+ if (IS_ERR(h_file))
9961+ goto out;
9962+
9963+ err = 0;
9964+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
c06a8ce3 9965+ && !file_inode(h_file)->i_nlink)
1facf9fc 9966+ goto out_put;
9967+
9968+ do {
9969+ arg->err = 0;
9970+ au_fclr_testempty(arg->flags, CALLED);
9971+ /* smp_mb(); */
392086de 9972+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1facf9fc 9973+ if (err >= 0)
9974+ err = arg->err;
9975+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
9976+
4f0767ce 9977+out_put:
1facf9fc 9978+ fput(h_file);
9979+ au_sbr_put(dentry->d_sb, arg->bindex);
4f0767ce 9980+out:
1facf9fc 9981+ return err;
9982+}
9983+
9984+struct do_test_empty_args {
9985+ int *errp;
9986+ struct dentry *dentry;
9987+ struct test_empty_arg *arg;
9988+};
9989+
9990+static void call_do_test_empty(void *args)
9991+{
9992+ struct do_test_empty_args *a = args;
9993+ *a->errp = do_test_empty(a->dentry, a->arg);
9994+}
9995+
9996+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
9997+{
9998+ int err, wkq_err;
9999+ struct dentry *h_dentry;
10000+ struct inode *h_inode;
10001+
10002+ h_dentry = au_h_dptr(dentry, arg->bindex);
5527c038 10003+ h_inode = d_inode(h_dentry);
53392da6 10004+ /* todo: i_mode changes anytime? */
1facf9fc 10005+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
10006+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
10007+ mutex_unlock(&h_inode->i_mutex);
10008+ if (!err)
10009+ err = do_test_empty(dentry, arg);
10010+ else {
10011+ struct do_test_empty_args args = {
10012+ .errp = &err,
10013+ .dentry = dentry,
10014+ .arg = arg
10015+ };
10016+ unsigned int flags = arg->flags;
10017+
10018+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
10019+ if (unlikely(wkq_err))
10020+ err = wkq_err;
10021+ arg->flags = flags;
10022+ }
10023+
10024+ return err;
10025+}
10026+
10027+int au_test_empty_lower(struct dentry *dentry)
10028+{
10029+ int err;
1308ab2a 10030+ unsigned int rdhash;
1facf9fc 10031+ aufs_bindex_t bindex, bstart, btail;
1308ab2a 10032+ struct au_nhash whlist;
392086de
AM
10033+ struct test_empty_arg arg = {
10034+ .ctx = {
2000de60 10035+ .actor = test_empty_cb
392086de
AM
10036+ }
10037+ };
076b876e 10038+ int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
1facf9fc 10039+
dece6358
AM
10040+ SiMustAnyLock(dentry->d_sb);
10041+
1308ab2a 10042+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
10043+ if (!rdhash)
10044+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
10045+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
dece6358 10046+ if (unlikely(err))
1facf9fc 10047+ goto out;
10048+
1facf9fc 10049+ arg.flags = 0;
1308ab2a 10050+ arg.whlist = &whlist;
10051+ bstart = au_dbstart(dentry);
dece6358
AM
10052+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10053+ au_fset_testempty(arg.flags, SHWH);
076b876e
AM
10054+ test_empty = do_test_empty;
10055+ if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
10056+ test_empty = sio_test_empty;
1facf9fc 10057+ arg.bindex = bstart;
076b876e 10058+ err = test_empty(dentry, &arg);
1facf9fc 10059+ if (unlikely(err))
10060+ goto out_whlist;
10061+
10062+ au_fset_testempty(arg.flags, WHONLY);
10063+ btail = au_dbtaildir(dentry);
10064+ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
10065+ struct dentry *h_dentry;
10066+
10067+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 10068+ if (h_dentry && d_is_positive(h_dentry)) {
1facf9fc 10069+ arg.bindex = bindex;
076b876e 10070+ err = test_empty(dentry, &arg);
1facf9fc 10071+ }
10072+ }
10073+
4f0767ce 10074+out_whlist:
1308ab2a 10075+ au_nhash_wh_free(&whlist);
4f0767ce 10076+out:
1facf9fc 10077+ return err;
10078+}
10079+
10080+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
10081+{
10082+ int err;
392086de
AM
10083+ struct test_empty_arg arg = {
10084+ .ctx = {
2000de60 10085+ .actor = test_empty_cb
392086de
AM
10086+ }
10087+ };
1facf9fc 10088+ aufs_bindex_t bindex, btail;
10089+
10090+ err = 0;
1308ab2a 10091+ arg.whlist = whlist;
1facf9fc 10092+ arg.flags = AuTestEmpty_WHONLY;
dece6358
AM
10093+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10094+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 10095+ btail = au_dbtaildir(dentry);
10096+ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
10097+ struct dentry *h_dentry;
10098+
10099+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 10100+ if (h_dentry && d_is_positive(h_dentry)) {
1facf9fc 10101+ arg.bindex = bindex;
10102+ err = sio_test_empty(dentry, &arg);
10103+ }
10104+ }
10105+
10106+ return err;
10107+}
10108+
10109+/* ---------------------------------------------------------------------- */
10110+
10111+const struct file_operations aufs_dir_fop = {
4a4d8108 10112+ .owner = THIS_MODULE,
027c5e7a 10113+ .llseek = default_llseek,
1facf9fc 10114+ .read = generic_read_dir,
392086de 10115+ .iterate = aufs_iterate,
1facf9fc 10116+ .unlocked_ioctl = aufs_ioctl_dir,
b752ccd1
AM
10117+#ifdef CONFIG_COMPAT
10118+ .compat_ioctl = aufs_compat_ioctl_dir,
10119+#endif
1facf9fc 10120+ .open = aufs_open_dir,
10121+ .release = aufs_release_dir,
4a4d8108 10122+ .flush = aufs_flush_dir,
1facf9fc 10123+ .fsync = aufs_fsync_dir
10124+};
7f207e10
AM
10125diff -urN /usr/share/empty/fs/aufs/dir.h linux/fs/aufs/dir.h
10126--- /usr/share/empty/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 10127+++ linux/fs/aufs/dir.h 2015-09-24 10:47:58.251386326 +0200
b912730e 10128@@ -0,0 +1,131 @@
1facf9fc 10129+/*
2000de60 10130+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 10131+ *
10132+ * This program, aufs is free software; you can redistribute it and/or modify
10133+ * it under the terms of the GNU General Public License as published by
10134+ * the Free Software Foundation; either version 2 of the License, or
10135+ * (at your option) any later version.
dece6358
AM
10136+ *
10137+ * This program is distributed in the hope that it will be useful,
10138+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10139+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10140+ * GNU General Public License for more details.
10141+ *
10142+ * You should have received a copy of the GNU General Public License
523b37e3 10143+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10144+ */
10145+
10146+/*
10147+ * directory operations
10148+ */
10149+
10150+#ifndef __AUFS_DIR_H__
10151+#define __AUFS_DIR_H__
10152+
10153+#ifdef __KERNEL__
10154+
10155+#include <linux/fs.h>
1facf9fc 10156+
10157+/* ---------------------------------------------------------------------- */
10158+
10159+/* need to be faster and smaller */
10160+
10161+struct au_nhash {
dece6358
AM
10162+ unsigned int nh_num;
10163+ struct hlist_head *nh_head;
1facf9fc 10164+};
10165+
10166+struct au_vdir_destr {
10167+ unsigned char len;
10168+ unsigned char name[0];
10169+} __packed;
10170+
10171+struct au_vdir_dehstr {
10172+ struct hlist_node hash;
10173+ struct au_vdir_destr *str;
4a4d8108 10174+} ____cacheline_aligned_in_smp;
1facf9fc 10175+
10176+struct au_vdir_de {
10177+ ino_t de_ino;
10178+ unsigned char de_type;
10179+ /* caution: packed */
10180+ struct au_vdir_destr de_str;
10181+} __packed;
10182+
10183+struct au_vdir_wh {
10184+ struct hlist_node wh_hash;
dece6358
AM
10185+#ifdef CONFIG_AUFS_SHWH
10186+ ino_t wh_ino;
1facf9fc 10187+ aufs_bindex_t wh_bindex;
dece6358
AM
10188+ unsigned char wh_type;
10189+#else
10190+ aufs_bindex_t wh_bindex;
10191+#endif
10192+ /* caution: packed */
1facf9fc 10193+ struct au_vdir_destr wh_str;
10194+} __packed;
10195+
10196+union au_vdir_deblk_p {
10197+ unsigned char *deblk;
10198+ struct au_vdir_de *de;
10199+};
10200+
10201+struct au_vdir {
10202+ unsigned char **vd_deblk;
10203+ unsigned long vd_nblk;
1facf9fc 10204+ struct {
10205+ unsigned long ul;
10206+ union au_vdir_deblk_p p;
10207+ } vd_last;
10208+
10209+ unsigned long vd_version;
dece6358 10210+ unsigned int vd_deblk_sz;
1facf9fc 10211+ unsigned long vd_jiffy;
4a4d8108 10212+} ____cacheline_aligned_in_smp;
1facf9fc 10213+
10214+/* ---------------------------------------------------------------------- */
10215+
10216+/* dir.c */
10217+extern const struct file_operations aufs_dir_fop;
10218+void au_add_nlink(struct inode *dir, struct inode *h_dir);
10219+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
1308ab2a 10220+loff_t au_dir_size(struct file *file, struct dentry *dentry);
b912730e 10221+void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
1facf9fc 10222+int au_test_empty_lower(struct dentry *dentry);
10223+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
10224+
10225+/* vdir.c */
1308ab2a 10226+unsigned int au_rdhash_est(loff_t sz);
dece6358
AM
10227+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
10228+void au_nhash_wh_free(struct au_nhash *whlist);
1facf9fc 10229+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
10230+ int limit);
dece6358
AM
10231+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
10232+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
10233+ unsigned int d_type, aufs_bindex_t bindex,
10234+ unsigned char shwh);
1facf9fc 10235+void au_vdir_free(struct au_vdir *vdir);
10236+int au_vdir_init(struct file *file);
392086de 10237+int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
1facf9fc 10238+
10239+/* ioctl.c */
10240+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
10241+
1308ab2a 10242+#ifdef CONFIG_AUFS_RDU
10243+/* rdu.c */
10244+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
10245+#ifdef CONFIG_COMPAT
10246+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
10247+ unsigned long arg);
10248+#endif
1308ab2a 10249+#else
c1595e42
JR
10250+AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
10251+ unsigned int cmd, unsigned long arg)
b752ccd1 10252+#ifdef CONFIG_COMPAT
c1595e42
JR
10253+AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
10254+ unsigned int cmd, unsigned long arg)
b752ccd1 10255+#endif
1308ab2a 10256+#endif
10257+
1facf9fc 10258+#endif /* __KERNEL__ */
10259+#endif /* __AUFS_DIR_H__ */
7f207e10
AM
10260diff -urN /usr/share/empty/fs/aufs/dynop.c linux/fs/aufs/dynop.c
10261--- /usr/share/empty/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 10262+++ linux/fs/aufs/dynop.c 2015-09-24 10:47:58.251386326 +0200
7e9cd9fe 10263@@ -0,0 +1,369 @@
1facf9fc 10264+/*
2000de60 10265+ * Copyright (C) 2010-2015 Junjiro R. Okajima
1facf9fc 10266+ *
10267+ * This program, aufs is free software; you can redistribute it and/or modify
10268+ * it under the terms of the GNU General Public License as published by
10269+ * the Free Software Foundation; either version 2 of the License, or
10270+ * (at your option) any later version.
dece6358
AM
10271+ *
10272+ * This program is distributed in the hope that it will be useful,
10273+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10274+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10275+ * GNU General Public License for more details.
10276+ *
10277+ * You should have received a copy of the GNU General Public License
523b37e3 10278+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10279+ */
10280+
10281+/*
4a4d8108 10282+ * dynamically customizable operations for regular files
1facf9fc 10283+ */
10284+
1facf9fc 10285+#include "aufs.h"
10286+
4a4d8108 10287+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
1facf9fc 10288+
4a4d8108
AM
10289+/*
10290+ * How large will these lists be?
10291+ * Usually just a few elements, 20-30 at most for each, I guess.
10292+ */
10293+static struct au_splhead dynop[AuDyLast];
10294+
10295+static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op)
1facf9fc 10296+{
4a4d8108
AM
10297+ struct au_dykey *key, *tmp;
10298+ struct list_head *head;
1facf9fc 10299+
4a4d8108
AM
10300+ key = NULL;
10301+ head = &spl->head;
10302+ rcu_read_lock();
10303+ list_for_each_entry_rcu(tmp, head, dk_list)
10304+ if (tmp->dk_op.dy_hop == h_op) {
10305+ key = tmp;
10306+ kref_get(&key->dk_kref);
10307+ break;
10308+ }
10309+ rcu_read_unlock();
10310+
10311+ return key;
1facf9fc 10312+}
10313+
4a4d8108 10314+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
1facf9fc 10315+{
4a4d8108
AM
10316+ struct au_dykey **k, *found;
10317+ const void *h_op = key->dk_op.dy_hop;
10318+ int i;
1facf9fc 10319+
4a4d8108
AM
10320+ found = NULL;
10321+ k = br->br_dykey;
10322+ for (i = 0; i < AuBrDynOp; i++)
10323+ if (k[i]) {
10324+ if (k[i]->dk_op.dy_hop == h_op) {
10325+ found = k[i];
10326+ break;
10327+ }
10328+ } else
10329+ break;
10330+ if (!found) {
10331+ spin_lock(&br->br_dykey_lock);
10332+ for (; i < AuBrDynOp; i++)
10333+ if (k[i]) {
10334+ if (k[i]->dk_op.dy_hop == h_op) {
10335+ found = k[i];
10336+ break;
10337+ }
10338+ } else {
10339+ k[i] = key;
10340+ break;
10341+ }
10342+ spin_unlock(&br->br_dykey_lock);
10343+ BUG_ON(i == AuBrDynOp); /* expand the array */
10344+ }
10345+
10346+ return found;
1facf9fc 10347+}
10348+
4a4d8108
AM
10349+/* kref_get() if @key is already added */
10350+static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key)
10351+{
10352+ struct au_dykey *tmp, *found;
10353+ struct list_head *head;
10354+ const void *h_op = key->dk_op.dy_hop;
1facf9fc 10355+
4a4d8108
AM
10356+ found = NULL;
10357+ head = &spl->head;
10358+ spin_lock(&spl->spin);
10359+ list_for_each_entry(tmp, head, dk_list)
10360+ if (tmp->dk_op.dy_hop == h_op) {
10361+ kref_get(&tmp->dk_kref);
10362+ found = tmp;
10363+ break;
10364+ }
10365+ if (!found)
10366+ list_add_rcu(&key->dk_list, head);
10367+ spin_unlock(&spl->spin);
1facf9fc 10368+
4a4d8108
AM
10369+ if (!found)
10370+ DyPrSym(key);
10371+ return found;
10372+}
10373+
10374+static void dy_free_rcu(struct rcu_head *rcu)
1facf9fc 10375+{
4a4d8108
AM
10376+ struct au_dykey *key;
10377+
10378+ key = container_of(rcu, struct au_dykey, dk_rcu);
10379+ DyPrSym(key);
10380+ kfree(key);
1facf9fc 10381+}
10382+
4a4d8108
AM
10383+static void dy_free(struct kref *kref)
10384+{
10385+ struct au_dykey *key;
10386+ struct au_splhead *spl;
1facf9fc 10387+
4a4d8108
AM
10388+ key = container_of(kref, struct au_dykey, dk_kref);
10389+ spl = dynop + key->dk_op.dy_type;
10390+ au_spl_del_rcu(&key->dk_list, spl);
10391+ call_rcu(&key->dk_rcu, dy_free_rcu);
10392+}
10393+
10394+void au_dy_put(struct au_dykey *key)
1facf9fc 10395+{
4a4d8108
AM
10396+ kref_put(&key->dk_kref, dy_free);
10397+}
1facf9fc 10398+
4a4d8108
AM
10399+/* ---------------------------------------------------------------------- */
10400+
10401+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
10402+
10403+#ifdef CONFIG_AUFS_DEBUG
10404+#define DyDbgDeclare(cnt) unsigned int cnt = 0
4f0767ce 10405+#define DyDbgInc(cnt) do { cnt++; } while (0)
4a4d8108
AM
10406+#else
10407+#define DyDbgDeclare(cnt) do {} while (0)
10408+#define DyDbgInc(cnt) do {} while (0)
10409+#endif
10410+
10411+#define DySet(func, dst, src, h_op, h_sb) do { \
10412+ DyDbgInc(cnt); \
10413+ if (h_op->func) { \
10414+ if (src.func) \
10415+ dst.func = src.func; \
10416+ else \
10417+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
10418+ } \
10419+} while (0)
10420+
10421+#define DySetForce(func, dst, src) do { \
10422+ AuDebugOn(!src.func); \
10423+ DyDbgInc(cnt); \
10424+ dst.func = src.func; \
10425+} while (0)
10426+
10427+#define DySetAop(func) \
10428+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
10429+#define DySetAopForce(func) \
10430+ DySetForce(func, dyaop->da_op, aufs_aop)
10431+
10432+static void dy_aop(struct au_dykey *key, const void *h_op,
10433+ struct super_block *h_sb __maybe_unused)
10434+{
10435+ struct au_dyaop *dyaop = (void *)key;
10436+ const struct address_space_operations *h_aop = h_op;
10437+ DyDbgDeclare(cnt);
10438+
10439+ AuDbg("%s\n", au_sbtype(h_sb));
10440+
10441+ DySetAop(writepage);
10442+ DySetAopForce(readpage); /* force */
4a4d8108
AM
10443+ DySetAop(writepages);
10444+ DySetAop(set_page_dirty);
10445+ DySetAop(readpages);
10446+ DySetAop(write_begin);
10447+ DySetAop(write_end);
10448+ DySetAop(bmap);
10449+ DySetAop(invalidatepage);
10450+ DySetAop(releasepage);
027c5e7a 10451+ DySetAop(freepage);
7e9cd9fe 10452+ /* this one will be changed according to an aufs mount option */
4a4d8108 10453+ DySetAop(direct_IO);
4a4d8108
AM
10454+ DySetAop(migratepage);
10455+ DySetAop(launder_page);
10456+ DySetAop(is_partially_uptodate);
392086de 10457+ DySetAop(is_dirty_writeback);
4a4d8108 10458+ DySetAop(error_remove_page);
b4510431
AM
10459+ DySetAop(swap_activate);
10460+ DySetAop(swap_deactivate);
4a4d8108
AM
10461+
10462+ DyDbgSize(cnt, *h_aop);
4a4d8108
AM
10463+}
10464+
4a4d8108
AM
10465+/* ---------------------------------------------------------------------- */
10466+
10467+static void dy_bug(struct kref *kref)
10468+{
10469+ BUG();
10470+}
10471+
10472+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
10473+{
10474+ struct au_dykey *key, *old;
10475+ struct au_splhead *spl;
b752ccd1 10476+ struct op {
4a4d8108 10477+ unsigned int sz;
b752ccd1
AM
10478+ void (*set)(struct au_dykey *key, const void *h_op,
10479+ struct super_block *h_sb __maybe_unused);
10480+ };
10481+ static const struct op a[] = {
4a4d8108
AM
10482+ [AuDy_AOP] = {
10483+ .sz = sizeof(struct au_dyaop),
b752ccd1 10484+ .set = dy_aop
4a4d8108 10485+ }
b752ccd1
AM
10486+ };
10487+ const struct op *p;
4a4d8108
AM
10488+
10489+ spl = dynop + op->dy_type;
10490+ key = dy_gfind_get(spl, op->dy_hop);
10491+ if (key)
10492+ goto out_add; /* success */
10493+
10494+ p = a + op->dy_type;
10495+ key = kzalloc(p->sz, GFP_NOFS);
10496+ if (unlikely(!key)) {
10497+ key = ERR_PTR(-ENOMEM);
10498+ goto out;
10499+ }
10500+
10501+ key->dk_op.dy_hop = op->dy_hop;
10502+ kref_init(&key->dk_kref);
86dc4139 10503+ p->set(key, op->dy_hop, au_br_sb(br));
4a4d8108
AM
10504+ old = dy_gadd(spl, key);
10505+ if (old) {
10506+ kfree(key);
10507+ key = old;
10508+ }
10509+
10510+out_add:
10511+ old = dy_bradd(br, key);
10512+ if (old)
10513+ /* its ref-count should never be zero here */
10514+ kref_put(&key->dk_kref, dy_bug);
10515+out:
10516+ return key;
10517+}
10518+
10519+/* ---------------------------------------------------------------------- */
10520+/*
10521+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
c1595e42 10522+ * This behaviour is necessary to return an error from open(O_DIRECT) instead
4a4d8108
AM
10523+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
10524+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
10525+ * See the aufs manual in detail.
4a4d8108
AM
10526+ */
10527+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
10528+{
7e9cd9fe 10529+ if (!do_dx)
4a4d8108 10530+ dyaop->da_op.direct_IO = NULL;
7e9cd9fe 10531+ else
4a4d8108 10532+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
4a4d8108
AM
10533+}
10534+
10535+static struct au_dyaop *dy_aget(struct au_branch *br,
10536+ const struct address_space_operations *h_aop,
10537+ int do_dx)
10538+{
10539+ struct au_dyaop *dyaop;
10540+ struct au_dynop op;
10541+
10542+ op.dy_type = AuDy_AOP;
10543+ op.dy_haop = h_aop;
10544+ dyaop = (void *)dy_get(&op, br);
10545+ if (IS_ERR(dyaop))
10546+ goto out;
10547+ dy_adx(dyaop, do_dx);
10548+
10549+out:
10550+ return dyaop;
10551+}
10552+
10553+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10554+ struct inode *h_inode)
10555+{
10556+ int err, do_dx;
10557+ struct super_block *sb;
10558+ struct au_branch *br;
10559+ struct au_dyaop *dyaop;
10560+
10561+ AuDebugOn(!S_ISREG(h_inode->i_mode));
10562+ IiMustWriteLock(inode);
10563+
10564+ sb = inode->i_sb;
10565+ br = au_sbr(sb, bindex);
10566+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
10567+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
10568+ err = PTR_ERR(dyaop);
10569+ if (IS_ERR(dyaop))
10570+ /* unnecessary to call dy_fput() */
10571+ goto out;
10572+
10573+ err = 0;
10574+ inode->i_mapping->a_ops = &dyaop->da_op;
10575+
10576+out:
10577+ return err;
10578+}
10579+
b752ccd1
AM
10580+/*
10581+ * Is it safe to replace a_ops during the inode/file is in operation?
10582+ * Yes, I hope so.
10583+ */
10584+int au_dy_irefresh(struct inode *inode)
10585+{
10586+ int err;
10587+ aufs_bindex_t bstart;
10588+ struct inode *h_inode;
10589+
10590+ err = 0;
10591+ if (S_ISREG(inode->i_mode)) {
10592+ bstart = au_ibstart(inode);
10593+ h_inode = au_h_iptr(inode, bstart);
10594+ err = au_dy_iaop(inode, bstart, h_inode);
10595+ }
10596+ return err;
10597+}
10598+
4a4d8108
AM
10599+void au_dy_arefresh(int do_dx)
10600+{
10601+ struct au_splhead *spl;
10602+ struct list_head *head;
10603+ struct au_dykey *key;
10604+
10605+ spl = dynop + AuDy_AOP;
10606+ head = &spl->head;
10607+ spin_lock(&spl->spin);
10608+ list_for_each_entry(key, head, dk_list)
10609+ dy_adx((void *)key, do_dx);
10610+ spin_unlock(&spl->spin);
10611+}
10612+
4a4d8108
AM
10613+/* ---------------------------------------------------------------------- */
10614+
10615+void __init au_dy_init(void)
10616+{
10617+ int i;
10618+
10619+ /* make sure that 'struct au_dykey *' can be any type */
10620+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
4a4d8108
AM
10621+
10622+ for (i = 0; i < AuDyLast; i++)
10623+ au_spl_init(dynop + i);
10624+}
10625+
10626+void au_dy_fin(void)
10627+{
10628+ int i;
10629+
10630+ for (i = 0; i < AuDyLast; i++)
10631+ WARN_ON(!list_empty(&dynop[i].head));
10632+}
7f207e10
AM
10633diff -urN /usr/share/empty/fs/aufs/dynop.h linux/fs/aufs/dynop.h
10634--- /usr/share/empty/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 10635+++ linux/fs/aufs/dynop.h 2015-09-24 10:47:58.251386326 +0200
7e9cd9fe 10636@@ -0,0 +1,74 @@
4a4d8108 10637+/*
2000de60 10638+ * Copyright (C) 2010-2015 Junjiro R. Okajima
4a4d8108
AM
10639+ *
10640+ * This program, aufs is free software; you can redistribute it and/or modify
10641+ * it under the terms of the GNU General Public License as published by
10642+ * the Free Software Foundation; either version 2 of the License, or
10643+ * (at your option) any later version.
10644+ *
10645+ * This program is distributed in the hope that it will be useful,
10646+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10647+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10648+ * GNU General Public License for more details.
10649+ *
10650+ * You should have received a copy of the GNU General Public License
523b37e3 10651+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
10652+ */
10653+
10654+/*
10655+ * dynamically customizable operations (for regular files only)
10656+ */
10657+
10658+#ifndef __AUFS_DYNOP_H__
10659+#define __AUFS_DYNOP_H__
10660+
10661+#ifdef __KERNEL__
10662+
7e9cd9fe
AM
10663+#include <linux/fs.h>
10664+#include <linux/kref.h>
4a4d8108 10665+
2cbb1c4b 10666+enum {AuDy_AOP, AuDyLast};
4a4d8108
AM
10667+
10668+struct au_dynop {
10669+ int dy_type;
10670+ union {
10671+ const void *dy_hop;
10672+ const struct address_space_operations *dy_haop;
4a4d8108
AM
10673+ };
10674+};
10675+
10676+struct au_dykey {
10677+ union {
10678+ struct list_head dk_list;
10679+ struct rcu_head dk_rcu;
10680+ };
10681+ struct au_dynop dk_op;
10682+
10683+ /*
10684+ * during I am in the branch local array, kref is gotten. when the
10685+ * branch is removed, kref is put.
10686+ */
10687+ struct kref dk_kref;
10688+};
10689+
10690+/* stop unioning since their sizes are very different from each other */
10691+struct au_dyaop {
10692+ struct au_dykey da_key;
10693+ struct address_space_operations da_op; /* not const */
4a4d8108
AM
10694+};
10695+
4a4d8108
AM
10696+/* ---------------------------------------------------------------------- */
10697+
10698+/* dynop.c */
10699+struct au_branch;
10700+void au_dy_put(struct au_dykey *key);
10701+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10702+ struct inode *h_inode);
b752ccd1 10703+int au_dy_irefresh(struct inode *inode);
4a4d8108 10704+void au_dy_arefresh(int do_dio);
4a4d8108
AM
10705+
10706+void __init au_dy_init(void);
10707+void au_dy_fin(void);
10708+
4a4d8108
AM
10709+#endif /* __KERNEL__ */
10710+#endif /* __AUFS_DYNOP_H__ */
7f207e10
AM
10711diff -urN /usr/share/empty/fs/aufs/export.c linux/fs/aufs/export.c
10712--- /usr/share/empty/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 10713+++ linux/fs/aufs/export.c 2015-09-24 10:47:58.251386326 +0200
5527c038 10714@@ -0,0 +1,832 @@
4a4d8108 10715+/*
2000de60 10716+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
10717+ *
10718+ * This program, aufs is free software; you can redistribute it and/or modify
10719+ * it under the terms of the GNU General Public License as published by
10720+ * the Free Software Foundation; either version 2 of the License, or
10721+ * (at your option) any later version.
10722+ *
10723+ * This program is distributed in the hope that it will be useful,
10724+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10725+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10726+ * GNU General Public License for more details.
10727+ *
10728+ * You should have received a copy of the GNU General Public License
523b37e3 10729+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
10730+ */
10731+
10732+/*
10733+ * export via nfs
10734+ */
10735+
10736+#include <linux/exportfs.h>
7eafdf33 10737+#include <linux/fs_struct.h>
4a4d8108
AM
10738+#include <linux/namei.h>
10739+#include <linux/nsproxy.h>
10740+#include <linux/random.h>
10741+#include <linux/writeback.h>
7eafdf33 10742+#include "../fs/mount.h"
4a4d8108
AM
10743+#include "aufs.h"
10744+
10745+union conv {
10746+#ifdef CONFIG_AUFS_INO_T_64
10747+ __u32 a[2];
10748+#else
10749+ __u32 a[1];
10750+#endif
10751+ ino_t ino;
10752+};
10753+
10754+static ino_t decode_ino(__u32 *a)
10755+{
10756+ union conv u;
10757+
10758+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
10759+ u.a[0] = a[0];
10760+#ifdef CONFIG_AUFS_INO_T_64
10761+ u.a[1] = a[1];
10762+#endif
10763+ return u.ino;
10764+}
10765+
10766+static void encode_ino(__u32 *a, ino_t ino)
10767+{
10768+ union conv u;
10769+
10770+ u.ino = ino;
10771+ a[0] = u.a[0];
10772+#ifdef CONFIG_AUFS_INO_T_64
10773+ a[1] = u.a[1];
10774+#endif
10775+}
10776+
10777+/* NFS file handle */
10778+enum {
10779+ Fh_br_id,
10780+ Fh_sigen,
10781+#ifdef CONFIG_AUFS_INO_T_64
10782+ /* support 64bit inode number */
10783+ Fh_ino1,
10784+ Fh_ino2,
10785+ Fh_dir_ino1,
10786+ Fh_dir_ino2,
10787+#else
10788+ Fh_ino1,
10789+ Fh_dir_ino1,
10790+#endif
10791+ Fh_igen,
10792+ Fh_h_type,
10793+ Fh_tail,
10794+
10795+ Fh_ino = Fh_ino1,
10796+ Fh_dir_ino = Fh_dir_ino1
10797+};
10798+
10799+static int au_test_anon(struct dentry *dentry)
10800+{
027c5e7a 10801+ /* note: read d_flags without d_lock */
4a4d8108
AM
10802+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
10803+}
10804+
a2a7ad62
AM
10805+int au_test_nfsd(void)
10806+{
10807+ int ret;
10808+ struct task_struct *tsk = current;
10809+ char comm[sizeof(tsk->comm)];
10810+
10811+ ret = 0;
10812+ if (tsk->flags & PF_KTHREAD) {
10813+ get_task_comm(comm, tsk);
10814+ ret = !strcmp(comm, "nfsd");
10815+ }
10816+
10817+ return ret;
10818+}
10819+
4a4d8108
AM
10820+/* ---------------------------------------------------------------------- */
10821+/* inode generation external table */
10822+
b752ccd1 10823+void au_xigen_inc(struct inode *inode)
4a4d8108 10824+{
4a4d8108
AM
10825+ loff_t pos;
10826+ ssize_t sz;
10827+ __u32 igen;
10828+ struct super_block *sb;
10829+ struct au_sbinfo *sbinfo;
10830+
4a4d8108 10831+ sb = inode->i_sb;
b752ccd1 10832+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
1facf9fc 10833+
b752ccd1 10834+ sbinfo = au_sbi(sb);
1facf9fc 10835+ pos = inode->i_ino;
10836+ pos *= sizeof(igen);
10837+ igen = inode->i_generation + 1;
1facf9fc 10838+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
10839+ sizeof(igen), &pos);
10840+ if (sz == sizeof(igen))
b752ccd1 10841+ return; /* success */
1facf9fc 10842+
b752ccd1 10843+ if (unlikely(sz >= 0))
1facf9fc 10844+ AuIOErr("xigen error (%zd)\n", sz);
1facf9fc 10845+}
10846+
10847+int au_xigen_new(struct inode *inode)
10848+{
10849+ int err;
10850+ loff_t pos;
10851+ ssize_t sz;
10852+ struct super_block *sb;
10853+ struct au_sbinfo *sbinfo;
10854+ struct file *file;
10855+
10856+ err = 0;
10857+ /* todo: dirty, at mount time */
10858+ if (inode->i_ino == AUFS_ROOT_INO)
10859+ goto out;
10860+ sb = inode->i_sb;
dece6358 10861+ SiMustAnyLock(sb);
1facf9fc 10862+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
10863+ goto out;
10864+
10865+ err = -EFBIG;
10866+ pos = inode->i_ino;
10867+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
10868+ AuIOErr1("too large i%lld\n", pos);
10869+ goto out;
10870+ }
10871+ pos *= sizeof(inode->i_generation);
10872+
10873+ err = 0;
10874+ sbinfo = au_sbi(sb);
10875+ file = sbinfo->si_xigen;
10876+ BUG_ON(!file);
10877+
c06a8ce3 10878+ if (vfsub_f_size_read(file)
1facf9fc 10879+ < pos + sizeof(inode->i_generation)) {
10880+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
10881+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
10882+ sizeof(inode->i_generation), &pos);
10883+ } else
10884+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
10885+ sizeof(inode->i_generation), &pos);
10886+ if (sz == sizeof(inode->i_generation))
10887+ goto out; /* success */
10888+
10889+ err = sz;
10890+ if (unlikely(sz >= 0)) {
10891+ err = -EIO;
10892+ AuIOErr("xigen error (%zd)\n", sz);
10893+ }
10894+
4f0767ce 10895+out:
1facf9fc 10896+ return err;
10897+}
10898+
10899+int au_xigen_set(struct super_block *sb, struct file *base)
10900+{
10901+ int err;
10902+ struct au_sbinfo *sbinfo;
10903+ struct file *file;
10904+
dece6358
AM
10905+ SiMustWriteLock(sb);
10906+
1facf9fc 10907+ sbinfo = au_sbi(sb);
10908+ file = au_xino_create2(base, sbinfo->si_xigen);
10909+ err = PTR_ERR(file);
10910+ if (IS_ERR(file))
10911+ goto out;
10912+ err = 0;
10913+ if (sbinfo->si_xigen)
10914+ fput(sbinfo->si_xigen);
10915+ sbinfo->si_xigen = file;
10916+
4f0767ce 10917+out:
1facf9fc 10918+ return err;
10919+}
10920+
10921+void au_xigen_clr(struct super_block *sb)
10922+{
10923+ struct au_sbinfo *sbinfo;
10924+
dece6358
AM
10925+ SiMustWriteLock(sb);
10926+
1facf9fc 10927+ sbinfo = au_sbi(sb);
10928+ if (sbinfo->si_xigen) {
10929+ fput(sbinfo->si_xigen);
10930+ sbinfo->si_xigen = NULL;
10931+ }
10932+}
10933+
10934+/* ---------------------------------------------------------------------- */
10935+
10936+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
10937+ ino_t dir_ino)
10938+{
10939+ struct dentry *dentry, *d;
10940+ struct inode *inode;
10941+ unsigned int sigen;
10942+
10943+ dentry = NULL;
10944+ inode = ilookup(sb, ino);
10945+ if (!inode)
10946+ goto out;
10947+
10948+ dentry = ERR_PTR(-ESTALE);
10949+ sigen = au_sigen(sb);
10950+ if (unlikely(is_bad_inode(inode)
10951+ || IS_DEADDIR(inode)
537831f9 10952+ || sigen != au_iigen(inode, NULL)))
1facf9fc 10953+ goto out_iput;
10954+
10955+ dentry = NULL;
10956+ if (!dir_ino || S_ISDIR(inode->i_mode))
10957+ dentry = d_find_alias(inode);
10958+ else {
027c5e7a 10959+ spin_lock(&inode->i_lock);
c1595e42 10960+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 10961+ spin_lock(&d->d_lock);
1facf9fc 10962+ if (!au_test_anon(d)
5527c038 10963+ && d_inode(d->d_parent)->i_ino == dir_ino) {
027c5e7a
AM
10964+ dentry = dget_dlock(d);
10965+ spin_unlock(&d->d_lock);
1facf9fc 10966+ break;
10967+ }
027c5e7a
AM
10968+ spin_unlock(&d->d_lock);
10969+ }
10970+ spin_unlock(&inode->i_lock);
1facf9fc 10971+ }
027c5e7a 10972+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
2cbb1c4b 10973+ /* need to refresh */
1facf9fc 10974+ dput(dentry);
2cbb1c4b 10975+ dentry = NULL;
1facf9fc 10976+ }
10977+
4f0767ce 10978+out_iput:
1facf9fc 10979+ iput(inode);
4f0767ce 10980+out:
2cbb1c4b 10981+ AuTraceErrPtr(dentry);
1facf9fc 10982+ return dentry;
10983+}
10984+
10985+/* ---------------------------------------------------------------------- */
10986+
10987+/* todo: dirty? */
10988+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
4a4d8108
AM
10989+
10990+struct au_compare_mnt_args {
10991+ /* input */
10992+ struct super_block *sb;
10993+
10994+ /* output */
10995+ struct vfsmount *mnt;
10996+};
10997+
10998+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
10999+{
11000+ struct au_compare_mnt_args *a = arg;
11001+
11002+ if (mnt->mnt_sb != a->sb)
11003+ return 0;
11004+ a->mnt = mntget(mnt);
11005+ return 1;
11006+}
11007+
1facf9fc 11008+static struct vfsmount *au_mnt_get(struct super_block *sb)
11009+{
4a4d8108 11010+ int err;
7eafdf33 11011+ struct path root;
4a4d8108
AM
11012+ struct au_compare_mnt_args args = {
11013+ .sb = sb
11014+ };
1facf9fc 11015+
7eafdf33 11016+ get_fs_root(current->fs, &root);
523b37e3 11017+ rcu_read_lock();
7eafdf33 11018+ err = iterate_mounts(au_compare_mnt, &args, root.mnt);
523b37e3 11019+ rcu_read_unlock();
7eafdf33 11020+ path_put(&root);
4a4d8108
AM
11021+ AuDebugOn(!err);
11022+ AuDebugOn(!args.mnt);
11023+ return args.mnt;
1facf9fc 11024+}
11025+
11026+struct au_nfsd_si_lock {
4a4d8108 11027+ unsigned int sigen;
027c5e7a 11028+ aufs_bindex_t bindex, br_id;
1facf9fc 11029+ unsigned char force_lock;
11030+};
11031+
027c5e7a
AM
11032+static int si_nfsd_read_lock(struct super_block *sb,
11033+ struct au_nfsd_si_lock *nsi_lock)
1facf9fc 11034+{
027c5e7a 11035+ int err;
1facf9fc 11036+ aufs_bindex_t bindex;
11037+
11038+ si_read_lock(sb, AuLock_FLUSH);
11039+
11040+ /* branch id may be wrapped around */
027c5e7a 11041+ err = 0;
1facf9fc 11042+ bindex = au_br_index(sb, nsi_lock->br_id);
11043+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
11044+ goto out; /* success */
11045+
027c5e7a
AM
11046+ err = -ESTALE;
11047+ bindex = -1;
1facf9fc 11048+ if (!nsi_lock->force_lock)
11049+ si_read_unlock(sb);
1facf9fc 11050+
4f0767ce 11051+out:
027c5e7a
AM
11052+ nsi_lock->bindex = bindex;
11053+ return err;
1facf9fc 11054+}
11055+
11056+struct find_name_by_ino {
392086de 11057+ struct dir_context ctx;
1facf9fc 11058+ int called, found;
11059+ ino_t ino;
11060+ char *name;
11061+ int namelen;
11062+};
11063+
11064+static int
392086de
AM
11065+find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
11066+ loff_t offset, u64 ino, unsigned int d_type)
1facf9fc 11067+{
392086de
AM
11068+ struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
11069+ ctx);
1facf9fc 11070+
11071+ a->called++;
11072+ if (a->ino != ino)
11073+ return 0;
11074+
11075+ memcpy(a->name, name, namelen);
11076+ a->namelen = namelen;
11077+ a->found = 1;
11078+ return 1;
11079+}
11080+
11081+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
11082+ struct au_nfsd_si_lock *nsi_lock)
11083+{
11084+ struct dentry *dentry, *parent;
11085+ struct file *file;
11086+ struct inode *dir;
392086de
AM
11087+ struct find_name_by_ino arg = {
11088+ .ctx = {
2000de60 11089+ .actor = find_name_by_ino
392086de
AM
11090+ }
11091+ };
1facf9fc 11092+ int err;
11093+
11094+ parent = path->dentry;
11095+ if (nsi_lock)
11096+ si_read_unlock(parent->d_sb);
4a4d8108 11097+ file = vfsub_dentry_open(path, au_dir_roflags);
1facf9fc 11098+ dentry = (void *)file;
11099+ if (IS_ERR(file))
11100+ goto out;
11101+
11102+ dentry = ERR_PTR(-ENOMEM);
537831f9 11103+ arg.name = (void *)__get_free_page(GFP_NOFS);
1facf9fc 11104+ if (unlikely(!arg.name))
11105+ goto out_file;
11106+ arg.ino = ino;
11107+ arg.found = 0;
11108+ do {
11109+ arg.called = 0;
11110+ /* smp_mb(); */
392086de 11111+ err = vfsub_iterate_dir(file, &arg.ctx);
1facf9fc 11112+ } while (!err && !arg.found && arg.called);
11113+ dentry = ERR_PTR(err);
11114+ if (unlikely(err))
11115+ goto out_name;
1716fcea
AM
11116+ /* instead of ENOENT */
11117+ dentry = ERR_PTR(-ESTALE);
1facf9fc 11118+ if (!arg.found)
11119+ goto out_name;
11120+
b4510431 11121+ /* do not call vfsub_lkup_one() */
5527c038 11122+ dir = d_inode(parent);
1facf9fc 11123+ mutex_lock(&dir->i_mutex);
11124+ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
11125+ mutex_unlock(&dir->i_mutex);
11126+ AuTraceErrPtr(dentry);
11127+ if (IS_ERR(dentry))
11128+ goto out_name;
11129+ AuDebugOn(au_test_anon(dentry));
5527c038 11130+ if (unlikely(d_really_is_negative(dentry))) {
1facf9fc 11131+ dput(dentry);
11132+ dentry = ERR_PTR(-ENOENT);
11133+ }
11134+
4f0767ce 11135+out_name:
537831f9 11136+ free_page((unsigned long)arg.name);
4f0767ce 11137+out_file:
1facf9fc 11138+ fput(file);
4f0767ce 11139+out:
1facf9fc 11140+ if (unlikely(nsi_lock
11141+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
11142+ if (!IS_ERR(dentry)) {
11143+ dput(dentry);
11144+ dentry = ERR_PTR(-ESTALE);
11145+ }
11146+ AuTraceErrPtr(dentry);
11147+ return dentry;
11148+}
11149+
11150+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
11151+ ino_t dir_ino,
11152+ struct au_nfsd_si_lock *nsi_lock)
11153+{
11154+ struct dentry *dentry;
11155+ struct path path;
11156+
11157+ if (dir_ino != AUFS_ROOT_INO) {
11158+ path.dentry = decode_by_ino(sb, dir_ino, 0);
11159+ dentry = path.dentry;
11160+ if (!path.dentry || IS_ERR(path.dentry))
11161+ goto out;
11162+ AuDebugOn(au_test_anon(path.dentry));
11163+ } else
11164+ path.dentry = dget(sb->s_root);
11165+
11166+ path.mnt = au_mnt_get(sb);
11167+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
11168+ path_put(&path);
11169+
4f0767ce 11170+out:
1facf9fc 11171+ AuTraceErrPtr(dentry);
11172+ return dentry;
11173+}
11174+
11175+/* ---------------------------------------------------------------------- */
11176+
11177+static int h_acceptable(void *expv, struct dentry *dentry)
11178+{
11179+ return 1;
11180+}
11181+
11182+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
11183+ char *buf, int len, struct super_block *sb)
11184+{
11185+ char *p;
11186+ int n;
11187+ struct path path;
11188+
11189+ p = d_path(h_rootpath, buf, len);
11190+ if (IS_ERR(p))
11191+ goto out;
11192+ n = strlen(p);
11193+
11194+ path.mnt = h_rootpath->mnt;
11195+ path.dentry = h_parent;
11196+ p = d_path(&path, buf, len);
11197+ if (IS_ERR(p))
11198+ goto out;
11199+ if (n != 1)
11200+ p += n;
11201+
11202+ path.mnt = au_mnt_get(sb);
11203+ path.dentry = sb->s_root;
11204+ p = d_path(&path, buf, len - strlen(p));
11205+ mntput(path.mnt);
11206+ if (IS_ERR(p))
11207+ goto out;
11208+ if (n != 1)
11209+ p[strlen(p)] = '/';
11210+
4f0767ce 11211+out:
1facf9fc 11212+ AuTraceErrPtr(p);
11213+ return p;
11214+}
11215+
11216+static
027c5e7a
AM
11217+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
11218+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
1facf9fc 11219+{
11220+ struct dentry *dentry, *h_parent, *root;
11221+ struct super_block *h_sb;
11222+ char *pathname, *p;
11223+ struct vfsmount *h_mnt;
11224+ struct au_branch *br;
11225+ int err;
11226+ struct path path;
11227+
027c5e7a 11228+ br = au_sbr(sb, nsi_lock->bindex);
86dc4139 11229+ h_mnt = au_br_mnt(br);
1facf9fc 11230+ h_sb = h_mnt->mnt_sb;
11231+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
11232+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
11233+ fh_len - Fh_tail, fh[Fh_h_type],
11234+ h_acceptable, /*context*/NULL);
11235+ dentry = h_parent;
11236+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
11237+ AuWarn1("%s decode_fh failed, %ld\n",
11238+ au_sbtype(h_sb), PTR_ERR(h_parent));
11239+ goto out;
11240+ }
11241+ dentry = NULL;
11242+ if (unlikely(au_test_anon(h_parent))) {
11243+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
11244+ au_sbtype(h_sb));
11245+ goto out_h_parent;
11246+ }
11247+
11248+ dentry = ERR_PTR(-ENOMEM);
11249+ pathname = (void *)__get_free_page(GFP_NOFS);
11250+ if (unlikely(!pathname))
11251+ goto out_h_parent;
11252+
11253+ root = sb->s_root;
11254+ path.mnt = h_mnt;
11255+ di_read_lock_parent(root, !AuLock_IR);
027c5e7a 11256+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
1facf9fc 11257+ di_read_unlock(root, !AuLock_IR);
11258+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
11259+ dentry = (void *)p;
11260+ if (IS_ERR(p))
11261+ goto out_pathname;
11262+
11263+ si_read_unlock(sb);
11264+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
11265+ dentry = ERR_PTR(err);
11266+ if (unlikely(err))
11267+ goto out_relock;
11268+
11269+ dentry = ERR_PTR(-ENOENT);
11270+ AuDebugOn(au_test_anon(path.dentry));
5527c038 11271+ if (unlikely(d_really_is_negative(path.dentry)))
1facf9fc 11272+ goto out_path;
11273+
5527c038 11274+ if (ino != d_inode(path.dentry)->i_ino)
1facf9fc 11275+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
11276+ else
11277+ dentry = dget(path.dentry);
11278+
4f0767ce 11279+out_path:
1facf9fc 11280+ path_put(&path);
4f0767ce 11281+out_relock:
1facf9fc 11282+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
11283+ if (!IS_ERR(dentry)) {
11284+ dput(dentry);
11285+ dentry = ERR_PTR(-ESTALE);
11286+ }
4f0767ce 11287+out_pathname:
1facf9fc 11288+ free_page((unsigned long)pathname);
4f0767ce 11289+out_h_parent:
1facf9fc 11290+ dput(h_parent);
4f0767ce 11291+out:
1facf9fc 11292+ AuTraceErrPtr(dentry);
11293+ return dentry;
11294+}
11295+
11296+/* ---------------------------------------------------------------------- */
11297+
11298+static struct dentry *
11299+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
11300+ int fh_type)
11301+{
11302+ struct dentry *dentry;
11303+ __u32 *fh = fid->raw;
027c5e7a 11304+ struct au_branch *br;
1facf9fc 11305+ ino_t ino, dir_ino;
1facf9fc 11306+ struct au_nfsd_si_lock nsi_lock = {
1facf9fc 11307+ .force_lock = 0
11308+ };
11309+
1facf9fc 11310+ dentry = ERR_PTR(-ESTALE);
4a4d8108
AM
11311+ /* it should never happen, but the file handle is unreliable */
11312+ if (unlikely(fh_len < Fh_tail))
11313+ goto out;
11314+ nsi_lock.sigen = fh[Fh_sigen];
11315+ nsi_lock.br_id = fh[Fh_br_id];
11316+
1facf9fc 11317+ /* branch id may be wrapped around */
027c5e7a
AM
11318+ br = NULL;
11319+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
1facf9fc 11320+ goto out;
11321+ nsi_lock.force_lock = 1;
11322+
11323+ /* is this inode still cached? */
11324+ ino = decode_ino(fh + Fh_ino);
4a4d8108
AM
11325+ /* it should never happen */
11326+ if (unlikely(ino == AUFS_ROOT_INO))
11327+ goto out;
11328+
1facf9fc 11329+ dir_ino = decode_ino(fh + Fh_dir_ino);
11330+ dentry = decode_by_ino(sb, ino, dir_ino);
11331+ if (IS_ERR(dentry))
11332+ goto out_unlock;
11333+ if (dentry)
11334+ goto accept;
11335+
11336+ /* is the parent dir cached? */
027c5e7a
AM
11337+ br = au_sbr(sb, nsi_lock.bindex);
11338+ atomic_inc(&br->br_count);
1facf9fc 11339+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
11340+ if (IS_ERR(dentry))
11341+ goto out_unlock;
11342+ if (dentry)
11343+ goto accept;
11344+
11345+ /* lookup path */
027c5e7a 11346+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
1facf9fc 11347+ if (IS_ERR(dentry))
11348+ goto out_unlock;
11349+ if (unlikely(!dentry))
11350+ /* todo?: make it ESTALE */
11351+ goto out_unlock;
11352+
4f0767ce 11353+accept:
027c5e7a 11354+ if (!au_digen_test(dentry, au_sigen(sb))
5527c038 11355+ && d_inode(dentry)->i_generation == fh[Fh_igen])
1facf9fc 11356+ goto out_unlock; /* success */
11357+
11358+ dput(dentry);
11359+ dentry = ERR_PTR(-ESTALE);
4f0767ce 11360+out_unlock:
027c5e7a
AM
11361+ if (br)
11362+ atomic_dec(&br->br_count);
1facf9fc 11363+ si_read_unlock(sb);
4f0767ce 11364+out:
1facf9fc 11365+ AuTraceErrPtr(dentry);
11366+ return dentry;
11367+}
11368+
11369+#if 0 /* reserved for future use */
11370+/* support subtreecheck option */
11371+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
11372+ int fh_len, int fh_type)
11373+{
11374+ struct dentry *parent;
11375+ __u32 *fh = fid->raw;
11376+ ino_t dir_ino;
11377+
11378+ dir_ino = decode_ino(fh + Fh_dir_ino);
11379+ parent = decode_by_ino(sb, dir_ino, 0);
11380+ if (IS_ERR(parent))
11381+ goto out;
11382+ if (!parent)
11383+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
11384+ dir_ino, fh, fh_len);
11385+
4f0767ce 11386+out:
1facf9fc 11387+ AuTraceErrPtr(parent);
11388+ return parent;
11389+}
11390+#endif
11391+
11392+/* ---------------------------------------------------------------------- */
11393+
0c3ec466
AM
11394+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
11395+ struct inode *dir)
1facf9fc 11396+{
11397+ int err;
0c3ec466 11398+ aufs_bindex_t bindex;
1facf9fc 11399+ struct super_block *sb, *h_sb;
0c3ec466
AM
11400+ struct dentry *dentry, *parent, *h_parent;
11401+ struct inode *h_dir;
1facf9fc 11402+ struct au_branch *br;
11403+
1facf9fc 11404+ err = -ENOSPC;
11405+ if (unlikely(*max_len <= Fh_tail)) {
11406+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
11407+ goto out;
11408+ }
11409+
11410+ err = FILEID_ROOT;
0c3ec466
AM
11411+ if (inode->i_ino == AUFS_ROOT_INO) {
11412+ AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
1facf9fc 11413+ goto out;
11414+ }
11415+
1facf9fc 11416+ h_parent = NULL;
0c3ec466
AM
11417+ sb = inode->i_sb;
11418+ err = si_read_lock(sb, AuLock_FLUSH);
027c5e7a
AM
11419+ if (unlikely(err))
11420+ goto out;
11421+
1facf9fc 11422+#ifdef CONFIG_AUFS_DEBUG
11423+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
11424+ AuWarn1("NFS-exporting requires xino\n");
11425+#endif
027c5e7a 11426+ err = -EIO;
0c3ec466
AM
11427+ parent = NULL;
11428+ ii_read_lock_child(inode);
11429+ bindex = au_ibstart(inode);
11430+ if (!dir) {
c1595e42 11431+ dentry = d_find_any_alias(inode);
0c3ec466
AM
11432+ if (unlikely(!dentry))
11433+ goto out_unlock;
11434+ AuDebugOn(au_test_anon(dentry));
11435+ parent = dget_parent(dentry);
11436+ dput(dentry);
11437+ if (unlikely(!parent))
11438+ goto out_unlock;
5527c038
JR
11439+ if (d_really_is_positive(parent))
11440+ dir = d_inode(parent);
1facf9fc 11441+ }
0c3ec466
AM
11442+
11443+ ii_read_lock_parent(dir);
11444+ h_dir = au_h_iptr(dir, bindex);
11445+ ii_read_unlock(dir);
11446+ if (unlikely(!h_dir))
11447+ goto out_parent;
c1595e42 11448+ h_parent = d_find_any_alias(h_dir);
1facf9fc 11449+ if (unlikely(!h_parent))
0c3ec466 11450+ goto out_hparent;
1facf9fc 11451+
11452+ err = -EPERM;
11453+ br = au_sbr(sb, bindex);
86dc4139 11454+ h_sb = au_br_sb(br);
1facf9fc 11455+ if (unlikely(!h_sb->s_export_op)) {
11456+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
0c3ec466 11457+ goto out_hparent;
1facf9fc 11458+ }
11459+
11460+ fh[Fh_br_id] = br->br_id;
11461+ fh[Fh_sigen] = au_sigen(sb);
11462+ encode_ino(fh + Fh_ino, inode->i_ino);
0c3ec466 11463+ encode_ino(fh + Fh_dir_ino, dir->i_ino);
1facf9fc 11464+ fh[Fh_igen] = inode->i_generation;
11465+
11466+ *max_len -= Fh_tail;
11467+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
11468+ max_len,
11469+ /*connectable or subtreecheck*/0);
11470+ err = fh[Fh_h_type];
11471+ *max_len += Fh_tail;
11472+ /* todo: macros? */
1716fcea 11473+ if (err != FILEID_INVALID)
1facf9fc 11474+ err = 99;
11475+ else
11476+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
11477+
0c3ec466 11478+out_hparent:
1facf9fc 11479+ dput(h_parent);
0c3ec466 11480+out_parent:
1facf9fc 11481+ dput(parent);
0c3ec466
AM
11482+out_unlock:
11483+ ii_read_unlock(inode);
11484+ si_read_unlock(sb);
4f0767ce 11485+out:
1facf9fc 11486+ if (unlikely(err < 0))
1716fcea 11487+ err = FILEID_INVALID;
1facf9fc 11488+ return err;
11489+}
11490+
11491+/* ---------------------------------------------------------------------- */
11492+
4a4d8108
AM
11493+static int aufs_commit_metadata(struct inode *inode)
11494+{
11495+ int err;
11496+ aufs_bindex_t bindex;
11497+ struct super_block *sb;
11498+ struct inode *h_inode;
11499+ int (*f)(struct inode *inode);
11500+
11501+ sb = inode->i_sb;
e49829fe 11502+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11503+ ii_write_lock_child(inode);
11504+ bindex = au_ibstart(inode);
11505+ AuDebugOn(bindex < 0);
11506+ h_inode = au_h_iptr(inode, bindex);
11507+
11508+ f = h_inode->i_sb->s_export_op->commit_metadata;
11509+ if (f)
11510+ err = f(h_inode);
11511+ else {
11512+ struct writeback_control wbc = {
11513+ .sync_mode = WB_SYNC_ALL,
11514+ .nr_to_write = 0 /* metadata only */
11515+ };
11516+
11517+ err = sync_inode(h_inode, &wbc);
11518+ }
11519+
11520+ au_cpup_attr_timesizes(inode);
11521+ ii_write_unlock(inode);
11522+ si_read_unlock(sb);
11523+ return err;
11524+}
11525+
11526+/* ---------------------------------------------------------------------- */
11527+
1facf9fc 11528+static struct export_operations aufs_export_op = {
4a4d8108 11529+ .fh_to_dentry = aufs_fh_to_dentry,
1facf9fc 11530+ /* .fh_to_parent = aufs_fh_to_parent, */
4a4d8108
AM
11531+ .encode_fh = aufs_encode_fh,
11532+ .commit_metadata = aufs_commit_metadata
1facf9fc 11533+};
11534+
11535+void au_export_init(struct super_block *sb)
11536+{
11537+ struct au_sbinfo *sbinfo;
11538+ __u32 u;
11539+
11540+ sb->s_export_op = &aufs_export_op;
11541+ sbinfo = au_sbi(sb);
11542+ sbinfo->si_xigen = NULL;
11543+ get_random_bytes(&u, sizeof(u));
11544+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
11545+ atomic_set(&sbinfo->si_xigen_next, u);
11546+}
076b876e
AM
11547diff -urN /usr/share/empty/fs/aufs/fhsm.c linux/fs/aufs/fhsm.c
11548--- /usr/share/empty/fs/aufs/fhsm.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 11549+++ linux/fs/aufs/fhsm.c 2015-09-24 10:47:58.251386326 +0200
c1595e42 11550@@ -0,0 +1,426 @@
076b876e 11551+/*
2000de60 11552+ * Copyright (C) 2011-2015 Junjiro R. Okajima
076b876e
AM
11553+ *
11554+ * This program, aufs is free software; you can redistribute it and/or modify
11555+ * it under the terms of the GNU General Public License as published by
11556+ * the Free Software Foundation; either version 2 of the License, or
11557+ * (at your option) any later version.
11558+ *
11559+ * This program is distributed in the hope that it will be useful,
11560+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11561+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11562+ * GNU General Public License for more details.
11563+ *
11564+ * You should have received a copy of the GNU General Public License
11565+ * along with this program; if not, write to the Free Software
11566+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11567+ */
11568+
11569+/*
11570+ * File-based Hierarchy Storage Management
11571+ */
11572+
11573+#include <linux/anon_inodes.h>
11574+#include <linux/poll.h>
11575+#include <linux/seq_file.h>
11576+#include <linux/statfs.h>
11577+#include "aufs.h"
11578+
c1595e42
JR
11579+static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
11580+{
11581+ struct au_sbinfo *sbinfo;
11582+ struct au_fhsm *fhsm;
11583+
11584+ SiMustAnyLock(sb);
11585+
11586+ sbinfo = au_sbi(sb);
11587+ fhsm = &sbinfo->si_fhsm;
11588+ AuDebugOn(!fhsm);
11589+ return fhsm->fhsm_bottom;
11590+}
11591+
11592+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
11593+{
11594+ struct au_sbinfo *sbinfo;
11595+ struct au_fhsm *fhsm;
11596+
11597+ SiMustWriteLock(sb);
11598+
11599+ sbinfo = au_sbi(sb);
11600+ fhsm = &sbinfo->si_fhsm;
11601+ AuDebugOn(!fhsm);
11602+ fhsm->fhsm_bottom = bindex;
11603+}
11604+
11605+/* ---------------------------------------------------------------------- */
11606+
076b876e
AM
11607+static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
11608+{
11609+ struct au_br_fhsm *bf;
11610+
11611+ bf = br->br_fhsm;
11612+ MtxMustLock(&bf->bf_lock);
11613+
11614+ return !bf->bf_readable
11615+ || time_after(jiffies,
11616+ bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
11617+}
11618+
11619+/* ---------------------------------------------------------------------- */
11620+
11621+static void au_fhsm_notify(struct super_block *sb, int val)
11622+{
11623+ struct au_sbinfo *sbinfo;
11624+ struct au_fhsm *fhsm;
11625+
11626+ SiMustAnyLock(sb);
11627+
11628+ sbinfo = au_sbi(sb);
11629+ fhsm = &sbinfo->si_fhsm;
11630+ if (au_fhsm_pid(fhsm)
11631+ && atomic_read(&fhsm->fhsm_readable) != -1) {
11632+ atomic_set(&fhsm->fhsm_readable, val);
11633+ if (val)
11634+ wake_up(&fhsm->fhsm_wqh);
11635+ }
11636+}
11637+
11638+static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
11639+ struct aufs_stfs *rstfs, int do_lock, int do_notify)
11640+{
11641+ int err;
11642+ struct au_branch *br;
11643+ struct au_br_fhsm *bf;
11644+
11645+ br = au_sbr(sb, bindex);
11646+ AuDebugOn(au_br_rdonly(br));
11647+ bf = br->br_fhsm;
11648+ AuDebugOn(!bf);
11649+
11650+ if (do_lock)
11651+ mutex_lock(&bf->bf_lock);
11652+ else
11653+ MtxMustLock(&bf->bf_lock);
11654+
11655+ /* sb->s_root for NFS is unreliable */
11656+ err = au_br_stfs(br, &bf->bf_stfs);
11657+ if (unlikely(err)) {
11658+ AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
11659+ goto out;
11660+ }
11661+
11662+ bf->bf_jiffy = jiffies;
11663+ bf->bf_readable = 1;
11664+ if (do_notify)
11665+ au_fhsm_notify(sb, /*val*/1);
11666+ if (rstfs)
11667+ *rstfs = bf->bf_stfs;
11668+
11669+out:
11670+ if (do_lock)
11671+ mutex_unlock(&bf->bf_lock);
11672+ au_fhsm_notify(sb, /*val*/1);
11673+
11674+ return err;
11675+}
11676+
11677+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
11678+{
11679+ int err;
076b876e
AM
11680+ struct au_sbinfo *sbinfo;
11681+ struct au_fhsm *fhsm;
11682+ struct au_branch *br;
11683+ struct au_br_fhsm *bf;
11684+
11685+ AuDbg("b%d, force %d\n", bindex, force);
11686+ SiMustAnyLock(sb);
11687+
11688+ sbinfo = au_sbi(sb);
11689+ fhsm = &sbinfo->si_fhsm;
c1595e42
JR
11690+ if (!au_ftest_si(sbinfo, FHSM)
11691+ || fhsm->fhsm_bottom == bindex)
076b876e
AM
11692+ return;
11693+
11694+ br = au_sbr(sb, bindex);
11695+ bf = br->br_fhsm;
11696+ AuDebugOn(!bf);
11697+ mutex_lock(&bf->bf_lock);
11698+ if (force
11699+ || au_fhsm_pid(fhsm)
11700+ || au_fhsm_test_jiffy(sbinfo, br))
11701+ err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
11702+ /*do_notify*/1);
11703+ mutex_unlock(&bf->bf_lock);
11704+}
11705+
11706+void au_fhsm_wrote_all(struct super_block *sb, int force)
11707+{
11708+ aufs_bindex_t bindex, bend;
11709+ struct au_branch *br;
11710+
11711+ /* exclude the bottom */
c1595e42 11712+ bend = au_fhsm_bottom(sb);
076b876e
AM
11713+ for (bindex = 0; bindex < bend; bindex++) {
11714+ br = au_sbr(sb, bindex);
11715+ if (au_br_fhsm(br->br_perm))
11716+ au_fhsm_wrote(sb, bindex, force);
11717+ }
11718+}
11719+
11720+/* ---------------------------------------------------------------------- */
11721+
11722+static unsigned int au_fhsm_poll(struct file *file,
11723+ struct poll_table_struct *wait)
11724+{
11725+ unsigned int mask;
11726+ struct au_sbinfo *sbinfo;
11727+ struct au_fhsm *fhsm;
11728+
11729+ mask = 0;
11730+ sbinfo = file->private_data;
11731+ fhsm = &sbinfo->si_fhsm;
11732+ poll_wait(file, &fhsm->fhsm_wqh, wait);
11733+ if (atomic_read(&fhsm->fhsm_readable))
11734+ mask = POLLIN /* | POLLRDNORM */;
11735+
11736+ AuTraceErr((int)mask);
11737+ return mask;
11738+}
11739+
11740+static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
11741+ struct aufs_stfs *stfs, __s16 brid)
11742+{
11743+ int err;
11744+
11745+ err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
11746+ if (!err)
11747+ err = __put_user(brid, &stbr->brid);
11748+ if (unlikely(err))
11749+ err = -EFAULT;
11750+
11751+ return err;
11752+}
11753+
11754+static ssize_t au_fhsm_do_read(struct super_block *sb,
11755+ struct aufs_stbr __user *stbr, size_t count)
11756+{
11757+ ssize_t err;
11758+ int nstbr;
11759+ aufs_bindex_t bindex, bend;
11760+ struct au_branch *br;
11761+ struct au_br_fhsm *bf;
11762+
11763+ /* except the bottom branch */
11764+ err = 0;
11765+ nstbr = 0;
c1595e42 11766+ bend = au_fhsm_bottom(sb);
076b876e
AM
11767+ for (bindex = 0; !err && bindex < bend; bindex++) {
11768+ br = au_sbr(sb, bindex);
11769+ if (!au_br_fhsm(br->br_perm))
11770+ continue;
11771+
11772+ bf = br->br_fhsm;
11773+ mutex_lock(&bf->bf_lock);
11774+ if (bf->bf_readable) {
11775+ err = -EFAULT;
11776+ if (count >= sizeof(*stbr))
11777+ err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
11778+ br->br_id);
11779+ if (!err) {
11780+ bf->bf_readable = 0;
11781+ count -= sizeof(*stbr);
11782+ nstbr++;
11783+ }
11784+ }
11785+ mutex_unlock(&bf->bf_lock);
11786+ }
11787+ if (!err)
11788+ err = sizeof(*stbr) * nstbr;
11789+
11790+ return err;
11791+}
11792+
11793+static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
11794+ loff_t *pos)
11795+{
11796+ ssize_t err;
11797+ int readable;
11798+ aufs_bindex_t nfhsm, bindex, bend;
11799+ struct au_sbinfo *sbinfo;
11800+ struct au_fhsm *fhsm;
11801+ struct au_branch *br;
11802+ struct super_block *sb;
11803+
11804+ err = 0;
11805+ sbinfo = file->private_data;
11806+ fhsm = &sbinfo->si_fhsm;
11807+need_data:
11808+ spin_lock_irq(&fhsm->fhsm_wqh.lock);
11809+ if (!atomic_read(&fhsm->fhsm_readable)) {
11810+ if (vfsub_file_flags(file) & O_NONBLOCK)
11811+ err = -EAGAIN;
11812+ else
11813+ err = wait_event_interruptible_locked_irq
11814+ (fhsm->fhsm_wqh,
11815+ atomic_read(&fhsm->fhsm_readable));
11816+ }
11817+ spin_unlock_irq(&fhsm->fhsm_wqh.lock);
11818+ if (unlikely(err))
11819+ goto out;
11820+
11821+ /* sb may already be dead */
11822+ au_rw_read_lock(&sbinfo->si_rwsem);
11823+ readable = atomic_read(&fhsm->fhsm_readable);
11824+ if (readable > 0) {
11825+ sb = sbinfo->si_sb;
11826+ AuDebugOn(!sb);
11827+ /* exclude the bottom branch */
11828+ nfhsm = 0;
c1595e42 11829+ bend = au_fhsm_bottom(sb);
076b876e
AM
11830+ for (bindex = 0; bindex < bend; bindex++) {
11831+ br = au_sbr(sb, bindex);
11832+ if (au_br_fhsm(br->br_perm))
11833+ nfhsm++;
11834+ }
11835+ err = -EMSGSIZE;
11836+ if (nfhsm * sizeof(struct aufs_stbr) <= count) {
11837+ atomic_set(&fhsm->fhsm_readable, 0);
11838+ err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
11839+ count);
11840+ }
11841+ }
11842+ au_rw_read_unlock(&sbinfo->si_rwsem);
11843+ if (!readable)
11844+ goto need_data;
11845+
11846+out:
11847+ return err;
11848+}
11849+
11850+static int au_fhsm_release(struct inode *inode, struct file *file)
11851+{
11852+ struct au_sbinfo *sbinfo;
11853+ struct au_fhsm *fhsm;
11854+
11855+ /* sb may already be dead */
11856+ sbinfo = file->private_data;
11857+ fhsm = &sbinfo->si_fhsm;
11858+ spin_lock(&fhsm->fhsm_spin);
11859+ fhsm->fhsm_pid = 0;
11860+ spin_unlock(&fhsm->fhsm_spin);
11861+ kobject_put(&sbinfo->si_kobj);
11862+
11863+ return 0;
11864+}
11865+
11866+static const struct file_operations au_fhsm_fops = {
11867+ .owner = THIS_MODULE,
11868+ .llseek = noop_llseek,
11869+ .read = au_fhsm_read,
11870+ .poll = au_fhsm_poll,
11871+ .release = au_fhsm_release
11872+};
11873+
11874+int au_fhsm_fd(struct super_block *sb, int oflags)
11875+{
11876+ int err, fd;
11877+ struct au_sbinfo *sbinfo;
11878+ struct au_fhsm *fhsm;
11879+
11880+ err = -EPERM;
11881+ if (unlikely(!capable(CAP_SYS_ADMIN)))
11882+ goto out;
11883+
11884+ err = -EINVAL;
11885+ if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
11886+ goto out;
11887+
11888+ err = 0;
11889+ sbinfo = au_sbi(sb);
11890+ fhsm = &sbinfo->si_fhsm;
11891+ spin_lock(&fhsm->fhsm_spin);
11892+ if (!fhsm->fhsm_pid)
11893+ fhsm->fhsm_pid = current->pid;
11894+ else
11895+ err = -EBUSY;
11896+ spin_unlock(&fhsm->fhsm_spin);
11897+ if (unlikely(err))
11898+ goto out;
11899+
11900+ oflags |= O_RDONLY;
11901+ /* oflags |= FMODE_NONOTIFY; */
11902+ fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
11903+ err = fd;
11904+ if (unlikely(fd < 0))
11905+ goto out_pid;
11906+
11907+ /* succeed reglardless 'fhsm' status */
11908+ kobject_get(&sbinfo->si_kobj);
11909+ si_noflush_read_lock(sb);
11910+ if (au_ftest_si(sbinfo, FHSM))
11911+ au_fhsm_wrote_all(sb, /*force*/0);
11912+ si_read_unlock(sb);
11913+ goto out; /* success */
11914+
11915+out_pid:
11916+ spin_lock(&fhsm->fhsm_spin);
11917+ fhsm->fhsm_pid = 0;
11918+ spin_unlock(&fhsm->fhsm_spin);
11919+out:
11920+ AuTraceErr(err);
11921+ return err;
11922+}
11923+
11924+/* ---------------------------------------------------------------------- */
11925+
11926+int au_fhsm_br_alloc(struct au_branch *br)
11927+{
11928+ int err;
11929+
11930+ err = 0;
11931+ br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
11932+ if (br->br_fhsm)
11933+ au_br_fhsm_init(br->br_fhsm);
11934+ else
11935+ err = -ENOMEM;
11936+
11937+ return err;
11938+}
11939+
11940+/* ---------------------------------------------------------------------- */
11941+
11942+void au_fhsm_fin(struct super_block *sb)
11943+{
11944+ au_fhsm_notify(sb, /*val*/-1);
11945+}
11946+
11947+void au_fhsm_init(struct au_sbinfo *sbinfo)
11948+{
11949+ struct au_fhsm *fhsm;
11950+
11951+ fhsm = &sbinfo->si_fhsm;
11952+ spin_lock_init(&fhsm->fhsm_spin);
11953+ init_waitqueue_head(&fhsm->fhsm_wqh);
11954+ atomic_set(&fhsm->fhsm_readable, 0);
11955+ fhsm->fhsm_expire
11956+ = msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
c1595e42 11957+ fhsm->fhsm_bottom = -1;
076b876e
AM
11958+}
11959+
11960+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
11961+{
11962+ sbinfo->si_fhsm.fhsm_expire
11963+ = msecs_to_jiffies(sec * MSEC_PER_SEC);
11964+}
11965+
11966+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
11967+{
11968+ unsigned int u;
11969+
11970+ if (!au_ftest_si(sbinfo, FHSM))
11971+ return;
11972+
11973+ u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
11974+ if (u != AUFS_FHSM_CACHE_DEF_SEC)
11975+ seq_printf(seq, ",fhsm_sec=%u", u);
11976+}
7f207e10
AM
11977diff -urN /usr/share/empty/fs/aufs/file.c linux/fs/aufs/file.c
11978--- /usr/share/empty/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
79b8bda9
AM
11979+++ linux/fs/aufs/file.c 2015-11-11 17:21:46.918863802 +0100
11980@@ -0,0 +1,844 @@
1facf9fc 11981+/*
2000de60 11982+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 11983+ *
11984+ * This program, aufs is free software; you can redistribute it and/or modify
11985+ * it under the terms of the GNU General Public License as published by
11986+ * the Free Software Foundation; either version 2 of the License, or
11987+ * (at your option) any later version.
dece6358
AM
11988+ *
11989+ * This program is distributed in the hope that it will be useful,
11990+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11991+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11992+ * GNU General Public License for more details.
11993+ *
11994+ * You should have received a copy of the GNU General Public License
523b37e3 11995+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 11996+ */
11997+
11998+/*
4a4d8108 11999+ * handling file/dir, and address_space operation
1facf9fc 12000+ */
12001+
7eafdf33
AM
12002+#ifdef CONFIG_AUFS_DEBUG
12003+#include <linux/migrate.h>
12004+#endif
4a4d8108 12005+#include <linux/pagemap.h>
1facf9fc 12006+#include "aufs.h"
12007+
4a4d8108
AM
12008+/* drop flags for writing */
12009+unsigned int au_file_roflags(unsigned int flags)
12010+{
12011+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
12012+ flags |= O_RDONLY | O_NOATIME;
12013+ return flags;
12014+}
12015+
12016+/* common functions to regular file and dir */
12017+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 12018+ struct file *file, int force_wr)
1facf9fc 12019+{
1308ab2a 12020+ struct file *h_file;
4a4d8108
AM
12021+ struct dentry *h_dentry;
12022+ struct inode *h_inode;
12023+ struct super_block *sb;
12024+ struct au_branch *br;
12025+ struct path h_path;
b912730e 12026+ int err;
1facf9fc 12027+
4a4d8108
AM
12028+ /* a race condition can happen between open and unlink/rmdir */
12029+ h_file = ERR_PTR(-ENOENT);
12030+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 12031+ if (au_test_nfsd() && (!h_dentry || d_is_negative(h_dentry)))
4a4d8108 12032+ goto out;
5527c038 12033+ h_inode = d_inode(h_dentry);
027c5e7a
AM
12034+ spin_lock(&h_dentry->d_lock);
12035+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
5527c038 12036+ /* || !d_inode(dentry)->i_nlink */
027c5e7a
AM
12037+ ;
12038+ spin_unlock(&h_dentry->d_lock);
12039+ if (unlikely(err))
4a4d8108 12040+ goto out;
1facf9fc 12041+
4a4d8108
AM
12042+ sb = dentry->d_sb;
12043+ br = au_sbr(sb, bindex);
b912730e
AM
12044+ err = au_br_test_oflag(flags, br);
12045+ h_file = ERR_PTR(err);
12046+ if (unlikely(err))
027c5e7a 12047+ goto out;
1facf9fc 12048+
4a4d8108 12049+ /* drop flags for writing */
5527c038 12050+ if (au_test_ro(sb, bindex, d_inode(dentry))) {
392086de
AM
12051+ if (force_wr && !(flags & O_WRONLY))
12052+ force_wr = 0;
4a4d8108 12053+ flags = au_file_roflags(flags);
392086de
AM
12054+ if (force_wr) {
12055+ h_file = ERR_PTR(-EROFS);
12056+ flags = au_file_roflags(flags);
12057+ if (unlikely(vfsub_native_ro(h_inode)
12058+ || IS_APPEND(h_inode)))
12059+ goto out;
12060+ flags &= ~O_ACCMODE;
12061+ flags |= O_WRONLY;
12062+ }
12063+ }
4a4d8108
AM
12064+ flags &= ~O_CREAT;
12065+ atomic_inc(&br->br_count);
12066+ h_path.dentry = h_dentry;
86dc4139 12067+ h_path.mnt = au_br_mnt(br);
38d290e6 12068+ h_file = vfsub_dentry_open(&h_path, flags);
4a4d8108
AM
12069+ if (IS_ERR(h_file))
12070+ goto out_br;
dece6358 12071+
b912730e 12072+ if (flags & __FMODE_EXEC) {
4a4d8108
AM
12073+ err = deny_write_access(h_file);
12074+ if (unlikely(err)) {
12075+ fput(h_file);
12076+ h_file = ERR_PTR(err);
12077+ goto out_br;
12078+ }
12079+ }
953406b4 12080+ fsnotify_open(h_file);
4a4d8108 12081+ goto out; /* success */
1facf9fc 12082+
4f0767ce 12083+out_br:
4a4d8108 12084+ atomic_dec(&br->br_count);
4f0767ce 12085+out:
4a4d8108
AM
12086+ return h_file;
12087+}
1308ab2a 12088+
076b876e
AM
12089+static int au_cmoo(struct dentry *dentry)
12090+{
12091+ int err, cmoo;
12092+ unsigned int udba;
12093+ struct path h_path;
12094+ struct au_pin pin;
12095+ struct au_cp_generic cpg = {
12096+ .dentry = dentry,
12097+ .bdst = -1,
12098+ .bsrc = -1,
12099+ .len = -1,
12100+ .pin = &pin,
12101+ .flags = AuCpup_DTIME | AuCpup_HOPEN
12102+ };
7e9cd9fe 12103+ struct inode *delegated;
076b876e
AM
12104+ struct super_block *sb;
12105+ struct au_sbinfo *sbinfo;
12106+ struct au_fhsm *fhsm;
12107+ pid_t pid;
12108+ struct au_branch *br;
12109+ struct dentry *parent;
12110+ struct au_hinode *hdir;
12111+
12112+ DiMustWriteLock(dentry);
5527c038 12113+ IiMustWriteLock(d_inode(dentry));
076b876e
AM
12114+
12115+ err = 0;
12116+ if (IS_ROOT(dentry))
12117+ goto out;
12118+ cpg.bsrc = au_dbstart(dentry);
12119+ if (!cpg.bsrc)
12120+ goto out;
12121+
12122+ sb = dentry->d_sb;
12123+ sbinfo = au_sbi(sb);
12124+ fhsm = &sbinfo->si_fhsm;
12125+ pid = au_fhsm_pid(fhsm);
12126+ if (pid
12127+ && (current->pid == pid
12128+ || current->real_parent->pid == pid))
12129+ goto out;
12130+
12131+ br = au_sbr(sb, cpg.bsrc);
12132+ cmoo = au_br_cmoo(br->br_perm);
12133+ if (!cmoo)
12134+ goto out;
7e9cd9fe 12135+ if (!d_is_reg(dentry))
076b876e
AM
12136+ cmoo &= AuBrAttr_COO_ALL;
12137+ if (!cmoo)
12138+ goto out;
12139+
12140+ parent = dget_parent(dentry);
12141+ di_write_lock_parent(parent);
12142+ err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
12143+ cpg.bdst = err;
12144+ if (unlikely(err < 0)) {
12145+ err = 0; /* there is no upper writable branch */
12146+ goto out_dgrade;
12147+ }
12148+ AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
12149+
12150+ /* do not respect the coo attrib for the target branch */
12151+ err = au_cpup_dirs(dentry, cpg.bdst);
12152+ if (unlikely(err))
12153+ goto out_dgrade;
12154+
12155+ di_downgrade_lock(parent, AuLock_IR);
12156+ udba = au_opt_udba(sb);
12157+ err = au_pin(&pin, dentry, cpg.bdst, udba,
12158+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12159+ if (unlikely(err))
12160+ goto out_parent;
12161+
12162+ err = au_sio_cpup_simple(&cpg);
12163+ au_unpin(&pin);
12164+ if (unlikely(err))
12165+ goto out_parent;
12166+ if (!(cmoo & AuBrWAttr_MOO))
12167+ goto out_parent; /* success */
12168+
12169+ err = au_pin(&pin, dentry, cpg.bsrc, udba,
12170+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12171+ if (unlikely(err))
12172+ goto out_parent;
12173+
12174+ h_path.mnt = au_br_mnt(br);
12175+ h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
5527c038 12176+ hdir = au_hi(d_inode(parent), cpg.bsrc);
076b876e
AM
12177+ delegated = NULL;
12178+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
12179+ au_unpin(&pin);
12180+ /* todo: keep h_dentry or not? */
12181+ if (unlikely(err == -EWOULDBLOCK)) {
12182+ pr_warn("cannot retry for NFSv4 delegation"
12183+ " for an internal unlink\n");
12184+ iput(delegated);
12185+ }
12186+ if (unlikely(err)) {
12187+ pr_err("unlink %pd after coo failed (%d), ignored\n",
12188+ dentry, err);
12189+ err = 0;
12190+ }
12191+ goto out_parent; /* success */
12192+
12193+out_dgrade:
12194+ di_downgrade_lock(parent, AuLock_IR);
12195+out_parent:
12196+ di_read_unlock(parent, AuLock_IR);
12197+ dput(parent);
12198+out:
12199+ AuTraceErr(err);
12200+ return err;
12201+}
12202+
b912730e 12203+int au_do_open(struct file *file, struct au_do_open_args *args)
1facf9fc 12204+{
b912730e 12205+ int err, no_lock = args->no_lock;
1facf9fc 12206+ struct dentry *dentry;
076b876e 12207+ struct au_finfo *finfo;
1308ab2a 12208+
b912730e
AM
12209+ if (!no_lock)
12210+ err = au_finfo_init(file, args->fidir);
12211+ else {
12212+ lockdep_off();
12213+ err = au_finfo_init(file, args->fidir);
12214+ lockdep_on();
12215+ }
4a4d8108
AM
12216+ if (unlikely(err))
12217+ goto out;
1facf9fc 12218+
2000de60 12219+ dentry = file->f_path.dentry;
b912730e
AM
12220+ AuDebugOn(IS_ERR_OR_NULL(dentry));
12221+ if (!no_lock) {
12222+ di_write_lock_child(dentry);
12223+ err = au_cmoo(dentry);
12224+ di_downgrade_lock(dentry, AuLock_IR);
12225+ if (!err)
12226+ err = args->open(file, vfsub_file_flags(file), NULL);
12227+ di_read_unlock(dentry, AuLock_IR);
12228+ } else {
12229+ err = au_cmoo(dentry);
12230+ if (!err)
12231+ err = args->open(file, vfsub_file_flags(file),
12232+ args->h_file);
12233+ if (!err && au_fbstart(file) != au_dbstart(dentry))
12234+ /*
12235+ * cmoo happens after h_file was opened.
12236+ * need to refresh file later.
12237+ */
12238+ atomic_dec(&au_fi(file)->fi_generation);
12239+ }
1facf9fc 12240+
076b876e
AM
12241+ finfo = au_fi(file);
12242+ if (!err) {
12243+ finfo->fi_file = file;
12244+ au_sphl_add(&finfo->fi_hlist,
2000de60 12245+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
076b876e 12246+ }
b912730e
AM
12247+ if (!no_lock)
12248+ fi_write_unlock(file);
12249+ else {
12250+ lockdep_off();
12251+ fi_write_unlock(file);
12252+ lockdep_on();
12253+ }
4a4d8108 12254+ if (unlikely(err)) {
076b876e 12255+ finfo->fi_hdir = NULL;
4a4d8108 12256+ au_finfo_fin(file);
1308ab2a 12257+ }
4a4d8108 12258+
4f0767ce 12259+out:
1308ab2a 12260+ return err;
12261+}
dece6358 12262+
4a4d8108 12263+int au_reopen_nondir(struct file *file)
1308ab2a 12264+{
4a4d8108
AM
12265+ int err;
12266+ aufs_bindex_t bstart;
12267+ struct dentry *dentry;
12268+ struct file *h_file, *h_file_tmp;
1308ab2a 12269+
2000de60 12270+ dentry = file->f_path.dentry;
4a4d8108
AM
12271+ bstart = au_dbstart(dentry);
12272+ h_file_tmp = NULL;
12273+ if (au_fbstart(file) == bstart) {
12274+ h_file = au_hf_top(file);
12275+ if (file->f_mode == h_file->f_mode)
12276+ return 0; /* success */
12277+ h_file_tmp = h_file;
12278+ get_file(h_file_tmp);
12279+ au_set_h_fptr(file, bstart, NULL);
12280+ }
12281+ AuDebugOn(au_fi(file)->fi_hdir);
86dc4139
AM
12282+ /*
12283+ * it can happen
12284+ * file exists on both of rw and ro
12285+ * open --> dbstart and fbstart are both 0
12286+ * prepend a branch as rw, "rw" become ro
12287+ * remove rw/file
12288+ * delete the top branch, "rw" becomes rw again
12289+ * --> dbstart is 1, fbstart is still 0
12290+ * write --> fbstart is 0 but dbstart is 1
12291+ */
12292+ /* AuDebugOn(au_fbstart(file) < bstart); */
1308ab2a 12293+
4a4d8108 12294+ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
392086de 12295+ file, /*force_wr*/0);
4a4d8108 12296+ err = PTR_ERR(h_file);
86dc4139
AM
12297+ if (IS_ERR(h_file)) {
12298+ if (h_file_tmp) {
12299+ atomic_inc(&au_sbr(dentry->d_sb, bstart)->br_count);
12300+ au_set_h_fptr(file, bstart, h_file_tmp);
12301+ h_file_tmp = NULL;
12302+ }
4a4d8108 12303+ goto out; /* todo: close all? */
86dc4139 12304+ }
4a4d8108
AM
12305+
12306+ err = 0;
12307+ au_set_fbstart(file, bstart);
12308+ au_set_h_fptr(file, bstart, h_file);
12309+ au_update_figen(file);
12310+ /* todo: necessary? */
12311+ /* file->f_ra = h_file->f_ra; */
12312+
4f0767ce 12313+out:
4a4d8108
AM
12314+ if (h_file_tmp)
12315+ fput(h_file_tmp);
12316+ return err;
1facf9fc 12317+}
12318+
1308ab2a 12319+/* ---------------------------------------------------------------------- */
12320+
4a4d8108
AM
12321+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
12322+ struct dentry *hi_wh)
1facf9fc 12323+{
4a4d8108
AM
12324+ int err;
12325+ aufs_bindex_t bstart;
12326+ struct au_dinfo *dinfo;
12327+ struct dentry *h_dentry;
12328+ struct au_hdentry *hdp;
1facf9fc 12329+
2000de60 12330+ dinfo = au_di(file->f_path.dentry);
4a4d8108 12331+ AuRwMustWriteLock(&dinfo->di_rwsem);
dece6358 12332+
4a4d8108
AM
12333+ bstart = dinfo->di_bstart;
12334+ dinfo->di_bstart = btgt;
12335+ hdp = dinfo->di_hdentry;
12336+ h_dentry = hdp[0 + btgt].hd_dentry;
12337+ hdp[0 + btgt].hd_dentry = hi_wh;
12338+ err = au_reopen_nondir(file);
12339+ hdp[0 + btgt].hd_dentry = h_dentry;
12340+ dinfo->di_bstart = bstart;
1facf9fc 12341+
1facf9fc 12342+ return err;
12343+}
12344+
4a4d8108 12345+static int au_ready_to_write_wh(struct file *file, loff_t len,
86dc4139 12346+ aufs_bindex_t bcpup, struct au_pin *pin)
1facf9fc 12347+{
4a4d8108 12348+ int err;
027c5e7a 12349+ struct inode *inode, *h_inode;
c2b27bf2
AM
12350+ struct dentry *h_dentry, *hi_wh;
12351+ struct au_cp_generic cpg = {
2000de60 12352+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12353+ .bdst = bcpup,
12354+ .bsrc = -1,
12355+ .len = len,
12356+ .pin = pin
12357+ };
1facf9fc 12358+
c2b27bf2 12359+ au_update_dbstart(cpg.dentry);
5527c038 12360+ inode = d_inode(cpg.dentry);
027c5e7a 12361+ h_inode = NULL;
c2b27bf2
AM
12362+ if (au_dbstart(cpg.dentry) <= bcpup
12363+ && au_dbend(cpg.dentry) >= bcpup) {
12364+ h_dentry = au_h_dptr(cpg.dentry, bcpup);
5527c038
JR
12365+ if (h_dentry && d_is_positive(h_dentry))
12366+ h_inode = d_inode(h_dentry);
027c5e7a 12367+ }
4a4d8108 12368+ hi_wh = au_hi_wh(inode, bcpup);
027c5e7a 12369+ if (!hi_wh && !h_inode)
c2b27bf2 12370+ err = au_sio_cpup_wh(&cpg, file);
4a4d8108
AM
12371+ else
12372+ /* already copied-up after unlink */
12373+ err = au_reopen_wh(file, bcpup, hi_wh);
1facf9fc 12374+
4a4d8108 12375+ if (!err
38d290e6
JR
12376+ && (inode->i_nlink > 1
12377+ || (inode->i_state & I_LINKABLE))
c2b27bf2
AM
12378+ && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
12379+ au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
1308ab2a 12380+
dece6358 12381+ return err;
1facf9fc 12382+}
12383+
4a4d8108
AM
12384+/*
12385+ * prepare the @file for writing.
12386+ */
12387+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
1facf9fc 12388+{
4a4d8108 12389+ int err;
c2b27bf2 12390+ aufs_bindex_t dbstart;
c1595e42 12391+ struct dentry *parent;
86dc4139 12392+ struct inode *inode;
1facf9fc 12393+ struct super_block *sb;
4a4d8108 12394+ struct file *h_file;
c2b27bf2 12395+ struct au_cp_generic cpg = {
2000de60 12396+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12397+ .bdst = -1,
12398+ .bsrc = -1,
12399+ .len = len,
12400+ .pin = pin,
12401+ .flags = AuCpup_DTIME
12402+ };
1facf9fc 12403+
c2b27bf2 12404+ sb = cpg.dentry->d_sb;
5527c038 12405+ inode = d_inode(cpg.dentry);
c2b27bf2
AM
12406+ cpg.bsrc = au_fbstart(file);
12407+ err = au_test_ro(sb, cpg.bsrc, inode);
4a4d8108 12408+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
c2b27bf2
AM
12409+ err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
12410+ /*flags*/0);
1facf9fc 12411+ goto out;
4a4d8108 12412+ }
1facf9fc 12413+
027c5e7a 12414+ /* need to cpup or reopen */
c2b27bf2 12415+ parent = dget_parent(cpg.dentry);
4a4d8108 12416+ di_write_lock_parent(parent);
c2b27bf2
AM
12417+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12418+ cpg.bdst = err;
4a4d8108
AM
12419+ if (unlikely(err < 0))
12420+ goto out_dgrade;
12421+ err = 0;
12422+
c2b27bf2
AM
12423+ if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
12424+ err = au_cpup_dirs(cpg.dentry, cpg.bdst);
1facf9fc 12425+ if (unlikely(err))
4a4d8108
AM
12426+ goto out_dgrade;
12427+ }
12428+
c2b27bf2 12429+ err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108
AM
12430+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12431+ if (unlikely(err))
12432+ goto out_dgrade;
12433+
c2b27bf2 12434+ dbstart = au_dbstart(cpg.dentry);
c1595e42 12435+ if (dbstart <= cpg.bdst)
c2b27bf2 12436+ cpg.bsrc = cpg.bdst;
027c5e7a 12437+
c2b27bf2
AM
12438+ if (dbstart <= cpg.bdst /* just reopen */
12439+ || !d_unhashed(cpg.dentry) /* copyup and reopen */
027c5e7a 12440+ ) {
392086de 12441+ h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
86dc4139 12442+ if (IS_ERR(h_file))
027c5e7a 12443+ err = PTR_ERR(h_file);
86dc4139 12444+ else {
027c5e7a 12445+ di_downgrade_lock(parent, AuLock_IR);
c2b27bf2
AM
12446+ if (dbstart > cpg.bdst)
12447+ err = au_sio_cpup_simple(&cpg);
027c5e7a
AM
12448+ if (!err)
12449+ err = au_reopen_nondir(file);
c2b27bf2 12450+ au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
027c5e7a 12451+ }
027c5e7a
AM
12452+ } else { /* copyup as wh and reopen */
12453+ /*
12454+ * since writable hfsplus branch is not supported,
12455+ * h_open_pre/post() are unnecessary.
12456+ */
c2b27bf2 12457+ err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
4a4d8108 12458+ di_downgrade_lock(parent, AuLock_IR);
4a4d8108 12459+ }
4a4d8108
AM
12460+
12461+ if (!err) {
12462+ au_pin_set_parent_lflag(pin, /*lflag*/0);
12463+ goto out_dput; /* success */
12464+ }
12465+ au_unpin(pin);
12466+ goto out_unlock;
1facf9fc 12467+
4f0767ce 12468+out_dgrade:
4a4d8108 12469+ di_downgrade_lock(parent, AuLock_IR);
4f0767ce 12470+out_unlock:
4a4d8108 12471+ di_read_unlock(parent, AuLock_IR);
4f0767ce 12472+out_dput:
4a4d8108 12473+ dput(parent);
4f0767ce 12474+out:
1facf9fc 12475+ return err;
12476+}
12477+
4a4d8108
AM
12478+/* ---------------------------------------------------------------------- */
12479+
12480+int au_do_flush(struct file *file, fl_owner_t id,
12481+ int (*flush)(struct file *file, fl_owner_t id))
1facf9fc 12482+{
4a4d8108 12483+ int err;
1facf9fc 12484+ struct super_block *sb;
4a4d8108 12485+ struct inode *inode;
1facf9fc 12486+
c06a8ce3
AM
12487+ inode = file_inode(file);
12488+ sb = inode->i_sb;
4a4d8108
AM
12489+ si_noflush_read_lock(sb);
12490+ fi_read_lock(file);
b752ccd1 12491+ ii_read_lock_child(inode);
1facf9fc 12492+
4a4d8108
AM
12493+ err = flush(file, id);
12494+ au_cpup_attr_timesizes(inode);
1facf9fc 12495+
b752ccd1 12496+ ii_read_unlock(inode);
4a4d8108 12497+ fi_read_unlock(file);
1308ab2a 12498+ si_read_unlock(sb);
dece6358 12499+ return err;
1facf9fc 12500+}
12501+
4a4d8108
AM
12502+/* ---------------------------------------------------------------------- */
12503+
12504+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
1facf9fc 12505+{
4a4d8108 12506+ int err;
4a4d8108
AM
12507+ struct au_pin pin;
12508+ struct au_finfo *finfo;
c2b27bf2 12509+ struct dentry *parent, *hi_wh;
4a4d8108 12510+ struct inode *inode;
1facf9fc 12511+ struct super_block *sb;
c2b27bf2 12512+ struct au_cp_generic cpg = {
2000de60 12513+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12514+ .bdst = -1,
12515+ .bsrc = -1,
12516+ .len = -1,
12517+ .pin = &pin,
12518+ .flags = AuCpup_DTIME
12519+ };
1facf9fc 12520+
4a4d8108
AM
12521+ FiMustWriteLock(file);
12522+
12523+ err = 0;
12524+ finfo = au_fi(file);
c2b27bf2 12525+ sb = cpg.dentry->d_sb;
5527c038 12526+ inode = d_inode(cpg.dentry);
c2b27bf2
AM
12527+ cpg.bdst = au_ibstart(inode);
12528+ if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
1308ab2a 12529+ goto out;
dece6358 12530+
c2b27bf2
AM
12531+ parent = dget_parent(cpg.dentry);
12532+ if (au_test_ro(sb, cpg.bdst, inode)) {
4a4d8108 12533+ di_read_lock_parent(parent, !AuLock_IR);
c2b27bf2
AM
12534+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12535+ cpg.bdst = err;
4a4d8108
AM
12536+ di_read_unlock(parent, !AuLock_IR);
12537+ if (unlikely(err < 0))
12538+ goto out_parent;
12539+ err = 0;
1facf9fc 12540+ }
1facf9fc 12541+
4a4d8108 12542+ di_read_lock_parent(parent, AuLock_IR);
c2b27bf2 12543+ hi_wh = au_hi_wh(inode, cpg.bdst);
7f207e10
AM
12544+ if (!S_ISDIR(inode->i_mode)
12545+ && au_opt_test(au_mntflags(sb), PLINK)
4a4d8108 12546+ && au_plink_test(inode)
c2b27bf2
AM
12547+ && !d_unhashed(cpg.dentry)
12548+ && cpg.bdst < au_dbstart(cpg.dentry)) {
12549+ err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
4a4d8108
AM
12550+ if (unlikely(err))
12551+ goto out_unlock;
12552+
12553+ /* always superio. */
c2b27bf2 12554+ err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108 12555+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 12556+ if (!err) {
c2b27bf2 12557+ err = au_sio_cpup_simple(&cpg);
367653fa
AM
12558+ au_unpin(&pin);
12559+ }
4a4d8108
AM
12560+ } else if (hi_wh) {
12561+ /* already copied-up after unlink */
c2b27bf2 12562+ err = au_reopen_wh(file, cpg.bdst, hi_wh);
4a4d8108
AM
12563+ *need_reopen = 0;
12564+ }
1facf9fc 12565+
4f0767ce 12566+out_unlock:
4a4d8108 12567+ di_read_unlock(parent, AuLock_IR);
4f0767ce 12568+out_parent:
4a4d8108 12569+ dput(parent);
4f0767ce 12570+out:
1308ab2a 12571+ return err;
dece6358 12572+}
1facf9fc 12573+
4a4d8108 12574+static void au_do_refresh_dir(struct file *file)
dece6358 12575+{
4a4d8108
AM
12576+ aufs_bindex_t bindex, bend, new_bindex, brid;
12577+ struct au_hfile *p, tmp, *q;
12578+ struct au_finfo *finfo;
1308ab2a 12579+ struct super_block *sb;
4a4d8108 12580+ struct au_fidir *fidir;
1facf9fc 12581+
4a4d8108 12582+ FiMustWriteLock(file);
1facf9fc 12583+
2000de60 12584+ sb = file->f_path.dentry->d_sb;
4a4d8108
AM
12585+ finfo = au_fi(file);
12586+ fidir = finfo->fi_hdir;
12587+ AuDebugOn(!fidir);
12588+ p = fidir->fd_hfile + finfo->fi_btop;
12589+ brid = p->hf_br->br_id;
12590+ bend = fidir->fd_bbot;
12591+ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
12592+ if (!p->hf_file)
12593+ continue;
1308ab2a 12594+
4a4d8108
AM
12595+ new_bindex = au_br_index(sb, p->hf_br->br_id);
12596+ if (new_bindex == bindex)
12597+ continue;
12598+ if (new_bindex < 0) {
12599+ au_set_h_fptr(file, bindex, NULL);
12600+ continue;
12601+ }
1308ab2a 12602+
4a4d8108
AM
12603+ /* swap two lower inode, and loop again */
12604+ q = fidir->fd_hfile + new_bindex;
12605+ tmp = *q;
12606+ *q = *p;
12607+ *p = tmp;
12608+ if (tmp.hf_file) {
12609+ bindex--;
12610+ p--;
12611+ }
12612+ }
1308ab2a 12613+
4a4d8108 12614+ p = fidir->fd_hfile;
2000de60 12615+ if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
4a4d8108
AM
12616+ bend = au_sbend(sb);
12617+ for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
12618+ finfo->fi_btop++, p++)
12619+ if (p->hf_file) {
c06a8ce3 12620+ if (file_inode(p->hf_file))
4a4d8108 12621+ break;
c1595e42 12622+ au_hfput(p, file);
4a4d8108
AM
12623+ }
12624+ } else {
12625+ bend = au_br_index(sb, brid);
12626+ for (finfo->fi_btop = 0; finfo->fi_btop < bend;
12627+ finfo->fi_btop++, p++)
12628+ if (p->hf_file)
12629+ au_hfput(p, file);
12630+ bend = au_sbend(sb);
12631+ }
1308ab2a 12632+
4a4d8108
AM
12633+ p = fidir->fd_hfile + bend;
12634+ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
12635+ fidir->fd_bbot--, p--)
12636+ if (p->hf_file) {
c06a8ce3 12637+ if (file_inode(p->hf_file))
4a4d8108 12638+ break;
c1595e42 12639+ au_hfput(p, file);
4a4d8108
AM
12640+ }
12641+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
1308ab2a 12642+}
12643+
4a4d8108
AM
12644+/*
12645+ * after branch manipulating, refresh the file.
12646+ */
12647+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
1facf9fc 12648+{
4a4d8108
AM
12649+ int err, need_reopen;
12650+ aufs_bindex_t bend, bindex;
12651+ struct dentry *dentry;
1308ab2a 12652+ struct au_finfo *finfo;
4a4d8108 12653+ struct au_hfile *hfile;
1facf9fc 12654+
2000de60 12655+ dentry = file->f_path.dentry;
1308ab2a 12656+ finfo = au_fi(file);
4a4d8108
AM
12657+ if (!finfo->fi_hdir) {
12658+ hfile = &finfo->fi_htop;
12659+ AuDebugOn(!hfile->hf_file);
12660+ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
12661+ AuDebugOn(bindex < 0);
12662+ if (bindex != finfo->fi_btop)
12663+ au_set_fbstart(file, bindex);
12664+ } else {
12665+ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
12666+ if (unlikely(err))
12667+ goto out;
12668+ au_do_refresh_dir(file);
12669+ }
1facf9fc 12670+
4a4d8108
AM
12671+ err = 0;
12672+ need_reopen = 1;
12673+ if (!au_test_mmapped(file))
12674+ err = au_file_refresh_by_inode(file, &need_reopen);
027c5e7a 12675+ if (!err && need_reopen && !d_unlinked(dentry))
4a4d8108
AM
12676+ err = reopen(file);
12677+ if (!err) {
12678+ au_update_figen(file);
12679+ goto out; /* success */
12680+ }
12681+
12682+ /* error, close all lower files */
12683+ if (finfo->fi_hdir) {
12684+ bend = au_fbend_dir(file);
12685+ for (bindex = au_fbstart(file); bindex <= bend; bindex++)
12686+ au_set_h_fptr(file, bindex, NULL);
12687+ }
1facf9fc 12688+
4f0767ce 12689+out:
1facf9fc 12690+ return err;
12691+}
12692+
4a4d8108
AM
12693+/* common function to regular file and dir */
12694+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
12695+ int wlock)
dece6358 12696+{
1308ab2a 12697+ int err;
4a4d8108
AM
12698+ unsigned int sigen, figen;
12699+ aufs_bindex_t bstart;
12700+ unsigned char pseudo_link;
12701+ struct dentry *dentry;
12702+ struct inode *inode;
1facf9fc 12703+
4a4d8108 12704+ err = 0;
2000de60 12705+ dentry = file->f_path.dentry;
5527c038 12706+ inode = d_inode(dentry);
4a4d8108
AM
12707+ sigen = au_sigen(dentry->d_sb);
12708+ fi_write_lock(file);
12709+ figen = au_figen(file);
12710+ di_write_lock_child(dentry);
12711+ bstart = au_dbstart(dentry);
12712+ pseudo_link = (bstart != au_ibstart(inode));
12713+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
12714+ if (!wlock) {
12715+ di_downgrade_lock(dentry, AuLock_IR);
12716+ fi_downgrade_lock(file);
12717+ }
12718+ goto out; /* success */
12719+ }
dece6358 12720+
4a4d8108 12721+ AuDbg("sigen %d, figen %d\n", sigen, figen);
027c5e7a 12722+ if (au_digen_test(dentry, sigen)) {
4a4d8108 12723+ err = au_reval_dpath(dentry, sigen);
027c5e7a 12724+ AuDebugOn(!err && au_digen_test(dentry, sigen));
4a4d8108 12725+ }
dece6358 12726+
027c5e7a
AM
12727+ if (!err)
12728+ err = refresh_file(file, reopen);
4a4d8108
AM
12729+ if (!err) {
12730+ if (!wlock) {
12731+ di_downgrade_lock(dentry, AuLock_IR);
12732+ fi_downgrade_lock(file);
12733+ }
12734+ } else {
12735+ di_write_unlock(dentry);
12736+ fi_write_unlock(file);
12737+ }
1facf9fc 12738+
4f0767ce 12739+out:
1308ab2a 12740+ return err;
12741+}
1facf9fc 12742+
4a4d8108
AM
12743+/* ---------------------------------------------------------------------- */
12744+
12745+/* cf. aufs_nopage() */
12746+/* for madvise(2) */
12747+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
1308ab2a 12748+{
4a4d8108
AM
12749+ unlock_page(page);
12750+ return 0;
12751+}
1facf9fc 12752+
4a4d8108 12753+/* it will never be called, but necessary to support O_DIRECT */
5527c038
JR
12754+static ssize_t aufs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
12755+ loff_t offset)
4a4d8108 12756+{ BUG(); return 0; }
1facf9fc 12757+
4a4d8108
AM
12758+/* they will never be called. */
12759+#ifdef CONFIG_AUFS_DEBUG
12760+static int aufs_write_begin(struct file *file, struct address_space *mapping,
12761+ loff_t pos, unsigned len, unsigned flags,
12762+ struct page **pagep, void **fsdata)
12763+{ AuUnsupport(); return 0; }
12764+static int aufs_write_end(struct file *file, struct address_space *mapping,
12765+ loff_t pos, unsigned len, unsigned copied,
12766+ struct page *page, void *fsdata)
12767+{ AuUnsupport(); return 0; }
12768+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
12769+{ AuUnsupport(); return 0; }
1308ab2a 12770+
4a4d8108
AM
12771+static int aufs_set_page_dirty(struct page *page)
12772+{ AuUnsupport(); return 0; }
392086de
AM
12773+static void aufs_invalidatepage(struct page *page, unsigned int offset,
12774+ unsigned int length)
4a4d8108
AM
12775+{ AuUnsupport(); }
12776+static int aufs_releasepage(struct page *page, gfp_t gfp)
12777+{ AuUnsupport(); return 0; }
79b8bda9 12778+#if 0 /* called by memory compaction regardless file */
4a4d8108 12779+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
7eafdf33 12780+ struct page *page, enum migrate_mode mode)
4a4d8108 12781+{ AuUnsupport(); return 0; }
79b8bda9 12782+#endif
4a4d8108
AM
12783+static int aufs_launder_page(struct page *page)
12784+{ AuUnsupport(); return 0; }
12785+static int aufs_is_partially_uptodate(struct page *page,
38d290e6
JR
12786+ unsigned long from,
12787+ unsigned long count)
4a4d8108 12788+{ AuUnsupport(); return 0; }
392086de
AM
12789+static void aufs_is_dirty_writeback(struct page *page, bool *dirty,
12790+ bool *writeback)
12791+{ AuUnsupport(); }
4a4d8108
AM
12792+static int aufs_error_remove_page(struct address_space *mapping,
12793+ struct page *page)
12794+{ AuUnsupport(); return 0; }
b4510431
AM
12795+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
12796+ sector_t *span)
12797+{ AuUnsupport(); return 0; }
12798+static void aufs_swap_deactivate(struct file *file)
12799+{ AuUnsupport(); }
4a4d8108
AM
12800+#endif /* CONFIG_AUFS_DEBUG */
12801+
12802+const struct address_space_operations aufs_aop = {
12803+ .readpage = aufs_readpage,
12804+ .direct_IO = aufs_direct_IO,
4a4d8108
AM
12805+#ifdef CONFIG_AUFS_DEBUG
12806+ .writepage = aufs_writepage,
4a4d8108
AM
12807+ /* no writepages, because of writepage */
12808+ .set_page_dirty = aufs_set_page_dirty,
12809+ /* no readpages, because of readpage */
12810+ .write_begin = aufs_write_begin,
12811+ .write_end = aufs_write_end,
12812+ /* no bmap, no block device */
12813+ .invalidatepage = aufs_invalidatepage,
12814+ .releasepage = aufs_releasepage,
79b8bda9
AM
12815+ /* is fallback_migrate_page ok? */
12816+ /* .migratepage = aufs_migratepage, */
4a4d8108
AM
12817+ .launder_page = aufs_launder_page,
12818+ .is_partially_uptodate = aufs_is_partially_uptodate,
392086de 12819+ .is_dirty_writeback = aufs_is_dirty_writeback,
b4510431
AM
12820+ .error_remove_page = aufs_error_remove_page,
12821+ .swap_activate = aufs_swap_activate,
12822+ .swap_deactivate = aufs_swap_deactivate
4a4d8108 12823+#endif /* CONFIG_AUFS_DEBUG */
dece6358 12824+};
7f207e10
AM
12825diff -urN /usr/share/empty/fs/aufs/file.h linux/fs/aufs/file.h
12826--- /usr/share/empty/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 12827+++ linux/fs/aufs/file.h 2015-09-24 10:47:58.251386326 +0200
b912730e 12828@@ -0,0 +1,291 @@
4a4d8108 12829+/*
2000de60 12830+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
12831+ *
12832+ * This program, aufs is free software; you can redistribute it and/or modify
12833+ * it under the terms of the GNU General Public License as published by
12834+ * the Free Software Foundation; either version 2 of the License, or
12835+ * (at your option) any later version.
12836+ *
12837+ * This program is distributed in the hope that it will be useful,
12838+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12839+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12840+ * GNU General Public License for more details.
12841+ *
12842+ * You should have received a copy of the GNU General Public License
523b37e3 12843+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 12844+ */
1facf9fc 12845+
4a4d8108
AM
12846+/*
12847+ * file operations
12848+ */
1facf9fc 12849+
4a4d8108
AM
12850+#ifndef __AUFS_FILE_H__
12851+#define __AUFS_FILE_H__
1facf9fc 12852+
4a4d8108 12853+#ifdef __KERNEL__
1facf9fc 12854+
2cbb1c4b 12855+#include <linux/file.h>
4a4d8108
AM
12856+#include <linux/fs.h>
12857+#include <linux/poll.h>
4a4d8108 12858+#include "rwsem.h"
1facf9fc 12859+
4a4d8108
AM
12860+struct au_branch;
12861+struct au_hfile {
12862+ struct file *hf_file;
12863+ struct au_branch *hf_br;
12864+};
1facf9fc 12865+
4a4d8108
AM
12866+struct au_vdir;
12867+struct au_fidir {
12868+ aufs_bindex_t fd_bbot;
12869+ aufs_bindex_t fd_nent;
12870+ struct au_vdir *fd_vdir_cache;
12871+ struct au_hfile fd_hfile[];
12872+};
1facf9fc 12873+
4a4d8108 12874+static inline int au_fidir_sz(int nent)
dece6358 12875+{
4f0767ce
JR
12876+ AuDebugOn(nent < 0);
12877+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
4a4d8108 12878+}
1facf9fc 12879+
4a4d8108
AM
12880+struct au_finfo {
12881+ atomic_t fi_generation;
dece6358 12882+
4a4d8108
AM
12883+ struct au_rwsem fi_rwsem;
12884+ aufs_bindex_t fi_btop;
12885+
12886+ /* do not union them */
12887+ struct { /* for non-dir */
12888+ struct au_hfile fi_htop;
2cbb1c4b 12889+ atomic_t fi_mmapped;
4a4d8108
AM
12890+ };
12891+ struct au_fidir *fi_hdir; /* for dir only */
523b37e3
AM
12892+
12893+ struct hlist_node fi_hlist;
12894+ struct file *fi_file; /* very ugly */
4a4d8108 12895+} ____cacheline_aligned_in_smp;
1facf9fc 12896+
4a4d8108 12897+/* ---------------------------------------------------------------------- */
1facf9fc 12898+
4a4d8108
AM
12899+/* file.c */
12900+extern const struct address_space_operations aufs_aop;
12901+unsigned int au_file_roflags(unsigned int flags);
12902+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 12903+ struct file *file, int force_wr);
b912730e
AM
12904+struct au_do_open_args {
12905+ int no_lock;
12906+ int (*open)(struct file *file, int flags,
12907+ struct file *h_file);
12908+ struct au_fidir *fidir;
12909+ struct file *h_file;
12910+};
12911+int au_do_open(struct file *file, struct au_do_open_args *args);
4a4d8108
AM
12912+int au_reopen_nondir(struct file *file);
12913+struct au_pin;
12914+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
12915+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
12916+ int wlock);
12917+int au_do_flush(struct file *file, fl_owner_t id,
12918+ int (*flush)(struct file *file, fl_owner_t id));
1facf9fc 12919+
4a4d8108
AM
12920+/* poll.c */
12921+#ifdef CONFIG_AUFS_POLL
12922+unsigned int aufs_poll(struct file *file, poll_table *wait);
12923+#endif
1facf9fc 12924+
4a4d8108
AM
12925+#ifdef CONFIG_AUFS_BR_HFSPLUS
12926+/* hfsplus.c */
392086de
AM
12927+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
12928+ int force_wr);
4a4d8108
AM
12929+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
12930+ struct file *h_file);
12931+#else
c1595e42
JR
12932+AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
12933+ aufs_bindex_t bindex, int force_wr)
4a4d8108
AM
12934+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
12935+ struct file *h_file);
12936+#endif
1facf9fc 12937+
4a4d8108
AM
12938+/* f_op.c */
12939+extern const struct file_operations aufs_file_fop;
b912730e 12940+int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
4a4d8108 12941+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
b912730e 12942+struct file *au_read_pre(struct file *file, int keep_fi);
4a4d8108 12943+
4a4d8108
AM
12944+/* finfo.c */
12945+void au_hfput(struct au_hfile *hf, struct file *file);
12946+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
12947+ struct file *h_file);
1facf9fc 12948+
4a4d8108 12949+void au_update_figen(struct file *file);
4a4d8108
AM
12950+struct au_fidir *au_fidir_alloc(struct super_block *sb);
12951+int au_fidir_realloc(struct au_finfo *finfo, int nbr);
1facf9fc 12952+
4a4d8108
AM
12953+void au_fi_init_once(void *_fi);
12954+void au_finfo_fin(struct file *file);
12955+int au_finfo_init(struct file *file, struct au_fidir *fidir);
1facf9fc 12956+
4a4d8108
AM
12957+/* ioctl.c */
12958+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
12959+#ifdef CONFIG_COMPAT
12960+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
12961+ unsigned long arg);
c2b27bf2
AM
12962+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
12963+ unsigned long arg);
b752ccd1 12964+#endif
1facf9fc 12965+
4a4d8108 12966+/* ---------------------------------------------------------------------- */
1facf9fc 12967+
4a4d8108
AM
12968+static inline struct au_finfo *au_fi(struct file *file)
12969+{
38d290e6 12970+ return file->private_data;
4a4d8108 12971+}
1facf9fc 12972+
4a4d8108 12973+/* ---------------------------------------------------------------------- */
1facf9fc 12974+
4a4d8108
AM
12975+/*
12976+ * fi_read_lock, fi_write_lock,
12977+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
12978+ */
12979+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
1308ab2a 12980+
4a4d8108
AM
12981+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
12982+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
12983+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
1facf9fc 12984+
1308ab2a 12985+/* ---------------------------------------------------------------------- */
12986+
4a4d8108
AM
12987+/* todo: hard/soft set? */
12988+static inline aufs_bindex_t au_fbstart(struct file *file)
dece6358 12989+{
4a4d8108
AM
12990+ FiMustAnyLock(file);
12991+ return au_fi(file)->fi_btop;
12992+}
dece6358 12993+
4a4d8108
AM
12994+static inline aufs_bindex_t au_fbend_dir(struct file *file)
12995+{
12996+ FiMustAnyLock(file);
12997+ AuDebugOn(!au_fi(file)->fi_hdir);
12998+ return au_fi(file)->fi_hdir->fd_bbot;
12999+}
1facf9fc 13000+
4a4d8108
AM
13001+static inline struct au_vdir *au_fvdir_cache(struct file *file)
13002+{
13003+ FiMustAnyLock(file);
13004+ AuDebugOn(!au_fi(file)->fi_hdir);
13005+ return au_fi(file)->fi_hdir->fd_vdir_cache;
13006+}
1facf9fc 13007+
4a4d8108
AM
13008+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
13009+{
13010+ FiMustWriteLock(file);
13011+ au_fi(file)->fi_btop = bindex;
13012+}
1facf9fc 13013+
4a4d8108
AM
13014+static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
13015+{
13016+ FiMustWriteLock(file);
13017+ AuDebugOn(!au_fi(file)->fi_hdir);
13018+ au_fi(file)->fi_hdir->fd_bbot = bindex;
13019+}
1308ab2a 13020+
4a4d8108
AM
13021+static inline void au_set_fvdir_cache(struct file *file,
13022+ struct au_vdir *vdir_cache)
13023+{
13024+ FiMustWriteLock(file);
13025+ AuDebugOn(!au_fi(file)->fi_hdir);
13026+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
13027+}
dece6358 13028+
4a4d8108
AM
13029+static inline struct file *au_hf_top(struct file *file)
13030+{
13031+ FiMustAnyLock(file);
13032+ AuDebugOn(au_fi(file)->fi_hdir);
13033+ return au_fi(file)->fi_htop.hf_file;
13034+}
1facf9fc 13035+
4a4d8108
AM
13036+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
13037+{
13038+ FiMustAnyLock(file);
13039+ AuDebugOn(!au_fi(file)->fi_hdir);
13040+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
dece6358
AM
13041+}
13042+
4a4d8108
AM
13043+/* todo: memory barrier? */
13044+static inline unsigned int au_figen(struct file *f)
dece6358 13045+{
4a4d8108
AM
13046+ return atomic_read(&au_fi(f)->fi_generation);
13047+}
dece6358 13048+
2cbb1c4b
JR
13049+static inline void au_set_mmapped(struct file *f)
13050+{
13051+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
13052+ return;
0c3ec466 13053+ pr_warn("fi_mmapped wrapped around\n");
2cbb1c4b
JR
13054+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
13055+ ;
13056+}
13057+
13058+static inline void au_unset_mmapped(struct file *f)
13059+{
13060+ atomic_dec(&au_fi(f)->fi_mmapped);
13061+}
13062+
4a4d8108
AM
13063+static inline int au_test_mmapped(struct file *f)
13064+{
2cbb1c4b
JR
13065+ return atomic_read(&au_fi(f)->fi_mmapped);
13066+}
13067+
13068+/* customize vma->vm_file */
13069+
13070+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
13071+ struct file *file)
13072+{
53392da6
AM
13073+ struct file *f;
13074+
13075+ f = vma->vm_file;
2cbb1c4b
JR
13076+ get_file(file);
13077+ vma->vm_file = file;
53392da6 13078+ fput(f);
2cbb1c4b
JR
13079+}
13080+
13081+#ifdef CONFIG_MMU
13082+#define AuDbgVmRegion(file, vma) do {} while (0)
13083+
13084+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13085+ struct file *file)
13086+{
13087+ au_do_vm_file_reset(vma, file);
13088+}
13089+#else
13090+#define AuDbgVmRegion(file, vma) \
13091+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
13092+
13093+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13094+ struct file *file)
13095+{
53392da6
AM
13096+ struct file *f;
13097+
2cbb1c4b 13098+ au_do_vm_file_reset(vma, file);
53392da6 13099+ f = vma->vm_region->vm_file;
2cbb1c4b
JR
13100+ get_file(file);
13101+ vma->vm_region->vm_file = file;
53392da6 13102+ fput(f);
2cbb1c4b
JR
13103+}
13104+#endif /* CONFIG_MMU */
13105+
13106+/* handle vma->vm_prfile */
fb47a38f 13107+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
2cbb1c4b
JR
13108+ struct file *file)
13109+{
2cbb1c4b
JR
13110+ get_file(file);
13111+ vma->vm_prfile = file;
13112+#ifndef CONFIG_MMU
13113+ get_file(file);
13114+ vma->vm_region->vm_prfile = file;
13115+#endif
fb47a38f 13116+}
1308ab2a 13117+
4a4d8108
AM
13118+#endif /* __KERNEL__ */
13119+#endif /* __AUFS_FILE_H__ */
7f207e10
AM
13120diff -urN /usr/share/empty/fs/aufs/finfo.c linux/fs/aufs/finfo.c
13121--- /usr/share/empty/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 13122+++ linux/fs/aufs/finfo.c 2015-09-24 10:47:58.251386326 +0200
b912730e 13123@@ -0,0 +1,157 @@
4a4d8108 13124+/*
2000de60 13125+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
13126+ *
13127+ * This program, aufs is free software; you can redistribute it and/or modify
13128+ * it under the terms of the GNU General Public License as published by
13129+ * the Free Software Foundation; either version 2 of the License, or
13130+ * (at your option) any later version.
13131+ *
13132+ * This program is distributed in the hope that it will be useful,
13133+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13134+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13135+ * GNU General Public License for more details.
13136+ *
13137+ * You should have received a copy of the GNU General Public License
523b37e3 13138+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 13139+ */
1308ab2a 13140+
4a4d8108
AM
13141+/*
13142+ * file private data
13143+ */
1facf9fc 13144+
4a4d8108 13145+#include "aufs.h"
1facf9fc 13146+
4a4d8108
AM
13147+void au_hfput(struct au_hfile *hf, struct file *file)
13148+{
13149+ /* todo: direct access f_flags */
2cbb1c4b 13150+ if (vfsub_file_flags(file) & __FMODE_EXEC)
4a4d8108
AM
13151+ allow_write_access(hf->hf_file);
13152+ fput(hf->hf_file);
13153+ hf->hf_file = NULL;
e49829fe 13154+ atomic_dec(&hf->hf_br->br_count);
4a4d8108
AM
13155+ hf->hf_br = NULL;
13156+}
1facf9fc 13157+
4a4d8108
AM
13158+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
13159+{
13160+ struct au_finfo *finfo = au_fi(file);
13161+ struct au_hfile *hf;
13162+ struct au_fidir *fidir;
13163+
13164+ fidir = finfo->fi_hdir;
13165+ if (!fidir) {
13166+ AuDebugOn(finfo->fi_btop != bindex);
13167+ hf = &finfo->fi_htop;
13168+ } else
13169+ hf = fidir->fd_hfile + bindex;
13170+
13171+ if (hf && hf->hf_file)
13172+ au_hfput(hf, file);
13173+ if (val) {
13174+ FiMustWriteLock(file);
b912730e 13175+ AuDebugOn(IS_ERR_OR_NULL(file->f_path.dentry));
4a4d8108 13176+ hf->hf_file = val;
2000de60 13177+ hf->hf_br = au_sbr(file->f_path.dentry->d_sb, bindex);
1308ab2a 13178+ }
4a4d8108 13179+}
1facf9fc 13180+
4a4d8108
AM
13181+void au_update_figen(struct file *file)
13182+{
2000de60 13183+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_path.dentry));
4a4d8108 13184+ /* smp_mb(); */ /* atomic_set */
1facf9fc 13185+}
13186+
4a4d8108
AM
13187+/* ---------------------------------------------------------------------- */
13188+
4a4d8108
AM
13189+struct au_fidir *au_fidir_alloc(struct super_block *sb)
13190+{
13191+ struct au_fidir *fidir;
13192+ int nbr;
13193+
13194+ nbr = au_sbend(sb) + 1;
13195+ if (nbr < 2)
13196+ nbr = 2; /* initial allocate for 2 branches */
13197+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
13198+ if (fidir) {
13199+ fidir->fd_bbot = -1;
13200+ fidir->fd_nent = nbr;
13201+ fidir->fd_vdir_cache = NULL;
13202+ }
13203+
13204+ return fidir;
13205+}
13206+
13207+int au_fidir_realloc(struct au_finfo *finfo, int nbr)
13208+{
13209+ int err;
13210+ struct au_fidir *fidir, *p;
13211+
13212+ AuRwMustWriteLock(&finfo->fi_rwsem);
13213+ fidir = finfo->fi_hdir;
13214+ AuDebugOn(!fidir);
13215+
13216+ err = -ENOMEM;
13217+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
13218+ GFP_NOFS);
13219+ if (p) {
13220+ p->fd_nent = nbr;
13221+ finfo->fi_hdir = p;
13222+ err = 0;
13223+ }
1facf9fc 13224+
dece6358 13225+ return err;
1facf9fc 13226+}
1308ab2a 13227+
13228+/* ---------------------------------------------------------------------- */
13229+
4a4d8108 13230+void au_finfo_fin(struct file *file)
1308ab2a 13231+{
4a4d8108
AM
13232+ struct au_finfo *finfo;
13233+
2000de60 13234+ au_nfiles_dec(file->f_path.dentry->d_sb);
7f207e10 13235+
4a4d8108
AM
13236+ finfo = au_fi(file);
13237+ AuDebugOn(finfo->fi_hdir);
13238+ AuRwDestroy(&finfo->fi_rwsem);
13239+ au_cache_free_finfo(finfo);
1308ab2a 13240+}
1308ab2a 13241+
e49829fe 13242+void au_fi_init_once(void *_finfo)
4a4d8108 13243+{
e49829fe 13244+ struct au_finfo *finfo = _finfo;
2cbb1c4b 13245+ static struct lock_class_key aufs_fi;
1308ab2a 13246+
e49829fe
JR
13247+ au_rw_init(&finfo->fi_rwsem);
13248+ au_rw_class(&finfo->fi_rwsem, &aufs_fi);
4a4d8108 13249+}
1308ab2a 13250+
4a4d8108
AM
13251+int au_finfo_init(struct file *file, struct au_fidir *fidir)
13252+{
1716fcea 13253+ int err;
4a4d8108
AM
13254+ struct au_finfo *finfo;
13255+ struct dentry *dentry;
13256+
13257+ err = -ENOMEM;
2000de60 13258+ dentry = file->f_path.dentry;
4a4d8108
AM
13259+ finfo = au_cache_alloc_finfo();
13260+ if (unlikely(!finfo))
13261+ goto out;
13262+
13263+ err = 0;
7f207e10 13264+ au_nfiles_inc(dentry->d_sb);
1716fcea
AM
13265+ /* verbose coding for lock class name */
13266+ if (!fidir)
13267+ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcNonDir_FIINFO);
13268+ else
13269+ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcDir_FIINFO);
4a4d8108
AM
13270+ au_rw_write_lock(&finfo->fi_rwsem);
13271+ finfo->fi_btop = -1;
13272+ finfo->fi_hdir = fidir;
13273+ atomic_set(&finfo->fi_generation, au_digen(dentry));
13274+ /* smp_mb(); */ /* atomic_set */
13275+
13276+ file->private_data = finfo;
13277+
13278+out:
13279+ return err;
13280+}
7f207e10
AM
13281diff -urN /usr/share/empty/fs/aufs/f_op.c linux/fs/aufs/f_op.c
13282--- /usr/share/empty/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 13283+++ linux/fs/aufs/f_op.c 2015-09-24 10:47:58.251386326 +0200
5527c038 13284@@ -0,0 +1,738 @@
dece6358 13285+/*
2000de60 13286+ * Copyright (C) 2005-2015 Junjiro R. Okajima
dece6358
AM
13287+ *
13288+ * This program, aufs is free software; you can redistribute it and/or modify
13289+ * it under the terms of the GNU General Public License as published by
13290+ * the Free Software Foundation; either version 2 of the License, or
13291+ * (at your option) any later version.
13292+ *
13293+ * This program is distributed in the hope that it will be useful,
13294+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13295+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13296+ * GNU General Public License for more details.
13297+ *
13298+ * You should have received a copy of the GNU General Public License
523b37e3 13299+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 13300+ */
1facf9fc 13301+
13302+/*
4a4d8108 13303+ * file and vm operations
1facf9fc 13304+ */
dece6358 13305+
86dc4139 13306+#include <linux/aio.h>
4a4d8108
AM
13307+#include <linux/fs_stack.h>
13308+#include <linux/mman.h>
4a4d8108 13309+#include <linux/security.h>
dece6358
AM
13310+#include "aufs.h"
13311+
b912730e 13312+int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
1facf9fc 13313+{
4a4d8108
AM
13314+ int err;
13315+ aufs_bindex_t bindex;
4a4d8108
AM
13316+ struct dentry *dentry;
13317+ struct au_finfo *finfo;
38d290e6 13318+ struct inode *h_inode;
4a4d8108
AM
13319+
13320+ FiMustWriteLock(file);
13321+
523b37e3 13322+ err = 0;
2000de60 13323+ dentry = file->f_path.dentry;
b912730e 13324+ AuDebugOn(IS_ERR_OR_NULL(dentry));
4a4d8108
AM
13325+ finfo = au_fi(file);
13326+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
2cbb1c4b 13327+ atomic_set(&finfo->fi_mmapped, 0);
4a4d8108 13328+ bindex = au_dbstart(dentry);
b912730e
AM
13329+ if (!h_file)
13330+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
13331+ else
13332+ get_file(h_file);
4a4d8108
AM
13333+ if (IS_ERR(h_file))
13334+ err = PTR_ERR(h_file);
13335+ else {
38d290e6
JR
13336+ if ((flags & __O_TMPFILE)
13337+ && !(flags & O_EXCL)) {
13338+ h_inode = file_inode(h_file);
13339+ spin_lock(&h_inode->i_lock);
13340+ h_inode->i_state |= I_LINKABLE;
13341+ spin_unlock(&h_inode->i_lock);
13342+ }
4a4d8108
AM
13343+ au_set_fbstart(file, bindex);
13344+ au_set_h_fptr(file, bindex, h_file);
13345+ au_update_figen(file);
13346+ /* todo: necessary? */
13347+ /* file->f_ra = h_file->f_ra; */
13348+ }
027c5e7a 13349+
4a4d8108 13350+ return err;
1facf9fc 13351+}
13352+
4a4d8108
AM
13353+static int aufs_open_nondir(struct inode *inode __maybe_unused,
13354+ struct file *file)
1facf9fc 13355+{
4a4d8108 13356+ int err;
1308ab2a 13357+ struct super_block *sb;
b912730e
AM
13358+ struct au_do_open_args args = {
13359+ .open = au_do_open_nondir
13360+ };
1facf9fc 13361+
523b37e3
AM
13362+ AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
13363+ file, vfsub_file_flags(file), file->f_mode);
1facf9fc 13364+
2000de60 13365+ sb = file->f_path.dentry->d_sb;
4a4d8108 13366+ si_read_lock(sb, AuLock_FLUSH);
b912730e 13367+ err = au_do_open(file, &args);
4a4d8108
AM
13368+ si_read_unlock(sb);
13369+ return err;
13370+}
1facf9fc 13371+
4a4d8108
AM
13372+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
13373+{
13374+ struct au_finfo *finfo;
13375+ aufs_bindex_t bindex;
1facf9fc 13376+
4a4d8108 13377+ finfo = au_fi(file);
2000de60
JR
13378+ au_sphl_del(&finfo->fi_hlist,
13379+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
4a4d8108 13380+ bindex = finfo->fi_btop;
b4510431 13381+ if (bindex >= 0)
4a4d8108 13382+ au_set_h_fptr(file, bindex, NULL);
7f207e10 13383+
4a4d8108
AM
13384+ au_finfo_fin(file);
13385+ return 0;
1facf9fc 13386+}
13387+
4a4d8108
AM
13388+/* ---------------------------------------------------------------------- */
13389+
13390+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
dece6358 13391+{
1308ab2a 13392+ int err;
4a4d8108
AM
13393+ struct file *h_file;
13394+
13395+ err = 0;
13396+ h_file = au_hf_top(file);
13397+ if (h_file)
13398+ err = vfsub_flush(h_file, id);
13399+ return err;
13400+}
13401+
13402+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
13403+{
13404+ return au_do_flush(file, id, au_do_flush_nondir);
13405+}
13406+
13407+/* ---------------------------------------------------------------------- */
9dbd164d
AM
13408+/*
13409+ * read and write functions acquire [fdi]_rwsem once, but release before
13410+ * mmap_sem. This is because to stop a race condition between mmap(2).
13411+ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
13412+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
13413+ * read functions after [fdi]_rwsem are released, but it should be harmless.
13414+ */
4a4d8108 13415+
b912730e
AM
13416+/* Callers should call au_read_post() or fput() in the end */
13417+struct file *au_read_pre(struct file *file, int keep_fi)
4a4d8108 13418+{
4a4d8108 13419+ struct file *h_file;
b912730e 13420+ int err;
1facf9fc 13421+
4a4d8108 13422+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
b912730e
AM
13423+ if (!err) {
13424+ di_read_unlock(file->f_path.dentry, AuLock_IR);
13425+ h_file = au_hf_top(file);
13426+ get_file(h_file);
13427+ if (!keep_fi)
13428+ fi_read_unlock(file);
13429+ } else
13430+ h_file = ERR_PTR(err);
13431+
13432+ return h_file;
13433+}
13434+
13435+static void au_read_post(struct inode *inode, struct file *h_file)
13436+{
13437+ /* update without lock, I don't think it a problem */
13438+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13439+ fput(h_file);
13440+}
13441+
13442+struct au_write_pre {
13443+ blkcnt_t blks;
13444+ aufs_bindex_t bstart;
13445+};
13446+
13447+/*
13448+ * return with iinfo is write-locked
13449+ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the
13450+ * end
13451+ */
13452+static struct file *au_write_pre(struct file *file, int do_ready,
13453+ struct au_write_pre *wpre)
13454+{
13455+ struct file *h_file;
13456+ struct dentry *dentry;
13457+ int err;
13458+ struct au_pin pin;
13459+
13460+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
13461+ h_file = ERR_PTR(err);
dece6358
AM
13462+ if (unlikely(err))
13463+ goto out;
1facf9fc 13464+
b912730e
AM
13465+ dentry = file->f_path.dentry;
13466+ if (do_ready) {
13467+ err = au_ready_to_write(file, -1, &pin);
13468+ if (unlikely(err)) {
13469+ h_file = ERR_PTR(err);
13470+ di_write_unlock(dentry);
13471+ goto out_fi;
13472+ }
13473+ }
13474+
13475+ di_downgrade_lock(dentry, /*flags*/0);
13476+ if (wpre)
13477+ wpre->bstart = au_fbstart(file);
4a4d8108 13478+ h_file = au_hf_top(file);
9dbd164d 13479+ get_file(h_file);
b912730e
AM
13480+ if (wpre)
13481+ wpre->blks = file_inode(h_file)->i_blocks;
13482+ if (do_ready)
13483+ au_unpin(&pin);
13484+ di_read_unlock(dentry, /*flags*/0);
13485+
13486+out_fi:
13487+ fi_write_unlock(file);
13488+out:
13489+ return h_file;
13490+}
13491+
13492+static void au_write_post(struct inode *inode, struct file *h_file,
13493+ struct au_write_pre *wpre, ssize_t written)
13494+{
13495+ struct inode *h_inode;
13496+
13497+ au_cpup_attr_timesizes(inode);
13498+ AuDebugOn(au_ibstart(inode) != wpre->bstart);
13499+ h_inode = file_inode(h_file);
13500+ inode->i_mode = h_inode->i_mode;
13501+ ii_write_unlock(inode);
13502+ fput(h_file);
13503+
13504+ /* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
13505+ if (written > 0)
13506+ au_fhsm_wrote(inode->i_sb, wpre->bstart,
13507+ /*force*/h_inode->i_blocks > wpre->blks);
13508+}
13509+
13510+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
13511+ loff_t *ppos)
13512+{
13513+ ssize_t err;
13514+ struct inode *inode;
13515+ struct file *h_file;
13516+ struct super_block *sb;
13517+
13518+ inode = file_inode(file);
13519+ sb = inode->i_sb;
13520+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
13521+
13522+ h_file = au_read_pre(file, /*keep_fi*/0);
13523+ err = PTR_ERR(h_file);
13524+ if (IS_ERR(h_file))
13525+ goto out;
9dbd164d
AM
13526+
13527+ /* filedata may be obsoleted by concurrent copyup, but no problem */
4a4d8108
AM
13528+ err = vfsub_read_u(h_file, buf, count, ppos);
13529+ /* todo: necessary? */
13530+ /* file->f_ra = h_file->f_ra; */
b912730e 13531+ au_read_post(inode, h_file);
1308ab2a 13532+
4f0767ce 13533+out:
dece6358
AM
13534+ si_read_unlock(sb);
13535+ return err;
13536+}
1facf9fc 13537+
e49829fe
JR
13538+/*
13539+ * todo: very ugly
13540+ * it locks both of i_mutex and si_rwsem for read in safe.
13541+ * if the plink maintenance mode continues forever (that is the problem),
13542+ * may loop forever.
13543+ */
13544+static void au_mtx_and_read_lock(struct inode *inode)
13545+{
13546+ int err;
13547+ struct super_block *sb = inode->i_sb;
13548+
13549+ while (1) {
13550+ mutex_lock(&inode->i_mutex);
13551+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
13552+ if (!err)
13553+ break;
13554+ mutex_unlock(&inode->i_mutex);
13555+ si_read_lock(sb, AuLock_NOPLMW);
13556+ si_read_unlock(sb);
13557+ }
13558+}
13559+
4a4d8108
AM
13560+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
13561+ size_t count, loff_t *ppos)
dece6358 13562+{
4a4d8108 13563+ ssize_t err;
b912730e
AM
13564+ struct au_write_pre wpre;
13565+ struct inode *inode;
4a4d8108
AM
13566+ struct file *h_file;
13567+ char __user *buf = (char __user *)ubuf;
1facf9fc 13568+
b912730e 13569+ inode = file_inode(file);
e49829fe 13570+ au_mtx_and_read_lock(inode);
1facf9fc 13571+
b912730e
AM
13572+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13573+ err = PTR_ERR(h_file);
13574+ if (IS_ERR(h_file))
9dbd164d 13575+ goto out;
9dbd164d 13576+
4a4d8108 13577+ err = vfsub_write_u(h_file, buf, count, ppos);
b912730e 13578+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13579+
4f0767ce 13580+out:
b912730e 13581+ si_read_unlock(inode->i_sb);
4a4d8108 13582+ mutex_unlock(&inode->i_mutex);
dece6358
AM
13583+ return err;
13584+}
1facf9fc 13585+
076b876e
AM
13586+static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
13587+ struct iov_iter *iov_iter)
dece6358 13588+{
4a4d8108
AM
13589+ ssize_t err;
13590+ struct file *file;
076b876e 13591+ ssize_t (*iter)(struct kiocb *, struct iov_iter *);
1facf9fc 13592+
4a4d8108
AM
13593+ err = security_file_permission(h_file, rw);
13594+ if (unlikely(err))
13595+ goto out;
1facf9fc 13596+
4a4d8108 13597+ err = -ENOSYS;
076b876e 13598+ iter = NULL;
5527c038 13599+ if (rw == MAY_READ)
076b876e 13600+ iter = h_file->f_op->read_iter;
5527c038 13601+ else if (rw == MAY_WRITE)
076b876e 13602+ iter = h_file->f_op->write_iter;
076b876e
AM
13603+
13604+ file = kio->ki_filp;
13605+ kio->ki_filp = h_file;
13606+ if (iter) {
2cbb1c4b 13607+ lockdep_off();
076b876e
AM
13608+ err = iter(kio, iov_iter);
13609+ lockdep_on();
4a4d8108
AM
13610+ } else
13611+ /* currently there is no such fs */
13612+ WARN_ON_ONCE(1);
076b876e 13613+ kio->ki_filp = file;
1facf9fc 13614+
4f0767ce 13615+out:
dece6358
AM
13616+ return err;
13617+}
1facf9fc 13618+
076b876e 13619+static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1facf9fc 13620+{
4a4d8108
AM
13621+ ssize_t err;
13622+ struct file *file, *h_file;
b912730e 13623+ struct inode *inode;
dece6358 13624+ struct super_block *sb;
1facf9fc 13625+
4a4d8108 13626+ file = kio->ki_filp;
b912730e
AM
13627+ inode = file_inode(file);
13628+ sb = inode->i_sb;
e49829fe 13629+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 13630+
b912730e
AM
13631+ h_file = au_read_pre(file, /*keep_fi*/0);
13632+ err = PTR_ERR(h_file);
13633+ if (IS_ERR(h_file))
13634+ goto out;
9dbd164d 13635+
076b876e 13636+ err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
4a4d8108
AM
13637+ /* todo: necessary? */
13638+ /* file->f_ra = h_file->f_ra; */
b912730e 13639+ au_read_post(inode, h_file);
1facf9fc 13640+
4f0767ce 13641+out:
4a4d8108 13642+ si_read_unlock(sb);
1308ab2a 13643+ return err;
13644+}
1facf9fc 13645+
076b876e 13646+static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1308ab2a 13647+{
4a4d8108 13648+ ssize_t err;
b912730e
AM
13649+ struct au_write_pre wpre;
13650+ struct inode *inode;
4a4d8108 13651+ struct file *file, *h_file;
1308ab2a 13652+
4a4d8108 13653+ file = kio->ki_filp;
b912730e 13654+ inode = file_inode(file);
e49829fe
JR
13655+ au_mtx_and_read_lock(inode);
13656+
b912730e
AM
13657+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13658+ err = PTR_ERR(h_file);
13659+ if (IS_ERR(h_file))
9dbd164d 13660+ goto out;
9dbd164d 13661+
076b876e 13662+ err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
b912730e 13663+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13664+
4f0767ce 13665+out:
b912730e 13666+ si_read_unlock(inode->i_sb);
4a4d8108 13667+ mutex_unlock(&inode->i_mutex);
dece6358 13668+ return err;
1facf9fc 13669+}
13670+
4a4d8108
AM
13671+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
13672+ struct pipe_inode_info *pipe, size_t len,
13673+ unsigned int flags)
1facf9fc 13674+{
4a4d8108
AM
13675+ ssize_t err;
13676+ struct file *h_file;
b912730e 13677+ struct inode *inode;
dece6358 13678+ struct super_block *sb;
1facf9fc 13679+
b912730e
AM
13680+ inode = file_inode(file);
13681+ sb = inode->i_sb;
e49829fe 13682+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
13683+
13684+ h_file = au_read_pre(file, /*keep_fi*/1);
13685+ err = PTR_ERR(h_file);
13686+ if (IS_ERR(h_file))
dece6358 13687+ goto out;
1facf9fc 13688+
4a4d8108 13689+ if (au_test_loopback_kthread()) {
2000de60 13690+ au_warn_loopback(h_file->f_path.dentry->d_sb);
87a755f4
AM
13691+ if (file->f_mapping != h_file->f_mapping) {
13692+ file->f_mapping = h_file->f_mapping;
13693+ smp_mb(); /* unnecessary? */
13694+ }
1308ab2a 13695+ }
9dbd164d
AM
13696+ fi_read_unlock(file);
13697+
4a4d8108
AM
13698+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
13699+ /* todo: necessasry? */
13700+ /* file->f_ra = h_file->f_ra; */
b912730e 13701+ au_read_post(inode, h_file);
1facf9fc 13702+
4f0767ce 13703+out:
4a4d8108 13704+ si_read_unlock(sb);
dece6358 13705+ return err;
1facf9fc 13706+}
13707+
4a4d8108
AM
13708+static ssize_t
13709+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
13710+ size_t len, unsigned int flags)
1facf9fc 13711+{
4a4d8108 13712+ ssize_t err;
b912730e
AM
13713+ struct au_write_pre wpre;
13714+ struct inode *inode;
076b876e 13715+ struct file *h_file;
1facf9fc 13716+
b912730e 13717+ inode = file_inode(file);
e49829fe 13718+ au_mtx_and_read_lock(inode);
9dbd164d 13719+
b912730e
AM
13720+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13721+ err = PTR_ERR(h_file);
13722+ if (IS_ERR(h_file))
9dbd164d 13723+ goto out;
9dbd164d 13724+
4a4d8108 13725+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
b912730e 13726+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13727+
4f0767ce 13728+out:
b912730e 13729+ si_read_unlock(inode->i_sb);
4a4d8108
AM
13730+ mutex_unlock(&inode->i_mutex);
13731+ return err;
13732+}
1facf9fc 13733+
38d290e6
JR
13734+static long aufs_fallocate(struct file *file, int mode, loff_t offset,
13735+ loff_t len)
13736+{
13737+ long err;
b912730e 13738+ struct au_write_pre wpre;
38d290e6
JR
13739+ struct inode *inode;
13740+ struct file *h_file;
13741+
b912730e 13742+ inode = file_inode(file);
38d290e6
JR
13743+ au_mtx_and_read_lock(inode);
13744+
b912730e
AM
13745+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13746+ err = PTR_ERR(h_file);
13747+ if (IS_ERR(h_file))
38d290e6 13748+ goto out;
38d290e6
JR
13749+
13750+ lockdep_off();
03673fb0 13751+ err = vfs_fallocate(h_file, mode, offset, len);
38d290e6 13752+ lockdep_on();
b912730e 13753+ au_write_post(inode, h_file, &wpre, /*written*/1);
38d290e6
JR
13754+
13755+out:
b912730e 13756+ si_read_unlock(inode->i_sb);
38d290e6
JR
13757+ mutex_unlock(&inode->i_mutex);
13758+ return err;
13759+}
13760+
4a4d8108
AM
13761+/* ---------------------------------------------------------------------- */
13762+
9dbd164d
AM
13763+/*
13764+ * The locking order around current->mmap_sem.
13765+ * - in most and regular cases
13766+ * file I/O syscall -- aufs_read() or something
13767+ * -- si_rwsem for read -- mmap_sem
13768+ * (Note that [fdi]i_rwsem are released before mmap_sem).
13769+ * - in mmap case
13770+ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
13771+ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
13772+ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
13773+ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
13774+ * It means that when aufs acquires si_rwsem for write, the process should never
13775+ * acquire mmap_sem.
13776+ *
392086de 13777+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
9dbd164d
AM
13778+ * problem either since any directory is not able to be mmap-ed.
13779+ * The similar scenario is applied to aufs_readlink() too.
13780+ */
13781+
38d290e6 13782+#if 0 /* stop calling security_file_mmap() */
2dfbb274
AM
13783+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
13784+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
13785+
13786+static unsigned long au_arch_prot_conv(unsigned long flags)
13787+{
13788+ /* currently ppc64 only */
13789+#ifdef CONFIG_PPC64
13790+ /* cf. linux/arch/powerpc/include/asm/mman.h */
13791+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
13792+ return AuConv_VM_PROT(flags, SAO);
13793+#else
13794+ AuDebugOn(arch_calc_vm_prot_bits(-1));
13795+ return 0;
13796+#endif
13797+}
13798+
13799+static unsigned long au_prot_conv(unsigned long flags)
13800+{
13801+ return AuConv_VM_PROT(flags, READ)
13802+ | AuConv_VM_PROT(flags, WRITE)
13803+ | AuConv_VM_PROT(flags, EXEC)
13804+ | au_arch_prot_conv(flags);
13805+}
13806+
13807+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
13808+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
13809+
13810+static unsigned long au_flag_conv(unsigned long flags)
13811+{
13812+ return AuConv_VM_MAP(flags, GROWSDOWN)
13813+ | AuConv_VM_MAP(flags, DENYWRITE)
2dfbb274
AM
13814+ | AuConv_VM_MAP(flags, LOCKED);
13815+}
38d290e6 13816+#endif
2dfbb274 13817+
9dbd164d 13818+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
dece6358 13819+{
4a4d8108 13820+ int err;
4a4d8108 13821+ const unsigned char wlock
9dbd164d 13822+ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
4a4d8108 13823+ struct super_block *sb;
9dbd164d 13824+ struct file *h_file;
b912730e 13825+ struct inode *inode;
9dbd164d
AM
13826+
13827+ AuDbgVmRegion(file, vma);
1308ab2a 13828+
b912730e
AM
13829+ inode = file_inode(file);
13830+ sb = inode->i_sb;
9dbd164d 13831+ lockdep_off();
e49829fe 13832+ si_read_lock(sb, AuLock_NOPLMW);
4a4d8108 13833+
b912730e 13834+ h_file = au_write_pre(file, wlock, /*wpre*/NULL);
9dbd164d 13835+ lockdep_on();
b912730e
AM
13836+ err = PTR_ERR(h_file);
13837+ if (IS_ERR(h_file))
13838+ goto out;
1308ab2a 13839+
b912730e
AM
13840+ err = 0;
13841+ au_set_mmapped(file);
9dbd164d 13842+ au_vm_file_reset(vma, h_file);
38d290e6
JR
13843+ /*
13844+ * we cannot call security_mmap_file() here since it may acquire
13845+ * mmap_sem or i_mutex.
13846+ *
13847+ * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
13848+ * au_flag_conv(vma->vm_flags));
13849+ */
9dbd164d
AM
13850+ if (!err)
13851+ err = h_file->f_op->mmap(h_file, vma);
b912730e
AM
13852+ if (!err) {
13853+ au_vm_prfile_set(vma, file);
13854+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13855+ goto out_fput; /* success */
13856+ }
2cbb1c4b
JR
13857+ au_unset_mmapped(file);
13858+ au_vm_file_reset(vma, file);
b912730e 13859+
2cbb1c4b 13860+out_fput:
9dbd164d 13861+ lockdep_off();
b912730e
AM
13862+ ii_write_unlock(inode);
13863+ lockdep_on();
13864+ fput(h_file);
4f0767ce 13865+out:
b912730e 13866+ lockdep_off();
9dbd164d
AM
13867+ si_read_unlock(sb);
13868+ lockdep_on();
13869+ AuTraceErr(err);
4a4d8108
AM
13870+ return err;
13871+}
13872+
13873+/* ---------------------------------------------------------------------- */
13874+
1e00d052
AM
13875+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
13876+ int datasync)
4a4d8108
AM
13877+{
13878+ int err;
b912730e 13879+ struct au_write_pre wpre;
4a4d8108
AM
13880+ struct inode *inode;
13881+ struct file *h_file;
4a4d8108
AM
13882+
13883+ err = 0; /* -EBADF; */ /* posix? */
13884+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
b912730e 13885+ goto out;
4a4d8108 13886+
b912730e
AM
13887+ inode = file_inode(file);
13888+ au_mtx_and_read_lock(inode);
13889+
13890+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13891+ err = PTR_ERR(h_file);
13892+ if (IS_ERR(h_file))
4a4d8108 13893+ goto out_unlock;
4a4d8108 13894+
53392da6 13895+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
b912730e 13896+ au_write_post(inode, h_file, &wpre, /*written*/0);
4a4d8108 13897+
4f0767ce 13898+out_unlock:
b912730e 13899+ si_read_unlock(inode->i_sb);
1e00d052 13900+ mutex_unlock(&inode->i_mutex);
b912730e 13901+out:
4a4d8108 13902+ return err;
dece6358
AM
13903+}
13904+
4a4d8108
AM
13905+/* no one supports this operation, currently */
13906+#if 0
13907+static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
dece6358 13908+{
4a4d8108 13909+ int err;
b912730e 13910+ struct au_write_pre wpre;
4a4d8108
AM
13911+ struct inode *inode;
13912+ struct file *file, *h_file;
1308ab2a 13913+
4a4d8108
AM
13914+ err = 0; /* -EBADF; */ /* posix? */
13915+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
13916+ goto out;
1308ab2a 13917+
b912730e
AM
13918+ file = kio->ki_filp;
13919+ inode = file_inode(file);
13920+ au_mtx_and_read_lock(inode);
13921+
13922+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13923+ err = PTR_ERR(h_file);
13924+ if (IS_ERR(h_file))
4a4d8108 13925+ goto out_unlock;
1308ab2a 13926+
4a4d8108
AM
13927+ err = -ENOSYS;
13928+ h_file = au_hf_top(file);
523b37e3 13929+ if (h_file->f_op->aio_fsync) {
4a4d8108 13930+ struct mutex *h_mtx;
1308ab2a 13931+
c06a8ce3 13932+ h_mtx = &file_inode(h_file)->i_mutex;
4a4d8108
AM
13933+ if (!is_sync_kiocb(kio)) {
13934+ get_file(h_file);
13935+ fput(file);
13936+ }
13937+ kio->ki_filp = h_file;
13938+ err = h_file->f_op->aio_fsync(kio, datasync);
13939+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
13940+ if (!err)
13941+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
13942+ /*ignore*/
4a4d8108
AM
13943+ mutex_unlock(h_mtx);
13944+ }
b912730e 13945+ au_write_post(inode, h_file, &wpre, /*written*/0);
1308ab2a 13946+
4f0767ce 13947+out_unlock:
e49829fe 13948+ si_read_unlock(inode->sb);
4a4d8108 13949+ mutex_unlock(&inode->i_mutex);
b912730e 13950+out:
4a4d8108 13951+ return err;
dece6358 13952+}
4a4d8108 13953+#endif
dece6358 13954+
4a4d8108 13955+static int aufs_fasync(int fd, struct file *file, int flag)
dece6358 13956+{
4a4d8108
AM
13957+ int err;
13958+ struct file *h_file;
4a4d8108 13959+ struct super_block *sb;
1308ab2a 13960+
b912730e 13961+ sb = file->f_path.dentry->d_sb;
e49829fe 13962+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
13963+
13964+ h_file = au_read_pre(file, /*keep_fi*/0);
13965+ err = PTR_ERR(h_file);
13966+ if (IS_ERR(h_file))
4a4d8108
AM
13967+ goto out;
13968+
523b37e3 13969+ if (h_file->f_op->fasync)
4a4d8108 13970+ err = h_file->f_op->fasync(fd, h_file, flag);
b912730e 13971+ fput(h_file); /* instead of au_read_post() */
1308ab2a 13972+
4f0767ce 13973+out:
4a4d8108 13974+ si_read_unlock(sb);
1308ab2a 13975+ return err;
dece6358 13976+}
4a4d8108
AM
13977+
13978+/* ---------------------------------------------------------------------- */
13979+
13980+/* no one supports this operation, currently */
13981+#if 0
13982+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
2000de60 13983+ size_t len, loff_t *pos, int more)
4a4d8108
AM
13984+{
13985+}
13986+#endif
13987+
13988+/* ---------------------------------------------------------------------- */
13989+
13990+const struct file_operations aufs_file_fop = {
13991+ .owner = THIS_MODULE,
2cbb1c4b 13992+
027c5e7a 13993+ .llseek = default_llseek,
4a4d8108
AM
13994+
13995+ .read = aufs_read,
13996+ .write = aufs_write,
076b876e
AM
13997+ .read_iter = aufs_read_iter,
13998+ .write_iter = aufs_write_iter,
13999+
4a4d8108
AM
14000+#ifdef CONFIG_AUFS_POLL
14001+ .poll = aufs_poll,
14002+#endif
14003+ .unlocked_ioctl = aufs_ioctl_nondir,
b752ccd1 14004+#ifdef CONFIG_COMPAT
c2b27bf2 14005+ .compat_ioctl = aufs_compat_ioctl_nondir,
b752ccd1 14006+#endif
4a4d8108
AM
14007+ .mmap = aufs_mmap,
14008+ .open = aufs_open_nondir,
14009+ .flush = aufs_flush_nondir,
14010+ .release = aufs_release_nondir,
14011+ .fsync = aufs_fsync_nondir,
14012+ /* .aio_fsync = aufs_aio_fsync_nondir, */
14013+ .fasync = aufs_fasync,
14014+ /* .sendpage = aufs_sendpage, */
14015+ .splice_write = aufs_splice_write,
14016+ .splice_read = aufs_splice_read,
14017+#if 0
14018+ .aio_splice_write = aufs_aio_splice_write,
38d290e6 14019+ .aio_splice_read = aufs_aio_splice_read,
4a4d8108 14020+#endif
38d290e6 14021+ .fallocate = aufs_fallocate
4a4d8108 14022+};
7f207e10
AM
14023diff -urN /usr/share/empty/fs/aufs/fstype.h linux/fs/aufs/fstype.h
14024--- /usr/share/empty/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
b95c5147 14025+++ linux/fs/aufs/fstype.h 2015-12-10 17:59:16.836166410 +0100
b912730e 14026@@ -0,0 +1,400 @@
4a4d8108 14027+/*
2000de60 14028+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
14029+ *
14030+ * This program, aufs is free software; you can redistribute it and/or modify
14031+ * it under the terms of the GNU General Public License as published by
14032+ * the Free Software Foundation; either version 2 of the License, or
14033+ * (at your option) any later version.
14034+ *
14035+ * This program is distributed in the hope that it will be useful,
14036+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14037+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14038+ * GNU General Public License for more details.
14039+ *
14040+ * You should have received a copy of the GNU General Public License
523b37e3 14041+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
14042+ */
14043+
14044+/*
14045+ * judging filesystem type
14046+ */
14047+
14048+#ifndef __AUFS_FSTYPE_H__
14049+#define __AUFS_FSTYPE_H__
14050+
14051+#ifdef __KERNEL__
14052+
14053+#include <linux/fs.h>
14054+#include <linux/magic.h>
b912730e 14055+#include <linux/nfs_fs.h>
b95c5147 14056+#include <linux/romfs_fs.h>
4a4d8108
AM
14057+
14058+static inline int au_test_aufs(struct super_block *sb)
14059+{
14060+ return sb->s_magic == AUFS_SUPER_MAGIC;
14061+}
14062+
14063+static inline const char *au_sbtype(struct super_block *sb)
14064+{
14065+ return sb->s_type->name;
14066+}
1308ab2a 14067+
14068+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
14069+{
2000de60
JR
14070+#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
14071+ return sb->s_magic == ISOFS_SUPER_MAGIC;
dece6358
AM
14072+#else
14073+ return 0;
14074+#endif
14075+}
14076+
1308ab2a 14077+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
dece6358 14078+{
2000de60
JR
14079+#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
14080+ return sb->s_magic == ROMFS_MAGIC;
dece6358
AM
14081+#else
14082+ return 0;
14083+#endif
14084+}
14085+
1308ab2a 14086+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
dece6358 14087+{
1308ab2a 14088+#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
14089+ return sb->s_magic == CRAMFS_MAGIC;
14090+#endif
14091+ return 0;
14092+}
14093+
14094+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
14095+{
14096+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
14097+ return sb->s_magic == NFS_SUPER_MAGIC;
dece6358
AM
14098+#else
14099+ return 0;
14100+#endif
14101+}
14102+
1308ab2a 14103+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
dece6358 14104+{
1308ab2a 14105+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
14106+ return sb->s_magic == FUSE_SUPER_MAGIC;
dece6358
AM
14107+#else
14108+ return 0;
14109+#endif
14110+}
14111+
1308ab2a 14112+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
dece6358 14113+{
1308ab2a 14114+#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
14115+ return sb->s_magic == XFS_SB_MAGIC;
dece6358
AM
14116+#else
14117+ return 0;
14118+#endif
14119+}
14120+
1308ab2a 14121+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
dece6358 14122+{
1308ab2a 14123+#ifdef CONFIG_TMPFS
14124+ return sb->s_magic == TMPFS_MAGIC;
14125+#else
14126+ return 0;
dece6358 14127+#endif
dece6358
AM
14128+}
14129+
1308ab2a 14130+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
1facf9fc 14131+{
1308ab2a 14132+#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
14133+ return !strcmp(au_sbtype(sb), "ecryptfs");
14134+#else
14135+ return 0;
14136+#endif
1facf9fc 14137+}
14138+
1308ab2a 14139+static inline int au_test_ramfs(struct super_block *sb)
14140+{
14141+ return sb->s_magic == RAMFS_MAGIC;
14142+}
14143+
14144+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
14145+{
14146+#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
14147+ return sb->s_magic == UBIFS_SUPER_MAGIC;
14148+#else
14149+ return 0;
14150+#endif
14151+}
14152+
14153+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
14154+{
14155+#ifdef CONFIG_PROC_FS
14156+ return sb->s_magic == PROC_SUPER_MAGIC;
14157+#else
14158+ return 0;
14159+#endif
14160+}
14161+
14162+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
14163+{
14164+#ifdef CONFIG_SYSFS
14165+ return sb->s_magic == SYSFS_MAGIC;
14166+#else
14167+ return 0;
14168+#endif
14169+}
14170+
14171+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
14172+{
14173+#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
14174+ return sb->s_magic == CONFIGFS_MAGIC;
14175+#else
14176+ return 0;
14177+#endif
14178+}
14179+
14180+static inline int au_test_minix(struct super_block *sb __maybe_unused)
14181+{
14182+#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
14183+ return sb->s_magic == MINIX3_SUPER_MAGIC
14184+ || sb->s_magic == MINIX2_SUPER_MAGIC
14185+ || sb->s_magic == MINIX2_SUPER_MAGIC2
14186+ || sb->s_magic == MINIX_SUPER_MAGIC
14187+ || sb->s_magic == MINIX_SUPER_MAGIC2;
14188+#else
14189+ return 0;
14190+#endif
14191+}
14192+
1308ab2a 14193+static inline int au_test_fat(struct super_block *sb __maybe_unused)
14194+{
14195+#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
14196+ return sb->s_magic == MSDOS_SUPER_MAGIC;
14197+#else
14198+ return 0;
14199+#endif
14200+}
14201+
14202+static inline int au_test_msdos(struct super_block *sb)
14203+{
14204+ return au_test_fat(sb);
14205+}
14206+
14207+static inline int au_test_vfat(struct super_block *sb)
14208+{
14209+ return au_test_fat(sb);
14210+}
14211+
14212+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
14213+{
14214+#ifdef CONFIG_SECURITYFS
14215+ return sb->s_magic == SECURITYFS_MAGIC;
14216+#else
14217+ return 0;
14218+#endif
14219+}
14220+
14221+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
14222+{
14223+#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
14224+ return sb->s_magic == SQUASHFS_MAGIC;
14225+#else
14226+ return 0;
14227+#endif
14228+}
14229+
14230+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
14231+{
14232+#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
14233+ return sb->s_magic == BTRFS_SUPER_MAGIC;
14234+#else
14235+ return 0;
14236+#endif
14237+}
14238+
14239+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
14240+{
14241+#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
14242+ return sb->s_magic == XENFS_SUPER_MAGIC;
14243+#else
14244+ return 0;
14245+#endif
14246+}
14247+
14248+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
14249+{
14250+#ifdef CONFIG_DEBUG_FS
14251+ return sb->s_magic == DEBUGFS_MAGIC;
14252+#else
14253+ return 0;
14254+#endif
14255+}
14256+
14257+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
14258+{
14259+#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
14260+ return sb->s_magic == NILFS_SUPER_MAGIC;
14261+#else
14262+ return 0;
14263+#endif
14264+}
14265+
4a4d8108
AM
14266+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
14267+{
14268+#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE)
14269+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
14270+#else
14271+ return 0;
14272+#endif
14273+}
14274+
1308ab2a 14275+/* ---------------------------------------------------------------------- */
14276+/*
14277+ * they can't be an aufs branch.
14278+ */
14279+static inline int au_test_fs_unsuppoted(struct super_block *sb)
14280+{
14281+ return
14282+#ifndef CONFIG_AUFS_BR_RAMFS
14283+ au_test_ramfs(sb) ||
14284+#endif
14285+ au_test_procfs(sb)
14286+ || au_test_sysfs(sb)
14287+ || au_test_configfs(sb)
14288+ || au_test_debugfs(sb)
14289+ || au_test_securityfs(sb)
14290+ || au_test_xenfs(sb)
14291+ || au_test_ecryptfs(sb)
14292+ /* || !strcmp(au_sbtype(sb), "unionfs") */
14293+ || au_test_aufs(sb); /* will be supported in next version */
14294+}
14295+
1308ab2a 14296+static inline int au_test_fs_remote(struct super_block *sb)
14297+{
14298+ return !au_test_tmpfs(sb)
14299+#ifdef CONFIG_AUFS_BR_RAMFS
14300+ && !au_test_ramfs(sb)
14301+#endif
14302+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
14303+}
14304+
14305+/* ---------------------------------------------------------------------- */
14306+
14307+/*
14308+ * Note: these functions (below) are created after reading ->getattr() in all
14309+ * filesystems under linux/fs. it means we have to do so in every update...
14310+ */
14311+
14312+/*
14313+ * some filesystems require getattr to refresh the inode attributes before
14314+ * referencing.
14315+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
14316+ * and leave the work for d_revalidate()
14317+ */
14318+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
14319+{
14320+ return au_test_nfs(sb)
14321+ || au_test_fuse(sb)
1308ab2a 14322+ /* || au_test_btrfs(sb) */ /* untested */
1308ab2a 14323+ ;
14324+}
14325+
14326+/*
14327+ * filesystems which don't maintain i_size or i_blocks.
14328+ */
14329+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
14330+{
14331+ return au_test_xfs(sb)
4a4d8108
AM
14332+ || au_test_btrfs(sb)
14333+ || au_test_ubifs(sb)
14334+ || au_test_hfsplus(sb) /* maintained, but incorrect */
1308ab2a 14335+ /* || au_test_minix(sb) */ /* untested */
14336+ ;
14337+}
14338+
14339+/*
14340+ * filesystems which don't store the correct value in some of their inode
14341+ * attributes.
14342+ */
14343+static inline int au_test_fs_bad_iattr(struct super_block *sb)
14344+{
14345+ return au_test_fs_bad_iattr_size(sb)
1308ab2a 14346+ || au_test_fat(sb)
14347+ || au_test_msdos(sb)
14348+ || au_test_vfat(sb);
1facf9fc 14349+}
14350+
14351+/* they don't check i_nlink in link(2) */
14352+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
14353+{
14354+ return au_test_tmpfs(sb)
14355+#ifdef CONFIG_AUFS_BR_RAMFS
14356+ || au_test_ramfs(sb)
14357+#endif
4a4d8108 14358+ || au_test_ubifs(sb)
4a4d8108 14359+ || au_test_hfsplus(sb);
1facf9fc 14360+}
14361+
14362+/*
14363+ * filesystems which sets S_NOATIME and S_NOCMTIME.
14364+ */
14365+static inline int au_test_fs_notime(struct super_block *sb)
14366+{
14367+ return au_test_nfs(sb)
14368+ || au_test_fuse(sb)
dece6358 14369+ || au_test_ubifs(sb)
1facf9fc 14370+ ;
14371+}
14372+
1facf9fc 14373+/* temporary support for i#1 in cramfs */
14374+static inline int au_test_fs_unique_ino(struct inode *inode)
14375+{
14376+ if (au_test_cramfs(inode->i_sb))
14377+ return inode->i_ino != 1;
14378+ return 1;
14379+}
14380+
14381+/* ---------------------------------------------------------------------- */
14382+
14383+/*
14384+ * the filesystem where the xino files placed must support i/o after unlink and
14385+ * maintain i_size and i_blocks.
14386+ */
14387+static inline int au_test_fs_bad_xino(struct super_block *sb)
14388+{
14389+ return au_test_fs_remote(sb)
14390+ || au_test_fs_bad_iattr_size(sb)
1facf9fc 14391+ /* don't want unnecessary work for xino */
14392+ || au_test_aufs(sb)
1308ab2a 14393+ || au_test_ecryptfs(sb)
14394+ || au_test_nilfs(sb);
1facf9fc 14395+}
14396+
14397+static inline int au_test_fs_trunc_xino(struct super_block *sb)
14398+{
14399+ return au_test_tmpfs(sb)
14400+ || au_test_ramfs(sb);
14401+}
14402+
14403+/*
14404+ * test if the @sb is real-readonly.
14405+ */
14406+static inline int au_test_fs_rr(struct super_block *sb)
14407+{
14408+ return au_test_squashfs(sb)
14409+ || au_test_iso9660(sb)
14410+ || au_test_cramfs(sb)
14411+ || au_test_romfs(sb);
14412+}
14413+
b912730e
AM
14414+/*
14415+ * test if the @inode is nfs with 'noacl' option
14416+ * NFS always sets MS_POSIXACL regardless its mount option 'noacl.'
14417+ */
14418+static inline int au_test_nfs_noacl(struct inode *inode)
14419+{
14420+ return au_test_nfs(inode->i_sb)
14421+ /* && IS_POSIXACL(inode) */
14422+ && !nfs_server_capable(inode, NFS_CAP_ACLS);
14423+}
14424+
1facf9fc 14425+#endif /* __KERNEL__ */
14426+#endif /* __AUFS_FSTYPE_H__ */
7f207e10
AM
14427diff -urN /usr/share/empty/fs/aufs/hfsnotify.c linux/fs/aufs/hfsnotify.c
14428--- /usr/share/empty/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 14429+++ linux/fs/aufs/hfsnotify.c 2015-09-24 10:47:58.254719746 +0200
c1595e42 14430@@ -0,0 +1,288 @@
1facf9fc 14431+/*
2000de60 14432+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 14433+ *
14434+ * This program, aufs is free software; you can redistribute it and/or modify
14435+ * it under the terms of the GNU General Public License as published by
14436+ * the Free Software Foundation; either version 2 of the License, or
14437+ * (at your option) any later version.
dece6358
AM
14438+ *
14439+ * This program is distributed in the hope that it will be useful,
14440+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14441+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14442+ * GNU General Public License for more details.
14443+ *
14444+ * You should have received a copy of the GNU General Public License
523b37e3 14445+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 14446+ */
14447+
14448+/*
4a4d8108 14449+ * fsnotify for the lower directories
1facf9fc 14450+ */
14451+
14452+#include "aufs.h"
14453+
4a4d8108
AM
14454+/* FS_IN_IGNORED is unnecessary */
14455+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
14456+ | FS_CREATE | FS_EVENT_ON_CHILD);
7f207e10 14457+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
7eafdf33 14458+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
1facf9fc 14459+
0c5527e5 14460+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
1facf9fc 14461+{
0c5527e5
AM
14462+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
14463+ hn_mark);
4a4d8108 14464+ AuDbg("here\n");
7eafdf33 14465+ au_cache_free_hnotify(hn);
076b876e 14466+ smp_mb__before_atomic();
1716fcea
AM
14467+ if (atomic64_dec_and_test(&au_hfsn_ifree))
14468+ wake_up(&au_hfsn_wq);
4a4d8108 14469+}
1facf9fc 14470+
027c5e7a 14471+static int au_hfsn_alloc(struct au_hinode *hinode)
4a4d8108 14472+{
1716fcea 14473+ int err;
027c5e7a
AM
14474+ struct au_hnotify *hn;
14475+ struct super_block *sb;
14476+ struct au_branch *br;
0c5527e5 14477+ struct fsnotify_mark *mark;
027c5e7a 14478+ aufs_bindex_t bindex;
1facf9fc 14479+
027c5e7a
AM
14480+ hn = hinode->hi_notify;
14481+ sb = hn->hn_aufs_inode->i_sb;
14482+ bindex = au_br_index(sb, hinode->hi_id);
14483+ br = au_sbr(sb, bindex);
1716fcea
AM
14484+ AuDebugOn(!br->br_hfsn);
14485+
0c5527e5
AM
14486+ mark = &hn->hn_mark;
14487+ fsnotify_init_mark(mark, au_hfsn_free_mark);
14488+ mark->mask = AuHfsnMask;
7f207e10
AM
14489+ /*
14490+ * by udba rename or rmdir, aufs assign a new inode to the known
14491+ * h_inode, so specify 1 to allow dups.
14492+ */
c1595e42 14493+ lockdep_off();
1716fcea 14494+ err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode,
027c5e7a 14495+ /*mnt*/NULL, /*allow_dups*/1);
1716fcea
AM
14496+ /* even if err */
14497+ fsnotify_put_mark(mark);
c1595e42 14498+ lockdep_on();
1716fcea
AM
14499+
14500+ return err;
1facf9fc 14501+}
14502+
7eafdf33 14503+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
1facf9fc 14504+{
0c5527e5 14505+ struct fsnotify_mark *mark;
7eafdf33 14506+ unsigned long long ull;
1716fcea 14507+ struct fsnotify_group *group;
7eafdf33
AM
14508+
14509+ ull = atomic64_inc_return(&au_hfsn_ifree);
14510+ BUG_ON(!ull);
953406b4 14511+
0c5527e5 14512+ mark = &hn->hn_mark;
1716fcea
AM
14513+ spin_lock(&mark->lock);
14514+ group = mark->group;
14515+ fsnotify_get_group(group);
14516+ spin_unlock(&mark->lock);
c1595e42 14517+ lockdep_off();
1716fcea
AM
14518+ fsnotify_destroy_mark(mark, group);
14519+ fsnotify_put_group(group);
c1595e42 14520+ lockdep_on();
7f207e10 14521+
7eafdf33
AM
14522+ /* free hn by myself */
14523+ return 0;
1facf9fc 14524+}
14525+
14526+/* ---------------------------------------------------------------------- */
14527+
4a4d8108 14528+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
1facf9fc 14529+{
0c5527e5 14530+ struct fsnotify_mark *mark;
1facf9fc 14531+
0c5527e5
AM
14532+ mark = &hinode->hi_notify->hn_mark;
14533+ spin_lock(&mark->lock);
1facf9fc 14534+ if (do_set) {
0c5527e5
AM
14535+ AuDebugOn(mark->mask & AuHfsnMask);
14536+ mark->mask |= AuHfsnMask;
1facf9fc 14537+ } else {
0c5527e5
AM
14538+ AuDebugOn(!(mark->mask & AuHfsnMask));
14539+ mark->mask &= ~AuHfsnMask;
1facf9fc 14540+ }
0c5527e5 14541+ spin_unlock(&mark->lock);
4a4d8108 14542+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
1facf9fc 14543+}
14544+
4a4d8108 14545+/* ---------------------------------------------------------------------- */
1facf9fc 14546+
4a4d8108
AM
14547+/* #define AuDbgHnotify */
14548+#ifdef AuDbgHnotify
14549+static char *au_hfsn_name(u32 mask)
14550+{
14551+#ifdef CONFIG_AUFS_DEBUG
c06a8ce3
AM
14552+#define test_ret(flag) \
14553+ do { \
14554+ if (mask & flag) \
14555+ return #flag; \
14556+ } while (0)
4a4d8108
AM
14557+ test_ret(FS_ACCESS);
14558+ test_ret(FS_MODIFY);
14559+ test_ret(FS_ATTRIB);
14560+ test_ret(FS_CLOSE_WRITE);
14561+ test_ret(FS_CLOSE_NOWRITE);
14562+ test_ret(FS_OPEN);
14563+ test_ret(FS_MOVED_FROM);
14564+ test_ret(FS_MOVED_TO);
14565+ test_ret(FS_CREATE);
14566+ test_ret(FS_DELETE);
14567+ test_ret(FS_DELETE_SELF);
14568+ test_ret(FS_MOVE_SELF);
14569+ test_ret(FS_UNMOUNT);
14570+ test_ret(FS_Q_OVERFLOW);
14571+ test_ret(FS_IN_IGNORED);
b912730e 14572+ test_ret(FS_ISDIR);
4a4d8108
AM
14573+ test_ret(FS_IN_ONESHOT);
14574+ test_ret(FS_EVENT_ON_CHILD);
14575+ return "";
14576+#undef test_ret
14577+#else
14578+ return "??";
14579+#endif
1facf9fc 14580+}
4a4d8108 14581+#endif
1facf9fc 14582+
14583+/* ---------------------------------------------------------------------- */
14584+
1716fcea
AM
14585+static void au_hfsn_free_group(struct fsnotify_group *group)
14586+{
14587+ struct au_br_hfsnotify *hfsn = group->private;
14588+
14589+ AuDbg("here\n");
14590+ kfree(hfsn);
14591+}
14592+
4a4d8108 14593+static int au_hfsn_handle_event(struct fsnotify_group *group,
fb47a38f 14594+ struct inode *inode,
0c5527e5
AM
14595+ struct fsnotify_mark *inode_mark,
14596+ struct fsnotify_mark *vfsmount_mark,
fb47a38f
JR
14597+ u32 mask, void *data, int data_type,
14598+ const unsigned char *file_name, u32 cookie)
1facf9fc 14599+{
14600+ int err;
4a4d8108
AM
14601+ struct au_hnotify *hnotify;
14602+ struct inode *h_dir, *h_inode;
fb47a38f 14603+ struct qstr h_child_qstr = QSTR_INIT(file_name, strlen(file_name));
4a4d8108 14604+
fb47a38f 14605+ AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
1facf9fc 14606+
14607+ err = 0;
0c5527e5 14608+ /* if FS_UNMOUNT happens, there must be another bug */
4a4d8108 14609+ AuDebugOn(mask & FS_UNMOUNT);
0c5527e5 14610+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
1facf9fc 14611+ goto out;
1facf9fc 14612+
fb47a38f
JR
14613+ h_dir = inode;
14614+ h_inode = NULL;
4a4d8108 14615+#ifdef AuDbgHnotify
392086de 14616+ au_debug_on();
4a4d8108
AM
14617+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
14618+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
14619+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
14620+ h_dir->i_ino, mask, au_hfsn_name(mask),
14621+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
14622+ /* WARN_ON(1); */
1facf9fc 14623+ }
392086de 14624+ au_debug_off();
1facf9fc 14625+#endif
4a4d8108 14626+
0c5527e5
AM
14627+ AuDebugOn(!inode_mark);
14628+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
14629+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
1facf9fc 14630+
4a4d8108
AM
14631+out:
14632+ return err;
14633+}
1facf9fc 14634+
4a4d8108 14635+static struct fsnotify_ops au_hfsn_ops = {
1716fcea
AM
14636+ .handle_event = au_hfsn_handle_event,
14637+ .free_group_priv = au_hfsn_free_group
4a4d8108
AM
14638+};
14639+
14640+/* ---------------------------------------------------------------------- */
14641+
027c5e7a
AM
14642+static void au_hfsn_fin_br(struct au_branch *br)
14643+{
1716fcea 14644+ struct au_br_hfsnotify *hfsn;
027c5e7a 14645+
1716fcea 14646+ hfsn = br->br_hfsn;
c1595e42
JR
14647+ if (hfsn) {
14648+ lockdep_off();
1716fcea 14649+ fsnotify_put_group(hfsn->hfsn_group);
c1595e42
JR
14650+ lockdep_on();
14651+ }
027c5e7a
AM
14652+}
14653+
1716fcea 14654+static int au_hfsn_init_br(struct au_branch *br, int perm)
4a4d8108
AM
14655+{
14656+ int err;
1716fcea
AM
14657+ struct fsnotify_group *group;
14658+ struct au_br_hfsnotify *hfsn;
1facf9fc 14659+
4a4d8108 14660+ err = 0;
1716fcea
AM
14661+ br->br_hfsn = NULL;
14662+ if (!au_br_hnotifyable(perm))
027c5e7a 14663+ goto out;
027c5e7a 14664+
1716fcea
AM
14665+ err = -ENOMEM;
14666+ hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
14667+ if (unlikely(!hfsn))
027c5e7a
AM
14668+ goto out;
14669+
1716fcea
AM
14670+ err = 0;
14671+ group = fsnotify_alloc_group(&au_hfsn_ops);
14672+ if (IS_ERR(group)) {
14673+ err = PTR_ERR(group);
0c5527e5 14674+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
1716fcea 14675+ goto out_hfsn;
4a4d8108 14676+ }
1facf9fc 14677+
1716fcea
AM
14678+ group->private = hfsn;
14679+ hfsn->hfsn_group = group;
14680+ br->br_hfsn = hfsn;
14681+ goto out; /* success */
14682+
14683+out_hfsn:
14684+ kfree(hfsn);
027c5e7a 14685+out:
1716fcea
AM
14686+ return err;
14687+}
14688+
14689+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
14690+{
14691+ int err;
14692+
14693+ err = 0;
14694+ if (!br->br_hfsn)
14695+ err = au_hfsn_init_br(br, perm);
14696+
1facf9fc 14697+ return err;
14698+}
14699+
7eafdf33
AM
14700+/* ---------------------------------------------------------------------- */
14701+
14702+static void au_hfsn_fin(void)
14703+{
14704+ AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
14705+ wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
14706+}
14707+
4a4d8108
AM
14708+const struct au_hnotify_op au_hnotify_op = {
14709+ .ctl = au_hfsn_ctl,
14710+ .alloc = au_hfsn_alloc,
14711+ .free = au_hfsn_free,
1facf9fc 14712+
7eafdf33
AM
14713+ .fin = au_hfsn_fin,
14714+
027c5e7a
AM
14715+ .reset_br = au_hfsn_reset_br,
14716+ .fin_br = au_hfsn_fin_br,
14717+ .init_br = au_hfsn_init_br
4a4d8108 14718+};
7f207e10
AM
14719diff -urN /usr/share/empty/fs/aufs/hfsplus.c linux/fs/aufs/hfsplus.c
14720--- /usr/share/empty/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 14721+++ linux/fs/aufs/hfsplus.c 2015-09-24 10:47:58.254719746 +0200
523b37e3 14722@@ -0,0 +1,56 @@
4a4d8108 14723+/*
2000de60 14724+ * Copyright (C) 2010-2015 Junjiro R. Okajima
4a4d8108
AM
14725+ *
14726+ * This program, aufs is free software; you can redistribute it and/or modify
14727+ * it under the terms of the GNU General Public License as published by
14728+ * the Free Software Foundation; either version 2 of the License, or
14729+ * (at your option) any later version.
14730+ *
14731+ * This program is distributed in the hope that it will be useful,
14732+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14733+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14734+ * GNU General Public License for more details.
14735+ *
14736+ * You should have received a copy of the GNU General Public License
523b37e3 14737+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 14738+ */
1facf9fc 14739+
4a4d8108
AM
14740+/*
14741+ * special support for filesystems which aqucires an inode mutex
14742+ * at final closing a file, eg, hfsplus.
14743+ *
14744+ * This trick is very simple and stupid, just to open the file before really
14745+ * neceeary open to tell hfsplus that this is not the final closing.
14746+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
14747+ * and au_h_open_post() after releasing it.
14748+ */
1facf9fc 14749+
4a4d8108 14750+#include "aufs.h"
1facf9fc 14751+
392086de
AM
14752+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
14753+ int force_wr)
4a4d8108
AM
14754+{
14755+ struct file *h_file;
14756+ struct dentry *h_dentry;
1facf9fc 14757+
4a4d8108
AM
14758+ h_dentry = au_h_dptr(dentry, bindex);
14759+ AuDebugOn(!h_dentry);
5527c038 14760+ AuDebugOn(d_is_negative(h_dentry));
4a4d8108
AM
14761+
14762+ h_file = NULL;
14763+ if (au_test_hfsplus(h_dentry->d_sb)
7e9cd9fe 14764+ && d_is_reg(h_dentry))
4a4d8108
AM
14765+ h_file = au_h_open(dentry, bindex,
14766+ O_RDONLY | O_NOATIME | O_LARGEFILE,
392086de 14767+ /*file*/NULL, force_wr);
4a4d8108 14768+ return h_file;
1facf9fc 14769+}
14770+
4a4d8108
AM
14771+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
14772+ struct file *h_file)
14773+{
14774+ if (h_file) {
14775+ fput(h_file);
14776+ au_sbr_put(dentry->d_sb, bindex);
14777+ }
14778+}
7f207e10
AM
14779diff -urN /usr/share/empty/fs/aufs/hnotify.c linux/fs/aufs/hnotify.c
14780--- /usr/share/empty/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 14781+++ linux/fs/aufs/hnotify.c 2015-09-24 10:47:58.254719746 +0200
5527c038 14782@@ -0,0 +1,710 @@
e49829fe 14783+/*
2000de60 14784+ * Copyright (C) 2005-2015 Junjiro R. Okajima
e49829fe
JR
14785+ *
14786+ * This program, aufs is free software; you can redistribute it and/or modify
14787+ * it under the terms of the GNU General Public License as published by
14788+ * the Free Software Foundation; either version 2 of the License, or
14789+ * (at your option) any later version.
14790+ *
14791+ * This program is distributed in the hope that it will be useful,
14792+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14793+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14794+ * GNU General Public License for more details.
14795+ *
14796+ * You should have received a copy of the GNU General Public License
523b37e3 14797+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
14798+ */
14799+
14800+/*
7f207e10 14801+ * abstraction to notify the direct changes on lower directories
e49829fe
JR
14802+ */
14803+
14804+#include "aufs.h"
14805+
027c5e7a 14806+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
e49829fe
JR
14807+{
14808+ int err;
7f207e10 14809+ struct au_hnotify *hn;
1facf9fc 14810+
4a4d8108
AM
14811+ err = -ENOMEM;
14812+ hn = au_cache_alloc_hnotify();
14813+ if (hn) {
14814+ hn->hn_aufs_inode = inode;
027c5e7a
AM
14815+ hinode->hi_notify = hn;
14816+ err = au_hnotify_op.alloc(hinode);
14817+ AuTraceErr(err);
14818+ if (unlikely(err)) {
14819+ hinode->hi_notify = NULL;
4a4d8108
AM
14820+ au_cache_free_hnotify(hn);
14821+ /*
14822+ * The upper dir was removed by udba, but the same named
14823+ * dir left. In this case, aufs assignes a new inode
14824+ * number and set the monitor again.
14825+ * For the lower dir, the old monitnor is still left.
14826+ */
14827+ if (err == -EEXIST)
14828+ err = 0;
14829+ }
1308ab2a 14830+ }
1308ab2a 14831+
027c5e7a 14832+ AuTraceErr(err);
1308ab2a 14833+ return err;
dece6358 14834+}
1facf9fc 14835+
4a4d8108 14836+void au_hn_free(struct au_hinode *hinode)
dece6358 14837+{
4a4d8108 14838+ struct au_hnotify *hn;
1facf9fc 14839+
4a4d8108
AM
14840+ hn = hinode->hi_notify;
14841+ if (hn) {
4a4d8108 14842+ hinode->hi_notify = NULL;
7eafdf33
AM
14843+ if (au_hnotify_op.free(hinode, hn))
14844+ au_cache_free_hnotify(hn);
4a4d8108
AM
14845+ }
14846+}
dece6358 14847+
4a4d8108 14848+/* ---------------------------------------------------------------------- */
dece6358 14849+
4a4d8108
AM
14850+void au_hn_ctl(struct au_hinode *hinode, int do_set)
14851+{
14852+ if (hinode->hi_notify)
14853+ au_hnotify_op.ctl(hinode, do_set);
14854+}
14855+
14856+void au_hn_reset(struct inode *inode, unsigned int flags)
14857+{
14858+ aufs_bindex_t bindex, bend;
14859+ struct inode *hi;
14860+ struct dentry *iwhdentry;
1facf9fc 14861+
1308ab2a 14862+ bend = au_ibend(inode);
4a4d8108
AM
14863+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
14864+ hi = au_h_iptr(inode, bindex);
14865+ if (!hi)
14866+ continue;
1308ab2a 14867+
4a4d8108
AM
14868+ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
14869+ iwhdentry = au_hi_wh(inode, bindex);
14870+ if (iwhdentry)
14871+ dget(iwhdentry);
14872+ au_igrab(hi);
14873+ au_set_h_iptr(inode, bindex, NULL, 0);
14874+ au_set_h_iptr(inode, bindex, au_igrab(hi),
14875+ flags & ~AuHi_XINO);
14876+ iput(hi);
14877+ dput(iwhdentry);
14878+ /* mutex_unlock(&hi->i_mutex); */
1facf9fc 14879+ }
1facf9fc 14880+}
14881+
1308ab2a 14882+/* ---------------------------------------------------------------------- */
1facf9fc 14883+
4a4d8108 14884+static int hn_xino(struct inode *inode, struct inode *h_inode)
1facf9fc 14885+{
4a4d8108
AM
14886+ int err;
14887+ aufs_bindex_t bindex, bend, bfound, bstart;
14888+ struct inode *h_i;
1facf9fc 14889+
4a4d8108
AM
14890+ err = 0;
14891+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 14892+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
14893+ goto out;
14894+ }
1facf9fc 14895+
4a4d8108
AM
14896+ bfound = -1;
14897+ bend = au_ibend(inode);
14898+ bstart = au_ibstart(inode);
14899+#if 0 /* reserved for future use */
14900+ if (bindex == bend) {
14901+ /* keep this ino in rename case */
14902+ goto out;
14903+ }
14904+#endif
14905+ for (bindex = bstart; bindex <= bend; bindex++)
14906+ if (au_h_iptr(inode, bindex) == h_inode) {
14907+ bfound = bindex;
14908+ break;
14909+ }
14910+ if (bfound < 0)
1308ab2a 14911+ goto out;
1facf9fc 14912+
4a4d8108
AM
14913+ for (bindex = bstart; bindex <= bend; bindex++) {
14914+ h_i = au_h_iptr(inode, bindex);
14915+ if (!h_i)
14916+ continue;
1facf9fc 14917+
4a4d8108
AM
14918+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
14919+ /* ignore this error */
14920+ /* bad action? */
1facf9fc 14921+ }
1facf9fc 14922+
4a4d8108 14923+ /* children inode number will be broken */
1facf9fc 14924+
4f0767ce 14925+out:
4a4d8108
AM
14926+ AuTraceErr(err);
14927+ return err;
1facf9fc 14928+}
14929+
4a4d8108 14930+static int hn_gen_tree(struct dentry *dentry)
1facf9fc 14931+{
4a4d8108
AM
14932+ int err, i, j, ndentry;
14933+ struct au_dcsub_pages dpages;
14934+ struct au_dpage *dpage;
14935+ struct dentry **dentries;
1facf9fc 14936+
4a4d8108
AM
14937+ err = au_dpages_init(&dpages, GFP_NOFS);
14938+ if (unlikely(err))
14939+ goto out;
14940+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
14941+ if (unlikely(err))
14942+ goto out_dpages;
1facf9fc 14943+
4a4d8108
AM
14944+ for (i = 0; i < dpages.ndpage; i++) {
14945+ dpage = dpages.dpages + i;
14946+ dentries = dpage->dentries;
14947+ ndentry = dpage->ndentry;
14948+ for (j = 0; j < ndentry; j++) {
14949+ struct dentry *d;
14950+
14951+ d = dentries[j];
14952+ if (IS_ROOT(d))
14953+ continue;
14954+
4a4d8108 14955+ au_digen_dec(d);
5527c038 14956+ if (d_really_is_positive(d))
4a4d8108
AM
14957+ /* todo: reset children xino?
14958+ cached children only? */
5527c038 14959+ au_iigen_dec(d_inode(d));
1308ab2a 14960+ }
dece6358 14961+ }
1facf9fc 14962+
4f0767ce 14963+out_dpages:
4a4d8108 14964+ au_dpages_free(&dpages);
dece6358 14965+
027c5e7a 14966+#if 0
4a4d8108
AM
14967+ /* discard children */
14968+ dentry_unhash(dentry);
14969+ dput(dentry);
027c5e7a 14970+#endif
4f0767ce 14971+out:
dece6358
AM
14972+ return err;
14973+}
14974+
1308ab2a 14975+/*
4a4d8108 14976+ * return 0 if processed.
1308ab2a 14977+ */
4a4d8108
AM
14978+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
14979+ const unsigned int isdir)
dece6358 14980+{
1308ab2a 14981+ int err;
4a4d8108
AM
14982+ struct dentry *d;
14983+ struct qstr *dname;
1facf9fc 14984+
4a4d8108
AM
14985+ err = 1;
14986+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 14987+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
14988+ err = 0;
14989+ goto out;
14990+ }
dece6358 14991+
4a4d8108
AM
14992+ if (!isdir) {
14993+ AuDebugOn(!name);
14994+ au_iigen_dec(inode);
027c5e7a 14995+ spin_lock(&inode->i_lock);
c1595e42 14996+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 14997+ spin_lock(&d->d_lock);
4a4d8108
AM
14998+ dname = &d->d_name;
14999+ if (dname->len != nlen
027c5e7a
AM
15000+ && memcmp(dname->name, name, nlen)) {
15001+ spin_unlock(&d->d_lock);
4a4d8108 15002+ continue;
027c5e7a 15003+ }
4a4d8108 15004+ err = 0;
4a4d8108
AM
15005+ au_digen_dec(d);
15006+ spin_unlock(&d->d_lock);
15007+ break;
1facf9fc 15008+ }
027c5e7a 15009+ spin_unlock(&inode->i_lock);
1308ab2a 15010+ } else {
027c5e7a 15011+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
c1595e42 15012+ d = d_find_any_alias(inode);
4a4d8108
AM
15013+ if (!d) {
15014+ au_iigen_dec(inode);
15015+ goto out;
15016+ }
1facf9fc 15017+
027c5e7a 15018+ spin_lock(&d->d_lock);
4a4d8108 15019+ dname = &d->d_name;
027c5e7a
AM
15020+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
15021+ spin_unlock(&d->d_lock);
4a4d8108 15022+ err = hn_gen_tree(d);
027c5e7a
AM
15023+ spin_lock(&d->d_lock);
15024+ }
15025+ spin_unlock(&d->d_lock);
4a4d8108
AM
15026+ dput(d);
15027+ }
1facf9fc 15028+
4f0767ce 15029+out:
4a4d8108 15030+ AuTraceErr(err);
1308ab2a 15031+ return err;
15032+}
dece6358 15033+
4a4d8108 15034+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
1facf9fc 15035+{
4a4d8108 15036+ int err;
1facf9fc 15037+
5527c038 15038+ if (IS_ROOT(dentry)) {
0c3ec466 15039+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15040+ return 0;
15041+ }
1308ab2a 15042+
4a4d8108
AM
15043+ err = 0;
15044+ if (!isdir) {
4a4d8108 15045+ au_digen_dec(dentry);
5527c038
JR
15046+ if (d_really_is_positive(dentry))
15047+ au_iigen_dec(d_inode(dentry));
4a4d8108 15048+ } else {
027c5e7a 15049+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
5527c038 15050+ if (d_really_is_positive(dentry))
4a4d8108
AM
15051+ err = hn_gen_tree(dentry);
15052+ }
15053+
15054+ AuTraceErr(err);
15055+ return err;
1facf9fc 15056+}
15057+
4a4d8108 15058+/* ---------------------------------------------------------------------- */
1facf9fc 15059+
4a4d8108
AM
15060+/* hnotify job flags */
15061+#define AuHnJob_XINO0 1
15062+#define AuHnJob_GEN (1 << 1)
15063+#define AuHnJob_DIRENT (1 << 2)
15064+#define AuHnJob_ISDIR (1 << 3)
15065+#define AuHnJob_TRYXINO0 (1 << 4)
15066+#define AuHnJob_MNTPNT (1 << 5)
15067+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
7f207e10
AM
15068+#define au_fset_hnjob(flags, name) \
15069+ do { (flags) |= AuHnJob_##name; } while (0)
15070+#define au_fclr_hnjob(flags, name) \
15071+ do { (flags) &= ~AuHnJob_##name; } while (0)
1facf9fc 15072+
4a4d8108
AM
15073+enum {
15074+ AuHn_CHILD,
15075+ AuHn_PARENT,
15076+ AuHnLast
15077+};
1facf9fc 15078+
4a4d8108
AM
15079+struct au_hnotify_args {
15080+ struct inode *h_dir, *dir, *h_child_inode;
15081+ u32 mask;
15082+ unsigned int flags[AuHnLast];
15083+ unsigned int h_child_nlen;
15084+ char h_child_name[];
15085+};
1facf9fc 15086+
4a4d8108
AM
15087+struct hn_job_args {
15088+ unsigned int flags;
15089+ struct inode *inode, *h_inode, *dir, *h_dir;
15090+ struct dentry *dentry;
15091+ char *h_name;
15092+ int h_nlen;
15093+};
1308ab2a 15094+
4a4d8108
AM
15095+static int hn_job(struct hn_job_args *a)
15096+{
15097+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
076b876e 15098+ int e;
1308ab2a 15099+
4a4d8108
AM
15100+ /* reset xino */
15101+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
15102+ hn_xino(a->inode, a->h_inode); /* ignore this error */
1308ab2a 15103+
4a4d8108
AM
15104+ if (au_ftest_hnjob(a->flags, TRYXINO0)
15105+ && a->inode
15106+ && a->h_inode) {
15107+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
38d290e6
JR
15108+ if (!a->h_inode->i_nlink
15109+ && !(a->h_inode->i_state & I_LINKABLE))
4a4d8108
AM
15110+ hn_xino(a->inode, a->h_inode); /* ignore this error */
15111+ mutex_unlock(&a->h_inode->i_mutex);
1308ab2a 15112+ }
1facf9fc 15113+
4a4d8108
AM
15114+ /* make the generation obsolete */
15115+ if (au_ftest_hnjob(a->flags, GEN)) {
076b876e 15116+ e = -1;
4a4d8108 15117+ if (a->inode)
076b876e 15118+ e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
4a4d8108 15119+ isdir);
076b876e 15120+ if (e && a->dentry)
4a4d8108
AM
15121+ hn_gen_by_name(a->dentry, isdir);
15122+ /* ignore this error */
1facf9fc 15123+ }
1facf9fc 15124+
4a4d8108
AM
15125+ /* make dir entries obsolete */
15126+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
15127+ struct au_vdir *vdir;
1facf9fc 15128+
4a4d8108
AM
15129+ vdir = au_ivdir(a->inode);
15130+ if (vdir)
15131+ vdir->vd_jiffy = 0;
15132+ /* IMustLock(a->inode); */
15133+ /* a->inode->i_version++; */
15134+ }
1facf9fc 15135+
4a4d8108
AM
15136+ /* can do nothing but warn */
15137+ if (au_ftest_hnjob(a->flags, MNTPNT)
15138+ && a->dentry
15139+ && d_mountpoint(a->dentry))
523b37e3 15140+ pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
1facf9fc 15141+
4a4d8108 15142+ return 0;
1308ab2a 15143+}
1facf9fc 15144+
1308ab2a 15145+/* ---------------------------------------------------------------------- */
1facf9fc 15146+
4a4d8108
AM
15147+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
15148+ struct inode *dir)
1308ab2a 15149+{
4a4d8108
AM
15150+ struct dentry *dentry, *d, *parent;
15151+ struct qstr *dname;
1308ab2a 15152+
c1595e42 15153+ parent = d_find_any_alias(dir);
4a4d8108
AM
15154+ if (!parent)
15155+ return NULL;
1308ab2a 15156+
4a4d8108 15157+ dentry = NULL;
027c5e7a 15158+ spin_lock(&parent->d_lock);
c1595e42 15159+ list_for_each_entry(d, &parent->d_subdirs, d_child) {
523b37e3 15160+ /* AuDbg("%pd\n", d); */
027c5e7a 15161+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
4a4d8108
AM
15162+ dname = &d->d_name;
15163+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
027c5e7a
AM
15164+ goto cont_unlock;
15165+ if (au_di(d))
15166+ au_digen_dec(d);
15167+ else
15168+ goto cont_unlock;
c1595e42 15169+ if (au_dcount(d) > 0) {
027c5e7a 15170+ dentry = dget_dlock(d);
4a4d8108 15171+ spin_unlock(&d->d_lock);
027c5e7a 15172+ break;
dece6358 15173+ }
1facf9fc 15174+
f6b6e03d 15175+cont_unlock:
027c5e7a 15176+ spin_unlock(&d->d_lock);
1308ab2a 15177+ }
027c5e7a 15178+ spin_unlock(&parent->d_lock);
4a4d8108 15179+ dput(parent);
1facf9fc 15180+
4a4d8108
AM
15181+ if (dentry)
15182+ di_write_lock_child(dentry);
1308ab2a 15183+
4a4d8108
AM
15184+ return dentry;
15185+}
dece6358 15186+
4a4d8108
AM
15187+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
15188+ aufs_bindex_t bindex, ino_t h_ino)
15189+{
15190+ struct inode *inode;
15191+ ino_t ino;
15192+ int err;
15193+
15194+ inode = NULL;
15195+ err = au_xino_read(sb, bindex, h_ino, &ino);
15196+ if (!err && ino)
15197+ inode = ilookup(sb, ino);
15198+ if (!inode)
15199+ goto out;
15200+
15201+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15202+ pr_warn("wrong root branch\n");
4a4d8108
AM
15203+ iput(inode);
15204+ inode = NULL;
15205+ goto out;
1308ab2a 15206+ }
15207+
4a4d8108 15208+ ii_write_lock_child(inode);
1308ab2a 15209+
4f0767ce 15210+out:
4a4d8108 15211+ return inode;
dece6358
AM
15212+}
15213+
4a4d8108 15214+static void au_hn_bh(void *_args)
1facf9fc 15215+{
4a4d8108
AM
15216+ struct au_hnotify_args *a = _args;
15217+ struct super_block *sb;
15218+ aufs_bindex_t bindex, bend, bfound;
15219+ unsigned char xino, try_iput;
1facf9fc 15220+ int err;
1308ab2a 15221+ struct inode *inode;
4a4d8108
AM
15222+ ino_t h_ino;
15223+ struct hn_job_args args;
15224+ struct dentry *dentry;
15225+ struct au_sbinfo *sbinfo;
1facf9fc 15226+
4a4d8108
AM
15227+ AuDebugOn(!_args);
15228+ AuDebugOn(!a->h_dir);
15229+ AuDebugOn(!a->dir);
15230+ AuDebugOn(!a->mask);
15231+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
15232+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
15233+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
1facf9fc 15234+
4a4d8108
AM
15235+ inode = NULL;
15236+ dentry = NULL;
15237+ /*
15238+ * do not lock a->dir->i_mutex here
15239+ * because of d_revalidate() may cause a deadlock.
15240+ */
15241+ sb = a->dir->i_sb;
15242+ AuDebugOn(!sb);
15243+ sbinfo = au_sbi(sb);
15244+ AuDebugOn(!sbinfo);
7f207e10 15245+ si_write_lock(sb, AuLock_NOPLMW);
1facf9fc 15246+
4a4d8108
AM
15247+ ii_read_lock_parent(a->dir);
15248+ bfound = -1;
15249+ bend = au_ibend(a->dir);
15250+ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
15251+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
15252+ bfound = bindex;
15253+ break;
15254+ }
15255+ ii_read_unlock(a->dir);
15256+ if (unlikely(bfound < 0))
15257+ goto out;
1facf9fc 15258+
4a4d8108
AM
15259+ xino = !!au_opt_test(au_mntflags(sb), XINO);
15260+ h_ino = 0;
15261+ if (a->h_child_inode)
15262+ h_ino = a->h_child_inode->i_ino;
1facf9fc 15263+
4a4d8108
AM
15264+ if (a->h_child_nlen
15265+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
15266+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
15267+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
15268+ a->dir);
15269+ try_iput = 0;
5527c038
JR
15270+ if (dentry && d_really_is_positive(dentry))
15271+ inode = d_inode(dentry);
4a4d8108
AM
15272+ if (xino && !inode && h_ino
15273+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
15274+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
15275+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
15276+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
15277+ try_iput = 1;
15278+ }
1facf9fc 15279+
4a4d8108
AM
15280+ args.flags = a->flags[AuHn_CHILD];
15281+ args.dentry = dentry;
15282+ args.inode = inode;
15283+ args.h_inode = a->h_child_inode;
15284+ args.dir = a->dir;
15285+ args.h_dir = a->h_dir;
15286+ args.h_name = a->h_child_name;
15287+ args.h_nlen = a->h_child_nlen;
15288+ err = hn_job(&args);
15289+ if (dentry) {
027c5e7a 15290+ if (au_di(dentry))
4a4d8108
AM
15291+ di_write_unlock(dentry);
15292+ dput(dentry);
15293+ }
15294+ if (inode && try_iput) {
15295+ ii_write_unlock(inode);
15296+ iput(inode);
15297+ }
1facf9fc 15298+
4a4d8108
AM
15299+ ii_write_lock_parent(a->dir);
15300+ args.flags = a->flags[AuHn_PARENT];
15301+ args.dentry = NULL;
15302+ args.inode = a->dir;
15303+ args.h_inode = a->h_dir;
15304+ args.dir = NULL;
15305+ args.h_dir = NULL;
15306+ args.h_name = NULL;
15307+ args.h_nlen = 0;
15308+ err = hn_job(&args);
15309+ ii_write_unlock(a->dir);
1facf9fc 15310+
4f0767ce 15311+out:
4a4d8108
AM
15312+ iput(a->h_child_inode);
15313+ iput(a->h_dir);
15314+ iput(a->dir);
027c5e7a
AM
15315+ si_write_unlock(sb);
15316+ au_nwt_done(&sbinfo->si_nowait);
1308ab2a 15317+ kfree(a);
dece6358 15318+}
1facf9fc 15319+
4a4d8108
AM
15320+/* ---------------------------------------------------------------------- */
15321+
15322+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
15323+ struct qstr *h_child_qstr, struct inode *h_child_inode)
dece6358 15324+{
4a4d8108 15325+ int err, len;
53392da6 15326+ unsigned int flags[AuHnLast], f;
4a4d8108
AM
15327+ unsigned char isdir, isroot, wh;
15328+ struct inode *dir;
15329+ struct au_hnotify_args *args;
15330+ char *p, *h_child_name;
dece6358 15331+
1308ab2a 15332+ err = 0;
4a4d8108
AM
15333+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
15334+ dir = igrab(hnotify->hn_aufs_inode);
15335+ if (!dir)
15336+ goto out;
1facf9fc 15337+
4a4d8108
AM
15338+ isroot = (dir->i_ino == AUFS_ROOT_INO);
15339+ wh = 0;
15340+ h_child_name = (void *)h_child_qstr->name;
15341+ len = h_child_qstr->len;
15342+ if (h_child_name) {
15343+ if (len > AUFS_WH_PFX_LEN
15344+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
15345+ h_child_name += AUFS_WH_PFX_LEN;
15346+ len -= AUFS_WH_PFX_LEN;
15347+ wh = 1;
15348+ }
1facf9fc 15349+ }
dece6358 15350+
4a4d8108
AM
15351+ isdir = 0;
15352+ if (h_child_inode)
15353+ isdir = !!S_ISDIR(h_child_inode->i_mode);
15354+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
15355+ flags[AuHn_CHILD] = 0;
15356+ if (isdir)
15357+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
15358+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
15359+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
15360+ switch (mask & FS_EVENTS_POSS_ON_CHILD) {
15361+ case FS_MOVED_FROM:
15362+ case FS_MOVED_TO:
15363+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
15364+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15365+ /*FALLTHROUGH*/
15366+ case FS_CREATE:
fb47a38f 15367+ AuDebugOn(!h_child_name);
4a4d8108 15368+ break;
1facf9fc 15369+
4a4d8108
AM
15370+ case FS_DELETE:
15371+ /*
15372+ * aufs never be able to get this child inode.
15373+ * revalidation should be in d_revalidate()
15374+ * by checking i_nlink, i_generation or d_unhashed().
15375+ */
15376+ AuDebugOn(!h_child_name);
15377+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
15378+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15379+ break;
dece6358 15380+
4a4d8108
AM
15381+ default:
15382+ AuDebugOn(1);
15383+ }
1308ab2a 15384+
4a4d8108
AM
15385+ if (wh)
15386+ h_child_inode = NULL;
1308ab2a 15387+
4a4d8108
AM
15388+ err = -ENOMEM;
15389+ /* iput() and kfree() will be called in au_hnotify() */
4a4d8108 15390+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
4a4d8108
AM
15391+ if (unlikely(!args)) {
15392+ AuErr1("no memory\n");
15393+ iput(dir);
15394+ goto out;
15395+ }
15396+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
15397+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
15398+ args->mask = mask;
15399+ args->dir = dir;
15400+ args->h_dir = igrab(h_dir);
15401+ if (h_child_inode)
15402+ h_child_inode = igrab(h_child_inode); /* can be NULL */
15403+ args->h_child_inode = h_child_inode;
15404+ args->h_child_nlen = len;
15405+ if (len) {
15406+ p = (void *)args;
15407+ p += sizeof(*args);
15408+ memcpy(p, h_child_name, len);
15409+ p[len] = 0;
1308ab2a 15410+ }
1308ab2a 15411+
38d290e6 15412+ /* NFS fires the event for silly-renamed one from kworker */
53392da6 15413+ f = 0;
38d290e6
JR
15414+ if (!dir->i_nlink
15415+ || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
53392da6
AM
15416+ f = AuWkq_NEST;
15417+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
4a4d8108
AM
15418+ if (unlikely(err)) {
15419+ pr_err("wkq %d\n", err);
15420+ iput(args->h_child_inode);
15421+ iput(args->h_dir);
15422+ iput(args->dir);
15423+ kfree(args);
1facf9fc 15424+ }
1facf9fc 15425+
4a4d8108 15426+out:
1facf9fc 15427+ return err;
15428+}
15429+
027c5e7a
AM
15430+/* ---------------------------------------------------------------------- */
15431+
15432+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
15433+{
15434+ int err;
15435+
15436+ AuDebugOn(!(udba & AuOptMask_UDBA));
15437+
15438+ err = 0;
15439+ if (au_hnotify_op.reset_br)
15440+ err = au_hnotify_op.reset_br(udba, br, perm);
15441+
15442+ return err;
15443+}
15444+
15445+int au_hnotify_init_br(struct au_branch *br, int perm)
15446+{
15447+ int err;
15448+
15449+ err = 0;
15450+ if (au_hnotify_op.init_br)
15451+ err = au_hnotify_op.init_br(br, perm);
15452+
15453+ return err;
15454+}
15455+
15456+void au_hnotify_fin_br(struct au_branch *br)
15457+{
15458+ if (au_hnotify_op.fin_br)
15459+ au_hnotify_op.fin_br(br);
15460+}
15461+
4a4d8108
AM
15462+static void au_hn_destroy_cache(void)
15463+{
15464+ kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]);
15465+ au_cachep[AuCache_HNOTIFY] = NULL;
15466+}
1308ab2a 15467+
4a4d8108 15468+int __init au_hnotify_init(void)
1facf9fc 15469+{
1308ab2a 15470+ int err;
1308ab2a 15471+
4a4d8108
AM
15472+ err = -ENOMEM;
15473+ au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify);
15474+ if (au_cachep[AuCache_HNOTIFY]) {
027c5e7a
AM
15475+ err = 0;
15476+ if (au_hnotify_op.init)
15477+ err = au_hnotify_op.init();
4a4d8108
AM
15478+ if (unlikely(err))
15479+ au_hn_destroy_cache();
1308ab2a 15480+ }
1308ab2a 15481+ AuTraceErr(err);
4a4d8108 15482+ return err;
1308ab2a 15483+}
15484+
4a4d8108 15485+void au_hnotify_fin(void)
1308ab2a 15486+{
027c5e7a
AM
15487+ if (au_hnotify_op.fin)
15488+ au_hnotify_op.fin();
4a4d8108
AM
15489+ /* cf. au_cache_fin() */
15490+ if (au_cachep[AuCache_HNOTIFY])
15491+ au_hn_destroy_cache();
dece6358 15492+}
7f207e10
AM
15493diff -urN /usr/share/empty/fs/aufs/iinfo.c linux/fs/aufs/iinfo.c
15494--- /usr/share/empty/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 15495+++ linux/fs/aufs/iinfo.c 2015-09-24 10:47:58.254719746 +0200
38d290e6 15496@@ -0,0 +1,277 @@
dece6358 15497+/*
2000de60 15498+ * Copyright (C) 2005-2015 Junjiro R. Okajima
dece6358
AM
15499+ *
15500+ * This program, aufs is free software; you can redistribute it and/or modify
15501+ * it under the terms of the GNU General Public License as published by
15502+ * the Free Software Foundation; either version 2 of the License, or
15503+ * (at your option) any later version.
15504+ *
15505+ * This program is distributed in the hope that it will be useful,
15506+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15507+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15508+ * GNU General Public License for more details.
15509+ *
15510+ * You should have received a copy of the GNU General Public License
523b37e3 15511+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 15512+ */
1facf9fc 15513+
dece6358 15514+/*
4a4d8108 15515+ * inode private data
dece6358 15516+ */
1facf9fc 15517+
1308ab2a 15518+#include "aufs.h"
1facf9fc 15519+
4a4d8108 15520+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 15521+{
4a4d8108 15522+ struct inode *h_inode;
1facf9fc 15523+
4a4d8108 15524+ IiMustAnyLock(inode);
1facf9fc 15525+
4a4d8108
AM
15526+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
15527+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15528+ return h_inode;
15529+}
1facf9fc 15530+
4a4d8108
AM
15531+/* todo: hard/soft set? */
15532+void au_hiput(struct au_hinode *hinode)
15533+{
15534+ au_hn_free(hinode);
15535+ dput(hinode->hi_whdentry);
15536+ iput(hinode->hi_inode);
15537+}
1facf9fc 15538+
4a4d8108
AM
15539+unsigned int au_hi_flags(struct inode *inode, int isdir)
15540+{
15541+ unsigned int flags;
15542+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
1facf9fc 15543+
4a4d8108
AM
15544+ flags = 0;
15545+ if (au_opt_test(mnt_flags, XINO))
15546+ au_fset_hi(flags, XINO);
15547+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
15548+ au_fset_hi(flags, HNOTIFY);
15549+ return flags;
1facf9fc 15550+}
15551+
4a4d8108
AM
15552+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
15553+ struct inode *h_inode, unsigned int flags)
1308ab2a 15554+{
4a4d8108
AM
15555+ struct au_hinode *hinode;
15556+ struct inode *hi;
15557+ struct au_iinfo *iinfo = au_ii(inode);
1facf9fc 15558+
4a4d8108 15559+ IiMustWriteLock(inode);
dece6358 15560+
4a4d8108
AM
15561+ hinode = iinfo->ii_hinode + bindex;
15562+ hi = hinode->hi_inode;
15563+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15564+
15565+ if (hi)
15566+ au_hiput(hinode);
15567+ hinode->hi_inode = h_inode;
15568+ if (h_inode) {
15569+ int err;
15570+ struct super_block *sb = inode->i_sb;
15571+ struct au_branch *br;
15572+
027c5e7a
AM
15573+ AuDebugOn(inode->i_mode
15574+ && (h_inode->i_mode & S_IFMT)
15575+ != (inode->i_mode & S_IFMT));
4a4d8108
AM
15576+ if (bindex == iinfo->ii_bstart)
15577+ au_cpup_igen(inode, h_inode);
15578+ br = au_sbr(sb, bindex);
15579+ hinode->hi_id = br->br_id;
15580+ if (au_ftest_hi(flags, XINO)) {
15581+ err = au_xino_write(sb, bindex, h_inode->i_ino,
15582+ inode->i_ino);
15583+ if (unlikely(err))
15584+ AuIOErr1("failed au_xino_write() %d\n", err);
15585+ }
15586+
15587+ if (au_ftest_hi(flags, HNOTIFY)
15588+ && au_br_hnotifyable(br->br_perm)) {
027c5e7a 15589+ err = au_hn_alloc(hinode, inode);
4a4d8108
AM
15590+ if (unlikely(err))
15591+ AuIOErr1("au_hn_alloc() %d\n", err);
1308ab2a 15592+ }
15593+ }
4a4d8108 15594+}
dece6358 15595+
4a4d8108
AM
15596+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
15597+ struct dentry *h_wh)
15598+{
15599+ struct au_hinode *hinode;
dece6358 15600+
4a4d8108
AM
15601+ IiMustWriteLock(inode);
15602+
15603+ hinode = au_ii(inode)->ii_hinode + bindex;
15604+ AuDebugOn(hinode->hi_whdentry);
15605+ hinode->hi_whdentry = h_wh;
1facf9fc 15606+}
15607+
537831f9 15608+void au_update_iigen(struct inode *inode, int half)
1308ab2a 15609+{
537831f9
AM
15610+ struct au_iinfo *iinfo;
15611+ struct au_iigen *iigen;
15612+ unsigned int sigen;
15613+
15614+ sigen = au_sigen(inode->i_sb);
15615+ iinfo = au_ii(inode);
15616+ iigen = &iinfo->ii_generation;
15617+ spin_lock(&iinfo->ii_genspin);
15618+ iigen->ig_generation = sigen;
15619+ if (half)
15620+ au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
15621+ else
15622+ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
15623+ spin_unlock(&iinfo->ii_genspin);
4a4d8108 15624+}
1facf9fc 15625+
4a4d8108
AM
15626+/* it may be called at remount time, too */
15627+void au_update_ibrange(struct inode *inode, int do_put_zero)
15628+{
15629+ struct au_iinfo *iinfo;
027c5e7a 15630+ aufs_bindex_t bindex, bend;
1facf9fc 15631+
4a4d8108 15632+ iinfo = au_ii(inode);
027c5e7a 15633+ if (!iinfo)
4a4d8108 15634+ return;
1facf9fc 15635+
4a4d8108 15636+ IiMustWriteLock(inode);
1facf9fc 15637+
027c5e7a 15638+ if (do_put_zero && iinfo->ii_bstart >= 0) {
4a4d8108
AM
15639+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
15640+ bindex++) {
15641+ struct inode *h_i;
1facf9fc 15642+
4a4d8108 15643+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
38d290e6
JR
15644+ if (h_i
15645+ && !h_i->i_nlink
15646+ && !(h_i->i_state & I_LINKABLE))
027c5e7a
AM
15647+ au_set_h_iptr(inode, bindex, NULL, 0);
15648+ }
4a4d8108
AM
15649+ }
15650+
027c5e7a
AM
15651+ iinfo->ii_bstart = -1;
15652+ iinfo->ii_bend = -1;
15653+ bend = au_sbend(inode->i_sb);
15654+ for (bindex = 0; bindex <= bend; bindex++)
15655+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
15656+ iinfo->ii_bstart = bindex;
4a4d8108 15657+ break;
027c5e7a
AM
15658+ }
15659+ if (iinfo->ii_bstart >= 0)
15660+ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
15661+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
15662+ iinfo->ii_bend = bindex;
15663+ break;
15664+ }
15665+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
1308ab2a 15666+}
1facf9fc 15667+
dece6358 15668+/* ---------------------------------------------------------------------- */
1facf9fc 15669+
4a4d8108 15670+void au_icntnr_init_once(void *_c)
dece6358 15671+{
4a4d8108
AM
15672+ struct au_icntnr *c = _c;
15673+ struct au_iinfo *iinfo = &c->iinfo;
e49829fe 15674+ static struct lock_class_key aufs_ii;
1facf9fc 15675+
537831f9 15676+ spin_lock_init(&iinfo->ii_genspin);
4a4d8108 15677+ au_rw_init(&iinfo->ii_rwsem);
e49829fe 15678+ au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
4a4d8108
AM
15679+ inode_init_once(&c->vfs_inode);
15680+}
1facf9fc 15681+
4a4d8108
AM
15682+int au_iinfo_init(struct inode *inode)
15683+{
15684+ struct au_iinfo *iinfo;
15685+ struct super_block *sb;
15686+ int nbr, i;
1facf9fc 15687+
4a4d8108
AM
15688+ sb = inode->i_sb;
15689+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
15690+ nbr = au_sbend(sb) + 1;
15691+ if (unlikely(nbr <= 0))
15692+ nbr = 1;
15693+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
15694+ if (iinfo->ii_hinode) {
7f207e10 15695+ au_ninodes_inc(sb);
4a4d8108
AM
15696+ for (i = 0; i < nbr; i++)
15697+ iinfo->ii_hinode[i].hi_id = -1;
1facf9fc 15698+
537831f9 15699+ iinfo->ii_generation.ig_generation = au_sigen(sb);
4a4d8108
AM
15700+ iinfo->ii_bstart = -1;
15701+ iinfo->ii_bend = -1;
15702+ iinfo->ii_vdir = NULL;
15703+ return 0;
1308ab2a 15704+ }
4a4d8108
AM
15705+ return -ENOMEM;
15706+}
1facf9fc 15707+
4a4d8108
AM
15708+int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
15709+{
15710+ int err, sz;
15711+ struct au_hinode *hip;
1facf9fc 15712+
4a4d8108
AM
15713+ AuRwMustWriteLock(&iinfo->ii_rwsem);
15714+
15715+ err = -ENOMEM;
15716+ sz = sizeof(*hip) * (iinfo->ii_bend + 1);
15717+ if (!sz)
15718+ sz = sizeof(*hip);
15719+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
15720+ if (hip) {
15721+ iinfo->ii_hinode = hip;
15722+ err = 0;
1308ab2a 15723+ }
4a4d8108 15724+
1308ab2a 15725+ return err;
1facf9fc 15726+}
15727+
4a4d8108 15728+void au_iinfo_fin(struct inode *inode)
1facf9fc 15729+{
4a4d8108
AM
15730+ struct au_iinfo *iinfo;
15731+ struct au_hinode *hi;
15732+ struct super_block *sb;
b752ccd1
AM
15733+ aufs_bindex_t bindex, bend;
15734+ const unsigned char unlinked = !inode->i_nlink;
1308ab2a 15735+
4a4d8108
AM
15736+ iinfo = au_ii(inode);
15737+ /* bad_inode case */
15738+ if (!iinfo)
15739+ return;
1308ab2a 15740+
b752ccd1 15741+ sb = inode->i_sb;
7f207e10 15742+ au_ninodes_dec(sb);
b752ccd1
AM
15743+ if (si_pid_test(sb))
15744+ au_xino_delete_inode(inode, unlinked);
15745+ else {
15746+ /*
15747+ * it is safe to hide the dependency between sbinfo and
15748+ * sb->s_umount.
15749+ */
15750+ lockdep_off();
15751+ si_noflush_read_lock(sb);
15752+ au_xino_delete_inode(inode, unlinked);
15753+ si_read_unlock(sb);
15754+ lockdep_on();
15755+ }
15756+
4a4d8108
AM
15757+ if (iinfo->ii_vdir)
15758+ au_vdir_free(iinfo->ii_vdir);
1308ab2a 15759+
b752ccd1
AM
15760+ bindex = iinfo->ii_bstart;
15761+ if (bindex >= 0) {
15762+ hi = iinfo->ii_hinode + bindex;
4a4d8108 15763+ bend = iinfo->ii_bend;
b752ccd1
AM
15764+ while (bindex++ <= bend) {
15765+ if (hi->hi_inode)
4a4d8108 15766+ au_hiput(hi);
4a4d8108
AM
15767+ hi++;
15768+ }
15769+ }
4a4d8108 15770+ kfree(iinfo->ii_hinode);
027c5e7a 15771+ iinfo->ii_hinode = NULL;
4a4d8108 15772+ AuRwDestroy(&iinfo->ii_rwsem);
dece6358 15773+}
7f207e10
AM
15774diff -urN /usr/share/empty/fs/aufs/inode.c linux/fs/aufs/inode.c
15775--- /usr/share/empty/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
b95c5147
AM
15776+++ linux/fs/aufs/inode.c 2015-12-10 17:59:16.836166410 +0100
15777@@ -0,0 +1,528 @@
4a4d8108 15778+/*
2000de60 15779+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
15780+ *
15781+ * This program, aufs is free software; you can redistribute it and/or modify
15782+ * it under the terms of the GNU General Public License as published by
15783+ * the Free Software Foundation; either version 2 of the License, or
15784+ * (at your option) any later version.
15785+ *
15786+ * This program is distributed in the hope that it will be useful,
15787+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15788+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15789+ * GNU General Public License for more details.
15790+ *
15791+ * You should have received a copy of the GNU General Public License
523b37e3 15792+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 15793+ */
1facf9fc 15794+
4a4d8108
AM
15795+/*
15796+ * inode functions
15797+ */
1facf9fc 15798+
4a4d8108 15799+#include "aufs.h"
1308ab2a 15800+
4a4d8108
AM
15801+struct inode *au_igrab(struct inode *inode)
15802+{
15803+ if (inode) {
15804+ AuDebugOn(!atomic_read(&inode->i_count));
027c5e7a 15805+ ihold(inode);
1facf9fc 15806+ }
4a4d8108
AM
15807+ return inode;
15808+}
1facf9fc 15809+
4a4d8108
AM
15810+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
15811+{
15812+ au_cpup_attr_all(inode, /*force*/0);
537831f9 15813+ au_update_iigen(inode, /*half*/1);
4a4d8108
AM
15814+ if (do_version)
15815+ inode->i_version++;
dece6358 15816+}
1facf9fc 15817+
027c5e7a 15818+static int au_ii_refresh(struct inode *inode, int *update)
dece6358 15819+{
4a4d8108 15820+ int err, e;
027c5e7a 15821+ umode_t type;
4a4d8108 15822+ aufs_bindex_t bindex, new_bindex;
1308ab2a 15823+ struct super_block *sb;
4a4d8108 15824+ struct au_iinfo *iinfo;
027c5e7a 15825+ struct au_hinode *p, *q, tmp;
1facf9fc 15826+
4a4d8108 15827+ IiMustWriteLock(inode);
1facf9fc 15828+
027c5e7a 15829+ *update = 0;
4a4d8108 15830+ sb = inode->i_sb;
027c5e7a 15831+ type = inode->i_mode & S_IFMT;
4a4d8108
AM
15832+ iinfo = au_ii(inode);
15833+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
15834+ if (unlikely(err))
1308ab2a 15835+ goto out;
1facf9fc 15836+
027c5e7a 15837+ AuDebugOn(iinfo->ii_bstart < 0);
4a4d8108 15838+ p = iinfo->ii_hinode + iinfo->ii_bstart;
4a4d8108
AM
15839+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
15840+ bindex++, p++) {
15841+ if (!p->hi_inode)
15842+ continue;
1facf9fc 15843+
027c5e7a 15844+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
4a4d8108
AM
15845+ new_bindex = au_br_index(sb, p->hi_id);
15846+ if (new_bindex == bindex)
15847+ continue;
1facf9fc 15848+
4a4d8108 15849+ if (new_bindex < 0) {
027c5e7a 15850+ *update = 1;
4a4d8108
AM
15851+ au_hiput(p);
15852+ p->hi_inode = NULL;
15853+ continue;
1308ab2a 15854+ }
4a4d8108
AM
15855+
15856+ if (new_bindex < iinfo->ii_bstart)
15857+ iinfo->ii_bstart = new_bindex;
15858+ if (iinfo->ii_bend < new_bindex)
15859+ iinfo->ii_bend = new_bindex;
15860+ /* swap two lower inode, and loop again */
15861+ q = iinfo->ii_hinode + new_bindex;
15862+ tmp = *q;
15863+ *q = *p;
15864+ *p = tmp;
15865+ if (tmp.hi_inode) {
15866+ bindex--;
15867+ p--;
1308ab2a 15868+ }
15869+ }
4a4d8108
AM
15870+ au_update_ibrange(inode, /*do_put_zero*/0);
15871+ e = au_dy_irefresh(inode);
15872+ if (unlikely(e && !err))
15873+ err = e;
1facf9fc 15874+
4f0767ce 15875+out:
027c5e7a
AM
15876+ AuTraceErr(err);
15877+ return err;
15878+}
15879+
b95c5147
AM
15880+void au_refresh_iop(struct inode *inode, int force_getattr)
15881+{
15882+ int type;
15883+ struct au_sbinfo *sbi = au_sbi(inode->i_sb);
15884+ const struct inode_operations *iop
15885+ = force_getattr ? aufs_iop : sbi->si_iop_array;
15886+
15887+ if (inode->i_op == iop)
15888+ return;
15889+
15890+ switch (inode->i_mode & S_IFMT) {
15891+ case S_IFDIR:
15892+ type = AuIop_DIR;
15893+ break;
15894+ case S_IFLNK:
15895+ type = AuIop_SYMLINK;
15896+ break;
15897+ default:
15898+ type = AuIop_OTHER;
15899+ break;
15900+ }
15901+
15902+ inode->i_op = iop + type;
15903+ /* unnecessary smp_wmb() */
15904+}
15905+
027c5e7a
AM
15906+int au_refresh_hinode_self(struct inode *inode)
15907+{
15908+ int err, update;
15909+
15910+ err = au_ii_refresh(inode, &update);
15911+ if (!err)
15912+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
15913+
15914+ AuTraceErr(err);
4a4d8108
AM
15915+ return err;
15916+}
1facf9fc 15917+
4a4d8108
AM
15918+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
15919+{
027c5e7a 15920+ int err, e, update;
4a4d8108 15921+ unsigned int flags;
027c5e7a 15922+ umode_t mode;
4a4d8108 15923+ aufs_bindex_t bindex, bend;
027c5e7a 15924+ unsigned char isdir;
4a4d8108
AM
15925+ struct au_hinode *p;
15926+ struct au_iinfo *iinfo;
1facf9fc 15927+
027c5e7a 15928+ err = au_ii_refresh(inode, &update);
4a4d8108
AM
15929+ if (unlikely(err))
15930+ goto out;
15931+
15932+ update = 0;
15933+ iinfo = au_ii(inode);
15934+ p = iinfo->ii_hinode + iinfo->ii_bstart;
027c5e7a
AM
15935+ mode = (inode->i_mode & S_IFMT);
15936+ isdir = S_ISDIR(mode);
4a4d8108
AM
15937+ flags = au_hi_flags(inode, isdir);
15938+ bend = au_dbend(dentry);
15939+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
5527c038 15940+ struct inode *h_i, *h_inode;
4a4d8108
AM
15941+ struct dentry *h_d;
15942+
15943+ h_d = au_h_dptr(dentry, bindex);
5527c038 15944+ if (!h_d || d_is_negative(h_d))
4a4d8108
AM
15945+ continue;
15946+
5527c038
JR
15947+ h_inode = d_inode(h_d);
15948+ AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
4a4d8108
AM
15949+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
15950+ h_i = au_h_iptr(inode, bindex);
15951+ if (h_i) {
5527c038 15952+ if (h_i == h_inode)
4a4d8108
AM
15953+ continue;
15954+ err = -EIO;
15955+ break;
15956+ }
15957+ }
15958+ if (bindex < iinfo->ii_bstart)
15959+ iinfo->ii_bstart = bindex;
15960+ if (iinfo->ii_bend < bindex)
15961+ iinfo->ii_bend = bindex;
5527c038 15962+ au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
4a4d8108 15963+ update = 1;
1308ab2a 15964+ }
4a4d8108
AM
15965+ au_update_ibrange(inode, /*do_put_zero*/0);
15966+ e = au_dy_irefresh(inode);
15967+ if (unlikely(e && !err))
15968+ err = e;
027c5e7a
AM
15969+ if (!err)
15970+ au_refresh_hinode_attr(inode, update && isdir);
4a4d8108 15971+
4f0767ce 15972+out:
4a4d8108 15973+ AuTraceErr(err);
1308ab2a 15974+ return err;
dece6358
AM
15975+}
15976+
4a4d8108 15977+static int set_inode(struct inode *inode, struct dentry *dentry)
dece6358 15978+{
4a4d8108
AM
15979+ int err;
15980+ unsigned int flags;
15981+ umode_t mode;
15982+ aufs_bindex_t bindex, bstart, btail;
15983+ unsigned char isdir;
15984+ struct dentry *h_dentry;
15985+ struct inode *h_inode;
15986+ struct au_iinfo *iinfo;
b95c5147 15987+ struct inode_operations *iop;
dece6358 15988+
4a4d8108 15989+ IiMustWriteLock(inode);
dece6358 15990+
4a4d8108
AM
15991+ err = 0;
15992+ isdir = 0;
b95c5147 15993+ iop = au_sbi(inode->i_sb)->si_iop_array;
4a4d8108 15994+ bstart = au_dbstart(dentry);
5527c038
JR
15995+ h_dentry = au_h_dptr(dentry, bstart);
15996+ h_inode = d_inode(h_dentry);
4a4d8108
AM
15997+ mode = h_inode->i_mode;
15998+ switch (mode & S_IFMT) {
15999+ case S_IFREG:
16000+ btail = au_dbtail(dentry);
b95c5147 16001+ inode->i_op = iop + AuIop_OTHER;
4a4d8108
AM
16002+ inode->i_fop = &aufs_file_fop;
16003+ err = au_dy_iaop(inode, bstart, h_inode);
16004+ if (unlikely(err))
16005+ goto out;
16006+ break;
16007+ case S_IFDIR:
16008+ isdir = 1;
16009+ btail = au_dbtaildir(dentry);
b95c5147 16010+ inode->i_op = iop + AuIop_DIR;
4a4d8108
AM
16011+ inode->i_fop = &aufs_dir_fop;
16012+ break;
16013+ case S_IFLNK:
16014+ btail = au_dbtail(dentry);
b95c5147 16015+ inode->i_op = iop + AuIop_SYMLINK;
4a4d8108
AM
16016+ break;
16017+ case S_IFBLK:
16018+ case S_IFCHR:
16019+ case S_IFIFO:
16020+ case S_IFSOCK:
16021+ btail = au_dbtail(dentry);
b95c5147 16022+ inode->i_op = iop + AuIop_OTHER;
38d290e6 16023+ init_special_inode(inode, mode, h_inode->i_rdev);
4a4d8108
AM
16024+ break;
16025+ default:
16026+ AuIOErr("Unknown file type 0%o\n", mode);
16027+ err = -EIO;
1308ab2a 16028+ goto out;
4a4d8108 16029+ }
dece6358 16030+
4a4d8108
AM
16031+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
16032+ flags = au_hi_flags(inode, isdir);
16033+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
16034+ && au_ftest_hi(flags, HNOTIFY)
16035+ && dentry->d_name.len > AUFS_WH_PFX_LEN
16036+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
16037+ au_fclr_hi(flags, HNOTIFY);
16038+ iinfo = au_ii(inode);
16039+ iinfo->ii_bstart = bstart;
16040+ iinfo->ii_bend = btail;
16041+ for (bindex = bstart; bindex <= btail; bindex++) {
16042+ h_dentry = au_h_dptr(dentry, bindex);
16043+ if (h_dentry)
16044+ au_set_h_iptr(inode, bindex,
5527c038 16045+ au_igrab(d_inode(h_dentry)), flags);
4a4d8108
AM
16046+ }
16047+ au_cpup_attr_all(inode, /*force*/1);
c1595e42
JR
16048+ /*
16049+ * to force calling aufs_get_acl() every time,
16050+ * do not call cache_no_acl() for aufs inode.
16051+ */
dece6358 16052+
4f0767ce 16053+out:
4a4d8108
AM
16054+ return err;
16055+}
dece6358 16056+
027c5e7a
AM
16057+/*
16058+ * successful returns with iinfo write_locked
16059+ * minus: errno
16060+ * zero: success, matched
16061+ * plus: no error, but unmatched
16062+ */
16063+static int reval_inode(struct inode *inode, struct dentry *dentry)
4a4d8108
AM
16064+{
16065+ int err;
537831f9
AM
16066+ unsigned int gen;
16067+ struct au_iigen iigen;
4a4d8108
AM
16068+ aufs_bindex_t bindex, bend;
16069+ struct inode *h_inode, *h_dinode;
5527c038 16070+ struct dentry *h_dentry;
dece6358 16071+
4a4d8108
AM
16072+ /*
16073+ * before this function, if aufs got any iinfo lock, it must be only
16074+ * one, the parent dir.
16075+ * it can happen by UDBA and the obsoleted inode number.
16076+ */
16077+ err = -EIO;
16078+ if (unlikely(inode->i_ino == parent_ino(dentry)))
16079+ goto out;
16080+
027c5e7a 16081+ err = 1;
4a4d8108 16082+ ii_write_lock_new_child(inode);
5527c038
JR
16083+ h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
16084+ h_dinode = d_inode(h_dentry);
4a4d8108
AM
16085+ bend = au_ibend(inode);
16086+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
16087+ h_inode = au_h_iptr(inode, bindex);
537831f9
AM
16088+ if (!h_inode || h_inode != h_dinode)
16089+ continue;
16090+
16091+ err = 0;
16092+ gen = au_iigen(inode, &iigen);
16093+ if (gen == au_digen(dentry)
16094+ && !au_ig_ftest(iigen.ig_flags, HALF_REFRESHED))
4a4d8108 16095+ break;
537831f9
AM
16096+
16097+ /* fully refresh inode using dentry */
16098+ err = au_refresh_hinode(inode, dentry);
16099+ if (!err)
16100+ au_update_iigen(inode, /*half*/0);
16101+ break;
1facf9fc 16102+ }
dece6358 16103+
4a4d8108
AM
16104+ if (unlikely(err))
16105+ ii_write_unlock(inode);
4f0767ce 16106+out:
1facf9fc 16107+ return err;
16108+}
1facf9fc 16109+
4a4d8108
AM
16110+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16111+ unsigned int d_type, ino_t *ino)
1facf9fc 16112+{
4a4d8108
AM
16113+ int err;
16114+ struct mutex *mtx;
1facf9fc 16115+
b752ccd1 16116+ /* prevent hardlinked inode number from race condition */
4a4d8108 16117+ mtx = NULL;
b752ccd1 16118+ if (d_type != DT_DIR) {
4a4d8108
AM
16119+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
16120+ mutex_lock(mtx);
16121+ }
16122+ err = au_xino_read(sb, bindex, h_ino, ino);
16123+ if (unlikely(err))
16124+ goto out;
1308ab2a 16125+
4a4d8108
AM
16126+ if (!*ino) {
16127+ err = -EIO;
16128+ *ino = au_xino_new_ino(sb);
16129+ if (unlikely(!*ino))
1facf9fc 16130+ goto out;
4a4d8108
AM
16131+ err = au_xino_write(sb, bindex, h_ino, *ino);
16132+ if (unlikely(err))
1308ab2a 16133+ goto out;
1308ab2a 16134+ }
1facf9fc 16135+
4f0767ce 16136+out:
b752ccd1 16137+ if (mtx)
4a4d8108 16138+ mutex_unlock(mtx);
1facf9fc 16139+ return err;
16140+}
16141+
4a4d8108
AM
16142+/* successful returns with iinfo write_locked */
16143+/* todo: return with unlocked? */
16144+struct inode *au_new_inode(struct dentry *dentry, int must_new)
1facf9fc 16145+{
5527c038 16146+ struct inode *inode, *h_inode;
4a4d8108
AM
16147+ struct dentry *h_dentry;
16148+ struct super_block *sb;
b752ccd1 16149+ struct mutex *mtx;
4a4d8108 16150+ ino_t h_ino, ino;
1716fcea 16151+ int err;
4a4d8108 16152+ aufs_bindex_t bstart;
1facf9fc 16153+
4a4d8108
AM
16154+ sb = dentry->d_sb;
16155+ bstart = au_dbstart(dentry);
16156+ h_dentry = au_h_dptr(dentry, bstart);
5527c038
JR
16157+ h_inode = d_inode(h_dentry);
16158+ h_ino = h_inode->i_ino;
b752ccd1
AM
16159+
16160+ /*
16161+ * stop 'race'-ing between hardlinks under different
16162+ * parents.
16163+ */
16164+ mtx = NULL;
2000de60 16165+ if (!d_is_dir(h_dentry))
b752ccd1
AM
16166+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
16167+
4f0767ce 16168+new_ino:
b752ccd1
AM
16169+ if (mtx)
16170+ mutex_lock(mtx);
4a4d8108
AM
16171+ err = au_xino_read(sb, bstart, h_ino, &ino);
16172+ inode = ERR_PTR(err);
16173+ if (unlikely(err))
16174+ goto out;
b752ccd1 16175+
4a4d8108
AM
16176+ if (!ino) {
16177+ ino = au_xino_new_ino(sb);
16178+ if (unlikely(!ino)) {
16179+ inode = ERR_PTR(-EIO);
dece6358
AM
16180+ goto out;
16181+ }
16182+ }
1facf9fc 16183+
4a4d8108
AM
16184+ AuDbg("i%lu\n", (unsigned long)ino);
16185+ inode = au_iget_locked(sb, ino);
16186+ err = PTR_ERR(inode);
16187+ if (IS_ERR(inode))
1facf9fc 16188+ goto out;
1facf9fc 16189+
4a4d8108
AM
16190+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
16191+ if (inode->i_state & I_NEW) {
1716fcea 16192+ /* verbose coding for lock class name */
2000de60 16193+ if (unlikely(d_is_symlink(h_dentry)))
1716fcea
AM
16194+ au_rw_class(&au_ii(inode)->ii_rwsem,
16195+ au_lc_key + AuLcSymlink_IIINFO);
2000de60 16196+ else if (unlikely(d_is_dir(h_dentry)))
1716fcea
AM
16197+ au_rw_class(&au_ii(inode)->ii_rwsem,
16198+ au_lc_key + AuLcDir_IIINFO);
16199+ else /* likely */
16200+ au_rw_class(&au_ii(inode)->ii_rwsem,
16201+ au_lc_key + AuLcNonDir_IIINFO);
2dfbb274 16202+
4a4d8108
AM
16203+ ii_write_lock_new_child(inode);
16204+ err = set_inode(inode, dentry);
16205+ if (!err) {
16206+ unlock_new_inode(inode);
16207+ goto out; /* success */
16208+ }
1308ab2a 16209+
027c5e7a
AM
16210+ /*
16211+ * iget_failed() calls iput(), but we need to call
16212+ * ii_write_unlock() after iget_failed(). so dirty hack for
16213+ * i_count.
16214+ */
16215+ atomic_inc(&inode->i_count);
4a4d8108 16216+ iget_failed(inode);
027c5e7a
AM
16217+ ii_write_unlock(inode);
16218+ au_xino_write(sb, bstart, h_ino, /*ino*/0);
16219+ /* ignore this error */
16220+ goto out_iput;
16221+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
b752ccd1
AM
16222+ /*
16223+ * horrible race condition between lookup, readdir and copyup
16224+ * (or something).
16225+ */
16226+ if (mtx)
16227+ mutex_unlock(mtx);
027c5e7a
AM
16228+ err = reval_inode(inode, dentry);
16229+ if (unlikely(err < 0)) {
16230+ mtx = NULL;
16231+ goto out_iput;
16232+ }
16233+
b752ccd1
AM
16234+ if (!err) {
16235+ mtx = NULL;
4a4d8108 16236+ goto out; /* success */
b752ccd1
AM
16237+ } else if (mtx)
16238+ mutex_lock(mtx);
4a4d8108
AM
16239+ }
16240+
5527c038 16241+ if (unlikely(au_test_fs_unique_ino(h_inode)))
4a4d8108 16242+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
523b37e3
AM
16243+ " b%d, %s, %pd, hi%lu, i%lu.\n",
16244+ bstart, au_sbtype(h_dentry->d_sb), dentry,
4a4d8108
AM
16245+ (unsigned long)h_ino, (unsigned long)ino);
16246+ ino = 0;
16247+ err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
16248+ if (!err) {
16249+ iput(inode);
b752ccd1
AM
16250+ if (mtx)
16251+ mutex_unlock(mtx);
4a4d8108
AM
16252+ goto new_ino;
16253+ }
1308ab2a 16254+
4f0767ce 16255+out_iput:
4a4d8108 16256+ iput(inode);
4a4d8108 16257+ inode = ERR_PTR(err);
4f0767ce 16258+out:
b752ccd1
AM
16259+ if (mtx)
16260+ mutex_unlock(mtx);
4a4d8108 16261+ return inode;
1facf9fc 16262+}
16263+
4a4d8108 16264+/* ---------------------------------------------------------------------- */
1facf9fc 16265+
4a4d8108
AM
16266+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16267+ struct inode *inode)
16268+{
16269+ int err;
076b876e 16270+ struct inode *hi;
1facf9fc 16271+
4a4d8108 16272+ err = au_br_rdonly(au_sbr(sb, bindex));
1facf9fc 16273+
4a4d8108
AM
16274+ /* pseudo-link after flushed may happen out of bounds */
16275+ if (!err
16276+ && inode
16277+ && au_ibstart(inode) <= bindex
16278+ && bindex <= au_ibend(inode)) {
16279+ /*
16280+ * permission check is unnecessary since vfsub routine
16281+ * will be called later
16282+ */
076b876e 16283+ hi = au_h_iptr(inode, bindex);
4a4d8108
AM
16284+ if (hi)
16285+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
1facf9fc 16286+ }
16287+
4a4d8108
AM
16288+ return err;
16289+}
dece6358 16290+
4a4d8108
AM
16291+int au_test_h_perm(struct inode *h_inode, int mask)
16292+{
2dfbb274 16293+ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
4a4d8108
AM
16294+ return 0;
16295+ return inode_permission(h_inode, mask);
16296+}
1facf9fc 16297+
4a4d8108
AM
16298+int au_test_h_perm_sio(struct inode *h_inode, int mask)
16299+{
16300+ if (au_test_nfs(h_inode->i_sb)
16301+ && (mask & MAY_WRITE)
16302+ && S_ISDIR(h_inode->i_mode))
16303+ mask |= MAY_READ; /* force permission check */
16304+ return au_test_h_perm(h_inode, mask);
1facf9fc 16305+}
7f207e10
AM
16306diff -urN /usr/share/empty/fs/aufs/inode.h linux/fs/aufs/inode.h
16307--- /usr/share/empty/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
b95c5147
AM
16308+++ linux/fs/aufs/inode.h 2015-12-10 17:59:16.836166410 +0100
16309@@ -0,0 +1,681 @@
4a4d8108 16310+/*
2000de60 16311+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
16312+ *
16313+ * This program, aufs is free software; you can redistribute it and/or modify
16314+ * it under the terms of the GNU General Public License as published by
16315+ * the Free Software Foundation; either version 2 of the License, or
16316+ * (at your option) any later version.
16317+ *
16318+ * This program is distributed in the hope that it will be useful,
16319+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16320+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16321+ * GNU General Public License for more details.
16322+ *
16323+ * You should have received a copy of the GNU General Public License
523b37e3 16324+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 16325+ */
1facf9fc 16326+
1308ab2a 16327+/*
4a4d8108 16328+ * inode operations
1308ab2a 16329+ */
dece6358 16330+
4a4d8108
AM
16331+#ifndef __AUFS_INODE_H__
16332+#define __AUFS_INODE_H__
dece6358 16333+
4a4d8108 16334+#ifdef __KERNEL__
1308ab2a 16335+
4a4d8108 16336+#include <linux/fsnotify.h>
4a4d8108 16337+#include "rwsem.h"
1308ab2a 16338+
4a4d8108 16339+struct vfsmount;
1facf9fc 16340+
4a4d8108
AM
16341+struct au_hnotify {
16342+#ifdef CONFIG_AUFS_HNOTIFY
16343+#ifdef CONFIG_AUFS_HFSNOTIFY
7f207e10 16344+ /* never use fsnotify_add_vfsmount_mark() */
0c5527e5 16345+ struct fsnotify_mark hn_mark;
4a4d8108 16346+#endif
7f207e10 16347+ struct inode *hn_aufs_inode; /* no get/put */
4a4d8108
AM
16348+#endif
16349+} ____cacheline_aligned_in_smp;
1facf9fc 16350+
4a4d8108
AM
16351+struct au_hinode {
16352+ struct inode *hi_inode;
16353+ aufs_bindex_t hi_id;
16354+#ifdef CONFIG_AUFS_HNOTIFY
16355+ struct au_hnotify *hi_notify;
16356+#endif
dece6358 16357+
4a4d8108
AM
16358+ /* reference to the copied-up whiteout with get/put */
16359+ struct dentry *hi_whdentry;
16360+};
dece6358 16361+
537831f9
AM
16362+/* ig_flags */
16363+#define AuIG_HALF_REFRESHED 1
16364+#define au_ig_ftest(flags, name) ((flags) & AuIG_##name)
16365+#define au_ig_fset(flags, name) \
16366+ do { (flags) |= AuIG_##name; } while (0)
16367+#define au_ig_fclr(flags, name) \
16368+ do { (flags) &= ~AuIG_##name; } while (0)
16369+
16370+struct au_iigen {
16371+ __u32 ig_generation, ig_flags;
16372+};
16373+
4a4d8108
AM
16374+struct au_vdir;
16375+struct au_iinfo {
537831f9 16376+ spinlock_t ii_genspin;
7a9e40b8 16377+ struct au_iigen ii_generation;
4a4d8108 16378+ struct super_block *ii_hsb1; /* no get/put */
1facf9fc 16379+
4a4d8108
AM
16380+ struct au_rwsem ii_rwsem;
16381+ aufs_bindex_t ii_bstart, ii_bend;
16382+ __u32 ii_higen;
16383+ struct au_hinode *ii_hinode;
16384+ struct au_vdir *ii_vdir;
16385+};
1facf9fc 16386+
4a4d8108
AM
16387+struct au_icntnr {
16388+ struct au_iinfo iinfo;
16389+ struct inode vfs_inode;
16390+} ____cacheline_aligned_in_smp;
1308ab2a 16391+
4a4d8108
AM
16392+/* au_pin flags */
16393+#define AuPin_DI_LOCKED 1
16394+#define AuPin_MNT_WRITE (1 << 1)
16395+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
7f207e10
AM
16396+#define au_fset_pin(flags, name) \
16397+ do { (flags) |= AuPin_##name; } while (0)
16398+#define au_fclr_pin(flags, name) \
16399+ do { (flags) &= ~AuPin_##name; } while (0)
4a4d8108
AM
16400+
16401+struct au_pin {
16402+ /* input */
16403+ struct dentry *dentry;
16404+ unsigned int udba;
16405+ unsigned char lsc_di, lsc_hi, flags;
16406+ aufs_bindex_t bindex;
16407+
16408+ /* output */
16409+ struct dentry *parent;
16410+ struct au_hinode *hdir;
16411+ struct vfsmount *h_mnt;
86dc4139
AM
16412+
16413+ /* temporary unlock/relock for copyup */
16414+ struct dentry *h_dentry, *h_parent;
16415+ struct au_branch *br;
16416+ struct task_struct *task;
4a4d8108 16417+};
1facf9fc 16418+
86dc4139 16419+void au_pin_hdir_unlock(struct au_pin *p);
c1595e42 16420+int au_pin_hdir_lock(struct au_pin *p);
86dc4139
AM
16421+int au_pin_hdir_relock(struct au_pin *p);
16422+void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task);
16423+void au_pin_hdir_acquire_nest(struct au_pin *p);
16424+void au_pin_hdir_release(struct au_pin *p);
16425+
1308ab2a 16426+/* ---------------------------------------------------------------------- */
16427+
4a4d8108 16428+static inline struct au_iinfo *au_ii(struct inode *inode)
1facf9fc 16429+{
4a4d8108 16430+ struct au_iinfo *iinfo;
1facf9fc 16431+
4a4d8108
AM
16432+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
16433+ if (iinfo->ii_hinode)
16434+ return iinfo;
16435+ return NULL; /* debugging bad_inode case */
16436+}
1facf9fc 16437+
4a4d8108 16438+/* ---------------------------------------------------------------------- */
1facf9fc 16439+
4a4d8108
AM
16440+/* inode.c */
16441+struct inode *au_igrab(struct inode *inode);
b95c5147 16442+void au_refresh_iop(struct inode *inode, int force_getattr);
027c5e7a 16443+int au_refresh_hinode_self(struct inode *inode);
4a4d8108
AM
16444+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
16445+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16446+ unsigned int d_type, ino_t *ino);
16447+struct inode *au_new_inode(struct dentry *dentry, int must_new);
16448+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16449+ struct inode *inode);
16450+int au_test_h_perm(struct inode *h_inode, int mask);
16451+int au_test_h_perm_sio(struct inode *h_inode, int mask);
1facf9fc 16452+
4a4d8108
AM
16453+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
16454+ ino_t h_ino, unsigned int d_type, ino_t *ino)
16455+{
16456+#ifdef CONFIG_AUFS_SHWH
16457+ return au_ino(sb, bindex, h_ino, d_type, ino);
16458+#else
16459+ return 0;
16460+#endif
16461+}
1facf9fc 16462+
4a4d8108 16463+/* i_op.c */
b95c5147
AM
16464+enum {
16465+ AuIop_SYMLINK,
16466+ AuIop_DIR,
16467+ AuIop_OTHER,
16468+ AuIop_Last
16469+};
16470+extern struct inode_operations aufs_iop[AuIop_Last],
16471+ aufs_iop_nogetattr[AuIop_Last];
1308ab2a 16472+
4a4d8108
AM
16473+/* au_wr_dir flags */
16474+#define AuWrDir_ADD_ENTRY 1
7e9cd9fe
AM
16475+#define AuWrDir_ISDIR (1 << 1)
16476+#define AuWrDir_TMPFILE (1 << 2)
4a4d8108 16477+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
7f207e10
AM
16478+#define au_fset_wrdir(flags, name) \
16479+ do { (flags) |= AuWrDir_##name; } while (0)
16480+#define au_fclr_wrdir(flags, name) \
16481+ do { (flags) &= ~AuWrDir_##name; } while (0)
1facf9fc 16482+
4a4d8108
AM
16483+struct au_wr_dir_args {
16484+ aufs_bindex_t force_btgt;
16485+ unsigned char flags;
16486+};
16487+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
16488+ struct au_wr_dir_args *args);
dece6358 16489+
4a4d8108
AM
16490+struct dentry *au_pinned_h_parent(struct au_pin *pin);
16491+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
16492+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
16493+ unsigned int udba, unsigned char flags);
16494+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
16495+ unsigned int udba, unsigned char flags) __must_check;
16496+int au_do_pin(struct au_pin *pin) __must_check;
16497+void au_unpin(struct au_pin *pin);
c1595e42
JR
16498+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
16499+
16500+#define AuIcpup_DID_CPUP 1
16501+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
16502+#define au_fset_icpup(flags, name) \
16503+ do { (flags) |= AuIcpup_##name; } while (0)
16504+#define au_fclr_icpup(flags, name) \
16505+ do { (flags) &= ~AuIcpup_##name; } while (0)
16506+
16507+struct au_icpup_args {
16508+ unsigned char flags;
16509+ unsigned char pin_flags;
16510+ aufs_bindex_t btgt;
16511+ unsigned int udba;
16512+ struct au_pin pin;
16513+ struct path h_path;
16514+ struct inode *h_inode;
16515+};
16516+
16517+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
16518+ struct au_icpup_args *a);
16519+
16520+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path);
1facf9fc 16521+
4a4d8108
AM
16522+/* i_op_add.c */
16523+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
16524+ struct dentry *h_parent, int isdir);
7eafdf33
AM
16525+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
16526+ dev_t dev);
4a4d8108 16527+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
7eafdf33 16528+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 16529+ bool want_excl);
b912730e
AM
16530+struct vfsub_aopen_args;
16531+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
16532+ struct vfsub_aopen_args *args);
38d290e6 16533+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode);
4a4d8108
AM
16534+int aufs_link(struct dentry *src_dentry, struct inode *dir,
16535+ struct dentry *dentry);
7eafdf33 16536+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
1facf9fc 16537+
4a4d8108
AM
16538+/* i_op_del.c */
16539+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
16540+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
16541+ struct dentry *h_parent, int isdir);
16542+int aufs_unlink(struct inode *dir, struct dentry *dentry);
16543+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
1308ab2a 16544+
4a4d8108
AM
16545+/* i_op_ren.c */
16546+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
16547+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
16548+ struct inode *dir, struct dentry *dentry);
1facf9fc 16549+
4a4d8108
AM
16550+/* iinfo.c */
16551+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
16552+void au_hiput(struct au_hinode *hinode);
16553+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
16554+ struct dentry *h_wh);
16555+unsigned int au_hi_flags(struct inode *inode, int isdir);
1308ab2a 16556+
4a4d8108
AM
16557+/* hinode flags */
16558+#define AuHi_XINO 1
16559+#define AuHi_HNOTIFY (1 << 1)
16560+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
7f207e10
AM
16561+#define au_fset_hi(flags, name) \
16562+ do { (flags) |= AuHi_##name; } while (0)
16563+#define au_fclr_hi(flags, name) \
16564+ do { (flags) &= ~AuHi_##name; } while (0)
1facf9fc 16565+
4a4d8108
AM
16566+#ifndef CONFIG_AUFS_HNOTIFY
16567+#undef AuHi_HNOTIFY
16568+#define AuHi_HNOTIFY 0
16569+#endif
1facf9fc 16570+
4a4d8108
AM
16571+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
16572+ struct inode *h_inode, unsigned int flags);
1facf9fc 16573+
537831f9 16574+void au_update_iigen(struct inode *inode, int half);
4a4d8108 16575+void au_update_ibrange(struct inode *inode, int do_put_zero);
1facf9fc 16576+
4a4d8108
AM
16577+void au_icntnr_init_once(void *_c);
16578+int au_iinfo_init(struct inode *inode);
16579+void au_iinfo_fin(struct inode *inode);
16580+int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
1308ab2a 16581+
e49829fe 16582+#ifdef CONFIG_PROC_FS
4a4d8108 16583+/* plink.c */
e49829fe 16584+int au_plink_maint(struct super_block *sb, int flags);
7e9cd9fe 16585+struct au_sbinfo;
e49829fe
JR
16586+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
16587+int au_plink_maint_enter(struct super_block *sb);
4a4d8108
AM
16588+#ifdef CONFIG_AUFS_DEBUG
16589+void au_plink_list(struct super_block *sb);
16590+#else
16591+AuStubVoid(au_plink_list, struct super_block *sb)
16592+#endif
16593+int au_plink_test(struct inode *inode);
16594+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
16595+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
16596+ struct dentry *h_dentry);
e49829fe
JR
16597+void au_plink_put(struct super_block *sb, int verbose);
16598+void au_plink_clean(struct super_block *sb, int verbose);
4a4d8108 16599+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
e49829fe
JR
16600+#else
16601+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
16602+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
16603+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
16604+AuStubVoid(au_plink_list, struct super_block *sb);
16605+AuStubInt0(au_plink_test, struct inode *inode);
16606+AuStub(struct dentry *, au_plink_lkup, return NULL,
16607+ struct inode *inode, aufs_bindex_t bindex);
16608+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
16609+ struct dentry *h_dentry);
16610+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
16611+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
16612+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
16613+#endif /* CONFIG_PROC_FS */
1facf9fc 16614+
c1595e42
JR
16615+#ifdef CONFIG_AUFS_XATTR
16616+/* xattr.c */
7e9cd9fe
AM
16617+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
16618+ unsigned int verbose);
c1595e42
JR
16619+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
16620+ssize_t aufs_getxattr(struct dentry *dentry, const char *name, void *value,
16621+ size_t size);
16622+int aufs_setxattr(struct dentry *dentry, const char *name, const void *value,
16623+ size_t size, int flags);
16624+int aufs_removexattr(struct dentry *dentry, const char *name);
16625+
16626+/* void au_xattr_init(struct super_block *sb); */
16627+#else
16628+AuStubInt0(au_cpup_xattr, struct dentry *h_dst, struct dentry *h_src,
7e9cd9fe 16629+ int ignore_flags, unsigned int verbose);
c1595e42
JR
16630+/* AuStubVoid(au_xattr_init, struct super_block *sb); */
16631+#endif
16632+
16633+#ifdef CONFIG_FS_POSIX_ACL
16634+struct posix_acl *aufs_get_acl(struct inode *inode, int type);
16635+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
16636+#endif
16637+
16638+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
16639+enum {
16640+ AU_XATTR_SET,
16641+ AU_XATTR_REMOVE,
16642+ AU_ACL_SET
16643+};
16644+
16645+struct au_srxattr {
16646+ int type;
16647+ union {
16648+ struct {
16649+ const char *name;
16650+ const void *value;
16651+ size_t size;
16652+ int flags;
16653+ } set;
16654+ struct {
16655+ const char *name;
16656+ } remove;
16657+ struct {
16658+ struct posix_acl *acl;
16659+ int type;
16660+ } acl_set;
16661+ } u;
16662+};
16663+ssize_t au_srxattr(struct dentry *dentry, struct au_srxattr *arg);
16664+#endif
16665+
4a4d8108 16666+/* ---------------------------------------------------------------------- */
1308ab2a 16667+
4a4d8108
AM
16668+/* lock subclass for iinfo */
16669+enum {
16670+ AuLsc_II_CHILD, /* child first */
16671+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
16672+ AuLsc_II_CHILD3, /* copyup dirs */
16673+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
16674+ AuLsc_II_PARENT2,
16675+ AuLsc_II_PARENT3, /* copyup dirs */
16676+ AuLsc_II_NEW_CHILD
16677+};
1308ab2a 16678+
1facf9fc 16679+/*
4a4d8108
AM
16680+ * ii_read_lock_child, ii_write_lock_child,
16681+ * ii_read_lock_child2, ii_write_lock_child2,
16682+ * ii_read_lock_child3, ii_write_lock_child3,
16683+ * ii_read_lock_parent, ii_write_lock_parent,
16684+ * ii_read_lock_parent2, ii_write_lock_parent2,
16685+ * ii_read_lock_parent3, ii_write_lock_parent3,
16686+ * ii_read_lock_new_child, ii_write_lock_new_child,
1facf9fc 16687+ */
4a4d8108
AM
16688+#define AuReadLockFunc(name, lsc) \
16689+static inline void ii_read_lock_##name(struct inode *i) \
16690+{ \
16691+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
16692+}
16693+
16694+#define AuWriteLockFunc(name, lsc) \
16695+static inline void ii_write_lock_##name(struct inode *i) \
16696+{ \
16697+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
16698+}
16699+
16700+#define AuRWLockFuncs(name, lsc) \
16701+ AuReadLockFunc(name, lsc) \
16702+ AuWriteLockFunc(name, lsc)
16703+
16704+AuRWLockFuncs(child, CHILD);
16705+AuRWLockFuncs(child2, CHILD2);
16706+AuRWLockFuncs(child3, CHILD3);
16707+AuRWLockFuncs(parent, PARENT);
16708+AuRWLockFuncs(parent2, PARENT2);
16709+AuRWLockFuncs(parent3, PARENT3);
16710+AuRWLockFuncs(new_child, NEW_CHILD);
16711+
16712+#undef AuReadLockFunc
16713+#undef AuWriteLockFunc
16714+#undef AuRWLockFuncs
1facf9fc 16715+
16716+/*
4a4d8108 16717+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
1facf9fc 16718+ */
4a4d8108 16719+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
1facf9fc 16720+
4a4d8108
AM
16721+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
16722+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
16723+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
1facf9fc 16724+
4a4d8108 16725+/* ---------------------------------------------------------------------- */
1308ab2a 16726+
027c5e7a
AM
16727+static inline void au_icntnr_init(struct au_icntnr *c)
16728+{
16729+#ifdef CONFIG_AUFS_DEBUG
16730+ c->vfs_inode.i_mode = 0;
16731+#endif
16732+}
16733+
537831f9 16734+static inline unsigned int au_iigen(struct inode *inode, struct au_iigen *iigen)
4a4d8108 16735+{
537831f9
AM
16736+ unsigned int gen;
16737+ struct au_iinfo *iinfo;
16738+
16739+ iinfo = au_ii(inode);
16740+ spin_lock(&iinfo->ii_genspin);
16741+ if (iigen)
16742+ *iigen = iinfo->ii_generation;
16743+ gen = iinfo->ii_generation.ig_generation;
16744+ spin_unlock(&iinfo->ii_genspin);
16745+
16746+ return gen;
4a4d8108 16747+}
1308ab2a 16748+
4a4d8108
AM
16749+/* tiny test for inode number */
16750+/* tmpfs generation is too rough */
16751+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
16752+{
16753+ struct au_iinfo *iinfo;
1308ab2a 16754+
4a4d8108
AM
16755+ iinfo = au_ii(inode);
16756+ AuRwMustAnyLock(&iinfo->ii_rwsem);
16757+ return !(iinfo->ii_hsb1 == h_inode->i_sb
16758+ && iinfo->ii_higen == h_inode->i_generation);
16759+}
1308ab2a 16760+
4a4d8108
AM
16761+static inline void au_iigen_dec(struct inode *inode)
16762+{
537831f9
AM
16763+ struct au_iinfo *iinfo;
16764+
16765+ iinfo = au_ii(inode);
16766+ spin_lock(&iinfo->ii_genspin);
16767+ iinfo->ii_generation.ig_generation--;
16768+ spin_unlock(&iinfo->ii_genspin);
027c5e7a
AM
16769+}
16770+
16771+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
16772+{
16773+ int err;
16774+
16775+ err = 0;
537831f9 16776+ if (unlikely(inode && au_iigen(inode, NULL) != sigen))
027c5e7a
AM
16777+ err = -EIO;
16778+
16779+ return err;
4a4d8108 16780+}
1308ab2a 16781+
4a4d8108 16782+/* ---------------------------------------------------------------------- */
1308ab2a 16783+
4a4d8108
AM
16784+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
16785+ aufs_bindex_t bindex)
16786+{
16787+ IiMustAnyLock(inode);
16788+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
16789+}
1308ab2a 16790+
4a4d8108
AM
16791+static inline aufs_bindex_t au_ibstart(struct inode *inode)
16792+{
16793+ IiMustAnyLock(inode);
16794+ return au_ii(inode)->ii_bstart;
16795+}
1308ab2a 16796+
4a4d8108
AM
16797+static inline aufs_bindex_t au_ibend(struct inode *inode)
16798+{
16799+ IiMustAnyLock(inode);
16800+ return au_ii(inode)->ii_bend;
16801+}
1308ab2a 16802+
4a4d8108
AM
16803+static inline struct au_vdir *au_ivdir(struct inode *inode)
16804+{
16805+ IiMustAnyLock(inode);
16806+ return au_ii(inode)->ii_vdir;
16807+}
1308ab2a 16808+
4a4d8108
AM
16809+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
16810+{
16811+ IiMustAnyLock(inode);
16812+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
16813+}
1308ab2a 16814+
4a4d8108 16815+static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 16816+{
4a4d8108
AM
16817+ IiMustWriteLock(inode);
16818+ au_ii(inode)->ii_bstart = bindex;
16819+}
1308ab2a 16820+
4a4d8108
AM
16821+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
16822+{
16823+ IiMustWriteLock(inode);
16824+ au_ii(inode)->ii_bend = bindex;
1308ab2a 16825+}
16826+
4a4d8108
AM
16827+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
16828+{
16829+ IiMustWriteLock(inode);
16830+ au_ii(inode)->ii_vdir = vdir;
16831+}
1facf9fc 16832+
4a4d8108 16833+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 16834+{
4a4d8108
AM
16835+ IiMustAnyLock(inode);
16836+ return au_ii(inode)->ii_hinode + bindex;
16837+}
dece6358 16838+
4a4d8108 16839+/* ---------------------------------------------------------------------- */
1facf9fc 16840+
4a4d8108
AM
16841+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
16842+{
16843+ if (pin)
16844+ return pin->parent;
16845+ return NULL;
1facf9fc 16846+}
16847+
4a4d8108 16848+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
1facf9fc 16849+{
4a4d8108
AM
16850+ if (pin && pin->hdir)
16851+ return pin->hdir->hi_inode;
16852+ return NULL;
1308ab2a 16853+}
1facf9fc 16854+
4a4d8108
AM
16855+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
16856+{
16857+ if (pin)
16858+ return pin->hdir;
16859+ return NULL;
16860+}
1facf9fc 16861+
4a4d8108 16862+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
1308ab2a 16863+{
4a4d8108
AM
16864+ if (pin)
16865+ pin->dentry = dentry;
16866+}
1308ab2a 16867+
4a4d8108
AM
16868+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
16869+ unsigned char lflag)
16870+{
16871+ if (pin) {
7f207e10 16872+ if (lflag)
4a4d8108 16873+ au_fset_pin(pin->flags, DI_LOCKED);
7f207e10 16874+ else
4a4d8108 16875+ au_fclr_pin(pin->flags, DI_LOCKED);
1308ab2a 16876+ }
4a4d8108
AM
16877+}
16878+
7e9cd9fe 16879+#if 0 /* reserved */
4a4d8108
AM
16880+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
16881+{
16882+ if (pin) {
16883+ dput(pin->parent);
16884+ pin->parent = dget(parent);
1facf9fc 16885+ }
4a4d8108 16886+}
7e9cd9fe 16887+#endif
1facf9fc 16888+
4a4d8108
AM
16889+/* ---------------------------------------------------------------------- */
16890+
027c5e7a 16891+struct au_branch;
4a4d8108
AM
16892+#ifdef CONFIG_AUFS_HNOTIFY
16893+struct au_hnotify_op {
16894+ void (*ctl)(struct au_hinode *hinode, int do_set);
027c5e7a 16895+ int (*alloc)(struct au_hinode *hinode);
7eafdf33
AM
16896+
16897+ /*
16898+ * if it returns true, the the caller should free hinode->hi_notify,
16899+ * otherwise ->free() frees it.
16900+ */
16901+ int (*free)(struct au_hinode *hinode,
16902+ struct au_hnotify *hn) __must_check;
4a4d8108
AM
16903+
16904+ void (*fin)(void);
16905+ int (*init)(void);
027c5e7a
AM
16906+
16907+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
16908+ void (*fin_br)(struct au_branch *br);
16909+ int (*init_br)(struct au_branch *br, int perm);
4a4d8108
AM
16910+};
16911+
16912+/* hnotify.c */
027c5e7a 16913+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
4a4d8108
AM
16914+void au_hn_free(struct au_hinode *hinode);
16915+void au_hn_ctl(struct au_hinode *hinode, int do_set);
16916+void au_hn_reset(struct inode *inode, unsigned int flags);
16917+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
16918+ struct qstr *h_child_qstr, struct inode *h_child_inode);
027c5e7a
AM
16919+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
16920+int au_hnotify_init_br(struct au_branch *br, int perm);
16921+void au_hnotify_fin_br(struct au_branch *br);
4a4d8108
AM
16922+int __init au_hnotify_init(void);
16923+void au_hnotify_fin(void);
16924+
7f207e10 16925+/* hfsnotify.c */
4a4d8108
AM
16926+extern const struct au_hnotify_op au_hnotify_op;
16927+
16928+static inline
16929+void au_hn_init(struct au_hinode *hinode)
16930+{
16931+ hinode->hi_notify = NULL;
1308ab2a 16932+}
16933+
53392da6
AM
16934+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
16935+{
16936+ return hinode->hi_notify;
16937+}
16938+
4a4d8108 16939+#else
c1595e42
JR
16940+AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
16941+ struct au_hinode *hinode __maybe_unused,
16942+ struct inode *inode __maybe_unused)
16943+AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
4a4d8108
AM
16944+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
16945+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
16946+ int do_set __maybe_unused)
16947+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
16948+ unsigned int flags __maybe_unused)
027c5e7a
AM
16949+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
16950+ struct au_branch *br __maybe_unused,
16951+ int perm __maybe_unused)
16952+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
16953+ int perm __maybe_unused)
16954+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
4a4d8108
AM
16955+AuStubInt0(__init au_hnotify_init, void)
16956+AuStubVoid(au_hnotify_fin, void)
16957+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
16958+#endif /* CONFIG_AUFS_HNOTIFY */
16959+
16960+static inline void au_hn_suspend(struct au_hinode *hdir)
16961+{
16962+ au_hn_ctl(hdir, /*do_set*/0);
1308ab2a 16963+}
16964+
4a4d8108 16965+static inline void au_hn_resume(struct au_hinode *hdir)
1308ab2a 16966+{
4a4d8108
AM
16967+ au_hn_ctl(hdir, /*do_set*/1);
16968+}
1308ab2a 16969+
4a4d8108
AM
16970+static inline void au_hn_imtx_lock(struct au_hinode *hdir)
16971+{
16972+ mutex_lock(&hdir->hi_inode->i_mutex);
16973+ au_hn_suspend(hdir);
16974+}
dece6358 16975+
4a4d8108
AM
16976+static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
16977+ unsigned int sc __maybe_unused)
16978+{
16979+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
16980+ au_hn_suspend(hdir);
1facf9fc 16981+}
1facf9fc 16982+
4a4d8108
AM
16983+static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
16984+{
16985+ au_hn_resume(hdir);
16986+ mutex_unlock(&hdir->hi_inode->i_mutex);
16987+}
16988+
16989+#endif /* __KERNEL__ */
16990+#endif /* __AUFS_INODE_H__ */
7f207e10
AM
16991diff -urN /usr/share/empty/fs/aufs/ioctl.c linux/fs/aufs/ioctl.c
16992--- /usr/share/empty/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 16993+++ linux/fs/aufs/ioctl.c 2015-09-24 10:47:58.254719746 +0200
c1595e42 16994@@ -0,0 +1,219 @@
4a4d8108 16995+/*
2000de60 16996+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
16997+ *
16998+ * This program, aufs is free software; you can redistribute it and/or modify
16999+ * it under the terms of the GNU General Public License as published by
17000+ * the Free Software Foundation; either version 2 of the License, or
17001+ * (at your option) any later version.
17002+ *
17003+ * This program is distributed in the hope that it will be useful,
17004+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17005+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17006+ * GNU General Public License for more details.
17007+ *
17008+ * You should have received a copy of the GNU General Public License
523b37e3 17009+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
17010+ */
17011+
17012+/*
17013+ * ioctl
17014+ * plink-management and readdir in userspace.
17015+ * assist the pathconf(3) wrapper library.
c2b27bf2 17016+ * move-down
076b876e 17017+ * File-based Hierarchical Storage Management.
4a4d8108
AM
17018+ */
17019+
c2b27bf2
AM
17020+#include <linux/compat.h>
17021+#include <linux/file.h>
4a4d8108
AM
17022+#include "aufs.h"
17023+
1e00d052 17024+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
4a4d8108
AM
17025+{
17026+ int err, fd;
17027+ aufs_bindex_t wbi, bindex, bend;
17028+ struct file *h_file;
17029+ struct super_block *sb;
17030+ struct dentry *root;
1e00d052
AM
17031+ struct au_branch *br;
17032+ struct aufs_wbr_fd wbrfd = {
17033+ .oflags = au_dir_roflags,
17034+ .brid = -1
17035+ };
17036+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
17037+ | O_NOATIME | O_CLOEXEC;
4a4d8108 17038+
1e00d052
AM
17039+ AuDebugOn(wbrfd.oflags & ~valid);
17040+
17041+ if (arg) {
17042+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
17043+ if (unlikely(err)) {
17044+ err = -EFAULT;
17045+ goto out;
17046+ }
17047+
17048+ err = -EINVAL;
17049+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
17050+ wbrfd.oflags |= au_dir_roflags;
17051+ AuDbg("0%o\n", wbrfd.oflags);
17052+ if (unlikely(wbrfd.oflags & ~valid))
17053+ goto out;
17054+ }
17055+
2000de60 17056+ fd = get_unused_fd_flags(0);
1e00d052
AM
17057+ err = fd;
17058+ if (unlikely(fd < 0))
4a4d8108 17059+ goto out;
4a4d8108 17060+
1e00d052 17061+ h_file = ERR_PTR(-EINVAL);
4a4d8108 17062+ wbi = 0;
1e00d052 17063+ br = NULL;
4a4d8108
AM
17064+ sb = path->dentry->d_sb;
17065+ root = sb->s_root;
17066+ aufs_read_lock(root, AuLock_IR);
1e00d052
AM
17067+ bend = au_sbend(sb);
17068+ if (wbrfd.brid >= 0) {
17069+ wbi = au_br_index(sb, wbrfd.brid);
17070+ if (unlikely(wbi < 0 || wbi > bend))
17071+ goto out_unlock;
17072+ }
17073+
17074+ h_file = ERR_PTR(-ENOENT);
17075+ br = au_sbr(sb, wbi);
17076+ if (!au_br_writable(br->br_perm)) {
17077+ if (arg)
17078+ goto out_unlock;
17079+
17080+ bindex = wbi + 1;
17081+ wbi = -1;
17082+ for (; bindex <= bend; bindex++) {
17083+ br = au_sbr(sb, bindex);
17084+ if (au_br_writable(br->br_perm)) {
4a4d8108 17085+ wbi = bindex;
1e00d052 17086+ br = au_sbr(sb, wbi);
4a4d8108
AM
17087+ break;
17088+ }
17089+ }
4a4d8108
AM
17090+ }
17091+ AuDbg("wbi %d\n", wbi);
1e00d052 17092+ if (wbi >= 0)
392086de
AM
17093+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
17094+ /*force_wr*/0);
1e00d052
AM
17095+
17096+out_unlock:
4a4d8108
AM
17097+ aufs_read_unlock(root, AuLock_IR);
17098+ err = PTR_ERR(h_file);
17099+ if (IS_ERR(h_file))
17100+ goto out_fd;
17101+
1e00d052 17102+ atomic_dec(&br->br_count); /* cf. au_h_open() */
4a4d8108
AM
17103+ fd_install(fd, h_file);
17104+ err = fd;
17105+ goto out; /* success */
17106+
4f0767ce 17107+out_fd:
4a4d8108 17108+ put_unused_fd(fd);
4f0767ce 17109+out:
1e00d052 17110+ AuTraceErr(err);
4a4d8108
AM
17111+ return err;
17112+}
17113+
17114+/* ---------------------------------------------------------------------- */
17115+
17116+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
17117+{
17118+ long err;
c1595e42 17119+ struct dentry *dentry;
4a4d8108
AM
17120+
17121+ switch (cmd) {
4a4d8108
AM
17122+ case AUFS_CTL_RDU:
17123+ case AUFS_CTL_RDU_INO:
17124+ err = au_rdu_ioctl(file, cmd, arg);
17125+ break;
17126+
17127+ case AUFS_CTL_WBR_FD:
1e00d052 17128+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
17129+ break;
17130+
027c5e7a
AM
17131+ case AUFS_CTL_IBUSY:
17132+ err = au_ibusy_ioctl(file, arg);
17133+ break;
17134+
076b876e
AM
17135+ case AUFS_CTL_BRINFO:
17136+ err = au_brinfo_ioctl(file, arg);
17137+ break;
17138+
17139+ case AUFS_CTL_FHSM_FD:
2000de60 17140+ dentry = file->f_path.dentry;
c1595e42
JR
17141+ if (IS_ROOT(dentry))
17142+ err = au_fhsm_fd(dentry->d_sb, arg);
17143+ else
17144+ err = -ENOTTY;
076b876e
AM
17145+ break;
17146+
4a4d8108
AM
17147+ default:
17148+ /* do not call the lower */
17149+ AuDbg("0x%x\n", cmd);
17150+ err = -ENOTTY;
17151+ }
17152+
17153+ AuTraceErr(err);
17154+ return err;
17155+}
17156+
17157+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
17158+{
17159+ long err;
17160+
17161+ switch (cmd) {
c2b27bf2 17162+ case AUFS_CTL_MVDOWN:
2000de60 17163+ err = au_mvdown(file->f_path.dentry, (void __user *)arg);
c2b27bf2
AM
17164+ break;
17165+
4a4d8108 17166+ case AUFS_CTL_WBR_FD:
1e00d052 17167+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
17168+ break;
17169+
17170+ default:
17171+ /* do not call the lower */
17172+ AuDbg("0x%x\n", cmd);
17173+ err = -ENOTTY;
17174+ }
17175+
17176+ AuTraceErr(err);
17177+ return err;
17178+}
b752ccd1
AM
17179+
17180+#ifdef CONFIG_COMPAT
17181+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
17182+ unsigned long arg)
17183+{
17184+ long err;
17185+
17186+ switch (cmd) {
17187+ case AUFS_CTL_RDU:
17188+ case AUFS_CTL_RDU_INO:
17189+ err = au_rdu_compat_ioctl(file, cmd, arg);
17190+ break;
17191+
027c5e7a
AM
17192+ case AUFS_CTL_IBUSY:
17193+ err = au_ibusy_compat_ioctl(file, arg);
17194+ break;
17195+
076b876e
AM
17196+ case AUFS_CTL_BRINFO:
17197+ err = au_brinfo_compat_ioctl(file, arg);
17198+ break;
17199+
b752ccd1
AM
17200+ default:
17201+ err = aufs_ioctl_dir(file, cmd, arg);
17202+ }
17203+
17204+ AuTraceErr(err);
17205+ return err;
17206+}
17207+
b752ccd1
AM
17208+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
17209+ unsigned long arg)
17210+{
17211+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
17212+}
17213+#endif
7f207e10
AM
17214diff -urN /usr/share/empty/fs/aufs/i_op_add.c linux/fs/aufs/i_op_add.c
17215--- /usr/share/empty/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 17216+++ linux/fs/aufs/i_op_add.c 2015-09-24 10:47:58.254719746 +0200
5527c038 17217@@ -0,0 +1,932 @@
4a4d8108 17218+/*
2000de60 17219+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
17220+ *
17221+ * This program, aufs is free software; you can redistribute it and/or modify
17222+ * it under the terms of the GNU General Public License as published by
17223+ * the Free Software Foundation; either version 2 of the License, or
17224+ * (at your option) any later version.
17225+ *
17226+ * This program is distributed in the hope that it will be useful,
17227+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17228+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17229+ * GNU General Public License for more details.
17230+ *
17231+ * You should have received a copy of the GNU General Public License
523b37e3 17232+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
17233+ */
17234+
17235+/*
17236+ * inode operations (add entry)
17237+ */
17238+
17239+#include "aufs.h"
17240+
17241+/*
17242+ * final procedure of adding a new entry, except link(2).
17243+ * remove whiteout, instantiate, copyup the parent dir's times and size
17244+ * and update version.
17245+ * if it failed, re-create the removed whiteout.
17246+ */
17247+static int epilog(struct inode *dir, aufs_bindex_t bindex,
17248+ struct dentry *wh_dentry, struct dentry *dentry)
17249+{
17250+ int err, rerr;
17251+ aufs_bindex_t bwh;
17252+ struct path h_path;
076b876e 17253+ struct super_block *sb;
4a4d8108
AM
17254+ struct inode *inode, *h_dir;
17255+ struct dentry *wh;
17256+
17257+ bwh = -1;
076b876e 17258+ sb = dir->i_sb;
4a4d8108 17259+ if (wh_dentry) {
5527c038 17260+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
4a4d8108
AM
17261+ IMustLock(h_dir);
17262+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
17263+ bwh = au_dbwh(dentry);
17264+ h_path.dentry = wh_dentry;
076b876e 17265+ h_path.mnt = au_sbr_mnt(sb, bindex);
4a4d8108
AM
17266+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
17267+ dentry);
17268+ if (unlikely(err))
17269+ goto out;
17270+ }
17271+
17272+ inode = au_new_inode(dentry, /*must_new*/1);
17273+ if (!IS_ERR(inode)) {
17274+ d_instantiate(dentry, inode);
5527c038 17275+ dir = d_inode(dentry->d_parent); /* dir inode is locked */
4a4d8108 17276+ IMustLock(dir);
b912730e 17277+ au_dir_ts(dir, bindex);
4a4d8108 17278+ dir->i_version++;
076b876e 17279+ au_fhsm_wrote(sb, bindex, /*force*/0);
4a4d8108
AM
17280+ return 0; /* success */
17281+ }
17282+
17283+ err = PTR_ERR(inode);
17284+ if (!wh_dentry)
17285+ goto out;
17286+
17287+ /* revert */
17288+ /* dir inode is locked */
17289+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
17290+ rerr = PTR_ERR(wh);
17291+ if (IS_ERR(wh)) {
523b37e3
AM
17292+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
17293+ dentry, err, rerr);
4a4d8108
AM
17294+ err = -EIO;
17295+ } else
17296+ dput(wh);
17297+
4f0767ce 17298+out:
4a4d8108
AM
17299+ return err;
17300+}
17301+
027c5e7a
AM
17302+static int au_d_may_add(struct dentry *dentry)
17303+{
17304+ int err;
17305+
17306+ err = 0;
17307+ if (unlikely(d_unhashed(dentry)))
17308+ err = -ENOENT;
5527c038 17309+ if (unlikely(d_really_is_positive(dentry)))
027c5e7a
AM
17310+ err = -EEXIST;
17311+ return err;
17312+}
17313+
4a4d8108
AM
17314+/*
17315+ * simple tests for the adding inode operations.
17316+ * following the checks in vfs, plus the parent-child relationship.
17317+ */
17318+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
17319+ struct dentry *h_parent, int isdir)
17320+{
17321+ int err;
17322+ umode_t h_mode;
17323+ struct dentry *h_dentry;
17324+ struct inode *h_inode;
17325+
17326+ err = -ENAMETOOLONG;
17327+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17328+ goto out;
17329+
17330+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 17331+ if (d_really_is_negative(dentry)) {
4a4d8108 17332+ err = -EEXIST;
5527c038 17333+ if (unlikely(d_is_positive(h_dentry)))
4a4d8108
AM
17334+ goto out;
17335+ } else {
17336+ /* rename(2) case */
17337+ err = -EIO;
5527c038
JR
17338+ if (unlikely(d_is_negative(h_dentry)))
17339+ goto out;
17340+ h_inode = d_inode(h_dentry);
17341+ if (unlikely(!h_inode->i_nlink))
4a4d8108
AM
17342+ goto out;
17343+
17344+ h_mode = h_inode->i_mode;
17345+ if (!isdir) {
17346+ err = -EISDIR;
17347+ if (unlikely(S_ISDIR(h_mode)))
17348+ goto out;
17349+ } else if (unlikely(!S_ISDIR(h_mode))) {
17350+ err = -ENOTDIR;
17351+ goto out;
17352+ }
17353+ }
17354+
17355+ err = 0;
17356+ /* expected parent dir is locked */
17357+ if (unlikely(h_parent != h_dentry->d_parent))
17358+ err = -EIO;
17359+
4f0767ce 17360+out:
4a4d8108
AM
17361+ AuTraceErr(err);
17362+ return err;
17363+}
17364+
17365+/*
17366+ * initial procedure of adding a new entry.
17367+ * prepare writable branch and the parent dir, lock it,
17368+ * and lookup whiteout for the new entry.
17369+ */
17370+static struct dentry*
17371+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
17372+ struct dentry *src_dentry, struct au_pin *pin,
17373+ struct au_wr_dir_args *wr_dir_args)
17374+{
17375+ struct dentry *wh_dentry, *h_parent;
17376+ struct super_block *sb;
17377+ struct au_branch *br;
17378+ int err;
17379+ unsigned int udba;
17380+ aufs_bindex_t bcpup;
17381+
523b37e3 17382+ AuDbg("%pd\n", dentry);
4a4d8108
AM
17383+
17384+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
17385+ bcpup = err;
17386+ wh_dentry = ERR_PTR(err);
17387+ if (unlikely(err < 0))
17388+ goto out;
17389+
17390+ sb = dentry->d_sb;
17391+ udba = au_opt_udba(sb);
17392+ err = au_pin(pin, dentry, bcpup, udba,
17393+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17394+ wh_dentry = ERR_PTR(err);
17395+ if (unlikely(err))
17396+ goto out;
17397+
17398+ h_parent = au_pinned_h_parent(pin);
17399+ if (udba != AuOpt_UDBA_NONE
17400+ && au_dbstart(dentry) == bcpup)
17401+ err = au_may_add(dentry, bcpup, h_parent,
17402+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
17403+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17404+ err = -ENAMETOOLONG;
17405+ wh_dentry = ERR_PTR(err);
17406+ if (unlikely(err))
17407+ goto out_unpin;
17408+
17409+ br = au_sbr(sb, bcpup);
17410+ if (dt) {
17411+ struct path tmp = {
17412+ .dentry = h_parent,
86dc4139 17413+ .mnt = au_br_mnt(br)
4a4d8108
AM
17414+ };
17415+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
17416+ }
17417+
17418+ wh_dentry = NULL;
17419+ if (bcpup != au_dbwh(dentry))
17420+ goto out; /* success */
17421+
2000de60
JR
17422+ /*
17423+ * ENAMETOOLONG here means that if we allowed create such name, then it
17424+ * would not be able to removed in the future. So we don't allow such
17425+ * name here and we don't handle ENAMETOOLONG differently here.
17426+ */
4a4d8108
AM
17427+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
17428+
4f0767ce 17429+out_unpin:
4a4d8108
AM
17430+ if (IS_ERR(wh_dentry))
17431+ au_unpin(pin);
4f0767ce 17432+out:
4a4d8108
AM
17433+ return wh_dentry;
17434+}
17435+
17436+/* ---------------------------------------------------------------------- */
17437+
17438+enum { Mknod, Symlink, Creat };
17439+struct simple_arg {
17440+ int type;
17441+ union {
17442+ struct {
b912730e
AM
17443+ umode_t mode;
17444+ bool want_excl;
17445+ bool try_aopen;
17446+ struct vfsub_aopen_args *aopen;
4a4d8108
AM
17447+ } c;
17448+ struct {
17449+ const char *symname;
17450+ } s;
17451+ struct {
7eafdf33 17452+ umode_t mode;
4a4d8108
AM
17453+ dev_t dev;
17454+ } m;
17455+ } u;
17456+};
17457+
17458+static int add_simple(struct inode *dir, struct dentry *dentry,
17459+ struct simple_arg *arg)
17460+{
076b876e 17461+ int err, rerr;
4a4d8108
AM
17462+ aufs_bindex_t bstart;
17463+ unsigned char created;
b912730e
AM
17464+ const unsigned char try_aopen
17465+ = (arg->type == Creat && arg->u.c.try_aopen);
4a4d8108
AM
17466+ struct dentry *wh_dentry, *parent;
17467+ struct inode *h_dir;
b912730e
AM
17468+ struct super_block *sb;
17469+ struct au_branch *br;
c2b27bf2
AM
17470+ /* to reuduce stack size */
17471+ struct {
17472+ struct au_dtime dt;
17473+ struct au_pin pin;
17474+ struct path h_path;
17475+ struct au_wr_dir_args wr_dir_args;
17476+ } *a;
4a4d8108 17477+
523b37e3 17478+ AuDbg("%pd\n", dentry);
4a4d8108
AM
17479+ IMustLock(dir);
17480+
c2b27bf2
AM
17481+ err = -ENOMEM;
17482+ a = kmalloc(sizeof(*a), GFP_NOFS);
17483+ if (unlikely(!a))
17484+ goto out;
17485+ a->wr_dir_args.force_btgt = -1;
17486+ a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
17487+
4a4d8108 17488+ parent = dentry->d_parent; /* dir inode is locked */
b912730e
AM
17489+ if (!try_aopen) {
17490+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
17491+ if (unlikely(err))
17492+ goto out_free;
17493+ }
027c5e7a
AM
17494+ err = au_d_may_add(dentry);
17495+ if (unlikely(err))
17496+ goto out_unlock;
b912730e
AM
17497+ if (!try_aopen)
17498+ di_write_lock_parent(parent);
c2b27bf2
AM
17499+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
17500+ &a->pin, &a->wr_dir_args);
4a4d8108
AM
17501+ err = PTR_ERR(wh_dentry);
17502+ if (IS_ERR(wh_dentry))
027c5e7a 17503+ goto out_parent;
4a4d8108
AM
17504+
17505+ bstart = au_dbstart(dentry);
b912730e
AM
17506+ sb = dentry->d_sb;
17507+ br = au_sbr(sb, bstart);
c2b27bf2 17508+ a->h_path.dentry = au_h_dptr(dentry, bstart);
b912730e 17509+ a->h_path.mnt = au_br_mnt(br);
c2b27bf2 17510+ h_dir = au_pinned_h_dir(&a->pin);
4a4d8108
AM
17511+ switch (arg->type) {
17512+ case Creat:
b912730e
AM
17513+ err = 0;
17514+ if (!try_aopen || !h_dir->i_op->atomic_open)
17515+ err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
17516+ arg->u.c.want_excl);
17517+ else
17518+ err = vfsub_atomic_open(h_dir, a->h_path.dentry,
17519+ arg->u.c.aopen, br);
4a4d8108
AM
17520+ break;
17521+ case Symlink:
c2b27bf2 17522+ err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
4a4d8108
AM
17523+ break;
17524+ case Mknod:
c2b27bf2
AM
17525+ err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
17526+ arg->u.m.dev);
4a4d8108
AM
17527+ break;
17528+ default:
17529+ BUG();
17530+ }
17531+ created = !err;
17532+ if (!err)
17533+ err = epilog(dir, bstart, wh_dentry, dentry);
17534+
17535+ /* revert */
5527c038 17536+ if (unlikely(created && err && d_is_positive(a->h_path.dentry))) {
523b37e3
AM
17537+ /* no delegation since it is just created */
17538+ rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
17539+ /*force*/0);
4a4d8108 17540+ if (rerr) {
523b37e3
AM
17541+ AuIOErr("%pd revert failure(%d, %d)\n",
17542+ dentry, err, rerr);
4a4d8108
AM
17543+ err = -EIO;
17544+ }
c2b27bf2 17545+ au_dtime_revert(&a->dt);
4a4d8108
AM
17546+ }
17547+
b912730e
AM
17548+ if (!err && try_aopen && !h_dir->i_op->atomic_open)
17549+ *arg->u.c.aopen->opened |= FILE_CREATED;
17550+
c2b27bf2 17551+ au_unpin(&a->pin);
4a4d8108
AM
17552+ dput(wh_dentry);
17553+
027c5e7a 17554+out_parent:
b912730e
AM
17555+ if (!try_aopen)
17556+ di_write_unlock(parent);
027c5e7a 17557+out_unlock:
4a4d8108
AM
17558+ if (unlikely(err)) {
17559+ au_update_dbstart(dentry);
17560+ d_drop(dentry);
17561+ }
b912730e
AM
17562+ if (!try_aopen)
17563+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2
AM
17564+out_free:
17565+ kfree(a);
027c5e7a 17566+out:
4a4d8108
AM
17567+ return err;
17568+}
17569+
7eafdf33
AM
17570+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
17571+ dev_t dev)
4a4d8108
AM
17572+{
17573+ struct simple_arg arg = {
17574+ .type = Mknod,
17575+ .u.m = {
17576+ .mode = mode,
17577+ .dev = dev
17578+ }
17579+ };
17580+ return add_simple(dir, dentry, &arg);
17581+}
17582+
17583+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
17584+{
17585+ struct simple_arg arg = {
17586+ .type = Symlink,
17587+ .u.s.symname = symname
17588+ };
17589+ return add_simple(dir, dentry, &arg);
17590+}
17591+
7eafdf33 17592+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 17593+ bool want_excl)
4a4d8108
AM
17594+{
17595+ struct simple_arg arg = {
17596+ .type = Creat,
17597+ .u.c = {
b4510431
AM
17598+ .mode = mode,
17599+ .want_excl = want_excl
4a4d8108
AM
17600+ }
17601+ };
17602+ return add_simple(dir, dentry, &arg);
17603+}
17604+
b912730e
AM
17605+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
17606+ struct vfsub_aopen_args *aopen_args)
17607+{
17608+ struct simple_arg arg = {
17609+ .type = Creat,
17610+ .u.c = {
17611+ .mode = aopen_args->create_mode,
17612+ .want_excl = aopen_args->open_flag & O_EXCL,
17613+ .try_aopen = true,
17614+ .aopen = aopen_args
17615+ }
17616+ };
17617+ return add_simple(dir, dentry, &arg);
17618+}
17619+
38d290e6
JR
17620+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
17621+{
17622+ int err;
17623+ aufs_bindex_t bindex;
17624+ struct super_block *sb;
17625+ struct dentry *parent, *h_parent, *h_dentry;
17626+ struct inode *h_dir, *inode;
17627+ struct vfsmount *h_mnt;
17628+ struct au_wr_dir_args wr_dir_args = {
17629+ .force_btgt = -1,
17630+ .flags = AuWrDir_TMPFILE
17631+ };
17632+
17633+ /* copy-up may happen */
17634+ mutex_lock(&dir->i_mutex);
17635+
17636+ sb = dir->i_sb;
17637+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
17638+ if (unlikely(err))
17639+ goto out;
17640+
17641+ err = au_di_init(dentry);
17642+ if (unlikely(err))
17643+ goto out_si;
17644+
17645+ err = -EBUSY;
17646+ parent = d_find_any_alias(dir);
17647+ AuDebugOn(!parent);
17648+ di_write_lock_parent(parent);
5527c038 17649+ if (unlikely(d_inode(parent) != dir))
38d290e6
JR
17650+ goto out_parent;
17651+
17652+ err = au_digen_test(parent, au_sigen(sb));
17653+ if (unlikely(err))
17654+ goto out_parent;
17655+
17656+ bindex = au_dbstart(parent);
17657+ au_set_dbstart(dentry, bindex);
17658+ au_set_dbend(dentry, bindex);
17659+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
17660+ bindex = err;
17661+ if (unlikely(err < 0))
17662+ goto out_parent;
17663+
17664+ err = -EOPNOTSUPP;
17665+ h_dir = au_h_iptr(dir, bindex);
17666+ if (unlikely(!h_dir->i_op->tmpfile))
17667+ goto out_parent;
17668+
17669+ h_mnt = au_sbr_mnt(sb, bindex);
17670+ err = vfsub_mnt_want_write(h_mnt);
17671+ if (unlikely(err))
17672+ goto out_parent;
17673+
17674+ h_parent = au_h_dptr(parent, bindex);
5527c038 17675+ err = inode_permission(d_inode(h_parent), MAY_WRITE | MAY_EXEC);
38d290e6
JR
17676+ if (unlikely(err))
17677+ goto out_mnt;
17678+
17679+ err = -ENOMEM;
17680+ h_dentry = d_alloc(h_parent, &dentry->d_name);
17681+ if (unlikely(!h_dentry))
17682+ goto out_mnt;
17683+
17684+ err = h_dir->i_op->tmpfile(h_dir, h_dentry, mode);
17685+ if (unlikely(err))
17686+ goto out_dentry;
17687+
17688+ au_set_dbstart(dentry, bindex);
17689+ au_set_dbend(dentry, bindex);
17690+ au_set_h_dptr(dentry, bindex, dget(h_dentry));
17691+ inode = au_new_inode(dentry, /*must_new*/1);
17692+ if (IS_ERR(inode)) {
17693+ err = PTR_ERR(inode);
17694+ au_set_h_dptr(dentry, bindex, NULL);
17695+ au_set_dbstart(dentry, -1);
17696+ au_set_dbend(dentry, -1);
17697+ } else {
17698+ if (!inode->i_nlink)
17699+ set_nlink(inode, 1);
17700+ d_tmpfile(dentry, inode);
17701+ au_di(dentry)->di_tmpfile = 1;
17702+
17703+ /* update without i_mutex */
17704+ if (au_ibstart(dir) == au_dbstart(dentry))
17705+ au_cpup_attr_timesizes(dir);
17706+ }
17707+
17708+out_dentry:
17709+ dput(h_dentry);
17710+out_mnt:
17711+ vfsub_mnt_drop_write(h_mnt);
17712+out_parent:
17713+ di_write_unlock(parent);
17714+ dput(parent);
17715+ di_write_unlock(dentry);
17716+ if (!err)
17717+#if 0
17718+ /* verbose coding for lock class name */
17719+ au_rw_class(&au_di(dentry)->di_rwsem,
17720+ au_lc_key + AuLcNonDir_DIINFO);
17721+#else
17722+ ;
17723+#endif
17724+ else {
17725+ au_di_fin(dentry);
17726+ dentry->d_fsdata = NULL;
17727+ }
17728+out_si:
17729+ si_read_unlock(sb);
17730+out:
17731+ mutex_unlock(&dir->i_mutex);
17732+ return err;
17733+}
17734+
4a4d8108
AM
17735+/* ---------------------------------------------------------------------- */
17736+
17737+struct au_link_args {
17738+ aufs_bindex_t bdst, bsrc;
17739+ struct au_pin pin;
17740+ struct path h_path;
17741+ struct dentry *src_parent, *parent;
17742+};
17743+
17744+static int au_cpup_before_link(struct dentry *src_dentry,
17745+ struct au_link_args *a)
17746+{
17747+ int err;
17748+ struct dentry *h_src_dentry;
c2b27bf2
AM
17749+ struct au_cp_generic cpg = {
17750+ .dentry = src_dentry,
17751+ .bdst = a->bdst,
17752+ .bsrc = a->bsrc,
17753+ .len = -1,
17754+ .pin = &a->pin,
17755+ .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
17756+ };
4a4d8108
AM
17757+
17758+ di_read_lock_parent(a->src_parent, AuLock_IR);
17759+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
17760+ if (unlikely(err))
17761+ goto out;
17762+
17763+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
4a4d8108
AM
17764+ err = au_pin(&a->pin, src_dentry, a->bdst,
17765+ au_opt_udba(src_dentry->d_sb),
17766+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17767+ if (unlikely(err))
17768+ goto out;
367653fa 17769+
c2b27bf2 17770+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
17771+ au_unpin(&a->pin);
17772+
4f0767ce 17773+out:
4a4d8108
AM
17774+ di_read_unlock(a->src_parent, AuLock_IR);
17775+ return err;
17776+}
17777+
86dc4139
AM
17778+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
17779+ struct au_link_args *a)
4a4d8108
AM
17780+{
17781+ int err;
17782+ unsigned char plink;
86dc4139 17783+ aufs_bindex_t bend;
4a4d8108 17784+ struct dentry *h_src_dentry;
523b37e3 17785+ struct inode *h_inode, *inode, *delegated;
4a4d8108
AM
17786+ struct super_block *sb;
17787+ struct file *h_file;
17788+
17789+ plink = 0;
17790+ h_inode = NULL;
17791+ sb = src_dentry->d_sb;
5527c038 17792+ inode = d_inode(src_dentry);
4a4d8108
AM
17793+ if (au_ibstart(inode) <= a->bdst)
17794+ h_inode = au_h_iptr(inode, a->bdst);
17795+ if (!h_inode || !h_inode->i_nlink) {
17796+ /* copyup src_dentry as the name of dentry. */
86dc4139
AM
17797+ bend = au_dbend(dentry);
17798+ if (bend < a->bsrc)
17799+ au_set_dbend(dentry, a->bsrc);
17800+ au_set_h_dptr(dentry, a->bsrc,
17801+ dget(au_h_dptr(src_dentry, a->bsrc)));
17802+ dget(a->h_path.dentry);
17803+ au_set_h_dptr(dentry, a->bdst, NULL);
c1595e42
JR
17804+ AuDbg("temporary d_inode...\n");
17805+ spin_lock(&dentry->d_lock);
5527c038 17806+ dentry->d_inode = d_inode(src_dentry); /* tmp */
c1595e42 17807+ spin_unlock(&dentry->d_lock);
392086de 17808+ h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
86dc4139 17809+ if (IS_ERR(h_file))
4a4d8108 17810+ err = PTR_ERR(h_file);
86dc4139 17811+ else {
c2b27bf2
AM
17812+ struct au_cp_generic cpg = {
17813+ .dentry = dentry,
17814+ .bdst = a->bdst,
17815+ .bsrc = -1,
17816+ .len = -1,
17817+ .pin = &a->pin,
17818+ .flags = AuCpup_KEEPLINO
17819+ };
17820+ err = au_sio_cpup_simple(&cpg);
86dc4139
AM
17821+ au_h_open_post(dentry, a->bsrc, h_file);
17822+ if (!err) {
17823+ dput(a->h_path.dentry);
17824+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
17825+ } else
17826+ au_set_h_dptr(dentry, a->bdst,
17827+ a->h_path.dentry);
17828+ }
c1595e42 17829+ spin_lock(&dentry->d_lock);
86dc4139 17830+ dentry->d_inode = NULL; /* restore */
c1595e42
JR
17831+ spin_unlock(&dentry->d_lock);
17832+ AuDbg("temporary d_inode...done\n");
86dc4139
AM
17833+ au_set_h_dptr(dentry, a->bsrc, NULL);
17834+ au_set_dbend(dentry, bend);
4a4d8108
AM
17835+ } else {
17836+ /* the inode of src_dentry already exists on a.bdst branch */
17837+ h_src_dentry = d_find_alias(h_inode);
17838+ if (!h_src_dentry && au_plink_test(inode)) {
17839+ plink = 1;
17840+ h_src_dentry = au_plink_lkup(inode, a->bdst);
17841+ err = PTR_ERR(h_src_dentry);
17842+ if (IS_ERR(h_src_dentry))
17843+ goto out;
17844+
5527c038 17845+ if (unlikely(d_is_negative(h_src_dentry))) {
4a4d8108
AM
17846+ dput(h_src_dentry);
17847+ h_src_dentry = NULL;
17848+ }
17849+
17850+ }
17851+ if (h_src_dentry) {
523b37e3 17852+ delegated = NULL;
4a4d8108 17853+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
17854+ &a->h_path, &delegated);
17855+ if (unlikely(err == -EWOULDBLOCK)) {
17856+ pr_warn("cannot retry for NFSv4 delegation"
17857+ " for an internal link\n");
17858+ iput(delegated);
17859+ }
4a4d8108
AM
17860+ dput(h_src_dentry);
17861+ } else {
17862+ AuIOErr("no dentry found for hi%lu on b%d\n",
17863+ h_inode->i_ino, a->bdst);
17864+ err = -EIO;
17865+ }
17866+ }
17867+
17868+ if (!err && !plink)
17869+ au_plink_append(inode, a->bdst, a->h_path.dentry);
17870+
17871+out:
2cbb1c4b 17872+ AuTraceErr(err);
4a4d8108
AM
17873+ return err;
17874+}
17875+
17876+int aufs_link(struct dentry *src_dentry, struct inode *dir,
17877+ struct dentry *dentry)
17878+{
17879+ int err, rerr;
17880+ struct au_dtime dt;
17881+ struct au_link_args *a;
17882+ struct dentry *wh_dentry, *h_src_dentry;
523b37e3 17883+ struct inode *inode, *delegated;
4a4d8108
AM
17884+ struct super_block *sb;
17885+ struct au_wr_dir_args wr_dir_args = {
17886+ /* .force_btgt = -1, */
17887+ .flags = AuWrDir_ADD_ENTRY
17888+ };
17889+
17890+ IMustLock(dir);
5527c038 17891+ inode = d_inode(src_dentry);
4a4d8108
AM
17892+ IMustLock(inode);
17893+
4a4d8108
AM
17894+ err = -ENOMEM;
17895+ a = kzalloc(sizeof(*a), GFP_NOFS);
17896+ if (unlikely(!a))
17897+ goto out;
17898+
17899+ a->parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
17900+ err = aufs_read_and_write_lock2(dentry, src_dentry,
17901+ AuLock_NOPLM | AuLock_GEN);
e49829fe
JR
17902+ if (unlikely(err))
17903+ goto out_kfree;
38d290e6 17904+ err = au_d_linkable(src_dentry);
027c5e7a
AM
17905+ if (unlikely(err))
17906+ goto out_unlock;
17907+ err = au_d_may_add(dentry);
17908+ if (unlikely(err))
17909+ goto out_unlock;
e49829fe 17910+
4a4d8108 17911+ a->src_parent = dget_parent(src_dentry);
2cbb1c4b 17912+ wr_dir_args.force_btgt = au_ibstart(inode);
4a4d8108
AM
17913+
17914+ di_write_lock_parent(a->parent);
17915+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
17916+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
17917+ &wr_dir_args);
17918+ err = PTR_ERR(wh_dentry);
17919+ if (IS_ERR(wh_dentry))
027c5e7a 17920+ goto out_parent;
4a4d8108
AM
17921+
17922+ err = 0;
17923+ sb = dentry->d_sb;
17924+ a->bdst = au_dbstart(dentry);
17925+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
17926+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
2cbb1c4b
JR
17927+ a->bsrc = au_ibstart(inode);
17928+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
38d290e6
JR
17929+ if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
17930+ h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
2cbb1c4b
JR
17931+ if (!h_src_dentry) {
17932+ a->bsrc = au_dbstart(src_dentry);
17933+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
17934+ AuDebugOn(!h_src_dentry);
38d290e6
JR
17935+ } else if (IS_ERR(h_src_dentry)) {
17936+ err = PTR_ERR(h_src_dentry);
2cbb1c4b 17937+ goto out_parent;
38d290e6 17938+ }
2cbb1c4b 17939+
4a4d8108
AM
17940+ if (au_opt_test(au_mntflags(sb), PLINK)) {
17941+ if (a->bdst < a->bsrc
17942+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
86dc4139 17943+ err = au_cpup_or_link(src_dentry, dentry, a);
523b37e3
AM
17944+ else {
17945+ delegated = NULL;
4a4d8108 17946+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
17947+ &a->h_path, &delegated);
17948+ if (unlikely(err == -EWOULDBLOCK)) {
17949+ pr_warn("cannot retry for NFSv4 delegation"
17950+ " for an internal link\n");
17951+ iput(delegated);
17952+ }
17953+ }
2cbb1c4b 17954+ dput(h_src_dentry);
4a4d8108
AM
17955+ } else {
17956+ /*
17957+ * copyup src_dentry to the branch we process,
17958+ * and then link(2) to it.
17959+ */
2cbb1c4b 17960+ dput(h_src_dentry);
4a4d8108
AM
17961+ if (a->bdst < a->bsrc
17962+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
17963+ au_unpin(&a->pin);
17964+ di_write_unlock(a->parent);
17965+ err = au_cpup_before_link(src_dentry, a);
17966+ di_write_lock_parent(a->parent);
17967+ if (!err)
17968+ err = au_pin(&a->pin, dentry, a->bdst,
17969+ au_opt_udba(sb),
17970+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17971+ if (unlikely(err))
17972+ goto out_wh;
17973+ }
17974+ if (!err) {
17975+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
17976+ err = -ENOENT;
5527c038 17977+ if (h_src_dentry && d_is_positive(h_src_dentry)) {
523b37e3 17978+ delegated = NULL;
4a4d8108
AM
17979+ err = vfsub_link(h_src_dentry,
17980+ au_pinned_h_dir(&a->pin),
523b37e3
AM
17981+ &a->h_path, &delegated);
17982+ if (unlikely(err == -EWOULDBLOCK)) {
17983+ pr_warn("cannot retry"
17984+ " for NFSv4 delegation"
17985+ " for an internal link\n");
17986+ iput(delegated);
17987+ }
17988+ }
4a4d8108
AM
17989+ }
17990+ }
17991+ if (unlikely(err))
17992+ goto out_unpin;
17993+
17994+ if (wh_dentry) {
17995+ a->h_path.dentry = wh_dentry;
17996+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
17997+ dentry);
17998+ if (unlikely(err))
17999+ goto out_revert;
18000+ }
18001+
b912730e 18002+ au_dir_ts(dir, a->bdst);
4a4d8108 18003+ dir->i_version++;
4a4d8108
AM
18004+ inc_nlink(inode);
18005+ inode->i_ctime = dir->i_ctime;
027c5e7a
AM
18006+ d_instantiate(dentry, au_igrab(inode));
18007+ if (d_unhashed(a->h_path.dentry))
4a4d8108
AM
18008+ /* some filesystem calls d_drop() */
18009+ d_drop(dentry);
076b876e
AM
18010+ /* some filesystems consume an inode even hardlink */
18011+ au_fhsm_wrote(sb, a->bdst, /*force*/0);
4a4d8108
AM
18012+ goto out_unpin; /* success */
18013+
4f0767ce 18014+out_revert:
523b37e3
AM
18015+ /* no delegation since it is just created */
18016+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
18017+ /*delegated*/NULL, /*force*/0);
027c5e7a 18018+ if (unlikely(rerr)) {
523b37e3 18019+ AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
027c5e7a
AM
18020+ err = -EIO;
18021+ }
4a4d8108 18022+ au_dtime_revert(&dt);
4f0767ce 18023+out_unpin:
4a4d8108 18024+ au_unpin(&a->pin);
4f0767ce 18025+out_wh:
4a4d8108 18026+ dput(wh_dentry);
027c5e7a
AM
18027+out_parent:
18028+ di_write_unlock(a->parent);
18029+ dput(a->src_parent);
4f0767ce 18030+out_unlock:
4a4d8108
AM
18031+ if (unlikely(err)) {
18032+ au_update_dbstart(dentry);
18033+ d_drop(dentry);
18034+ }
4a4d8108 18035+ aufs_read_and_write_unlock2(dentry, src_dentry);
e49829fe 18036+out_kfree:
4a4d8108 18037+ kfree(a);
4f0767ce 18038+out:
86dc4139 18039+ AuTraceErr(err);
4a4d8108
AM
18040+ return err;
18041+}
18042+
7eafdf33 18043+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4a4d8108
AM
18044+{
18045+ int err, rerr;
18046+ aufs_bindex_t bindex;
18047+ unsigned char diropq;
18048+ struct path h_path;
18049+ struct dentry *wh_dentry, *parent, *opq_dentry;
18050+ struct mutex *h_mtx;
18051+ struct super_block *sb;
18052+ struct {
18053+ struct au_pin pin;
18054+ struct au_dtime dt;
18055+ } *a; /* reduce the stack usage */
18056+ struct au_wr_dir_args wr_dir_args = {
18057+ .force_btgt = -1,
18058+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
18059+ };
18060+
18061+ IMustLock(dir);
18062+
18063+ err = -ENOMEM;
18064+ a = kmalloc(sizeof(*a), GFP_NOFS);
18065+ if (unlikely(!a))
18066+ goto out;
18067+
027c5e7a
AM
18068+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
18069+ if (unlikely(err))
18070+ goto out_free;
18071+ err = au_d_may_add(dentry);
18072+ if (unlikely(err))
18073+ goto out_unlock;
18074+
4a4d8108
AM
18075+ parent = dentry->d_parent; /* dir inode is locked */
18076+ di_write_lock_parent(parent);
18077+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
18078+ &a->pin, &wr_dir_args);
18079+ err = PTR_ERR(wh_dentry);
18080+ if (IS_ERR(wh_dentry))
027c5e7a 18081+ goto out_parent;
4a4d8108
AM
18082+
18083+ sb = dentry->d_sb;
18084+ bindex = au_dbstart(dentry);
18085+ h_path.dentry = au_h_dptr(dentry, bindex);
18086+ h_path.mnt = au_sbr_mnt(sb, bindex);
18087+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
18088+ if (unlikely(err))
027c5e7a 18089+ goto out_unpin;
4a4d8108
AM
18090+
18091+ /* make the dir opaque */
18092+ diropq = 0;
5527c038 18093+ h_mtx = &d_inode(h_path.dentry)->i_mutex;
4a4d8108
AM
18094+ if (wh_dentry
18095+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
18096+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18097+ opq_dentry = au_diropq_create(dentry, bindex);
18098+ mutex_unlock(h_mtx);
18099+ err = PTR_ERR(opq_dentry);
18100+ if (IS_ERR(opq_dentry))
18101+ goto out_dir;
18102+ dput(opq_dentry);
18103+ diropq = 1;
18104+ }
18105+
18106+ err = epilog(dir, bindex, wh_dentry, dentry);
18107+ if (!err) {
18108+ inc_nlink(dir);
027c5e7a 18109+ goto out_unpin; /* success */
4a4d8108
AM
18110+ }
18111+
18112+ /* revert */
18113+ if (diropq) {
18114+ AuLabel(revert opq);
18115+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18116+ rerr = au_diropq_remove(dentry, bindex);
18117+ mutex_unlock(h_mtx);
18118+ if (rerr) {
523b37e3
AM
18119+ AuIOErr("%pd reverting diropq failed(%d, %d)\n",
18120+ dentry, err, rerr);
4a4d8108
AM
18121+ err = -EIO;
18122+ }
18123+ }
18124+
4f0767ce 18125+out_dir:
4a4d8108
AM
18126+ AuLabel(revert dir);
18127+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
18128+ if (rerr) {
523b37e3
AM
18129+ AuIOErr("%pd reverting dir failed(%d, %d)\n",
18130+ dentry, err, rerr);
4a4d8108
AM
18131+ err = -EIO;
18132+ }
4a4d8108 18133+ au_dtime_revert(&a->dt);
027c5e7a 18134+out_unpin:
4a4d8108
AM
18135+ au_unpin(&a->pin);
18136+ dput(wh_dentry);
027c5e7a
AM
18137+out_parent:
18138+ di_write_unlock(parent);
18139+out_unlock:
4a4d8108
AM
18140+ if (unlikely(err)) {
18141+ au_update_dbstart(dentry);
18142+ d_drop(dentry);
18143+ }
4a4d8108 18144+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 18145+out_free:
4a4d8108 18146+ kfree(a);
4f0767ce 18147+out:
4a4d8108
AM
18148+ return err;
18149+}
7f207e10
AM
18150diff -urN /usr/share/empty/fs/aufs/i_op.c linux/fs/aufs/i_op.c
18151--- /usr/share/empty/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
b95c5147
AM
18152+++ linux/fs/aufs/i_op.c 2015-12-10 17:59:16.836166410 +0100
18153@@ -0,0 +1,1484 @@
4a4d8108 18154+/*
2000de60 18155+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
18156+ *
18157+ * This program, aufs is free software; you can redistribute it and/or modify
18158+ * it under the terms of the GNU General Public License as published by
18159+ * the Free Software Foundation; either version 2 of the License, or
18160+ * (at your option) any later version.
18161+ *
18162+ * This program is distributed in the hope that it will be useful,
18163+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18164+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18165+ * GNU General Public License for more details.
18166+ *
18167+ * You should have received a copy of the GNU General Public License
523b37e3 18168+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 18169+ */
1facf9fc 18170+
1308ab2a 18171+/*
4a4d8108 18172+ * inode operations (except add/del/rename)
1308ab2a 18173+ */
4a4d8108
AM
18174+
18175+#include <linux/device_cgroup.h>
18176+#include <linux/fs_stack.h>
4a4d8108
AM
18177+#include <linux/namei.h>
18178+#include <linux/security.h>
4a4d8108
AM
18179+#include "aufs.h"
18180+
1e00d052 18181+static int h_permission(struct inode *h_inode, int mask,
79b8bda9 18182+ struct path *h_path, int brperm)
1facf9fc 18183+{
1308ab2a 18184+ int err;
4a4d8108 18185+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
1facf9fc 18186+
4a4d8108
AM
18187+ err = -EACCES;
18188+ if ((write_mask && IS_IMMUTABLE(h_inode))
18189+ || ((mask & MAY_EXEC)
18190+ && S_ISREG(h_inode->i_mode)
79b8bda9 18191+ && (path_noexec(h_path)
4a4d8108
AM
18192+ || !(h_inode->i_mode & S_IXUGO))))
18193+ goto out;
18194+
18195+ /*
18196+ * - skip the lower fs test in the case of write to ro branch.
18197+ * - nfs dir permission write check is optimized, but a policy for
18198+ * link/rename requires a real check.
b912730e
AM
18199+ * - nfs always sets MS_POSIXACL regardless its mount option 'noacl.'
18200+ * in this case, generic_permission() returns -EOPNOTSUPP.
4a4d8108
AM
18201+ */
18202+ if ((write_mask && !au_br_writable(brperm))
18203+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
18204+ && write_mask && !(mask & MAY_READ))
18205+ || !h_inode->i_op->permission) {
18206+ /* AuLabel(generic_permission); */
b912730e 18207+ /* AuDbg("get_acl %pf\n", h_inode->i_op->get_acl); */
1e00d052 18208+ err = generic_permission(h_inode, mask);
b912730e
AM
18209+ if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
18210+ err = h_inode->i_op->permission(h_inode, mask);
18211+ AuTraceErr(err);
1308ab2a 18212+ } else {
4a4d8108 18213+ /* AuLabel(h_inode->permission); */
1e00d052 18214+ err = h_inode->i_op->permission(h_inode, mask);
4a4d8108
AM
18215+ AuTraceErr(err);
18216+ }
1facf9fc 18217+
4a4d8108
AM
18218+ if (!err)
18219+ err = devcgroup_inode_permission(h_inode, mask);
7f207e10 18220+ if (!err)
4a4d8108 18221+ err = security_inode_permission(h_inode, mask);
4a4d8108
AM
18222+
18223+#if 0
18224+ if (!err) {
18225+ /* todo: do we need to call ima_path_check()? */
18226+ struct path h_path = {
18227+ .dentry =
18228+ .mnt = h_mnt
18229+ };
18230+ err = ima_path_check(&h_path,
18231+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
18232+ IMA_COUNT_LEAVE);
1308ab2a 18233+ }
4a4d8108 18234+#endif
dece6358 18235+
4f0767ce 18236+out:
1308ab2a 18237+ return err;
18238+}
dece6358 18239+
1e00d052 18240+static int aufs_permission(struct inode *inode, int mask)
1308ab2a 18241+{
18242+ int err;
4a4d8108
AM
18243+ aufs_bindex_t bindex, bend;
18244+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
18245+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
18246+ struct inode *h_inode;
18247+ struct super_block *sb;
18248+ struct au_branch *br;
1facf9fc 18249+
027c5e7a 18250+ /* todo: support rcu-walk? */
1e00d052 18251+ if (mask & MAY_NOT_BLOCK)
027c5e7a
AM
18252+ return -ECHILD;
18253+
4a4d8108
AM
18254+ sb = inode->i_sb;
18255+ si_read_lock(sb, AuLock_FLUSH);
18256+ ii_read_lock_child(inode);
027c5e7a
AM
18257+#if 0
18258+ err = au_iigen_test(inode, au_sigen(sb));
18259+ if (unlikely(err))
18260+ goto out;
18261+#endif
dece6358 18262+
076b876e
AM
18263+ if (!isdir
18264+ || write_mask
18265+ || au_opt_test(au_mntflags(sb), DIRPERM1)) {
4a4d8108
AM
18266+ err = au_busy_or_stale();
18267+ h_inode = au_h_iptr(inode, au_ibstart(inode));
18268+ if (unlikely(!h_inode
18269+ || (h_inode->i_mode & S_IFMT)
18270+ != (inode->i_mode & S_IFMT)))
18271+ goto out;
1facf9fc 18272+
4a4d8108
AM
18273+ err = 0;
18274+ bindex = au_ibstart(inode);
18275+ br = au_sbr(sb, bindex);
79b8bda9 18276+ err = h_permission(h_inode, mask, &br->br_path, br->br_perm);
4a4d8108
AM
18277+ if (write_mask
18278+ && !err
18279+ && !special_file(h_inode->i_mode)) {
18280+ /* test whether the upper writable branch exists */
18281+ err = -EROFS;
18282+ for (; bindex >= 0; bindex--)
18283+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
18284+ err = 0;
18285+ break;
18286+ }
18287+ }
18288+ goto out;
18289+ }
dece6358 18290+
4a4d8108 18291+ /* non-write to dir */
1308ab2a 18292+ err = 0;
4a4d8108
AM
18293+ bend = au_ibend(inode);
18294+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
18295+ h_inode = au_h_iptr(inode, bindex);
18296+ if (h_inode) {
18297+ err = au_busy_or_stale();
18298+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
18299+ break;
18300+
18301+ br = au_sbr(sb, bindex);
79b8bda9 18302+ err = h_permission(h_inode, mask, &br->br_path,
4a4d8108
AM
18303+ br->br_perm);
18304+ }
18305+ }
1308ab2a 18306+
4f0767ce 18307+out:
4a4d8108
AM
18308+ ii_read_unlock(inode);
18309+ si_read_unlock(sb);
1308ab2a 18310+ return err;
18311+}
18312+
4a4d8108 18313+/* ---------------------------------------------------------------------- */
1facf9fc 18314+
4a4d8108 18315+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
b4510431 18316+ unsigned int flags)
4a4d8108
AM
18317+{
18318+ struct dentry *ret, *parent;
b752ccd1 18319+ struct inode *inode;
4a4d8108 18320+ struct super_block *sb;
1716fcea 18321+ int err, npositive;
dece6358 18322+
4a4d8108 18323+ IMustLock(dir);
1308ab2a 18324+
537831f9
AM
18325+ /* todo: support rcu-walk? */
18326+ ret = ERR_PTR(-ECHILD);
18327+ if (flags & LOOKUP_RCU)
18328+ goto out;
18329+
18330+ ret = ERR_PTR(-ENAMETOOLONG);
18331+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
18332+ goto out;
18333+
4a4d8108 18334+ sb = dir->i_sb;
7f207e10
AM
18335+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
18336+ ret = ERR_PTR(err);
18337+ if (unlikely(err))
18338+ goto out;
18339+
4a4d8108
AM
18340+ err = au_di_init(dentry);
18341+ ret = ERR_PTR(err);
18342+ if (unlikely(err))
7f207e10 18343+ goto out_si;
1308ab2a 18344+
9dbd164d 18345+ inode = NULL;
027c5e7a 18346+ npositive = 0; /* suppress a warning */
4a4d8108
AM
18347+ parent = dentry->d_parent; /* dir inode is locked */
18348+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
18349+ err = au_alive_dir(parent);
18350+ if (!err)
18351+ err = au_digen_test(parent, au_sigen(sb));
18352+ if (!err) {
18353+ npositive = au_lkup_dentry(dentry, au_dbstart(parent),
537831f9 18354+ /*type*/0);
027c5e7a
AM
18355+ err = npositive;
18356+ }
4a4d8108 18357+ di_read_unlock(parent, AuLock_IR);
4a4d8108
AM
18358+ ret = ERR_PTR(err);
18359+ if (unlikely(err < 0))
18360+ goto out_unlock;
1308ab2a 18361+
4a4d8108 18362+ if (npositive) {
b752ccd1 18363+ inode = au_new_inode(dentry, /*must_new*/0);
c1595e42
JR
18364+ if (IS_ERR(inode)) {
18365+ ret = (void *)inode;
18366+ inode = NULL;
18367+ goto out_unlock;
18368+ }
9dbd164d 18369+ }
4a4d8108 18370+
c1595e42
JR
18371+ if (inode)
18372+ atomic_inc(&inode->i_count);
4a4d8108 18373+ ret = d_splice_alias(inode, dentry);
537831f9
AM
18374+#if 0
18375+ if (unlikely(d_need_lookup(dentry))) {
18376+ spin_lock(&dentry->d_lock);
18377+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
18378+ spin_unlock(&dentry->d_lock);
18379+ } else
18380+#endif
c1595e42 18381+ if (inode) {
2000de60 18382+ if (!IS_ERR(ret)) {
c1595e42 18383+ iput(inode);
2000de60
JR
18384+ if (ret && ret != dentry)
18385+ ii_write_unlock(inode);
18386+ } else {
c1595e42
JR
18387+ ii_write_unlock(inode);
18388+ iput(inode);
18389+ inode = NULL;
18390+ }
7f207e10 18391+ }
1facf9fc 18392+
4f0767ce 18393+out_unlock:
4a4d8108 18394+ di_write_unlock(dentry);
2dfbb274 18395+ if (inode) {
1716fcea
AM
18396+ /* verbose coding for lock class name */
18397+ if (unlikely(S_ISLNK(inode->i_mode)))
18398+ au_rw_class(&au_di(dentry)->di_rwsem,
18399+ au_lc_key + AuLcSymlink_DIINFO);
18400+ else if (unlikely(S_ISDIR(inode->i_mode)))
18401+ au_rw_class(&au_di(dentry)->di_rwsem,
18402+ au_lc_key + AuLcDir_DIINFO);
18403+ else /* likely */
18404+ au_rw_class(&au_di(dentry)->di_rwsem,
18405+ au_lc_key + AuLcNonDir_DIINFO);
9dbd164d 18406+ }
7f207e10 18407+out_si:
4a4d8108 18408+ si_read_unlock(sb);
7f207e10 18409+out:
4a4d8108
AM
18410+ return ret;
18411+}
1facf9fc 18412+
4a4d8108 18413+/* ---------------------------------------------------------------------- */
1facf9fc 18414+
b912730e
AM
18415+struct aopen_node {
18416+ struct hlist_node hlist;
18417+ struct file *file, *h_file;
18418+};
18419+
18420+static int au_do_aopen(struct inode *inode, struct file *file)
18421+{
18422+ struct au_sphlhead *aopen;
18423+ struct aopen_node *node;
18424+ struct au_do_open_args args = {
18425+ .no_lock = 1,
18426+ .open = au_do_open_nondir
18427+ };
18428+
18429+ aopen = &au_sbi(inode->i_sb)->si_aopen;
18430+ spin_lock(&aopen->spin);
18431+ hlist_for_each_entry(node, &aopen->head, hlist)
18432+ if (node->file == file) {
18433+ args.h_file = node->h_file;
18434+ break;
18435+ }
18436+ spin_unlock(&aopen->spin);
18437+ /* AuDebugOn(!args.h_file); */
18438+
18439+ return au_do_open(file, &args);
18440+}
18441+
18442+static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
18443+ struct file *file, unsigned int open_flag,
18444+ umode_t create_mode, int *opened)
18445+{
18446+ int err, h_opened = *opened;
18447+ struct dentry *parent;
18448+ struct dentry *d;
18449+ struct au_sphlhead *aopen;
18450+ struct vfsub_aopen_args args = {
18451+ .open_flag = open_flag,
18452+ .create_mode = create_mode,
18453+ .opened = &h_opened
18454+ };
18455+ struct aopen_node aopen_node = {
18456+ .file = file
18457+ };
18458+
18459+ IMustLock(dir);
18460+ AuDbg("open_flag 0x%x\n", open_flag);
18461+ AuDbgDentry(dentry);
18462+
18463+ err = 0;
18464+ if (!au_di(dentry)) {
18465+ d = aufs_lookup(dir, dentry, /*flags*/0);
18466+ if (IS_ERR(d)) {
18467+ err = PTR_ERR(d);
18468+ goto out;
18469+ } else if (d) {
18470+ /*
18471+ * obsoleted dentry found.
18472+ * another error will be returned later.
18473+ */
18474+ d_drop(d);
18475+ dput(d);
18476+ AuDbgDentry(d);
18477+ }
18478+ AuDbgDentry(dentry);
18479+ }
18480+
18481+ if (d_is_positive(dentry)
18482+ || d_unhashed(dentry)
18483+ || d_unlinked(dentry)
18484+ || !(open_flag & O_CREAT))
18485+ goto out_no_open;
18486+
18487+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
18488+ if (unlikely(err))
18489+ goto out;
18490+
18491+ parent = dentry->d_parent; /* dir is locked */
18492+ di_write_lock_parent(parent);
18493+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0);
18494+ if (unlikely(err))
18495+ goto out_unlock;
18496+
18497+ AuDbgDentry(dentry);
18498+ if (d_is_positive(dentry))
18499+ goto out_unlock;
18500+
18501+ args.file = get_empty_filp();
18502+ err = PTR_ERR(args.file);
18503+ if (IS_ERR(args.file))
18504+ goto out_unlock;
18505+
18506+ args.file->f_flags = file->f_flags;
18507+ err = au_aopen_or_create(dir, dentry, &args);
18508+ AuTraceErr(err);
18509+ AuDbgFile(args.file);
18510+ if (unlikely(err < 0)) {
18511+ if (h_opened & FILE_OPENED)
18512+ fput(args.file);
18513+ else
18514+ put_filp(args.file);
18515+ goto out_unlock;
18516+ }
18517+
18518+ /* some filesystems don't set FILE_CREATED while succeeded? */
18519+ *opened |= FILE_CREATED;
18520+ if (h_opened & FILE_OPENED)
18521+ aopen_node.h_file = args.file;
18522+ else {
18523+ put_filp(args.file);
18524+ args.file = NULL;
18525+ }
18526+ aopen = &au_sbi(dir->i_sb)->si_aopen;
18527+ au_sphl_add(&aopen_node.hlist, aopen);
18528+ err = finish_open(file, dentry, au_do_aopen, opened);
18529+ au_sphl_del(&aopen_node.hlist, aopen);
18530+ AuTraceErr(err);
18531+ AuDbgFile(file);
18532+ if (aopen_node.h_file)
18533+ fput(aopen_node.h_file);
18534+
18535+out_unlock:
18536+ di_write_unlock(parent);
18537+ aufs_read_unlock(dentry, AuLock_DW);
18538+ AuDbgDentry(dentry);
18539+ if (unlikely(err))
18540+ goto out;
18541+out_no_open:
18542+ if (!err && !(*opened & FILE_CREATED)) {
18543+ AuLabel(out_no_open);
18544+ dget(dentry);
18545+ err = finish_no_open(file, dentry);
18546+ }
18547+out:
18548+ AuDbg("%pd%s%s\n", dentry,
18549+ (*opened & FILE_CREATED) ? " created" : "",
18550+ (*opened & FILE_OPENED) ? " opened" : "");
18551+ AuTraceErr(err);
18552+ return err;
18553+}
18554+
18555+
18556+/* ---------------------------------------------------------------------- */
18557+
4a4d8108
AM
18558+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
18559+ const unsigned char add_entry, aufs_bindex_t bcpup,
18560+ aufs_bindex_t bstart)
18561+{
18562+ int err;
18563+ struct dentry *h_parent;
18564+ struct inode *h_dir;
1facf9fc 18565+
027c5e7a 18566+ if (add_entry)
5527c038 18567+ IMustLock(d_inode(parent));
027c5e7a 18568+ else
4a4d8108
AM
18569+ di_write_lock_parent(parent);
18570+
18571+ err = 0;
18572+ if (!au_h_dptr(parent, bcpup)) {
c2b27bf2
AM
18573+ if (bstart > bcpup)
18574+ err = au_cpup_dirs(dentry, bcpup);
18575+ else if (bstart < bcpup)
4a4d8108
AM
18576+ err = au_cpdown_dirs(dentry, bcpup);
18577+ else
c2b27bf2 18578+ BUG();
4a4d8108 18579+ }
38d290e6 18580+ if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
4a4d8108 18581+ h_parent = au_h_dptr(parent, bcpup);
5527c038 18582+ h_dir = d_inode(h_parent);
4a4d8108 18583+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
7e9cd9fe 18584+ err = au_lkup_neg(dentry, bcpup, /*wh*/0);
4a4d8108
AM
18585+ /* todo: no unlock here */
18586+ mutex_unlock(&h_dir->i_mutex);
027c5e7a
AM
18587+
18588+ AuDbg("bcpup %d\n", bcpup);
18589+ if (!err) {
5527c038 18590+ if (d_really_is_negative(dentry))
027c5e7a 18591+ au_set_h_dptr(dentry, bstart, NULL);
4a4d8108
AM
18592+ au_update_dbrange(dentry, /*do_put_zero*/0);
18593+ }
1308ab2a 18594+ }
1facf9fc 18595+
4a4d8108
AM
18596+ if (!add_entry)
18597+ di_write_unlock(parent);
18598+ if (!err)
18599+ err = bcpup; /* success */
1308ab2a 18600+
027c5e7a 18601+ AuTraceErr(err);
4a4d8108
AM
18602+ return err;
18603+}
1facf9fc 18604+
4a4d8108
AM
18605+/*
18606+ * decide the branch and the parent dir where we will create a new entry.
18607+ * returns new bindex or an error.
18608+ * copyup the parent dir if needed.
18609+ */
18610+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
18611+ struct au_wr_dir_args *args)
18612+{
18613+ int err;
392086de 18614+ unsigned int flags;
4a4d8108 18615+ aufs_bindex_t bcpup, bstart, src_bstart;
86dc4139
AM
18616+ const unsigned char add_entry
18617+ = au_ftest_wrdir(args->flags, ADD_ENTRY)
38d290e6 18618+ | au_ftest_wrdir(args->flags, TMPFILE);
4a4d8108
AM
18619+ struct super_block *sb;
18620+ struct dentry *parent;
18621+ struct au_sbinfo *sbinfo;
1facf9fc 18622+
4a4d8108
AM
18623+ sb = dentry->d_sb;
18624+ sbinfo = au_sbi(sb);
18625+ parent = dget_parent(dentry);
18626+ bstart = au_dbstart(dentry);
18627+ bcpup = bstart;
18628+ if (args->force_btgt < 0) {
18629+ if (src_dentry) {
18630+ src_bstart = au_dbstart(src_dentry);
18631+ if (src_bstart < bstart)
18632+ bcpup = src_bstart;
18633+ } else if (add_entry) {
392086de
AM
18634+ flags = 0;
18635+ if (au_ftest_wrdir(args->flags, ISDIR))
18636+ au_fset_wbr(flags, DIR);
18637+ err = AuWbrCreate(sbinfo, dentry, flags);
4a4d8108
AM
18638+ bcpup = err;
18639+ }
1facf9fc 18640+
5527c038 18641+ if (bcpup < 0 || au_test_ro(sb, bcpup, d_inode(dentry))) {
4a4d8108
AM
18642+ if (add_entry)
18643+ err = AuWbrCopyup(sbinfo, dentry);
18644+ else {
18645+ if (!IS_ROOT(dentry)) {
18646+ di_read_lock_parent(parent, !AuLock_IR);
18647+ err = AuWbrCopyup(sbinfo, dentry);
18648+ di_read_unlock(parent, !AuLock_IR);
18649+ } else
18650+ err = AuWbrCopyup(sbinfo, dentry);
18651+ }
18652+ bcpup = err;
18653+ if (unlikely(err < 0))
18654+ goto out;
18655+ }
18656+ } else {
18657+ bcpup = args->force_btgt;
5527c038 18658+ AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
1308ab2a 18659+ }
027c5e7a 18660+
4a4d8108
AM
18661+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
18662+ err = bcpup;
18663+ if (bcpup == bstart)
18664+ goto out; /* success */
4a4d8108
AM
18665+
18666+ /* copyup the new parent into the branch we process */
18667+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
027c5e7a 18668+ if (err >= 0) {
5527c038 18669+ if (d_really_is_negative(dentry)) {
027c5e7a
AM
18670+ au_set_h_dptr(dentry, bstart, NULL);
18671+ au_set_dbstart(dentry, bcpup);
18672+ au_set_dbend(dentry, bcpup);
18673+ }
38d290e6
JR
18674+ AuDebugOn(add_entry
18675+ && !au_ftest_wrdir(args->flags, TMPFILE)
18676+ && !au_h_dptr(dentry, bcpup));
027c5e7a 18677+ }
86dc4139
AM
18678+
18679+out:
18680+ dput(parent);
18681+ return err;
18682+}
18683+
18684+/* ---------------------------------------------------------------------- */
18685+
18686+void au_pin_hdir_unlock(struct au_pin *p)
18687+{
18688+ if (p->hdir)
18689+ au_hn_imtx_unlock(p->hdir);
18690+}
18691+
c1595e42 18692+int au_pin_hdir_lock(struct au_pin *p)
86dc4139
AM
18693+{
18694+ int err;
18695+
18696+ err = 0;
18697+ if (!p->hdir)
18698+ goto out;
18699+
18700+ /* even if an error happens later, keep this lock */
18701+ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
18702+
18703+ err = -EBUSY;
5527c038 18704+ if (unlikely(p->hdir->hi_inode != d_inode(p->h_parent)))
86dc4139
AM
18705+ goto out;
18706+
18707+ err = 0;
18708+ if (p->h_dentry)
18709+ err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
18710+ p->h_parent, p->br);
18711+
18712+out:
18713+ return err;
18714+}
18715+
18716+int au_pin_hdir_relock(struct au_pin *p)
18717+{
18718+ int err, i;
18719+ struct inode *h_i;
18720+ struct dentry *h_d[] = {
18721+ p->h_dentry,
18722+ p->h_parent
18723+ };
18724+
18725+ err = au_pin_hdir_lock(p);
18726+ if (unlikely(err))
18727+ goto out;
18728+
18729+ for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
18730+ if (!h_d[i])
18731+ continue;
5527c038
JR
18732+ if (d_is_positive(h_d[i])) {
18733+ h_i = d_inode(h_d[i]);
86dc4139 18734+ err = !h_i->i_nlink;
5527c038 18735+ }
86dc4139
AM
18736+ }
18737+
18738+out:
18739+ return err;
18740+}
18741+
18742+void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
18743+{
18744+#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
18745+ p->hdir->hi_inode->i_mutex.owner = task;
18746+#endif
18747+}
18748+
18749+void au_pin_hdir_acquire_nest(struct au_pin *p)
18750+{
18751+ if (p->hdir) {
18752+ mutex_acquire_nest(&p->hdir->hi_inode->i_mutex.dep_map,
18753+ p->lsc_hi, 0, NULL, _RET_IP_);
18754+ au_pin_hdir_set_owner(p, current);
18755+ }
dece6358 18756+}
1facf9fc 18757+
86dc4139
AM
18758+void au_pin_hdir_release(struct au_pin *p)
18759+{
18760+ if (p->hdir) {
18761+ au_pin_hdir_set_owner(p, p->task);
18762+ mutex_release(&p->hdir->hi_inode->i_mutex.dep_map, 1, _RET_IP_);
18763+ }
18764+}
1308ab2a 18765+
4a4d8108 18766+struct dentry *au_pinned_h_parent(struct au_pin *pin)
1308ab2a 18767+{
4a4d8108
AM
18768+ if (pin && pin->parent)
18769+ return au_h_dptr(pin->parent, pin->bindex);
18770+ return NULL;
dece6358 18771+}
1facf9fc 18772+
4a4d8108 18773+void au_unpin(struct au_pin *p)
dece6358 18774+{
86dc4139
AM
18775+ if (p->hdir)
18776+ au_pin_hdir_unlock(p);
e49829fe 18777+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
b4510431 18778+ vfsub_mnt_drop_write(p->h_mnt);
4a4d8108
AM
18779+ if (!p->hdir)
18780+ return;
1facf9fc 18781+
4a4d8108
AM
18782+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18783+ di_read_unlock(p->parent, AuLock_IR);
18784+ iput(p->hdir->hi_inode);
18785+ dput(p->parent);
18786+ p->parent = NULL;
18787+ p->hdir = NULL;
18788+ p->h_mnt = NULL;
86dc4139 18789+ /* do not clear p->task */
4a4d8108 18790+}
1308ab2a 18791+
4a4d8108
AM
18792+int au_do_pin(struct au_pin *p)
18793+{
18794+ int err;
18795+ struct super_block *sb;
4a4d8108
AM
18796+ struct inode *h_dir;
18797+
18798+ err = 0;
18799+ sb = p->dentry->d_sb;
86dc4139 18800+ p->br = au_sbr(sb, p->bindex);
4a4d8108
AM
18801+ if (IS_ROOT(p->dentry)) {
18802+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
86dc4139 18803+ p->h_mnt = au_br_mnt(p->br);
b4510431 18804+ err = vfsub_mnt_want_write(p->h_mnt);
4a4d8108
AM
18805+ if (unlikely(err)) {
18806+ au_fclr_pin(p->flags, MNT_WRITE);
18807+ goto out_err;
18808+ }
18809+ }
dece6358 18810+ goto out;
1facf9fc 18811+ }
18812+
86dc4139 18813+ p->h_dentry = NULL;
4a4d8108 18814+ if (p->bindex <= au_dbend(p->dentry))
86dc4139 18815+ p->h_dentry = au_h_dptr(p->dentry, p->bindex);
dece6358 18816+
4a4d8108
AM
18817+ p->parent = dget_parent(p->dentry);
18818+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18819+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
dece6358 18820+
4a4d8108 18821+ h_dir = NULL;
86dc4139 18822+ p->h_parent = au_h_dptr(p->parent, p->bindex);
5527c038 18823+ p->hdir = au_hi(d_inode(p->parent), p->bindex);
4a4d8108
AM
18824+ if (p->hdir)
18825+ h_dir = p->hdir->hi_inode;
dece6358 18826+
b752ccd1
AM
18827+ /*
18828+ * udba case, or
18829+ * if DI_LOCKED is not set, then p->parent may be different
18830+ * and h_parent can be NULL.
18831+ */
86dc4139 18832+ if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
e49829fe 18833+ err = -EBUSY;
4a4d8108
AM
18834+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18835+ di_read_unlock(p->parent, AuLock_IR);
18836+ dput(p->parent);
18837+ p->parent = NULL;
18838+ goto out_err;
18839+ }
1308ab2a 18840+
4a4d8108 18841+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
86dc4139 18842+ p->h_mnt = au_br_mnt(p->br);
b4510431 18843+ err = vfsub_mnt_want_write(p->h_mnt);
dece6358 18844+ if (unlikely(err)) {
4a4d8108 18845+ au_fclr_pin(p->flags, MNT_WRITE);
86dc4139
AM
18846+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18847+ di_read_unlock(p->parent, AuLock_IR);
18848+ dput(p->parent);
18849+ p->parent = NULL;
18850+ goto out_err;
dece6358
AM
18851+ }
18852+ }
4a4d8108 18853+
86dc4139
AM
18854+ au_igrab(h_dir);
18855+ err = au_pin_hdir_lock(p);
18856+ if (!err)
18857+ goto out; /* success */
18858+
076b876e
AM
18859+ au_unpin(p);
18860+
4f0767ce 18861+out_err:
4a4d8108
AM
18862+ pr_err("err %d\n", err);
18863+ err = au_busy_or_stale();
4f0767ce 18864+out:
1facf9fc 18865+ return err;
18866+}
18867+
4a4d8108
AM
18868+void au_pin_init(struct au_pin *p, struct dentry *dentry,
18869+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
18870+ unsigned int udba, unsigned char flags)
18871+{
18872+ p->dentry = dentry;
18873+ p->udba = udba;
18874+ p->lsc_di = lsc_di;
18875+ p->lsc_hi = lsc_hi;
18876+ p->flags = flags;
18877+ p->bindex = bindex;
18878+
18879+ p->parent = NULL;
18880+ p->hdir = NULL;
18881+ p->h_mnt = NULL;
86dc4139
AM
18882+
18883+ p->h_dentry = NULL;
18884+ p->h_parent = NULL;
18885+ p->br = NULL;
18886+ p->task = current;
4a4d8108
AM
18887+}
18888+
18889+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
18890+ unsigned int udba, unsigned char flags)
18891+{
18892+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
18893+ udba, flags);
18894+ return au_do_pin(pin);
18895+}
18896+
dece6358
AM
18897+/* ---------------------------------------------------------------------- */
18898+
1308ab2a 18899+/*
4a4d8108
AM
18900+ * ->setattr() and ->getattr() are called in various cases.
18901+ * chmod, stat: dentry is revalidated.
18902+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
18903+ * unhashed.
18904+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
1308ab2a 18905+ */
027c5e7a 18906+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
c1595e42 18907+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
1facf9fc 18908+{
4a4d8108 18909+ int err;
4a4d8108 18910+ struct dentry *parent;
1facf9fc 18911+
1308ab2a 18912+ err = 0;
027c5e7a 18913+ if (au_digen_test(dentry, sigen)) {
4a4d8108
AM
18914+ parent = dget_parent(dentry);
18915+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 18916+ err = au_refresh_dentry(dentry, parent);
4a4d8108
AM
18917+ di_read_unlock(parent, AuLock_IR);
18918+ dput(parent);
dece6358 18919+ }
1facf9fc 18920+
4a4d8108 18921+ AuTraceErr(err);
1308ab2a 18922+ return err;
18923+}
dece6358 18924+
c1595e42
JR
18925+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
18926+ struct au_icpup_args *a)
1308ab2a 18927+{
18928+ int err;
4a4d8108 18929+ loff_t sz;
e49829fe 18930+ aufs_bindex_t bstart, ibstart;
4a4d8108
AM
18931+ struct dentry *hi_wh, *parent;
18932+ struct inode *inode;
4a4d8108
AM
18933+ struct au_wr_dir_args wr_dir_args = {
18934+ .force_btgt = -1,
18935+ .flags = 0
18936+ };
18937+
2000de60 18938+ if (d_is_dir(dentry))
4a4d8108
AM
18939+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
18940+ /* plink or hi_wh() case */
2000de60 18941+ bstart = au_dbstart(dentry);
5527c038 18942+ inode = d_inode(dentry);
e49829fe 18943+ ibstart = au_ibstart(inode);
027c5e7a 18944+ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
e49829fe 18945+ wr_dir_args.force_btgt = ibstart;
4a4d8108
AM
18946+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
18947+ if (unlikely(err < 0))
18948+ goto out;
18949+ a->btgt = err;
18950+ if (err != bstart)
18951+ au_fset_icpup(a->flags, DID_CPUP);
18952+
18953+ err = 0;
18954+ a->pin_flags = AuPin_MNT_WRITE;
18955+ parent = NULL;
18956+ if (!IS_ROOT(dentry)) {
18957+ au_fset_pin(a->pin_flags, DI_LOCKED);
18958+ parent = dget_parent(dentry);
18959+ di_write_lock_parent(parent);
18960+ }
18961+
18962+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
18963+ if (unlikely(err))
18964+ goto out_parent;
18965+
18966+ a->h_path.dentry = au_h_dptr(dentry, bstart);
4a4d8108 18967+ sz = -1;
5527c038 18968+ a->h_inode = d_inode(a->h_path.dentry);
c1595e42
JR
18969+ if (ia && (ia->ia_valid & ATTR_SIZE)) {
18970+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
18971+ if (ia->ia_size < i_size_read(a->h_inode))
18972+ sz = ia->ia_size;
18973+ mutex_unlock(&a->h_inode->i_mutex);
18974+ }
4a4d8108 18975+
4a4d8108 18976+ hi_wh = NULL;
027c5e7a 18977+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
4a4d8108
AM
18978+ hi_wh = au_hi_wh(inode, a->btgt);
18979+ if (!hi_wh) {
c2b27bf2
AM
18980+ struct au_cp_generic cpg = {
18981+ .dentry = dentry,
18982+ .bdst = a->btgt,
18983+ .bsrc = -1,
18984+ .len = sz,
18985+ .pin = &a->pin
18986+ };
18987+ err = au_sio_cpup_wh(&cpg, /*file*/NULL);
4a4d8108
AM
18988+ if (unlikely(err))
18989+ goto out_unlock;
18990+ hi_wh = au_hi_wh(inode, a->btgt);
18991+ /* todo: revalidate hi_wh? */
18992+ }
18993+ }
18994+
18995+ if (parent) {
18996+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
18997+ di_downgrade_lock(parent, AuLock_IR);
18998+ dput(parent);
18999+ parent = NULL;
19000+ }
19001+ if (!au_ftest_icpup(a->flags, DID_CPUP))
19002+ goto out; /* success */
19003+
19004+ if (!d_unhashed(dentry)) {
c2b27bf2
AM
19005+ struct au_cp_generic cpg = {
19006+ .dentry = dentry,
19007+ .bdst = a->btgt,
19008+ .bsrc = bstart,
19009+ .len = sz,
19010+ .pin = &a->pin,
19011+ .flags = AuCpup_DTIME | AuCpup_HOPEN
19012+ };
19013+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
19014+ if (!err)
19015+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19016+ } else if (!hi_wh)
19017+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19018+ else
19019+ a->h_path.dentry = hi_wh; /* do not dget here */
1308ab2a 19020+
4f0767ce 19021+out_unlock:
5527c038 19022+ a->h_inode = d_inode(a->h_path.dentry);
86dc4139 19023+ if (!err)
dece6358 19024+ goto out; /* success */
4a4d8108 19025+ au_unpin(&a->pin);
4f0767ce 19026+out_parent:
4a4d8108
AM
19027+ if (parent) {
19028+ di_write_unlock(parent);
19029+ dput(parent);
19030+ }
4f0767ce 19031+out:
86dc4139
AM
19032+ if (!err)
19033+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
1facf9fc 19034+ return err;
19035+}
19036+
4a4d8108 19037+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
1facf9fc 19038+{
4a4d8108 19039+ int err;
523b37e3 19040+ struct inode *inode, *delegated;
4a4d8108
AM
19041+ struct super_block *sb;
19042+ struct file *file;
19043+ struct au_icpup_args *a;
1facf9fc 19044+
5527c038 19045+ inode = d_inode(dentry);
4a4d8108 19046+ IMustLock(inode);
dece6358 19047+
4a4d8108
AM
19048+ err = -ENOMEM;
19049+ a = kzalloc(sizeof(*a), GFP_NOFS);
19050+ if (unlikely(!a))
19051+ goto out;
1facf9fc 19052+
4a4d8108
AM
19053+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
19054+ ia->ia_valid &= ~ATTR_MODE;
dece6358 19055+
4a4d8108
AM
19056+ file = NULL;
19057+ sb = dentry->d_sb;
e49829fe
JR
19058+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19059+ if (unlikely(err))
19060+ goto out_kfree;
19061+
4a4d8108
AM
19062+ if (ia->ia_valid & ATTR_FILE) {
19063+ /* currently ftruncate(2) only */
7e9cd9fe 19064+ AuDebugOn(!d_is_reg(dentry));
4a4d8108
AM
19065+ file = ia->ia_file;
19066+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
19067+ if (unlikely(err))
19068+ goto out_si;
19069+ ia->ia_file = au_hf_top(file);
19070+ a->udba = AuOpt_UDBA_NONE;
19071+ } else {
19072+ /* fchmod() doesn't pass ia_file */
19073+ a->udba = au_opt_udba(sb);
027c5e7a
AM
19074+ di_write_lock_child(dentry);
19075+ /* no d_unlinked(), to set UDBA_NONE for root */
4a4d8108
AM
19076+ if (d_unhashed(dentry))
19077+ a->udba = AuOpt_UDBA_NONE;
4a4d8108
AM
19078+ if (a->udba != AuOpt_UDBA_NONE) {
19079+ AuDebugOn(IS_ROOT(dentry));
19080+ err = au_reval_for_attr(dentry, au_sigen(sb));
19081+ if (unlikely(err))
19082+ goto out_dentry;
19083+ }
dece6358 19084+ }
dece6358 19085+
4a4d8108
AM
19086+ err = au_pin_and_icpup(dentry, ia, a);
19087+ if (unlikely(err < 0))
19088+ goto out_dentry;
19089+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
19090+ ia->ia_file = NULL;
19091+ ia->ia_valid &= ~ATTR_FILE;
1308ab2a 19092+ }
dece6358 19093+
4a4d8108
AM
19094+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
19095+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
19096+ == (ATTR_MODE | ATTR_CTIME)) {
7eafdf33 19097+ err = security_path_chmod(&a->h_path, ia->ia_mode);
4a4d8108
AM
19098+ if (unlikely(err))
19099+ goto out_unlock;
19100+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
19101+ && (ia->ia_valid & ATTR_CTIME)) {
86dc4139 19102+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
4a4d8108
AM
19103+ if (unlikely(err))
19104+ goto out_unlock;
19105+ }
dece6358 19106+
4a4d8108
AM
19107+ if (ia->ia_valid & ATTR_SIZE) {
19108+ struct file *f;
1308ab2a 19109+
953406b4 19110+ if (ia->ia_size < i_size_read(inode))
4a4d8108 19111+ /* unmap only */
953406b4 19112+ truncate_setsize(inode, ia->ia_size);
1308ab2a 19113+
4a4d8108
AM
19114+ f = NULL;
19115+ if (ia->ia_valid & ATTR_FILE)
19116+ f = ia->ia_file;
19117+ mutex_unlock(&a->h_inode->i_mutex);
19118+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
19119+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
523b37e3
AM
19120+ } else {
19121+ delegated = NULL;
19122+ while (1) {
19123+ err = vfsub_notify_change(&a->h_path, ia, &delegated);
19124+ if (delegated) {
19125+ err = break_deleg_wait(&delegated);
19126+ if (!err)
19127+ continue;
19128+ }
19129+ break;
19130+ }
19131+ }
4a4d8108
AM
19132+ if (!err)
19133+ au_cpup_attr_changeable(inode);
1308ab2a 19134+
4f0767ce 19135+out_unlock:
4a4d8108
AM
19136+ mutex_unlock(&a->h_inode->i_mutex);
19137+ au_unpin(&a->pin);
027c5e7a
AM
19138+ if (unlikely(err))
19139+ au_update_dbstart(dentry);
4f0767ce 19140+out_dentry:
4a4d8108
AM
19141+ di_write_unlock(dentry);
19142+ if (file) {
19143+ fi_write_unlock(file);
19144+ ia->ia_file = file;
19145+ ia->ia_valid |= ATTR_FILE;
19146+ }
4f0767ce 19147+out_si:
4a4d8108 19148+ si_read_unlock(sb);
e49829fe 19149+out_kfree:
4a4d8108 19150+ kfree(a);
4f0767ce 19151+out:
4a4d8108
AM
19152+ AuTraceErr(err);
19153+ return err;
1facf9fc 19154+}
19155+
c1595e42
JR
19156+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
19157+static int au_h_path_to_set_attr(struct dentry *dentry,
19158+ struct au_icpup_args *a, struct path *h_path)
19159+{
19160+ int err;
19161+ struct super_block *sb;
19162+
19163+ sb = dentry->d_sb;
19164+ a->udba = au_opt_udba(sb);
19165+ /* no d_unlinked(), to set UDBA_NONE for root */
19166+ if (d_unhashed(dentry))
19167+ a->udba = AuOpt_UDBA_NONE;
19168+ if (a->udba != AuOpt_UDBA_NONE) {
19169+ AuDebugOn(IS_ROOT(dentry));
19170+ err = au_reval_for_attr(dentry, au_sigen(sb));
19171+ if (unlikely(err))
19172+ goto out;
19173+ }
19174+ err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
19175+ if (unlikely(err < 0))
19176+ goto out;
19177+
19178+ h_path->dentry = a->h_path.dentry;
19179+ h_path->mnt = au_sbr_mnt(sb, a->btgt);
19180+
19181+out:
19182+ return err;
19183+}
19184+
19185+ssize_t au_srxattr(struct dentry *dentry, struct au_srxattr *arg)
19186+{
19187+ int err;
19188+ struct path h_path;
19189+ struct super_block *sb;
19190+ struct au_icpup_args *a;
19191+ struct inode *inode, *h_inode;
19192+
5527c038 19193+ inode = d_inode(dentry);
c1595e42
JR
19194+ IMustLock(inode);
19195+
19196+ err = -ENOMEM;
19197+ a = kzalloc(sizeof(*a), GFP_NOFS);
19198+ if (unlikely(!a))
19199+ goto out;
19200+
19201+ sb = dentry->d_sb;
19202+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19203+ if (unlikely(err))
19204+ goto out_kfree;
19205+
19206+ h_path.dentry = NULL; /* silence gcc */
19207+ di_write_lock_child(dentry);
19208+ err = au_h_path_to_set_attr(dentry, a, &h_path);
19209+ if (unlikely(err))
19210+ goto out_di;
19211+
19212+ mutex_unlock(&a->h_inode->i_mutex);
19213+ switch (arg->type) {
19214+ case AU_XATTR_SET:
19215+ err = vfsub_setxattr(h_path.dentry,
19216+ arg->u.set.name, arg->u.set.value,
19217+ arg->u.set.size, arg->u.set.flags);
19218+ break;
19219+ case AU_XATTR_REMOVE:
19220+ err = vfsub_removexattr(h_path.dentry, arg->u.remove.name);
19221+ break;
19222+ case AU_ACL_SET:
19223+ err = -EOPNOTSUPP;
5527c038 19224+ h_inode = d_inode(h_path.dentry);
c1595e42
JR
19225+ if (h_inode->i_op->set_acl)
19226+ err = h_inode->i_op->set_acl(h_inode,
19227+ arg->u.acl_set.acl,
19228+ arg->u.acl_set.type);
19229+ break;
19230+ }
19231+ if (!err)
19232+ au_cpup_attr_timesizes(inode);
19233+
19234+ au_unpin(&a->pin);
19235+ if (unlikely(err))
19236+ au_update_dbstart(dentry);
19237+
19238+out_di:
19239+ di_write_unlock(dentry);
19240+ si_read_unlock(sb);
19241+out_kfree:
19242+ kfree(a);
19243+out:
19244+ AuTraceErr(err);
19245+ return err;
19246+}
19247+#endif
19248+
4a4d8108
AM
19249+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
19250+ unsigned int nlink)
1facf9fc 19251+{
9dbd164d
AM
19252+ unsigned int n;
19253+
4a4d8108 19254+ inode->i_mode = st->mode;
86dc4139
AM
19255+ /* don't i_[ug]id_write() here */
19256+ inode->i_uid = st->uid;
19257+ inode->i_gid = st->gid;
4a4d8108
AM
19258+ inode->i_atime = st->atime;
19259+ inode->i_mtime = st->mtime;
19260+ inode->i_ctime = st->ctime;
1facf9fc 19261+
4a4d8108
AM
19262+ au_cpup_attr_nlink(inode, /*force*/0);
19263+ if (S_ISDIR(inode->i_mode)) {
9dbd164d
AM
19264+ n = inode->i_nlink;
19265+ n -= nlink;
19266+ n += st->nlink;
f6b6e03d 19267+ smp_mb(); /* for i_nlink */
7eafdf33 19268+ /* 0 can happen */
92d182d2 19269+ set_nlink(inode, n);
4a4d8108 19270+ }
1facf9fc 19271+
4a4d8108
AM
19272+ spin_lock(&inode->i_lock);
19273+ inode->i_blocks = st->blocks;
19274+ i_size_write(inode, st->size);
19275+ spin_unlock(&inode->i_lock);
1facf9fc 19276+}
19277+
c1595e42
JR
19278+/*
19279+ * common routine for aufs_getattr() and aufs_getxattr().
19280+ * returns zero or negative (an error).
19281+ * @dentry will be read-locked in success.
19282+ */
19283+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path)
1facf9fc 19284+{
4a4d8108 19285+ int err;
076b876e 19286+ unsigned int mnt_flags, sigen;
c1595e42 19287+ unsigned char udba_none;
4a4d8108 19288+ aufs_bindex_t bindex;
4a4d8108
AM
19289+ struct super_block *sb, *h_sb;
19290+ struct inode *inode;
1facf9fc 19291+
c1595e42
JR
19292+ h_path->mnt = NULL;
19293+ h_path->dentry = NULL;
19294+
19295+ err = 0;
4a4d8108 19296+ sb = dentry->d_sb;
4a4d8108
AM
19297+ mnt_flags = au_mntflags(sb);
19298+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
1facf9fc 19299+
4a4d8108 19300+ /* support fstat(2) */
027c5e7a 19301+ if (!d_unlinked(dentry) && !udba_none) {
076b876e 19302+ sigen = au_sigen(sb);
027c5e7a
AM
19303+ err = au_digen_test(dentry, sigen);
19304+ if (!err) {
4a4d8108 19305+ di_read_lock_child(dentry, AuLock_IR);
027c5e7a 19306+ err = au_dbrange_test(dentry);
c1595e42
JR
19307+ if (unlikely(err)) {
19308+ di_read_unlock(dentry, AuLock_IR);
19309+ goto out;
19310+ }
027c5e7a 19311+ } else {
4a4d8108
AM
19312+ AuDebugOn(IS_ROOT(dentry));
19313+ di_write_lock_child(dentry);
027c5e7a
AM
19314+ err = au_dbrange_test(dentry);
19315+ if (!err)
19316+ err = au_reval_for_attr(dentry, sigen);
c1595e42
JR
19317+ if (!err)
19318+ di_downgrade_lock(dentry, AuLock_IR);
19319+ else {
19320+ di_write_unlock(dentry);
19321+ goto out;
19322+ }
4a4d8108
AM
19323+ }
19324+ } else
19325+ di_read_lock_child(dentry, AuLock_IR);
1facf9fc 19326+
5527c038 19327+ inode = d_inode(dentry);
4a4d8108 19328+ bindex = au_ibstart(inode);
c1595e42
JR
19329+ h_path->mnt = au_sbr_mnt(sb, bindex);
19330+ h_sb = h_path->mnt->mnt_sb;
19331+ if (!force
19332+ && !au_test_fs_bad_iattr(h_sb)
19333+ && udba_none)
19334+ goto out; /* success */
1facf9fc 19335+
4a4d8108 19336+ if (au_dbstart(dentry) == bindex)
c1595e42 19337+ h_path->dentry = au_h_dptr(dentry, bindex);
4a4d8108 19338+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
c1595e42
JR
19339+ h_path->dentry = au_plink_lkup(inode, bindex);
19340+ if (IS_ERR(h_path->dentry))
19341+ /* pretending success */
19342+ h_path->dentry = NULL;
19343+ else
19344+ dput(h_path->dentry);
4a4d8108 19345+ }
c1595e42
JR
19346+
19347+out:
19348+ return err;
19349+}
19350+
19351+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
19352+ struct dentry *dentry, struct kstat *st)
19353+{
19354+ int err;
19355+ unsigned char positive;
19356+ struct path h_path;
19357+ struct inode *inode;
19358+ struct super_block *sb;
19359+
5527c038 19360+ inode = d_inode(dentry);
c1595e42
JR
19361+ sb = dentry->d_sb;
19362+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19363+ if (unlikely(err))
19364+ goto out;
19365+ err = au_h_path_getattr(dentry, /*force*/0, &h_path);
19366+ if (unlikely(err))
19367+ goto out_si;
c06a8ce3 19368+ if (unlikely(!h_path.dentry))
c1595e42 19369+ /* illegally overlapped or something */
4a4d8108
AM
19370+ goto out_fill; /* pretending success */
19371+
5527c038 19372+ positive = d_is_positive(h_path.dentry);
4a4d8108 19373+ if (positive)
c06a8ce3 19374+ err = vfs_getattr(&h_path, st);
4a4d8108
AM
19375+ if (!err) {
19376+ if (positive)
c06a8ce3 19377+ au_refresh_iattr(inode, st,
5527c038 19378+ d_inode(h_path.dentry)->i_nlink);
4a4d8108 19379+ goto out_fill; /* success */
1facf9fc 19380+ }
7f207e10 19381+ AuTraceErr(err);
c1595e42 19382+ goto out_di;
4a4d8108 19383+
4f0767ce 19384+out_fill:
4a4d8108 19385+ generic_fillattr(inode, st);
c1595e42 19386+out_di:
4a4d8108 19387+ di_read_unlock(dentry, AuLock_IR);
c1595e42 19388+out_si:
4a4d8108 19389+ si_read_unlock(sb);
7f207e10
AM
19390+out:
19391+ AuTraceErr(err);
4a4d8108 19392+ return err;
1facf9fc 19393+}
19394+
19395+/* ---------------------------------------------------------------------- */
19396+
c2c0f25c
AM
19397+/*
19398+ * Assumption:
19399+ * - the number of symlinks is not so many.
19400+ *
19401+ * Structure:
19402+ * - sbinfo (instead of iinfo) contains an hlist of struct au_symlink.
19403+ * If iinfo contained the hlist, then it would be rather large waste of memory
19404+ * I am afraid.
19405+ * - struct au_symlink contains the necessary info for h_inode follow_link() and
19406+ * put_link().
19407+ */
1facf9fc 19408+
c2c0f25c
AM
19409+struct au_symlink {
19410+ union {
19411+ struct hlist_node hlist;
19412+ struct rcu_head rcu;
19413+ };
1facf9fc 19414+
c2c0f25c
AM
19415+ struct inode *h_inode;
19416+ void *h_cookie;
19417+};
1facf9fc 19418+
c2c0f25c
AM
19419+static void au_symlink_add(struct super_block *sb, struct au_symlink *slink,
19420+ struct inode *h_inode, void *cookie)
19421+{
19422+ struct au_sbinfo *sbinfo;
1facf9fc 19423+
c2c0f25c
AM
19424+ ihold(h_inode);
19425+ slink->h_inode = h_inode;
19426+ slink->h_cookie = cookie;
19427+ sbinfo = au_sbi(sb);
19428+ au_sphl_add(&slink->hlist, &sbinfo->si_symlink);
4a4d8108 19429+}
1facf9fc 19430+
c2c0f25c 19431+static void au_symlink_del(struct super_block *sb, struct au_symlink *slink)
4a4d8108 19432+{
c2c0f25c 19433+ struct au_sbinfo *sbinfo;
1facf9fc 19434+
c2c0f25c
AM
19435+ /* do not iput() within rcu */
19436+ iput(slink->h_inode);
19437+ slink->h_inode = NULL;
19438+ sbinfo = au_sbi(sb);
19439+ au_sphl_del_rcu(&slink->hlist, &sbinfo->si_symlink);
19440+ kfree_rcu(slink, rcu);
4a4d8108 19441+}
1facf9fc 19442+
c2c0f25c 19443+static const char *aufs_follow_link(struct dentry *dentry, void **cookie)
4a4d8108 19444+{
c2c0f25c
AM
19445+ const char *ret;
19446+ struct inode *inode, *h_inode;
19447+ struct dentry *h_dentry;
19448+ struct au_symlink *slink;
4a4d8108 19449+ int err;
c2c0f25c 19450+ aufs_bindex_t bindex;
1facf9fc 19451+
79b8bda9 19452+ ret = NULL; /* suppress a warning */
027c5e7a
AM
19453+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
19454+ if (unlikely(err))
c2c0f25c 19455+ goto out;
027c5e7a
AM
19456+
19457+ err = au_d_hashed_positive(dentry);
c2c0f25c
AM
19458+ if (unlikely(err))
19459+ goto out_unlock;
19460+
19461+ err = -EINVAL;
19462+ inode = d_inode(dentry);
19463+ bindex = au_ibstart(inode);
19464+ h_inode = au_h_iptr(inode, bindex);
19465+ if (unlikely(!h_inode->i_op->follow_link))
19466+ goto out_unlock;
19467+
19468+ err = -ENOMEM;
19469+ slink = kmalloc(sizeof(*slink), GFP_NOFS);
19470+ if (unlikely(!slink))
19471+ goto out_unlock;
19472+
19473+ err = -EBUSY;
19474+ h_dentry = NULL;
19475+ if (au_dbstart(dentry) <= bindex) {
19476+ h_dentry = au_h_dptr(dentry, bindex);
19477+ if (h_dentry)
19478+ dget(h_dentry);
027c5e7a 19479+ }
c2c0f25c
AM
19480+ if (!h_dentry) {
19481+ h_dentry = d_find_any_alias(h_inode);
19482+ if (IS_ERR(h_dentry)) {
19483+ err = PTR_ERR(h_dentry);
19484+ goto out_free;
19485+ }
19486+ }
19487+ if (unlikely(!h_dentry))
19488+ goto out_free;
1facf9fc 19489+
c2c0f25c
AM
19490+ err = 0;
19491+ AuDbg("%pf\n", h_inode->i_op->follow_link);
19492+ AuDbgDentry(h_dentry);
19493+ ret = h_inode->i_op->follow_link(h_dentry, cookie);
19494+ dput(h_dentry);
19495+
19496+ if (!IS_ERR_OR_NULL(ret)) {
19497+ au_symlink_add(inode->i_sb, slink, h_inode, *cookie);
19498+ *cookie = slink;
19499+ AuDbg("slink %p\n", slink);
19500+ goto out_unlock; /* success */
1308ab2a 19501+ }
1facf9fc 19502+
c2c0f25c
AM
19503+out_free:
19504+ slink->h_inode = NULL;
19505+ kfree_rcu(slink, rcu);
19506+out_unlock:
19507+ aufs_read_unlock(dentry, AuLock_IR);
4f0767ce 19508+out:
c2c0f25c
AM
19509+ if (unlikely(err))
19510+ ret = ERR_PTR(err);
19511+ AuTraceErrPtr(ret);
19512+ return ret;
4a4d8108 19513+}
1facf9fc 19514+
c2c0f25c 19515+static void aufs_put_link(struct inode *inode, void *cookie)
4a4d8108 19516+{
c2c0f25c
AM
19517+ struct au_symlink *slink;
19518+ struct inode *h_inode;
537831f9 19519+
c2c0f25c
AM
19520+ slink = cookie;
19521+ AuDbg("slink %p\n", slink);
19522+ h_inode = slink->h_inode;
19523+ AuDbg("%pf\n", h_inode->i_op->put_link);
19524+ AuDbgInode(h_inode);
19525+ if (h_inode->i_op->put_link)
19526+ h_inode->i_op->put_link(h_inode, slink->h_cookie);
19527+ au_symlink_del(inode->i_sb, slink);
4a4d8108 19528+}
1facf9fc 19529+
4a4d8108 19530+/* ---------------------------------------------------------------------- */
1facf9fc 19531+
0c3ec466 19532+static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags)
4a4d8108 19533+{
0c3ec466
AM
19534+ int err;
19535+ struct super_block *sb;
19536+ struct inode *h_inode;
19537+
19538+ sb = inode->i_sb;
19539+ /* mmap_sem might be acquired already, cf. aufs_mmap() */
19540+ lockdep_off();
19541+ si_read_lock(sb, AuLock_FLUSH);
19542+ ii_write_lock_child(inode);
19543+ lockdep_on();
19544+ h_inode = au_h_iptr(inode, au_ibstart(inode));
19545+ err = vfsub_update_time(h_inode, ts, flags);
19546+ lockdep_off();
38d290e6
JR
19547+ if (!err)
19548+ au_cpup_attr_timesizes(inode);
0c3ec466
AM
19549+ ii_write_unlock(inode);
19550+ si_read_unlock(sb);
19551+ lockdep_on();
38d290e6
JR
19552+
19553+ if (!err && (flags & S_VERSION))
19554+ inode_inc_iversion(inode);
19555+
0c3ec466 19556+ return err;
4a4d8108 19557+}
1facf9fc 19558+
4a4d8108 19559+/* ---------------------------------------------------------------------- */
1308ab2a 19560+
b95c5147
AM
19561+/* no getattr version will be set by module.c:aufs_init() */
19562+struct inode_operations aufs_iop_nogetattr[AuIop_Last],
19563+ aufs_iop[] = {
19564+ [AuIop_SYMLINK] = {
19565+ .permission = aufs_permission,
c1595e42 19566+#ifdef CONFIG_FS_POSIX_ACL
b95c5147
AM
19567+ .get_acl = aufs_get_acl,
19568+ .set_acl = aufs_set_acl, /* unsupport for symlink? */
c1595e42
JR
19569+#endif
19570+
b95c5147
AM
19571+ .setattr = aufs_setattr,
19572+ .getattr = aufs_getattr,
0c3ec466 19573+
c1595e42 19574+#ifdef CONFIG_AUFS_XATTR
b95c5147
AM
19575+ .setxattr = aufs_setxattr,
19576+ .getxattr = aufs_getxattr,
19577+ .listxattr = aufs_listxattr,
19578+ .removexattr = aufs_removexattr,
c1595e42
JR
19579+#endif
19580+
b95c5147
AM
19581+ .readlink = generic_readlink,
19582+ .follow_link = aufs_follow_link,
19583+ .put_link = aufs_put_link,
0c3ec466 19584+
b95c5147
AM
19585+ /* .update_time = aufs_update_time */
19586+ },
19587+ [AuIop_DIR] = {
19588+ .create = aufs_create,
19589+ .lookup = aufs_lookup,
19590+ .link = aufs_link,
19591+ .unlink = aufs_unlink,
19592+ .symlink = aufs_symlink,
19593+ .mkdir = aufs_mkdir,
19594+ .rmdir = aufs_rmdir,
19595+ .mknod = aufs_mknod,
19596+ .rename = aufs_rename,
19597+
19598+ .permission = aufs_permission,
c1595e42 19599+#ifdef CONFIG_FS_POSIX_ACL
b95c5147
AM
19600+ .get_acl = aufs_get_acl,
19601+ .set_acl = aufs_set_acl,
c1595e42
JR
19602+#endif
19603+
b95c5147
AM
19604+ .setattr = aufs_setattr,
19605+ .getattr = aufs_getattr,
0c3ec466 19606+
c1595e42 19607+#ifdef CONFIG_AUFS_XATTR
b95c5147
AM
19608+ .setxattr = aufs_setxattr,
19609+ .getxattr = aufs_getxattr,
19610+ .listxattr = aufs_listxattr,
19611+ .removexattr = aufs_removexattr,
c1595e42
JR
19612+#endif
19613+
b95c5147
AM
19614+ .update_time = aufs_update_time,
19615+ .atomic_open = aufs_atomic_open,
19616+ .tmpfile = aufs_tmpfile
19617+ },
19618+ [AuIop_OTHER] = {
19619+ .permission = aufs_permission,
c1595e42 19620+#ifdef CONFIG_FS_POSIX_ACL
b95c5147
AM
19621+ .get_acl = aufs_get_acl,
19622+ .set_acl = aufs_set_acl,
c1595e42
JR
19623+#endif
19624+
b95c5147
AM
19625+ .setattr = aufs_setattr,
19626+ .getattr = aufs_getattr,
0c3ec466 19627+
c1595e42 19628+#ifdef CONFIG_AUFS_XATTR
b95c5147
AM
19629+ .setxattr = aufs_setxattr,
19630+ .getxattr = aufs_getxattr,
19631+ .listxattr = aufs_listxattr,
19632+ .removexattr = aufs_removexattr,
c1595e42
JR
19633+#endif
19634+
b95c5147
AM
19635+ .update_time = aufs_update_time
19636+ }
4a4d8108 19637+};
7f207e10
AM
19638diff -urN /usr/share/empty/fs/aufs/i_op_del.c linux/fs/aufs/i_op_del.c
19639--- /usr/share/empty/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 19640+++ linux/fs/aufs/i_op_del.c 2015-09-24 10:47:58.254719746 +0200
5527c038 19641@@ -0,0 +1,510 @@
1facf9fc 19642+/*
2000de60 19643+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 19644+ *
19645+ * This program, aufs is free software; you can redistribute it and/or modify
19646+ * it under the terms of the GNU General Public License as published by
19647+ * the Free Software Foundation; either version 2 of the License, or
19648+ * (at your option) any later version.
dece6358
AM
19649+ *
19650+ * This program is distributed in the hope that it will be useful,
19651+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19652+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19653+ * GNU General Public License for more details.
19654+ *
19655+ * You should have received a copy of the GNU General Public License
523b37e3 19656+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 19657+ */
19658+
19659+/*
4a4d8108 19660+ * inode operations (del entry)
1308ab2a 19661+ */
dece6358 19662+
1308ab2a 19663+#include "aufs.h"
dece6358 19664+
4a4d8108
AM
19665+/*
19666+ * decide if a new whiteout for @dentry is necessary or not.
19667+ * when it is necessary, prepare the parent dir for the upper branch whose
19668+ * branch index is @bcpup for creation. the actual creation of the whiteout will
19669+ * be done by caller.
19670+ * return value:
19671+ * 0: wh is unnecessary
19672+ * plus: wh is necessary
19673+ * minus: error
19674+ */
19675+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
1308ab2a 19676+{
4a4d8108
AM
19677+ int need_wh, err;
19678+ aufs_bindex_t bstart;
19679+ struct super_block *sb;
dece6358 19680+
4a4d8108
AM
19681+ sb = dentry->d_sb;
19682+ bstart = au_dbstart(dentry);
19683+ if (*bcpup < 0) {
19684+ *bcpup = bstart;
5527c038 19685+ if (au_test_ro(sb, bstart, d_inode(dentry))) {
4a4d8108
AM
19686+ err = AuWbrCopyup(au_sbi(sb), dentry);
19687+ *bcpup = err;
19688+ if (unlikely(err < 0))
19689+ goto out;
19690+ }
19691+ } else
19692+ AuDebugOn(bstart < *bcpup
5527c038 19693+ || au_test_ro(sb, *bcpup, d_inode(dentry)));
4a4d8108 19694+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
1308ab2a 19695+
4a4d8108
AM
19696+ if (*bcpup != bstart) {
19697+ err = au_cpup_dirs(dentry, *bcpup);
19698+ if (unlikely(err))
19699+ goto out;
19700+ need_wh = 1;
19701+ } else {
027c5e7a 19702+ struct au_dinfo *dinfo, *tmp;
4a4d8108 19703+
027c5e7a
AM
19704+ need_wh = -ENOMEM;
19705+ dinfo = au_di(dentry);
19706+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
19707+ if (tmp) {
19708+ au_di_cp(tmp, dinfo);
19709+ au_di_swap(tmp, dinfo);
19710+ /* returns the number of positive dentries */
537831f9 19711+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0);
027c5e7a
AM
19712+ au_di_swap(tmp, dinfo);
19713+ au_rw_write_unlock(&tmp->di_rwsem);
19714+ au_di_free(tmp);
4a4d8108
AM
19715+ }
19716+ }
19717+ AuDbg("need_wh %d\n", need_wh);
19718+ err = need_wh;
19719+
4f0767ce 19720+out:
4a4d8108 19721+ return err;
1facf9fc 19722+}
19723+
4a4d8108
AM
19724+/*
19725+ * simple tests for the del-entry operations.
19726+ * following the checks in vfs, plus the parent-child relationship.
19727+ */
19728+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
19729+ struct dentry *h_parent, int isdir)
1facf9fc 19730+{
4a4d8108
AM
19731+ int err;
19732+ umode_t h_mode;
19733+ struct dentry *h_dentry, *h_latest;
1308ab2a 19734+ struct inode *h_inode;
1facf9fc 19735+
4a4d8108 19736+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 19737+ if (d_really_is_positive(dentry)) {
4a4d8108 19738+ err = -ENOENT;
5527c038
JR
19739+ if (unlikely(d_is_negative(h_dentry)))
19740+ goto out;
19741+ h_inode = d_inode(h_dentry);
19742+ if (unlikely(!h_inode->i_nlink))
4a4d8108 19743+ goto out;
1facf9fc 19744+
4a4d8108
AM
19745+ h_mode = h_inode->i_mode;
19746+ if (!isdir) {
19747+ err = -EISDIR;
19748+ if (unlikely(S_ISDIR(h_mode)))
19749+ goto out;
19750+ } else if (unlikely(!S_ISDIR(h_mode))) {
19751+ err = -ENOTDIR;
19752+ goto out;
19753+ }
19754+ } else {
19755+ /* rename(2) case */
19756+ err = -EIO;
5527c038 19757+ if (unlikely(d_is_positive(h_dentry)))
4a4d8108
AM
19758+ goto out;
19759+ }
1facf9fc 19760+
4a4d8108
AM
19761+ err = -ENOENT;
19762+ /* expected parent dir is locked */
19763+ if (unlikely(h_parent != h_dentry->d_parent))
19764+ goto out;
19765+ err = 0;
19766+
19767+ /*
19768+ * rmdir a dir may break the consistency on some filesystem.
19769+ * let's try heavy test.
19770+ */
19771+ err = -EACCES;
076b876e 19772+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)
5527c038 19773+ && au_test_h_perm(d_inode(h_parent),
076b876e 19774+ MAY_EXEC | MAY_WRITE)))
4a4d8108
AM
19775+ goto out;
19776+
076b876e 19777+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent);
4a4d8108
AM
19778+ err = -EIO;
19779+ if (IS_ERR(h_latest))
19780+ goto out;
19781+ if (h_latest == h_dentry)
19782+ err = 0;
19783+ dput(h_latest);
19784+
4f0767ce 19785+out:
4a4d8108 19786+ return err;
1308ab2a 19787+}
1facf9fc 19788+
4a4d8108
AM
19789+/*
19790+ * decide the branch where we operate for @dentry. the branch index will be set
19791+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
19792+ * dir for reverting.
19793+ * when a new whiteout is necessary, create it.
19794+ */
19795+static struct dentry*
19796+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
19797+ struct au_dtime *dt, struct au_pin *pin)
1308ab2a 19798+{
4a4d8108
AM
19799+ struct dentry *wh_dentry;
19800+ struct super_block *sb;
19801+ struct path h_path;
19802+ int err, need_wh;
19803+ unsigned int udba;
19804+ aufs_bindex_t bcpup;
dece6358 19805+
4a4d8108
AM
19806+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
19807+ wh_dentry = ERR_PTR(need_wh);
19808+ if (unlikely(need_wh < 0))
19809+ goto out;
19810+
19811+ sb = dentry->d_sb;
19812+ udba = au_opt_udba(sb);
19813+ bcpup = *rbcpup;
19814+ err = au_pin(pin, dentry, bcpup, udba,
19815+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
19816+ wh_dentry = ERR_PTR(err);
19817+ if (unlikely(err))
19818+ goto out;
19819+
19820+ h_path.dentry = au_pinned_h_parent(pin);
19821+ if (udba != AuOpt_UDBA_NONE
19822+ && au_dbstart(dentry) == bcpup) {
19823+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
19824+ wh_dentry = ERR_PTR(err);
19825+ if (unlikely(err))
19826+ goto out_unpin;
19827+ }
19828+
19829+ h_path.mnt = au_sbr_mnt(sb, bcpup);
19830+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
19831+ wh_dentry = NULL;
19832+ if (!need_wh)
19833+ goto out; /* success, no need to create whiteout */
19834+
19835+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
19836+ if (IS_ERR(wh_dentry))
19837+ goto out_unpin;
19838+
19839+ /* returns with the parent is locked and wh_dentry is dget-ed */
19840+ goto out; /* success */
19841+
4f0767ce 19842+out_unpin:
4a4d8108 19843+ au_unpin(pin);
4f0767ce 19844+out:
4a4d8108 19845+ return wh_dentry;
1facf9fc 19846+}
19847+
4a4d8108
AM
19848+/*
19849+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
19850+ * in order to be revertible and save time for removing many child whiteouts
19851+ * under the dir.
19852+ * returns 1 when there are too many child whiteout and caller should remove
19853+ * them asynchronously. returns 0 when the number of children is enough small to
19854+ * remove now or the branch fs is a remote fs.
19855+ * otherwise return an error.
19856+ */
19857+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
19858+ struct au_nhash *whlist, struct inode *dir)
1facf9fc 19859+{
4a4d8108
AM
19860+ int rmdir_later, err, dirwh;
19861+ struct dentry *h_dentry;
19862+ struct super_block *sb;
5527c038 19863+ struct inode *inode;
4a4d8108
AM
19864+
19865+ sb = dentry->d_sb;
19866+ SiMustAnyLock(sb);
19867+ h_dentry = au_h_dptr(dentry, bindex);
19868+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
19869+ if (unlikely(err))
19870+ goto out;
19871+
19872+ /* stop monitoring */
5527c038
JR
19873+ inode = d_inode(dentry);
19874+ au_hn_free(au_hi(inode, bindex));
4a4d8108
AM
19875+
19876+ if (!au_test_fs_remote(h_dentry->d_sb)) {
19877+ dirwh = au_sbi(sb)->si_dirwh;
19878+ rmdir_later = (dirwh <= 1);
19879+ if (!rmdir_later)
19880+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
19881+ dirwh);
19882+ if (rmdir_later)
19883+ return rmdir_later;
19884+ }
1facf9fc 19885+
4a4d8108
AM
19886+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
19887+ if (unlikely(err)) {
523b37e3
AM
19888+ AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
19889+ h_dentry, bindex, err);
4a4d8108
AM
19890+ err = 0;
19891+ }
dece6358 19892+
4f0767ce 19893+out:
4a4d8108
AM
19894+ AuTraceErr(err);
19895+ return err;
19896+}
1308ab2a 19897+
4a4d8108
AM
19898+/*
19899+ * final procedure for deleting a entry.
19900+ * maintain dentry and iattr.
19901+ */
19902+static void epilog(struct inode *dir, struct dentry *dentry,
19903+ aufs_bindex_t bindex)
19904+{
19905+ struct inode *inode;
1308ab2a 19906+
5527c038 19907+ inode = d_inode(dentry);
4a4d8108
AM
19908+ d_drop(dentry);
19909+ inode->i_ctime = dir->i_ctime;
1308ab2a 19910+
b912730e 19911+ au_dir_ts(dir, bindex);
4a4d8108 19912+ dir->i_version++;
1facf9fc 19913+}
19914+
4a4d8108
AM
19915+/*
19916+ * when an error happened, remove the created whiteout and revert everything.
19917+ */
7f207e10
AM
19918+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
19919+ aufs_bindex_t bwh, struct dentry *wh_dentry,
19920+ struct dentry *dentry, struct au_dtime *dt)
1facf9fc 19921+{
4a4d8108
AM
19922+ int rerr;
19923+ struct path h_path = {
19924+ .dentry = wh_dentry,
7f207e10 19925+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
4a4d8108 19926+ };
dece6358 19927+
7f207e10 19928+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
4a4d8108
AM
19929+ if (!rerr) {
19930+ au_set_dbwh(dentry, bwh);
19931+ au_dtime_revert(dt);
19932+ return 0;
19933+ }
dece6358 19934+
523b37e3 19935+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
4a4d8108 19936+ return -EIO;
1facf9fc 19937+}
19938+
4a4d8108 19939+/* ---------------------------------------------------------------------- */
1facf9fc 19940+
4a4d8108 19941+int aufs_unlink(struct inode *dir, struct dentry *dentry)
1308ab2a 19942+{
4a4d8108
AM
19943+ int err;
19944+ aufs_bindex_t bwh, bindex, bstart;
523b37e3 19945+ struct inode *inode, *h_dir, *delegated;
4a4d8108 19946+ struct dentry *parent, *wh_dentry;
c2b27bf2
AM
19947+ /* to reuduce stack size */
19948+ struct {
19949+ struct au_dtime dt;
19950+ struct au_pin pin;
19951+ struct path h_path;
19952+ } *a;
1facf9fc 19953+
4a4d8108 19954+ IMustLock(dir);
027c5e7a 19955+
c2b27bf2
AM
19956+ err = -ENOMEM;
19957+ a = kmalloc(sizeof(*a), GFP_NOFS);
19958+ if (unlikely(!a))
19959+ goto out;
19960+
027c5e7a
AM
19961+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
19962+ if (unlikely(err))
c2b27bf2 19963+ goto out_free;
027c5e7a
AM
19964+ err = au_d_hashed_positive(dentry);
19965+ if (unlikely(err))
19966+ goto out_unlock;
5527c038 19967+ inode = d_inode(dentry);
4a4d8108 19968+ IMustLock(inode);
027c5e7a 19969+ err = -EISDIR;
2000de60 19970+ if (unlikely(d_is_dir(dentry)))
027c5e7a 19971+ goto out_unlock; /* possible? */
1facf9fc 19972+
4a4d8108
AM
19973+ bstart = au_dbstart(dentry);
19974+ bwh = au_dbwh(dentry);
19975+ bindex = -1;
027c5e7a
AM
19976+ parent = dentry->d_parent; /* dir inode is locked */
19977+ di_write_lock_parent(parent);
c2b27bf2
AM
19978+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
19979+ &a->pin);
4a4d8108
AM
19980+ err = PTR_ERR(wh_dentry);
19981+ if (IS_ERR(wh_dentry))
027c5e7a 19982+ goto out_parent;
1facf9fc 19983+
c2b27bf2
AM
19984+ a->h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
19985+ a->h_path.dentry = au_h_dptr(dentry, bstart);
19986+ dget(a->h_path.dentry);
4a4d8108 19987+ if (bindex == bstart) {
c2b27bf2 19988+ h_dir = au_pinned_h_dir(&a->pin);
523b37e3
AM
19989+ delegated = NULL;
19990+ err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
19991+ if (unlikely(err == -EWOULDBLOCK)) {
19992+ pr_warn("cannot retry for NFSv4 delegation"
19993+ " for an internal unlink\n");
19994+ iput(delegated);
19995+ }
4a4d8108
AM
19996+ } else {
19997+ /* dir inode is locked */
5527c038 19998+ h_dir = d_inode(wh_dentry->d_parent);
4a4d8108
AM
19999+ IMustLock(h_dir);
20000+ err = 0;
20001+ }
dece6358 20002+
4a4d8108 20003+ if (!err) {
7f207e10 20004+ vfsub_drop_nlink(inode);
4a4d8108
AM
20005+ epilog(dir, dentry, bindex);
20006+
20007+ /* update target timestamps */
20008+ if (bindex == bstart) {
c2b27bf2
AM
20009+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
20010+ /*ignore*/
5527c038 20011+ inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
4a4d8108
AM
20012+ } else
20013+ /* todo: this timestamp may be reverted later */
20014+ inode->i_ctime = h_dir->i_ctime;
027c5e7a 20015+ goto out_unpin; /* success */
1facf9fc 20016+ }
20017+
4a4d8108
AM
20018+ /* revert */
20019+ if (wh_dentry) {
20020+ int rerr;
20021+
c2b27bf2
AM
20022+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20023+ &a->dt);
4a4d8108
AM
20024+ if (rerr)
20025+ err = rerr;
dece6358 20026+ }
1facf9fc 20027+
027c5e7a 20028+out_unpin:
c2b27bf2 20029+ au_unpin(&a->pin);
4a4d8108 20030+ dput(wh_dentry);
c2b27bf2 20031+ dput(a->h_path.dentry);
027c5e7a 20032+out_parent:
4a4d8108 20033+ di_write_unlock(parent);
027c5e7a 20034+out_unlock:
4a4d8108 20035+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2
AM
20036+out_free:
20037+ kfree(a);
027c5e7a 20038+out:
4a4d8108 20039+ return err;
dece6358
AM
20040+}
20041+
4a4d8108 20042+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
1308ab2a 20043+{
4a4d8108
AM
20044+ int err, rmdir_later;
20045+ aufs_bindex_t bwh, bindex, bstart;
4a4d8108
AM
20046+ struct inode *inode;
20047+ struct dentry *parent, *wh_dentry, *h_dentry;
20048+ struct au_whtmp_rmdir *args;
c2b27bf2
AM
20049+ /* to reuduce stack size */
20050+ struct {
20051+ struct au_dtime dt;
20052+ struct au_pin pin;
20053+ } *a;
1facf9fc 20054+
4a4d8108 20055+ IMustLock(dir);
027c5e7a 20056+
c2b27bf2
AM
20057+ err = -ENOMEM;
20058+ a = kmalloc(sizeof(*a), GFP_NOFS);
20059+ if (unlikely(!a))
20060+ goto out;
20061+
027c5e7a
AM
20062+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
20063+ if (unlikely(err))
c2b27bf2 20064+ goto out_free;
53392da6
AM
20065+ err = au_alive_dir(dentry);
20066+ if (unlikely(err))
027c5e7a 20067+ goto out_unlock;
5527c038 20068+ inode = d_inode(dentry);
4a4d8108 20069+ IMustLock(inode);
027c5e7a 20070+ err = -ENOTDIR;
2000de60 20071+ if (unlikely(!d_is_dir(dentry)))
027c5e7a 20072+ goto out_unlock; /* possible? */
dece6358 20073+
4a4d8108
AM
20074+ err = -ENOMEM;
20075+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
20076+ if (unlikely(!args))
20077+ goto out_unlock;
dece6358 20078+
4a4d8108
AM
20079+ parent = dentry->d_parent; /* dir inode is locked */
20080+ di_write_lock_parent(parent);
20081+ err = au_test_empty(dentry, &args->whlist);
20082+ if (unlikely(err))
027c5e7a 20083+ goto out_parent;
1facf9fc 20084+
4a4d8108
AM
20085+ bstart = au_dbstart(dentry);
20086+ bwh = au_dbwh(dentry);
20087+ bindex = -1;
c2b27bf2
AM
20088+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
20089+ &a->pin);
4a4d8108
AM
20090+ err = PTR_ERR(wh_dentry);
20091+ if (IS_ERR(wh_dentry))
027c5e7a 20092+ goto out_parent;
1facf9fc 20093+
4a4d8108
AM
20094+ h_dentry = au_h_dptr(dentry, bstart);
20095+ dget(h_dentry);
20096+ rmdir_later = 0;
20097+ if (bindex == bstart) {
20098+ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
20099+ if (err > 0) {
20100+ rmdir_later = err;
20101+ err = 0;
20102+ }
20103+ } else {
20104+ /* stop monitoring */
20105+ au_hn_free(au_hi(inode, bstart));
20106+
20107+ /* dir inode is locked */
5527c038 20108+ IMustLock(d_inode(wh_dentry->d_parent));
1facf9fc 20109+ err = 0;
20110+ }
20111+
4a4d8108 20112+ if (!err) {
027c5e7a 20113+ vfsub_dead_dir(inode);
4a4d8108
AM
20114+ au_set_dbdiropq(dentry, -1);
20115+ epilog(dir, dentry, bindex);
1308ab2a 20116+
4a4d8108
AM
20117+ if (rmdir_later) {
20118+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
20119+ args = NULL;
20120+ }
1308ab2a 20121+
4a4d8108 20122+ goto out_unpin; /* success */
1facf9fc 20123+ }
20124+
4a4d8108
AM
20125+ /* revert */
20126+ AuLabel(revert);
20127+ if (wh_dentry) {
20128+ int rerr;
1308ab2a 20129+
c2b27bf2
AM
20130+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20131+ &a->dt);
4a4d8108
AM
20132+ if (rerr)
20133+ err = rerr;
1facf9fc 20134+ }
20135+
4f0767ce 20136+out_unpin:
c2b27bf2 20137+ au_unpin(&a->pin);
4a4d8108
AM
20138+ dput(wh_dentry);
20139+ dput(h_dentry);
027c5e7a 20140+out_parent:
4a4d8108
AM
20141+ di_write_unlock(parent);
20142+ if (args)
20143+ au_whtmp_rmdir_free(args);
4f0767ce 20144+out_unlock:
4a4d8108 20145+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2
AM
20146+out_free:
20147+ kfree(a);
4f0767ce 20148+out:
4a4d8108
AM
20149+ AuTraceErr(err);
20150+ return err;
dece6358 20151+}
7f207e10
AM
20152diff -urN /usr/share/empty/fs/aufs/i_op_ren.c linux/fs/aufs/i_op_ren.c
20153--- /usr/share/empty/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
b95c5147
AM
20154+++ linux/fs/aufs/i_op_ren.c 2015-12-10 17:59:16.836166410 +0100
20155@@ -0,0 +1,1015 @@
1facf9fc 20156+/*
2000de60 20157+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 20158+ *
20159+ * This program, aufs is free software; you can redistribute it and/or modify
20160+ * it under the terms of the GNU General Public License as published by
20161+ * the Free Software Foundation; either version 2 of the License, or
20162+ * (at your option) any later version.
dece6358
AM
20163+ *
20164+ * This program is distributed in the hope that it will be useful,
20165+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20166+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20167+ * GNU General Public License for more details.
20168+ *
20169+ * You should have received a copy of the GNU General Public License
523b37e3 20170+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 20171+ */
20172+
20173+/*
4a4d8108
AM
20174+ * inode operation (rename entry)
20175+ * todo: this is crazy monster
1facf9fc 20176+ */
20177+
20178+#include "aufs.h"
20179+
4a4d8108
AM
20180+enum { AuSRC, AuDST, AuSrcDst };
20181+enum { AuPARENT, AuCHILD, AuParentChild };
1facf9fc 20182+
4a4d8108
AM
20183+#define AuRen_ISDIR 1
20184+#define AuRen_ISSAMEDIR (1 << 1)
20185+#define AuRen_WHSRC (1 << 2)
20186+#define AuRen_WHDST (1 << 3)
20187+#define AuRen_MNT_WRITE (1 << 4)
20188+#define AuRen_DT_DSTDIR (1 << 5)
20189+#define AuRen_DIROPQ (1 << 6)
4a4d8108 20190+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
7f207e10
AM
20191+#define au_fset_ren(flags, name) \
20192+ do { (flags) |= AuRen_##name; } while (0)
20193+#define au_fclr_ren(flags, name) \
20194+ do { (flags) &= ~AuRen_##name; } while (0)
1facf9fc 20195+
4a4d8108
AM
20196+struct au_ren_args {
20197+ struct {
20198+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
20199+ *wh_dentry;
20200+ struct inode *dir, *inode;
20201+ struct au_hinode *hdir;
20202+ struct au_dtime dt[AuParentChild];
20203+ aufs_bindex_t bstart;
20204+ } sd[AuSrcDst];
1facf9fc 20205+
4a4d8108
AM
20206+#define src_dentry sd[AuSRC].dentry
20207+#define src_dir sd[AuSRC].dir
20208+#define src_inode sd[AuSRC].inode
20209+#define src_h_dentry sd[AuSRC].h_dentry
20210+#define src_parent sd[AuSRC].parent
20211+#define src_h_parent sd[AuSRC].h_parent
20212+#define src_wh_dentry sd[AuSRC].wh_dentry
20213+#define src_hdir sd[AuSRC].hdir
20214+#define src_h_dir sd[AuSRC].hdir->hi_inode
20215+#define src_dt sd[AuSRC].dt
20216+#define src_bstart sd[AuSRC].bstart
1facf9fc 20217+
4a4d8108
AM
20218+#define dst_dentry sd[AuDST].dentry
20219+#define dst_dir sd[AuDST].dir
20220+#define dst_inode sd[AuDST].inode
20221+#define dst_h_dentry sd[AuDST].h_dentry
20222+#define dst_parent sd[AuDST].parent
20223+#define dst_h_parent sd[AuDST].h_parent
20224+#define dst_wh_dentry sd[AuDST].wh_dentry
20225+#define dst_hdir sd[AuDST].hdir
20226+#define dst_h_dir sd[AuDST].hdir->hi_inode
20227+#define dst_dt sd[AuDST].dt
20228+#define dst_bstart sd[AuDST].bstart
20229+
20230+ struct dentry *h_trap;
20231+ struct au_branch *br;
20232+ struct au_hinode *src_hinode;
20233+ struct path h_path;
20234+ struct au_nhash whlist;
027c5e7a 20235+ aufs_bindex_t btgt, src_bwh, src_bdiropq;
1facf9fc 20236+
1308ab2a 20237+ unsigned int flags;
1facf9fc 20238+
4a4d8108
AM
20239+ struct au_whtmp_rmdir *thargs;
20240+ struct dentry *h_dst;
20241+};
1308ab2a 20242+
4a4d8108 20243+/* ---------------------------------------------------------------------- */
1308ab2a 20244+
4a4d8108
AM
20245+/*
20246+ * functions for reverting.
20247+ * when an error happened in a single rename systemcall, we should revert
79b8bda9 20248+ * everything as if nothing happened.
4a4d8108
AM
20249+ * we don't need to revert the copied-up/down the parent dir since they are
20250+ * harmless.
20251+ */
1facf9fc 20252+
4a4d8108
AM
20253+#define RevertFailure(fmt, ...) do { \
20254+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
20255+ ##__VA_ARGS__, err, rerr); \
20256+ err = -EIO; \
20257+} while (0)
1facf9fc 20258+
4a4d8108 20259+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
1facf9fc 20260+{
4a4d8108 20261+ int rerr;
1facf9fc 20262+
4a4d8108
AM
20263+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
20264+ rerr = au_diropq_remove(a->src_dentry, a->btgt);
20265+ au_hn_imtx_unlock(a->src_hinode);
027c5e7a 20266+ au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
4a4d8108 20267+ if (rerr)
523b37e3 20268+ RevertFailure("remove diropq %pd", a->src_dentry);
4a4d8108 20269+}
1facf9fc 20270+
4a4d8108
AM
20271+static void au_ren_rev_rename(int err, struct au_ren_args *a)
20272+{
20273+ int rerr;
523b37e3 20274+ struct inode *delegated;
1facf9fc 20275+
b4510431
AM
20276+ a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name,
20277+ a->src_h_parent);
4a4d8108
AM
20278+ rerr = PTR_ERR(a->h_path.dentry);
20279+ if (IS_ERR(a->h_path.dentry)) {
523b37e3 20280+ RevertFailure("lkup one %pd", a->src_dentry);
4a4d8108 20281+ return;
1facf9fc 20282+ }
20283+
523b37e3 20284+ delegated = NULL;
4a4d8108
AM
20285+ rerr = vfsub_rename(a->dst_h_dir,
20286+ au_h_dptr(a->src_dentry, a->btgt),
523b37e3
AM
20287+ a->src_h_dir, &a->h_path, &delegated);
20288+ if (unlikely(rerr == -EWOULDBLOCK)) {
20289+ pr_warn("cannot retry for NFSv4 delegation"
20290+ " for an internal rename\n");
20291+ iput(delegated);
20292+ }
4a4d8108
AM
20293+ d_drop(a->h_path.dentry);
20294+ dput(a->h_path.dentry);
20295+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
20296+ if (rerr)
523b37e3 20297+ RevertFailure("rename %pd", a->src_dentry);
1facf9fc 20298+}
20299+
4a4d8108 20300+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
1facf9fc 20301+{
4a4d8108 20302+ int rerr;
523b37e3 20303+ struct inode *delegated;
dece6358 20304+
b4510431
AM
20305+ a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name,
20306+ a->dst_h_parent);
4a4d8108
AM
20307+ rerr = PTR_ERR(a->h_path.dentry);
20308+ if (IS_ERR(a->h_path.dentry)) {
523b37e3 20309+ RevertFailure("lkup one %pd", a->dst_dentry);
4a4d8108
AM
20310+ return;
20311+ }
5527c038 20312+ if (d_is_positive(a->h_path.dentry)) {
4a4d8108
AM
20313+ d_drop(a->h_path.dentry);
20314+ dput(a->h_path.dentry);
20315+ return;
dece6358
AM
20316+ }
20317+
523b37e3
AM
20318+ delegated = NULL;
20319+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path,
20320+ &delegated);
20321+ if (unlikely(rerr == -EWOULDBLOCK)) {
20322+ pr_warn("cannot retry for NFSv4 delegation"
20323+ " for an internal rename\n");
20324+ iput(delegated);
20325+ }
4a4d8108
AM
20326+ d_drop(a->h_path.dentry);
20327+ dput(a->h_path.dentry);
20328+ if (!rerr)
20329+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
20330+ else
523b37e3 20331+ RevertFailure("rename %pd", a->h_dst);
4a4d8108 20332+}
1308ab2a 20333+
4a4d8108
AM
20334+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
20335+{
20336+ int rerr;
1308ab2a 20337+
4a4d8108
AM
20338+ a->h_path.dentry = a->src_wh_dentry;
20339+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
027c5e7a 20340+ au_set_dbwh(a->src_dentry, a->src_bwh);
4a4d8108 20341+ if (rerr)
523b37e3 20342+ RevertFailure("unlink %pd", a->src_wh_dentry);
4a4d8108 20343+}
4a4d8108 20344+#undef RevertFailure
1facf9fc 20345+
1308ab2a 20346+/* ---------------------------------------------------------------------- */
20347+
4a4d8108
AM
20348+/*
20349+ * when we have to copyup the renaming entry, do it with the rename-target name
20350+ * in order to minimize the cost (the later actual rename is unnecessary).
20351+ * otherwise rename it on the target branch.
20352+ */
20353+static int au_ren_or_cpup(struct au_ren_args *a)
1facf9fc 20354+{
dece6358 20355+ int err;
4a4d8108 20356+ struct dentry *d;
523b37e3 20357+ struct inode *delegated;
1facf9fc 20358+
4a4d8108
AM
20359+ d = a->src_dentry;
20360+ if (au_dbstart(d) == a->btgt) {
20361+ a->h_path.dentry = a->dst_h_dentry;
20362+ if (au_ftest_ren(a->flags, DIROPQ)
20363+ && au_dbdiropq(d) == a->btgt)
20364+ au_fclr_ren(a->flags, DIROPQ);
20365+ AuDebugOn(au_dbstart(d) != a->btgt);
523b37e3 20366+ delegated = NULL;
4a4d8108 20367+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
523b37e3
AM
20368+ a->dst_h_dir, &a->h_path, &delegated);
20369+ if (unlikely(err == -EWOULDBLOCK)) {
20370+ pr_warn("cannot retry for NFSv4 delegation"
20371+ " for an internal rename\n");
20372+ iput(delegated);
20373+ }
c2b27bf2 20374+ } else
86dc4139 20375+ BUG();
1308ab2a 20376+
027c5e7a
AM
20377+ if (!err && a->h_dst)
20378+ /* it will be set to dinfo later */
20379+ dget(a->h_dst);
1facf9fc 20380+
dece6358
AM
20381+ return err;
20382+}
1facf9fc 20383+
4a4d8108
AM
20384+/* cf. aufs_rmdir() */
20385+static int au_ren_del_whtmp(struct au_ren_args *a)
dece6358 20386+{
4a4d8108
AM
20387+ int err;
20388+ struct inode *dir;
1facf9fc 20389+
4a4d8108
AM
20390+ dir = a->dst_dir;
20391+ SiMustAnyLock(dir->i_sb);
20392+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
20393+ au_sbi(dir->i_sb)->si_dirwh)
20394+ || au_test_fs_remote(a->h_dst->d_sb)) {
20395+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
20396+ if (unlikely(err))
523b37e3
AM
20397+ pr_warn("failed removing whtmp dir %pd (%d), "
20398+ "ignored.\n", a->h_dst, err);
4a4d8108
AM
20399+ } else {
20400+ au_nhash_wh_free(&a->thargs->whlist);
20401+ a->thargs->whlist = a->whlist;
20402+ a->whlist.nh_num = 0;
20403+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
20404+ dput(a->h_dst);
20405+ a->thargs = NULL;
20406+ }
20407+
20408+ return 0;
1308ab2a 20409+}
1facf9fc 20410+
4a4d8108
AM
20411+/* make it 'opaque' dir. */
20412+static int au_ren_diropq(struct au_ren_args *a)
20413+{
20414+ int err;
20415+ struct dentry *diropq;
1facf9fc 20416+
4a4d8108 20417+ err = 0;
027c5e7a 20418+ a->src_bdiropq = au_dbdiropq(a->src_dentry);
4a4d8108
AM
20419+ a->src_hinode = au_hi(a->src_inode, a->btgt);
20420+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
20421+ diropq = au_diropq_create(a->src_dentry, a->btgt);
20422+ au_hn_imtx_unlock(a->src_hinode);
20423+ if (IS_ERR(diropq))
20424+ err = PTR_ERR(diropq);
076b876e
AM
20425+ else
20426+ dput(diropq);
1facf9fc 20427+
4a4d8108
AM
20428+ return err;
20429+}
1facf9fc 20430+
4a4d8108
AM
20431+static int do_rename(struct au_ren_args *a)
20432+{
20433+ int err;
20434+ struct dentry *d, *h_d;
1facf9fc 20435+
4a4d8108
AM
20436+ /* prepare workqueue args for asynchronous rmdir */
20437+ h_d = a->dst_h_dentry;
5527c038 20438+ if (au_ftest_ren(a->flags, ISDIR) && d_is_positive(h_d)) {
4a4d8108
AM
20439+ err = -ENOMEM;
20440+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
20441+ if (unlikely(!a->thargs))
20442+ goto out;
20443+ a->h_dst = dget(h_d);
20444+ }
1facf9fc 20445+
4a4d8108
AM
20446+ /* create whiteout for src_dentry */
20447+ if (au_ftest_ren(a->flags, WHSRC)) {
027c5e7a
AM
20448+ a->src_bwh = au_dbwh(a->src_dentry);
20449+ AuDebugOn(a->src_bwh >= 0);
4a4d8108
AM
20450+ a->src_wh_dentry
20451+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
20452+ err = PTR_ERR(a->src_wh_dentry);
20453+ if (IS_ERR(a->src_wh_dentry))
20454+ goto out_thargs;
20455+ }
1facf9fc 20456+
4a4d8108
AM
20457+ /* lookup whiteout for dentry */
20458+ if (au_ftest_ren(a->flags, WHDST)) {
20459+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
20460+ a->br);
20461+ err = PTR_ERR(h_d);
20462+ if (IS_ERR(h_d))
20463+ goto out_whsrc;
5527c038 20464+ if (d_is_negative(h_d))
4a4d8108
AM
20465+ dput(h_d);
20466+ else
20467+ a->dst_wh_dentry = h_d;
20468+ }
1facf9fc 20469+
4a4d8108
AM
20470+ /* rename dentry to tmpwh */
20471+ if (a->thargs) {
20472+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
20473+ if (unlikely(err))
20474+ goto out_whdst;
dece6358 20475+
4a4d8108
AM
20476+ d = a->dst_dentry;
20477+ au_set_h_dptr(d, a->btgt, NULL);
86dc4139 20478+ err = au_lkup_neg(d, a->btgt, /*wh*/0);
4a4d8108
AM
20479+ if (unlikely(err))
20480+ goto out_whtmp;
20481+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
20482+ }
1facf9fc 20483+
5527c038 20484+ BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_bstart != a->btgt);
1facf9fc 20485+
4a4d8108
AM
20486+ /* rename by vfs_rename or cpup */
20487+ d = a->dst_dentry;
20488+ if (au_ftest_ren(a->flags, ISDIR)
20489+ && (a->dst_wh_dentry
20490+ || au_dbdiropq(d) == a->btgt
20491+ /* hide the lower to keep xino */
20492+ || a->btgt < au_dbend(d)
20493+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
20494+ au_fset_ren(a->flags, DIROPQ);
20495+ err = au_ren_or_cpup(a);
20496+ if (unlikely(err))
20497+ /* leave the copied-up one */
20498+ goto out_whtmp;
1308ab2a 20499+
4a4d8108
AM
20500+ /* make dir opaque */
20501+ if (au_ftest_ren(a->flags, DIROPQ)) {
20502+ err = au_ren_diropq(a);
20503+ if (unlikely(err))
20504+ goto out_rename;
20505+ }
1308ab2a 20506+
4a4d8108
AM
20507+ /* update target timestamps */
20508+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
20509+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
20510+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
5527c038 20511+ a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
1facf9fc 20512+
4a4d8108
AM
20513+ /* remove whiteout for dentry */
20514+ if (a->dst_wh_dentry) {
20515+ a->h_path.dentry = a->dst_wh_dentry;
20516+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
20517+ a->dst_dentry);
20518+ if (unlikely(err))
20519+ goto out_diropq;
20520+ }
1facf9fc 20521+
4a4d8108
AM
20522+ /* remove whtmp */
20523+ if (a->thargs)
20524+ au_ren_del_whtmp(a); /* ignore this error */
1308ab2a 20525+
076b876e 20526+ au_fhsm_wrote(a->src_dentry->d_sb, a->btgt, /*force*/0);
4a4d8108
AM
20527+ err = 0;
20528+ goto out_success;
20529+
4f0767ce 20530+out_diropq:
4a4d8108
AM
20531+ if (au_ftest_ren(a->flags, DIROPQ))
20532+ au_ren_rev_diropq(err, a);
4f0767ce 20533+out_rename:
7e9cd9fe 20534+ au_ren_rev_rename(err, a);
027c5e7a 20535+ dput(a->h_dst);
4f0767ce 20536+out_whtmp:
4a4d8108
AM
20537+ if (a->thargs)
20538+ au_ren_rev_whtmp(err, a);
4f0767ce 20539+out_whdst:
4a4d8108
AM
20540+ dput(a->dst_wh_dentry);
20541+ a->dst_wh_dentry = NULL;
4f0767ce 20542+out_whsrc:
4a4d8108
AM
20543+ if (a->src_wh_dentry)
20544+ au_ren_rev_whsrc(err, a);
4f0767ce 20545+out_success:
4a4d8108
AM
20546+ dput(a->src_wh_dentry);
20547+ dput(a->dst_wh_dentry);
4f0767ce 20548+out_thargs:
4a4d8108
AM
20549+ if (a->thargs) {
20550+ dput(a->h_dst);
20551+ au_whtmp_rmdir_free(a->thargs);
20552+ a->thargs = NULL;
20553+ }
4f0767ce 20554+out:
4a4d8108 20555+ return err;
dece6358 20556+}
1facf9fc 20557+
1308ab2a 20558+/* ---------------------------------------------------------------------- */
1facf9fc 20559+
4a4d8108
AM
20560+/*
20561+ * test if @dentry dir can be rename destination or not.
20562+ * success means, it is a logically empty dir.
20563+ */
20564+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
1308ab2a 20565+{
4a4d8108 20566+ return au_test_empty(dentry, whlist);
1308ab2a 20567+}
1facf9fc 20568+
4a4d8108
AM
20569+/*
20570+ * test if @dentry dir can be rename source or not.
20571+ * if it can, return 0 and @children is filled.
20572+ * success means,
20573+ * - it is a logically empty dir.
20574+ * - or, it exists on writable branch and has no children including whiteouts
20575+ * on the lower branch.
20576+ */
20577+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
20578+{
20579+ int err;
20580+ unsigned int rdhash;
20581+ aufs_bindex_t bstart;
1facf9fc 20582+
4a4d8108
AM
20583+ bstart = au_dbstart(dentry);
20584+ if (bstart != btgt) {
20585+ struct au_nhash whlist;
dece6358 20586+
4a4d8108
AM
20587+ SiMustAnyLock(dentry->d_sb);
20588+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
20589+ if (!rdhash)
20590+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
20591+ dentry));
20592+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
20593+ if (unlikely(err))
20594+ goto out;
20595+ err = au_test_empty(dentry, &whlist);
20596+ au_nhash_wh_free(&whlist);
20597+ goto out;
20598+ }
dece6358 20599+
4a4d8108
AM
20600+ if (bstart == au_dbtaildir(dentry))
20601+ return 0; /* success */
dece6358 20602+
4a4d8108 20603+ err = au_test_empty_lower(dentry);
1facf9fc 20604+
4f0767ce 20605+out:
4a4d8108
AM
20606+ if (err == -ENOTEMPTY) {
20607+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
20608+ " is not supported\n");
20609+ err = -EXDEV;
20610+ }
20611+ return err;
20612+}
1308ab2a 20613+
4a4d8108
AM
20614+/* side effect: sets whlist and h_dentry */
20615+static int au_ren_may_dir(struct au_ren_args *a)
1308ab2a 20616+{
4a4d8108
AM
20617+ int err;
20618+ unsigned int rdhash;
20619+ struct dentry *d;
1facf9fc 20620+
4a4d8108
AM
20621+ d = a->dst_dentry;
20622+ SiMustAnyLock(d->d_sb);
1facf9fc 20623+
4a4d8108
AM
20624+ err = 0;
20625+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
20626+ rdhash = au_sbi(d->d_sb)->si_rdhash;
20627+ if (!rdhash)
20628+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
20629+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
20630+ if (unlikely(err))
20631+ goto out;
1308ab2a 20632+
4a4d8108
AM
20633+ au_set_dbstart(d, a->dst_bstart);
20634+ err = may_rename_dstdir(d, &a->whlist);
20635+ au_set_dbstart(d, a->btgt);
20636+ }
20637+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
20638+ if (unlikely(err))
20639+ goto out;
20640+
20641+ d = a->src_dentry;
20642+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
20643+ if (au_ftest_ren(a->flags, ISDIR)) {
20644+ err = may_rename_srcdir(d, a->btgt);
20645+ if (unlikely(err)) {
20646+ au_nhash_wh_free(&a->whlist);
20647+ a->whlist.nh_num = 0;
20648+ }
20649+ }
4f0767ce 20650+out:
4a4d8108 20651+ return err;
1facf9fc 20652+}
20653+
4a4d8108 20654+/* ---------------------------------------------------------------------- */
1facf9fc 20655+
4a4d8108
AM
20656+/*
20657+ * simple tests for rename.
20658+ * following the checks in vfs, plus the parent-child relationship.
20659+ */
20660+static int au_may_ren(struct au_ren_args *a)
20661+{
20662+ int err, isdir;
20663+ struct inode *h_inode;
1facf9fc 20664+
4a4d8108
AM
20665+ if (a->src_bstart == a->btgt) {
20666+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
20667+ au_ftest_ren(a->flags, ISDIR));
20668+ if (unlikely(err))
20669+ goto out;
20670+ err = -EINVAL;
20671+ if (unlikely(a->src_h_dentry == a->h_trap))
20672+ goto out;
20673+ }
1facf9fc 20674+
4a4d8108
AM
20675+ err = 0;
20676+ if (a->dst_bstart != a->btgt)
20677+ goto out;
1facf9fc 20678+
027c5e7a
AM
20679+ err = -ENOTEMPTY;
20680+ if (unlikely(a->dst_h_dentry == a->h_trap))
20681+ goto out;
20682+
4a4d8108 20683+ err = -EIO;
4a4d8108 20684+ isdir = !!au_ftest_ren(a->flags, ISDIR);
5527c038
JR
20685+ if (d_really_is_negative(a->dst_dentry)) {
20686+ if (d_is_negative(a->dst_h_dentry))
20687+ err = au_may_add(a->dst_dentry, a->btgt,
20688+ a->dst_h_parent, isdir);
4a4d8108 20689+ } else {
5527c038 20690+ if (unlikely(d_is_negative(a->dst_h_dentry)))
4a4d8108 20691+ goto out;
5527c038
JR
20692+ h_inode = d_inode(a->dst_h_dentry);
20693+ if (h_inode->i_nlink)
20694+ err = au_may_del(a->dst_dentry, a->btgt,
20695+ a->dst_h_parent, isdir);
4a4d8108 20696+ }
1facf9fc 20697+
4f0767ce 20698+out:
4a4d8108
AM
20699+ if (unlikely(err == -ENOENT || err == -EEXIST))
20700+ err = -EIO;
20701+ AuTraceErr(err);
20702+ return err;
20703+}
1facf9fc 20704+
1308ab2a 20705+/* ---------------------------------------------------------------------- */
1facf9fc 20706+
4a4d8108
AM
20707+/*
20708+ * locking order
20709+ * (VFS)
20710+ * - src_dir and dir by lock_rename()
20711+ * - inode if exitsts
20712+ * (aufs)
20713+ * - lock all
20714+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
20715+ * + si_read_lock
20716+ * + di_write_lock2_child()
20717+ * + di_write_lock_child()
20718+ * + ii_write_lock_child()
20719+ * + di_write_lock_child2()
20720+ * + ii_write_lock_child2()
20721+ * + src_parent and parent
20722+ * + di_write_lock_parent()
20723+ * + ii_write_lock_parent()
20724+ * + di_write_lock_parent2()
20725+ * + ii_write_lock_parent2()
20726+ * + lower src_dir and dir by vfsub_lock_rename()
20727+ * + verify the every relationships between child and parent. if any
20728+ * of them failed, unlock all and return -EBUSY.
20729+ */
20730+static void au_ren_unlock(struct au_ren_args *a)
1308ab2a 20731+{
4a4d8108
AM
20732+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
20733+ a->dst_h_parent, a->dst_hdir);
86dc4139
AM
20734+ if (au_ftest_ren(a->flags, MNT_WRITE))
20735+ vfsub_mnt_drop_write(au_br_mnt(a->br));
1308ab2a 20736+}
20737+
4a4d8108 20738+static int au_ren_lock(struct au_ren_args *a)
1308ab2a 20739+{
4a4d8108
AM
20740+ int err;
20741+ unsigned int udba;
1308ab2a 20742+
4a4d8108
AM
20743+ err = 0;
20744+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
20745+ a->src_hdir = au_hi(a->src_dir, a->btgt);
20746+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
20747+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
86dc4139
AM
20748+
20749+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
20750+ if (unlikely(err))
20751+ goto out;
20752+ au_fset_ren(a->flags, MNT_WRITE);
4a4d8108
AM
20753+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
20754+ a->dst_h_parent, a->dst_hdir);
20755+ udba = au_opt_udba(a->src_dentry->d_sb);
5527c038
JR
20756+ if (unlikely(a->src_hdir->hi_inode != d_inode(a->src_h_parent)
20757+ || a->dst_hdir->hi_inode != d_inode(a->dst_h_parent)))
4a4d8108
AM
20758+ err = au_busy_or_stale();
20759+ if (!err && au_dbstart(a->src_dentry) == a->btgt)
20760+ err = au_h_verify(a->src_h_dentry, udba,
5527c038 20761+ d_inode(a->src_h_parent), a->src_h_parent,
4a4d8108
AM
20762+ a->br);
20763+ if (!err && au_dbstart(a->dst_dentry) == a->btgt)
20764+ err = au_h_verify(a->dst_h_dentry, udba,
5527c038 20765+ d_inode(a->dst_h_parent), a->dst_h_parent,
4a4d8108 20766+ a->br);
86dc4139 20767+ if (!err)
4a4d8108 20768+ goto out; /* success */
4a4d8108
AM
20769+
20770+ err = au_busy_or_stale();
4a4d8108 20771+ au_ren_unlock(a);
86dc4139 20772+
4f0767ce 20773+out:
4a4d8108 20774+ return err;
1facf9fc 20775+}
20776+
20777+/* ---------------------------------------------------------------------- */
20778+
4a4d8108 20779+static void au_ren_refresh_dir(struct au_ren_args *a)
1facf9fc 20780+{
4a4d8108 20781+ struct inode *dir;
dece6358 20782+
4a4d8108
AM
20783+ dir = a->dst_dir;
20784+ dir->i_version++;
20785+ if (au_ftest_ren(a->flags, ISDIR)) {
20786+ /* is this updating defined in POSIX? */
20787+ au_cpup_attr_timesizes(a->src_inode);
20788+ au_cpup_attr_nlink(dir, /*force*/1);
4a4d8108 20789+ }
027c5e7a 20790+
b912730e 20791+ au_dir_ts(dir, a->btgt);
dece6358 20792+
4a4d8108
AM
20793+ if (au_ftest_ren(a->flags, ISSAMEDIR))
20794+ return;
dece6358 20795+
4a4d8108
AM
20796+ dir = a->src_dir;
20797+ dir->i_version++;
20798+ if (au_ftest_ren(a->flags, ISDIR))
20799+ au_cpup_attr_nlink(dir, /*force*/1);
b912730e 20800+ au_dir_ts(dir, a->btgt);
1facf9fc 20801+}
20802+
4a4d8108 20803+static void au_ren_refresh(struct au_ren_args *a)
1facf9fc 20804+{
4a4d8108
AM
20805+ aufs_bindex_t bend, bindex;
20806+ struct dentry *d, *h_d;
20807+ struct inode *i, *h_i;
20808+ struct super_block *sb;
dece6358 20809+
027c5e7a
AM
20810+ d = a->dst_dentry;
20811+ d_drop(d);
20812+ if (a->h_dst)
20813+ /* already dget-ed by au_ren_or_cpup() */
20814+ au_set_h_dptr(d, a->btgt, a->h_dst);
20815+
20816+ i = a->dst_inode;
20817+ if (i) {
20818+ if (!au_ftest_ren(a->flags, ISDIR))
20819+ vfsub_drop_nlink(i);
20820+ else {
20821+ vfsub_dead_dir(i);
20822+ au_cpup_attr_timesizes(i);
20823+ }
20824+ au_update_dbrange(d, /*do_put_zero*/1);
20825+ } else {
20826+ bend = a->btgt;
20827+ for (bindex = au_dbstart(d); bindex < bend; bindex++)
20828+ au_set_h_dptr(d, bindex, NULL);
20829+ bend = au_dbend(d);
20830+ for (bindex = a->btgt + 1; bindex <= bend; bindex++)
20831+ au_set_h_dptr(d, bindex, NULL);
20832+ au_update_dbrange(d, /*do_put_zero*/0);
20833+ }
20834+
4a4d8108
AM
20835+ d = a->src_dentry;
20836+ au_set_dbwh(d, -1);
20837+ bend = au_dbend(d);
20838+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
20839+ h_d = au_h_dptr(d, bindex);
20840+ if (h_d)
20841+ au_set_h_dptr(d, bindex, NULL);
20842+ }
20843+ au_set_dbend(d, a->btgt);
20844+
20845+ sb = d->d_sb;
20846+ i = a->src_inode;
20847+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
20848+ return; /* success */
20849+
20850+ bend = au_ibend(i);
20851+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
20852+ h_i = au_h_iptr(i, bindex);
20853+ if (h_i) {
20854+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
20855+ /* ignore this error */
20856+ au_set_h_iptr(i, bindex, NULL, 0);
20857+ }
20858+ }
20859+ au_set_ibend(i, a->btgt);
1308ab2a 20860+}
dece6358 20861+
4a4d8108
AM
20862+/* ---------------------------------------------------------------------- */
20863+
20864+/* mainly for link(2) and rename(2) */
20865+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
1308ab2a 20866+{
4a4d8108
AM
20867+ aufs_bindex_t bdiropq, bwh;
20868+ struct dentry *parent;
20869+ struct au_branch *br;
20870+
20871+ parent = dentry->d_parent;
5527c038 20872+ IMustLock(d_inode(parent)); /* dir is locked */
4a4d8108
AM
20873+
20874+ bdiropq = au_dbdiropq(parent);
20875+ bwh = au_dbwh(dentry);
20876+ br = au_sbr(dentry->d_sb, btgt);
20877+ if (au_br_rdonly(br)
20878+ || (0 <= bdiropq && bdiropq < btgt)
20879+ || (0 <= bwh && bwh < btgt))
20880+ btgt = -1;
20881+
20882+ AuDbg("btgt %d\n", btgt);
20883+ return btgt;
1facf9fc 20884+}
20885+
4a4d8108
AM
20886+/* sets src_bstart, dst_bstart and btgt */
20887+static int au_ren_wbr(struct au_ren_args *a)
1facf9fc 20888+{
4a4d8108
AM
20889+ int err;
20890+ struct au_wr_dir_args wr_dir_args = {
20891+ /* .force_btgt = -1, */
20892+ .flags = AuWrDir_ADD_ENTRY
20893+ };
dece6358 20894+
4a4d8108
AM
20895+ a->src_bstart = au_dbstart(a->src_dentry);
20896+ a->dst_bstart = au_dbstart(a->dst_dentry);
20897+ if (au_ftest_ren(a->flags, ISDIR))
20898+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
20899+ wr_dir_args.force_btgt = a->src_bstart;
20900+ if (a->dst_inode && a->dst_bstart < a->src_bstart)
20901+ wr_dir_args.force_btgt = a->dst_bstart;
20902+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
20903+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
20904+ a->btgt = err;
dece6358 20905+
4a4d8108 20906+ return err;
1facf9fc 20907+}
20908+
4a4d8108 20909+static void au_ren_dt(struct au_ren_args *a)
1facf9fc 20910+{
4a4d8108
AM
20911+ a->h_path.dentry = a->src_h_parent;
20912+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
20913+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
20914+ a->h_path.dentry = a->dst_h_parent;
20915+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
20916+ }
1facf9fc 20917+
4a4d8108
AM
20918+ au_fclr_ren(a->flags, DT_DSTDIR);
20919+ if (!au_ftest_ren(a->flags, ISDIR))
20920+ return;
dece6358 20921+
4a4d8108
AM
20922+ a->h_path.dentry = a->src_h_dentry;
20923+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
5527c038 20924+ if (d_is_positive(a->dst_h_dentry)) {
4a4d8108
AM
20925+ au_fset_ren(a->flags, DT_DSTDIR);
20926+ a->h_path.dentry = a->dst_h_dentry;
20927+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
20928+ }
1308ab2a 20929+}
dece6358 20930+
4a4d8108 20931+static void au_ren_rev_dt(int err, struct au_ren_args *a)
1308ab2a 20932+{
4a4d8108
AM
20933+ struct dentry *h_d;
20934+ struct mutex *h_mtx;
20935+
20936+ au_dtime_revert(a->src_dt + AuPARENT);
20937+ if (!au_ftest_ren(a->flags, ISSAMEDIR))
20938+ au_dtime_revert(a->dst_dt + AuPARENT);
20939+
20940+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
20941+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
5527c038 20942+ h_mtx = &d_inode(h_d)->i_mutex;
4a4d8108
AM
20943+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
20944+ au_dtime_revert(a->src_dt + AuCHILD);
20945+ mutex_unlock(h_mtx);
20946+
20947+ if (au_ftest_ren(a->flags, DT_DSTDIR)) {
20948+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
5527c038 20949+ h_mtx = &d_inode(h_d)->i_mutex;
4a4d8108
AM
20950+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
20951+ au_dtime_revert(a->dst_dt + AuCHILD);
20952+ mutex_unlock(h_mtx);
1facf9fc 20953+ }
20954+ }
20955+}
20956+
4a4d8108
AM
20957+/* ---------------------------------------------------------------------- */
20958+
20959+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
20960+ struct inode *_dst_dir, struct dentry *_dst_dentry)
1facf9fc 20961+{
e49829fe 20962+ int err, flags;
4a4d8108
AM
20963+ /* reduce stack space */
20964+ struct au_ren_args *a;
20965+
523b37e3 20966+ AuDbg("%pd, %pd\n", _src_dentry, _dst_dentry);
4a4d8108
AM
20967+ IMustLock(_src_dir);
20968+ IMustLock(_dst_dir);
20969+
20970+ err = -ENOMEM;
20971+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
20972+ a = kzalloc(sizeof(*a), GFP_NOFS);
20973+ if (unlikely(!a))
20974+ goto out;
20975+
20976+ a->src_dir = _src_dir;
20977+ a->src_dentry = _src_dentry;
5527c038
JR
20978+ a->src_inode = NULL;
20979+ if (d_really_is_positive(a->src_dentry))
20980+ a->src_inode = d_inode(a->src_dentry);
4a4d8108
AM
20981+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
20982+ a->dst_dir = _dst_dir;
20983+ a->dst_dentry = _dst_dentry;
5527c038
JR
20984+ a->dst_inode = NULL;
20985+ if (d_really_is_positive(a->dst_dentry))
20986+ a->dst_inode = d_inode(a->dst_dentry);
4a4d8108
AM
20987+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
20988+ if (a->dst_inode) {
20989+ IMustLock(a->dst_inode);
20990+ au_igrab(a->dst_inode);
1facf9fc 20991+ }
1facf9fc 20992+
4a4d8108 20993+ err = -ENOTDIR;
027c5e7a 20994+ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
2000de60 20995+ if (d_is_dir(a->src_dentry)) {
4a4d8108 20996+ au_fset_ren(a->flags, ISDIR);
5527c038 20997+ if (unlikely(d_really_is_positive(a->dst_dentry)
2000de60 20998+ && !d_is_dir(a->dst_dentry)))
4a4d8108 20999+ goto out_free;
b95c5147
AM
21000+ flags |= AuLock_DIRS;
21001+ }
21002+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, flags);
e49829fe
JR
21003+ if (unlikely(err))
21004+ goto out_free;
1facf9fc 21005+
027c5e7a
AM
21006+ err = au_d_hashed_positive(a->src_dentry);
21007+ if (unlikely(err))
21008+ goto out_unlock;
21009+ err = -ENOENT;
21010+ if (a->dst_inode) {
21011+ /*
21012+ * If it is a dir, VFS unhash dst_dentry before this
21013+ * function. It means we cannot rely upon d_unhashed().
21014+ */
21015+ if (unlikely(!a->dst_inode->i_nlink))
21016+ goto out_unlock;
21017+ if (!S_ISDIR(a->dst_inode->i_mode)) {
21018+ err = au_d_hashed_positive(a->dst_dentry);
21019+ if (unlikely(err))
21020+ goto out_unlock;
21021+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
21022+ goto out_unlock;
21023+ } else if (unlikely(d_unhashed(a->dst_dentry)))
21024+ goto out_unlock;
21025+
7eafdf33
AM
21026+ /*
21027+ * is it possible?
79b8bda9 21028+ * yes, it happened (in linux-3.3-rcN) but I don't know why.
7eafdf33
AM
21029+ * there may exist a problem somewhere else.
21030+ */
21031+ err = -EINVAL;
5527c038 21032+ if (unlikely(d_inode(a->dst_parent) == d_inode(a->src_dentry)))
7eafdf33
AM
21033+ goto out_unlock;
21034+
4a4d8108
AM
21035+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
21036+ di_write_lock_parent(a->dst_parent);
1facf9fc 21037+
4a4d8108
AM
21038+ /* which branch we process */
21039+ err = au_ren_wbr(a);
21040+ if (unlikely(err < 0))
027c5e7a 21041+ goto out_parent;
4a4d8108 21042+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
86dc4139 21043+ a->h_path.mnt = au_br_mnt(a->br);
1facf9fc 21044+
4a4d8108
AM
21045+ /* are they available to be renamed */
21046+ err = au_ren_may_dir(a);
21047+ if (unlikely(err))
21048+ goto out_children;
1facf9fc 21049+
4a4d8108
AM
21050+ /* prepare the writable parent dir on the same branch */
21051+ if (a->dst_bstart == a->btgt) {
21052+ au_fset_ren(a->flags, WHDST);
21053+ } else {
21054+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
21055+ if (unlikely(err))
21056+ goto out_children;
21057+ }
1facf9fc 21058+
4a4d8108
AM
21059+ if (a->src_dir != a->dst_dir) {
21060+ /*
21061+ * this temporary unlock is safe,
21062+ * because both dir->i_mutex are locked.
21063+ */
21064+ di_write_unlock(a->dst_parent);
21065+ di_write_lock_parent(a->src_parent);
21066+ err = au_wr_dir_need_wh(a->src_dentry,
21067+ au_ftest_ren(a->flags, ISDIR),
21068+ &a->btgt);
21069+ di_write_unlock(a->src_parent);
21070+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
21071+ au_fclr_ren(a->flags, ISSAMEDIR);
21072+ } else
21073+ err = au_wr_dir_need_wh(a->src_dentry,
21074+ au_ftest_ren(a->flags, ISDIR),
21075+ &a->btgt);
21076+ if (unlikely(err < 0))
21077+ goto out_children;
21078+ if (err)
21079+ au_fset_ren(a->flags, WHSRC);
1facf9fc 21080+
86dc4139
AM
21081+ /* cpup src */
21082+ if (a->src_bstart != a->btgt) {
86dc4139
AM
21083+ struct au_pin pin;
21084+
21085+ err = au_pin(&pin, a->src_dentry, a->btgt,
21086+ au_opt_udba(a->src_dentry->d_sb),
21087+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 21088+ if (!err) {
c2b27bf2
AM
21089+ struct au_cp_generic cpg = {
21090+ .dentry = a->src_dentry,
21091+ .bdst = a->btgt,
21092+ .bsrc = a->src_bstart,
21093+ .len = -1,
21094+ .pin = &pin,
21095+ .flags = AuCpup_DTIME | AuCpup_HOPEN
21096+ };
367653fa 21097+ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
c2b27bf2 21098+ err = au_sio_cpup_simple(&cpg);
367653fa 21099+ au_unpin(&pin);
86dc4139 21100+ }
86dc4139
AM
21101+ if (unlikely(err))
21102+ goto out_children;
21103+ a->src_bstart = a->btgt;
21104+ a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
21105+ au_fset_ren(a->flags, WHSRC);
21106+ }
21107+
4a4d8108
AM
21108+ /* lock them all */
21109+ err = au_ren_lock(a);
21110+ if (unlikely(err))
86dc4139 21111+ /* leave the copied-up one */
4a4d8108 21112+ goto out_children;
1facf9fc 21113+
4a4d8108
AM
21114+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
21115+ err = au_may_ren(a);
21116+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
21117+ err = -ENAMETOOLONG;
21118+ if (unlikely(err))
21119+ goto out_hdir;
1facf9fc 21120+
4a4d8108
AM
21121+ /* store timestamps to be revertible */
21122+ au_ren_dt(a);
1facf9fc 21123+
4a4d8108
AM
21124+ /* here we go */
21125+ err = do_rename(a);
21126+ if (unlikely(err))
21127+ goto out_dt;
21128+
21129+ /* update dir attributes */
21130+ au_ren_refresh_dir(a);
21131+
21132+ /* dput/iput all lower dentries */
21133+ au_ren_refresh(a);
21134+
21135+ goto out_hdir; /* success */
21136+
4f0767ce 21137+out_dt:
4a4d8108 21138+ au_ren_rev_dt(err, a);
4f0767ce 21139+out_hdir:
4a4d8108 21140+ au_ren_unlock(a);
4f0767ce 21141+out_children:
4a4d8108 21142+ au_nhash_wh_free(&a->whlist);
027c5e7a
AM
21143+ if (err && a->dst_inode && a->dst_bstart != a->btgt) {
21144+ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
21145+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
21146+ au_set_dbstart(a->dst_dentry, a->dst_bstart);
4a4d8108 21147+ }
027c5e7a 21148+out_parent:
4a4d8108
AM
21149+ if (!err)
21150+ d_move(a->src_dentry, a->dst_dentry);
027c5e7a
AM
21151+ else {
21152+ au_update_dbstart(a->dst_dentry);
21153+ if (!a->dst_inode)
21154+ d_drop(a->dst_dentry);
21155+ }
4a4d8108
AM
21156+ if (au_ftest_ren(a->flags, ISSAMEDIR))
21157+ di_write_unlock(a->dst_parent);
21158+ else
21159+ di_write_unlock2(a->src_parent, a->dst_parent);
027c5e7a 21160+out_unlock:
4a4d8108 21161+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
4f0767ce 21162+out_free:
4a4d8108
AM
21163+ iput(a->dst_inode);
21164+ if (a->thargs)
21165+ au_whtmp_rmdir_free(a->thargs);
21166+ kfree(a);
4f0767ce 21167+out:
4a4d8108
AM
21168+ AuTraceErr(err);
21169+ return err;
1308ab2a 21170+}
7f207e10
AM
21171diff -urN /usr/share/empty/fs/aufs/Kconfig linux/fs/aufs/Kconfig
21172--- /usr/share/empty/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 21173+++ linux/fs/aufs/Kconfig 2015-09-24 10:47:58.248052907 +0200
c1595e42 21174@@ -0,0 +1,185 @@
4a4d8108
AM
21175+config AUFS_FS
21176+ tristate "Aufs (Advanced multi layered unification filesystem) support"
4a4d8108
AM
21177+ help
21178+ Aufs is a stackable unification filesystem such as Unionfs,
21179+ which unifies several directories and provides a merged single
21180+ directory.
21181+ In the early days, aufs was entirely re-designed and
21182+ re-implemented Unionfs Version 1.x series. Introducing many
21183+ original ideas, approaches and improvements, it becomes totally
21184+ different from Unionfs while keeping the basic features.
1facf9fc 21185+
4a4d8108
AM
21186+if AUFS_FS
21187+choice
21188+ prompt "Maximum number of branches"
21189+ default AUFS_BRANCH_MAX_127
21190+ help
21191+ Specifies the maximum number of branches (or member directories)
21192+ in a single aufs. The larger value consumes more system
21193+ resources and has a minor impact to performance.
21194+config AUFS_BRANCH_MAX_127
21195+ bool "127"
21196+ help
21197+ Specifies the maximum number of branches (or member directories)
21198+ in a single aufs. The larger value consumes more system
21199+ resources and has a minor impact to performance.
21200+config AUFS_BRANCH_MAX_511
21201+ bool "511"
21202+ help
21203+ Specifies the maximum number of branches (or member directories)
21204+ in a single aufs. The larger value consumes more system
21205+ resources and has a minor impact to performance.
21206+config AUFS_BRANCH_MAX_1023
21207+ bool "1023"
21208+ help
21209+ Specifies the maximum number of branches (or member directories)
21210+ in a single aufs. The larger value consumes more system
21211+ resources and has a minor impact to performance.
21212+config AUFS_BRANCH_MAX_32767
21213+ bool "32767"
21214+ help
21215+ Specifies the maximum number of branches (or member directories)
21216+ in a single aufs. The larger value consumes more system
21217+ resources and has a minor impact to performance.
21218+endchoice
1facf9fc 21219+
e49829fe
JR
21220+config AUFS_SBILIST
21221+ bool
21222+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
21223+ default y
21224+ help
21225+ Automatic configuration for internal use.
21226+ When aufs supports Magic SysRq or /proc, enabled automatically.
21227+
4a4d8108
AM
21228+config AUFS_HNOTIFY
21229+ bool "Detect direct branch access (bypassing aufs)"
21230+ help
21231+ If you want to modify files on branches directly, eg. bypassing aufs,
21232+ and want aufs to detect the changes of them fully, then enable this
21233+ option and use 'udba=notify' mount option.
7f207e10 21234+ Currently there is only one available configuration, "fsnotify".
4a4d8108
AM
21235+ It will have a negative impact to the performance.
21236+ See detail in aufs.5.
dece6358 21237+
4a4d8108
AM
21238+choice
21239+ prompt "method" if AUFS_HNOTIFY
21240+ default AUFS_HFSNOTIFY
21241+config AUFS_HFSNOTIFY
21242+ bool "fsnotify"
21243+ select FSNOTIFY
4a4d8108 21244+endchoice
1facf9fc 21245+
4a4d8108
AM
21246+config AUFS_EXPORT
21247+ bool "NFS-exportable aufs"
2cbb1c4b 21248+ depends on EXPORTFS
4a4d8108
AM
21249+ help
21250+ If you want to export your mounted aufs via NFS, then enable this
21251+ option. There are several requirements for this configuration.
21252+ See detail in aufs.5.
1facf9fc 21253+
4a4d8108
AM
21254+config AUFS_INO_T_64
21255+ bool
21256+ depends on AUFS_EXPORT
21257+ depends on 64BIT && !(ALPHA || S390)
21258+ default y
21259+ help
21260+ Automatic configuration for internal use.
21261+ /* typedef unsigned long/int __kernel_ino_t */
21262+ /* alpha and s390x are int */
1facf9fc 21263+
c1595e42
JR
21264+config AUFS_XATTR
21265+ bool "support for XATTR/EA (including Security Labels)"
21266+ help
21267+ If your branch fs supports XATTR/EA and you want to make them
21268+ available in aufs too, then enable this opsion and specify the
21269+ branch attributes for EA.
21270+ See detail in aufs.5.
21271+
076b876e
AM
21272+config AUFS_FHSM
21273+ bool "File-based Hierarchical Storage Management"
21274+ help
21275+ Hierarchical Storage Management (or HSM) is a well-known feature
21276+ in the storage world. Aufs provides this feature as file-based.
21277+ with multiple branches.
21278+ These multiple branches are prioritized, ie. the topmost one
21279+ should be the fastest drive and be used heavily.
21280+
4a4d8108
AM
21281+config AUFS_RDU
21282+ bool "Readdir in userspace"
21283+ help
21284+ Aufs has two methods to provide a merged view for a directory,
21285+ by a user-space library and by kernel-space natively. The latter
21286+ is always enabled but sometimes large and slow.
21287+ If you enable this option, install the library in aufs2-util
21288+ package, and set some environment variables for your readdir(3),
21289+ then the work will be handled in user-space which generally
21290+ shows better performance in most cases.
21291+ See detail in aufs.5.
1facf9fc 21292+
4a4d8108
AM
21293+config AUFS_SHWH
21294+ bool "Show whiteouts"
21295+ help
21296+ If you want to make the whiteouts in aufs visible, then enable
21297+ this option and specify 'shwh' mount option. Although it may
21298+ sounds like philosophy or something, but in technically it
21299+ simply shows the name of whiteout with keeping its behaviour.
1facf9fc 21300+
4a4d8108
AM
21301+config AUFS_BR_RAMFS
21302+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
21303+ help
21304+ If you want to use ramfs as an aufs branch fs, then enable this
21305+ option. Generally tmpfs is recommended.
21306+ Aufs prohibited them to be a branch fs by default, because
21307+ initramfs becomes unusable after switch_root or something
21308+ generally. If you sets initramfs as an aufs branch and boot your
21309+ system by switch_root, you will meet a problem easily since the
21310+ files in initramfs may be inaccessible.
21311+ Unless you are going to use ramfs as an aufs branch fs without
21312+ switch_root or something, leave it N.
1facf9fc 21313+
4a4d8108
AM
21314+config AUFS_BR_FUSE
21315+ bool "Fuse fs as an aufs branch"
21316+ depends on FUSE_FS
21317+ select AUFS_POLL
21318+ help
21319+ If you want to use fuse-based userspace filesystem as an aufs
21320+ branch fs, then enable this option.
21321+ It implements the internal poll(2) operation which is
21322+ implemented by fuse only (curretnly).
1facf9fc 21323+
4a4d8108
AM
21324+config AUFS_POLL
21325+ bool
21326+ help
21327+ Automatic configuration for internal use.
1facf9fc 21328+
4a4d8108
AM
21329+config AUFS_BR_HFSPLUS
21330+ bool "Hfsplus as an aufs branch"
21331+ depends on HFSPLUS_FS
21332+ default y
21333+ help
21334+ If you want to use hfsplus fs as an aufs branch fs, then enable
21335+ this option. This option introduces a small overhead at
21336+ copying-up a file on hfsplus.
1facf9fc 21337+
4a4d8108
AM
21338+config AUFS_BDEV_LOOP
21339+ bool
21340+ depends on BLK_DEV_LOOP
21341+ default y
21342+ help
21343+ Automatic configuration for internal use.
21344+ Convert =[ym] into =y.
1308ab2a 21345+
4a4d8108
AM
21346+config AUFS_DEBUG
21347+ bool "Debug aufs"
21348+ help
21349+ Enable this to compile aufs internal debug code.
21350+ It will have a negative impact to the performance.
21351+
21352+config AUFS_MAGIC_SYSRQ
21353+ bool
21354+ depends on AUFS_DEBUG && MAGIC_SYSRQ
21355+ default y
21356+ help
21357+ Automatic configuration for internal use.
21358+ When aufs supports Magic SysRq, enabled automatically.
21359+endif
7f207e10
AM
21360diff -urN /usr/share/empty/fs/aufs/loop.c linux/fs/aufs/loop.c
21361--- /usr/share/empty/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
79b8bda9
AM
21362+++ linux/fs/aufs/loop.c 2015-11-11 17:21:46.918863802 +0100
21363@@ -0,0 +1,146 @@
1facf9fc 21364+/*
2000de60 21365+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 21366+ *
21367+ * This program, aufs is free software; you can redistribute it and/or modify
21368+ * it under the terms of the GNU General Public License as published by
21369+ * the Free Software Foundation; either version 2 of the License, or
21370+ * (at your option) any later version.
dece6358
AM
21371+ *
21372+ * This program is distributed in the hope that it will be useful,
21373+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21374+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21375+ * GNU General Public License for more details.
21376+ *
21377+ * You should have received a copy of the GNU General Public License
523b37e3 21378+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21379+ */
21380+
21381+/*
21382+ * support for loopback block device as a branch
21383+ */
21384+
1facf9fc 21385+#include "aufs.h"
21386+
392086de
AM
21387+/* added into drivers/block/loop.c */
21388+static struct file *(*backing_file_func)(struct super_block *sb);
21389+
1facf9fc 21390+/*
21391+ * test if two lower dentries have overlapping branches.
21392+ */
b752ccd1 21393+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
1facf9fc 21394+{
b752ccd1 21395+ struct super_block *h_sb;
392086de
AM
21396+ struct file *backing_file;
21397+
21398+ if (unlikely(!backing_file_func)) {
21399+ /* don't load "loop" module here */
21400+ backing_file_func = symbol_get(loop_backing_file);
21401+ if (unlikely(!backing_file_func))
21402+ /* "loop" module is not loaded */
21403+ return 0;
21404+ }
1facf9fc 21405+
b752ccd1 21406+ h_sb = h_adding->d_sb;
392086de
AM
21407+ backing_file = backing_file_func(h_sb);
21408+ if (!backing_file)
1facf9fc 21409+ return 0;
21410+
2000de60 21411+ h_adding = backing_file->f_path.dentry;
b752ccd1
AM
21412+ /*
21413+ * h_adding can be local NFS.
21414+ * in this case aufs cannot detect the loop.
21415+ */
21416+ if (unlikely(h_adding->d_sb == sb))
1facf9fc 21417+ return 1;
b752ccd1 21418+ return !!au_test_subdir(h_adding, sb->s_root);
1facf9fc 21419+}
21420+
21421+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
21422+int au_test_loopback_kthread(void)
21423+{
b752ccd1
AM
21424+ int ret;
21425+ struct task_struct *tsk = current;
a2a7ad62 21426+ char c, comm[sizeof(tsk->comm)];
b752ccd1
AM
21427+
21428+ ret = 0;
21429+ if (tsk->flags & PF_KTHREAD) {
a2a7ad62
AM
21430+ get_task_comm(comm, tsk);
21431+ c = comm[4];
b752ccd1 21432+ ret = ('0' <= c && c <= '9'
a2a7ad62 21433+ && !strncmp(comm, "loop", 4));
b752ccd1 21434+ }
1facf9fc 21435+
b752ccd1 21436+ return ret;
1facf9fc 21437+}
87a755f4
AM
21438+
21439+/* ---------------------------------------------------------------------- */
21440+
21441+#define au_warn_loopback_step 16
21442+static int au_warn_loopback_nelem = au_warn_loopback_step;
21443+static unsigned long *au_warn_loopback_array;
21444+
21445+void au_warn_loopback(struct super_block *h_sb)
21446+{
21447+ int i, new_nelem;
21448+ unsigned long *a, magic;
21449+ static DEFINE_SPINLOCK(spin);
21450+
21451+ magic = h_sb->s_magic;
21452+ spin_lock(&spin);
21453+ a = au_warn_loopback_array;
21454+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
21455+ if (a[i] == magic) {
21456+ spin_unlock(&spin);
21457+ return;
21458+ }
21459+
21460+ /* h_sb is new to us, print it */
21461+ if (i < au_warn_loopback_nelem) {
21462+ a[i] = magic;
21463+ goto pr;
21464+ }
21465+
21466+ /* expand the array */
21467+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
21468+ a = au_kzrealloc(au_warn_loopback_array,
21469+ au_warn_loopback_nelem * sizeof(unsigned long),
21470+ new_nelem * sizeof(unsigned long), GFP_ATOMIC);
21471+ if (a) {
21472+ au_warn_loopback_nelem = new_nelem;
21473+ au_warn_loopback_array = a;
21474+ a[i] = magic;
21475+ goto pr;
21476+ }
21477+
21478+ spin_unlock(&spin);
21479+ AuWarn1("realloc failed, ignored\n");
21480+ return;
21481+
21482+pr:
21483+ spin_unlock(&spin);
0c3ec466
AM
21484+ pr_warn("you may want to try another patch for loopback file "
21485+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
87a755f4
AM
21486+}
21487+
21488+int au_loopback_init(void)
21489+{
21490+ int err;
21491+ struct super_block *sb __maybe_unused;
21492+
79b8bda9 21493+ BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(unsigned long));
87a755f4
AM
21494+
21495+ err = 0;
21496+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
21497+ sizeof(unsigned long), GFP_NOFS);
21498+ if (unlikely(!au_warn_loopback_array))
21499+ err = -ENOMEM;
21500+
21501+ return err;
21502+}
21503+
21504+void au_loopback_fin(void)
21505+{
79b8bda9
AM
21506+ if (backing_file_func)
21507+ symbol_put(loop_backing_file);
87a755f4
AM
21508+ kfree(au_warn_loopback_array);
21509+}
7f207e10
AM
21510diff -urN /usr/share/empty/fs/aufs/loop.h linux/fs/aufs/loop.h
21511--- /usr/share/empty/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 21512+++ linux/fs/aufs/loop.h 2015-09-24 10:47:58.254719746 +0200
523b37e3 21513@@ -0,0 +1,52 @@
1facf9fc 21514+/*
2000de60 21515+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 21516+ *
21517+ * This program, aufs is free software; you can redistribute it and/or modify
21518+ * it under the terms of the GNU General Public License as published by
21519+ * the Free Software Foundation; either version 2 of the License, or
21520+ * (at your option) any later version.
dece6358
AM
21521+ *
21522+ * This program is distributed in the hope that it will be useful,
21523+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21524+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21525+ * GNU General Public License for more details.
21526+ *
21527+ * You should have received a copy of the GNU General Public License
523b37e3 21528+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21529+ */
21530+
21531+/*
21532+ * support for loopback mount as a branch
21533+ */
21534+
21535+#ifndef __AUFS_LOOP_H__
21536+#define __AUFS_LOOP_H__
21537+
21538+#ifdef __KERNEL__
21539+
dece6358
AM
21540+struct dentry;
21541+struct super_block;
1facf9fc 21542+
21543+#ifdef CONFIG_AUFS_BDEV_LOOP
392086de
AM
21544+/* drivers/block/loop.c */
21545+struct file *loop_backing_file(struct super_block *sb);
21546+
1facf9fc 21547+/* loop.c */
b752ccd1 21548+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
1facf9fc 21549+int au_test_loopback_kthread(void);
87a755f4
AM
21550+void au_warn_loopback(struct super_block *h_sb);
21551+
21552+int au_loopback_init(void);
21553+void au_loopback_fin(void);
1facf9fc 21554+#else
4a4d8108 21555+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
b752ccd1 21556+ struct dentry *h_adding)
4a4d8108 21557+AuStubInt0(au_test_loopback_kthread, void)
87a755f4
AM
21558+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
21559+
21560+AuStubInt0(au_loopback_init, void)
21561+AuStubVoid(au_loopback_fin, void)
1facf9fc 21562+#endif /* BLK_DEV_LOOP */
21563+
21564+#endif /* __KERNEL__ */
21565+#endif /* __AUFS_LOOP_H__ */
7f207e10
AM
21566diff -urN /usr/share/empty/fs/aufs/magic.mk linux/fs/aufs/magic.mk
21567--- /usr/share/empty/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 21568+++ linux/fs/aufs/magic.mk 2015-09-24 10:47:58.254719746 +0200
7e9cd9fe 21569@@ -0,0 +1,30 @@
1facf9fc 21570+
21571+# defined in ${srctree}/fs/fuse/inode.c
21572+# tristate
21573+ifdef CONFIG_FUSE_FS
21574+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
21575+endif
21576+
1facf9fc 21577+# defined in ${srctree}/fs/xfs/xfs_sb.h
21578+# tristate
21579+ifdef CONFIG_XFS_FS
21580+ccflags-y += -DXFS_SB_MAGIC=0x58465342
21581+endif
21582+
21583+# defined in ${srctree}/fs/configfs/mount.c
21584+# tristate
21585+ifdef CONFIG_CONFIGFS_FS
21586+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
21587+endif
21588+
1facf9fc 21589+# defined in ${srctree}/fs/ubifs/ubifs.h
21590+# tristate
21591+ifdef CONFIG_UBIFS_FS
21592+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
21593+endif
4a4d8108
AM
21594+
21595+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
21596+# tristate
21597+ifdef CONFIG_HFSPLUS_FS
21598+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
21599+endif
7f207e10
AM
21600diff -urN /usr/share/empty/fs/aufs/Makefile linux/fs/aufs/Makefile
21601--- /usr/share/empty/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 21602+++ linux/fs/aufs/Makefile 2015-09-24 10:47:58.248052907 +0200
c1595e42 21603@@ -0,0 +1,44 @@
4a4d8108
AM
21604+
21605+include ${src}/magic.mk
21606+ifeq (${CONFIG_AUFS_FS},m)
21607+include ${src}/conf.mk
21608+endif
21609+-include ${src}/priv_def.mk
21610+
21611+# cf. include/linux/kernel.h
21612+# enable pr_debug
21613+ccflags-y += -DDEBUG
f6c5ef8b
AM
21614+# sparse requires the full pathname
21615+ifdef M
523b37e3 21616+ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h
f6c5ef8b 21617+else
523b37e3 21618+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
f6c5ef8b 21619+endif
4a4d8108
AM
21620+
21621+obj-$(CONFIG_AUFS_FS) += aufs.o
21622+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
21623+ wkq.o vfsub.o dcsub.o \
e49829fe 21624+ cpup.o whout.o wbr_policy.o \
4a4d8108
AM
21625+ dinfo.o dentry.o \
21626+ dynop.o \
21627+ finfo.o file.o f_op.o \
21628+ dir.o vdir.o \
21629+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
c2b27bf2 21630+ mvdown.o ioctl.o
4a4d8108
AM
21631+
21632+# all are boolean
e49829fe 21633+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
4a4d8108
AM
21634+aufs-$(CONFIG_SYSFS) += sysfs.o
21635+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
21636+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
21637+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
21638+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
4a4d8108 21639+aufs-$(CONFIG_AUFS_EXPORT) += export.o
c1595e42
JR
21640+aufs-$(CONFIG_AUFS_XATTR) += xattr.o
21641+aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
076b876e 21642+aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
4a4d8108
AM
21643+aufs-$(CONFIG_AUFS_POLL) += poll.o
21644+aufs-$(CONFIG_AUFS_RDU) += rdu.o
4a4d8108
AM
21645+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
21646+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
21647+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
7f207e10
AM
21648diff -urN /usr/share/empty/fs/aufs/module.c linux/fs/aufs/module.c
21649--- /usr/share/empty/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
b95c5147
AM
21650+++ linux/fs/aufs/module.c 2015-12-10 17:59:16.839499823 +0100
21651@@ -0,0 +1,221 @@
1facf9fc 21652+/*
2000de60 21653+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 21654+ *
21655+ * This program, aufs is free software; you can redistribute it and/or modify
21656+ * it under the terms of the GNU General Public License as published by
21657+ * the Free Software Foundation; either version 2 of the License, or
21658+ * (at your option) any later version.
dece6358
AM
21659+ *
21660+ * This program is distributed in the hope that it will be useful,
21661+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21662+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21663+ * GNU General Public License for more details.
21664+ *
21665+ * You should have received a copy of the GNU General Public License
523b37e3 21666+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21667+ */
21668+
21669+/*
21670+ * module global variables and operations
21671+ */
21672+
21673+#include <linux/module.h>
21674+#include <linux/seq_file.h>
21675+#include "aufs.h"
21676+
21677+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
21678+{
21679+ if (new_sz <= nused)
21680+ return p;
21681+
21682+ p = krealloc(p, new_sz, gfp);
21683+ if (p)
21684+ memset(p + nused, 0, new_sz - nused);
21685+ return p;
21686+}
21687+
21688+/* ---------------------------------------------------------------------- */
21689+
21690+/*
21691+ * aufs caches
21692+ */
21693+struct kmem_cache *au_cachep[AuCache_Last];
21694+static int __init au_cache_init(void)
21695+{
4a4d8108 21696+ au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
1facf9fc 21697+ if (au_cachep[AuCache_DINFO])
027c5e7a 21698+ /* SLAB_DESTROY_BY_RCU */
4a4d8108
AM
21699+ au_cachep[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
21700+ au_icntnr_init_once);
1facf9fc 21701+ if (au_cachep[AuCache_ICNTNR])
4a4d8108
AM
21702+ au_cachep[AuCache_FINFO] = AuCacheCtor(au_finfo,
21703+ au_fi_init_once);
1facf9fc 21704+ if (au_cachep[AuCache_FINFO])
21705+ au_cachep[AuCache_VDIR] = AuCache(au_vdir);
21706+ if (au_cachep[AuCache_VDIR])
21707+ au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
21708+ if (au_cachep[AuCache_DEHSTR])
21709+ return 0;
21710+
21711+ return -ENOMEM;
21712+}
21713+
21714+static void au_cache_fin(void)
21715+{
21716+ int i;
4a4d8108 21717+
537831f9
AM
21718+ /*
21719+ * Make sure all delayed rcu free inodes are flushed before we
21720+ * destroy cache.
21721+ */
21722+ rcu_barrier();
21723+
7eafdf33
AM
21724+ /* excluding AuCache_HNOTIFY */
21725+ BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
79b8bda9
AM
21726+ for (i = 0; i < AuCache_HNOTIFY; i++) {
21727+ kmem_cache_destroy(au_cachep[i]);
21728+ au_cachep[i] = NULL;
21729+ }
1facf9fc 21730+}
21731+
21732+/* ---------------------------------------------------------------------- */
21733+
21734+int au_dir_roflags;
21735+
e49829fe 21736+#ifdef CONFIG_AUFS_SBILIST
1e00d052
AM
21737+/*
21738+ * iterate_supers_type() doesn't protect us from
21739+ * remounting (branch management)
21740+ */
e49829fe
JR
21741+struct au_splhead au_sbilist;
21742+#endif
21743+
9dbd164d
AM
21744+struct lock_class_key au_lc_key[AuLcKey_Last];
21745+
1facf9fc 21746+/*
21747+ * functions for module interface.
21748+ */
21749+MODULE_LICENSE("GPL");
21750+/* MODULE_LICENSE("GPL v2"); */
dece6358 21751+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
1facf9fc 21752+MODULE_DESCRIPTION(AUFS_NAME
21753+ " -- Advanced multi layered unification filesystem");
21754+MODULE_VERSION(AUFS_VERSION);
c06a8ce3 21755+MODULE_ALIAS_FS(AUFS_NAME);
1facf9fc 21756+
1facf9fc 21757+/* this module parameter has no meaning when SYSFS is disabled */
21758+int sysaufs_brs = 1;
21759+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
21760+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
21761+
076b876e
AM
21762+/* this module parameter has no meaning when USER_NS is disabled */
21763+static bool au_userns;
21764+MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
21765+module_param_named(allow_userns, au_userns, bool, S_IRUGO);
21766+
1facf9fc 21767+/* ---------------------------------------------------------------------- */
21768+
21769+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
21770+
21771+int au_seq_path(struct seq_file *seq, struct path *path)
21772+{
79b8bda9
AM
21773+ int err;
21774+
21775+ err = seq_path(seq, path, au_esc_chars);
21776+ if (err > 0)
21777+ err = 0;
21778+ else if (err < 0)
21779+ err = -ENOMEM;
21780+
21781+ return err;
1facf9fc 21782+}
21783+
21784+/* ---------------------------------------------------------------------- */
21785+
21786+static int __init aufs_init(void)
21787+{
21788+ int err, i;
21789+ char *p;
21790+
21791+ p = au_esc_chars;
21792+ for (i = 1; i <= ' '; i++)
21793+ *p++ = i;
21794+ *p++ = '\\';
21795+ *p++ = '\x7f';
21796+ *p = 0;
21797+
21798+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
21799+
b95c5147
AM
21800+ memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
21801+ for (i = 0; i < AuIop_Last; i++)
21802+ aufs_iop_nogetattr[i].getattr = NULL;
21803+
e49829fe 21804+ au_sbilist_init();
1facf9fc 21805+ sysaufs_brs_init();
21806+ au_debug_init();
4a4d8108 21807+ au_dy_init();
1facf9fc 21808+ err = sysaufs_init();
21809+ if (unlikely(err))
21810+ goto out;
e49829fe 21811+ err = au_procfs_init();
4f0767ce 21812+ if (unlikely(err))
953406b4 21813+ goto out_sysaufs;
e49829fe
JR
21814+ err = au_wkq_init();
21815+ if (unlikely(err))
21816+ goto out_procfs;
87a755f4 21817+ err = au_loopback_init();
1facf9fc 21818+ if (unlikely(err))
21819+ goto out_wkq;
87a755f4
AM
21820+ err = au_hnotify_init();
21821+ if (unlikely(err))
21822+ goto out_loopback;
1facf9fc 21823+ err = au_sysrq_init();
21824+ if (unlikely(err))
21825+ goto out_hin;
21826+ err = au_cache_init();
21827+ if (unlikely(err))
21828+ goto out_sysrq;
076b876e
AM
21829+
21830+ aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
1facf9fc 21831+ err = register_filesystem(&aufs_fs_type);
21832+ if (unlikely(err))
21833+ goto out_cache;
076b876e 21834+
4a4d8108
AM
21835+ /* since we define pr_fmt, call printk directly */
21836+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
1facf9fc 21837+ goto out; /* success */
21838+
4f0767ce 21839+out_cache:
1facf9fc 21840+ au_cache_fin();
4f0767ce 21841+out_sysrq:
1facf9fc 21842+ au_sysrq_fin();
4f0767ce 21843+out_hin:
4a4d8108 21844+ au_hnotify_fin();
87a755f4
AM
21845+out_loopback:
21846+ au_loopback_fin();
4f0767ce 21847+out_wkq:
1facf9fc 21848+ au_wkq_fin();
e49829fe
JR
21849+out_procfs:
21850+ au_procfs_fin();
4f0767ce 21851+out_sysaufs:
1facf9fc 21852+ sysaufs_fin();
4a4d8108 21853+ au_dy_fin();
4f0767ce 21854+out:
1facf9fc 21855+ return err;
21856+}
21857+
21858+static void __exit aufs_exit(void)
21859+{
21860+ unregister_filesystem(&aufs_fs_type);
21861+ au_cache_fin();
21862+ au_sysrq_fin();
4a4d8108 21863+ au_hnotify_fin();
87a755f4 21864+ au_loopback_fin();
1facf9fc 21865+ au_wkq_fin();
e49829fe 21866+ au_procfs_fin();
1facf9fc 21867+ sysaufs_fin();
4a4d8108 21868+ au_dy_fin();
1facf9fc 21869+}
21870+
21871+module_init(aufs_init);
21872+module_exit(aufs_exit);
7f207e10
AM
21873diff -urN /usr/share/empty/fs/aufs/module.h linux/fs/aufs/module.h
21874--- /usr/share/empty/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 21875+++ linux/fs/aufs/module.h 2015-09-24 10:47:58.254719746 +0200
523b37e3 21876@@ -0,0 +1,104 @@
1facf9fc 21877+/*
2000de60 21878+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 21879+ *
21880+ * This program, aufs is free software; you can redistribute it and/or modify
21881+ * it under the terms of the GNU General Public License as published by
21882+ * the Free Software Foundation; either version 2 of the License, or
21883+ * (at your option) any later version.
dece6358
AM
21884+ *
21885+ * This program is distributed in the hope that it will be useful,
21886+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21887+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21888+ * GNU General Public License for more details.
21889+ *
21890+ * You should have received a copy of the GNU General Public License
523b37e3 21891+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21892+ */
21893+
21894+/*
21895+ * module initialization and module-global
21896+ */
21897+
21898+#ifndef __AUFS_MODULE_H__
21899+#define __AUFS_MODULE_H__
21900+
21901+#ifdef __KERNEL__
21902+
21903+#include <linux/slab.h>
21904+
dece6358
AM
21905+struct path;
21906+struct seq_file;
21907+
1facf9fc 21908+/* module parameters */
1facf9fc 21909+extern int sysaufs_brs;
21910+
21911+/* ---------------------------------------------------------------------- */
21912+
21913+extern int au_dir_roflags;
21914+
9dbd164d
AM
21915+enum {
21916+ AuLcNonDir_FIINFO,
21917+ AuLcNonDir_DIINFO,
21918+ AuLcNonDir_IIINFO,
21919+
21920+ AuLcDir_FIINFO,
21921+ AuLcDir_DIINFO,
21922+ AuLcDir_IIINFO,
21923+
21924+ AuLcSymlink_DIINFO,
21925+ AuLcSymlink_IIINFO,
21926+
21927+ AuLcKey_Last
21928+};
21929+extern struct lock_class_key au_lc_key[AuLcKey_Last];
21930+
1facf9fc 21931+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
21932+int au_seq_path(struct seq_file *seq, struct path *path);
21933+
e49829fe
JR
21934+#ifdef CONFIG_PROC_FS
21935+/* procfs.c */
21936+int __init au_procfs_init(void);
21937+void au_procfs_fin(void);
21938+#else
21939+AuStubInt0(au_procfs_init, void);
21940+AuStubVoid(au_procfs_fin, void);
21941+#endif
21942+
4f0767ce
JR
21943+/* ---------------------------------------------------------------------- */
21944+
21945+/* kmem cache */
1facf9fc 21946+enum {
21947+ AuCache_DINFO,
21948+ AuCache_ICNTNR,
21949+ AuCache_FINFO,
21950+ AuCache_VDIR,
21951+ AuCache_DEHSTR,
7eafdf33 21952+ AuCache_HNOTIFY, /* must be last */
1facf9fc 21953+ AuCache_Last
21954+};
21955+
4a4d8108
AM
21956+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
21957+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
21958+#define AuCacheCtor(type, ctor) \
21959+ kmem_cache_create(#type, sizeof(struct type), \
21960+ __alignof__(struct type), AuCacheFlags, ctor)
1facf9fc 21961+
21962+extern struct kmem_cache *au_cachep[];
21963+
21964+#define AuCacheFuncs(name, index) \
4a4d8108 21965+static inline struct au_##name *au_cache_alloc_##name(void) \
1facf9fc 21966+{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
4a4d8108 21967+static inline void au_cache_free_##name(struct au_##name *p) \
1facf9fc 21968+{ kmem_cache_free(au_cachep[AuCache_##index], p); }
21969+
21970+AuCacheFuncs(dinfo, DINFO);
21971+AuCacheFuncs(icntnr, ICNTNR);
21972+AuCacheFuncs(finfo, FINFO);
21973+AuCacheFuncs(vdir, VDIR);
4a4d8108
AM
21974+AuCacheFuncs(vdir_dehstr, DEHSTR);
21975+#ifdef CONFIG_AUFS_HNOTIFY
21976+AuCacheFuncs(hnotify, HNOTIFY);
21977+#endif
1facf9fc 21978+
4a4d8108
AM
21979+#endif /* __KERNEL__ */
21980+#endif /* __AUFS_MODULE_H__ */
c2b27bf2
AM
21981diff -urN /usr/share/empty/fs/aufs/mvdown.c linux/fs/aufs/mvdown.c
21982--- /usr/share/empty/fs/aufs/mvdown.c 1970-01-01 01:00:00.000000000 +0100
b95c5147 21983+++ linux/fs/aufs/mvdown.c 2015-12-10 17:59:16.839499823 +0100
79b8bda9 21984@@ -0,0 +1,703 @@
c2b27bf2 21985+/*
2000de60 21986+ * Copyright (C) 2011-2015 Junjiro R. Okajima
c2b27bf2
AM
21987+ *
21988+ * This program, aufs is free software; you can redistribute it and/or modify
21989+ * it under the terms of the GNU General Public License as published by
21990+ * the Free Software Foundation; either version 2 of the License, or
21991+ * (at your option) any later version.
21992+ *
21993+ * This program is distributed in the hope that it will be useful,
21994+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21995+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21996+ * GNU General Public License for more details.
21997+ *
21998+ * You should have received a copy of the GNU General Public License
523b37e3
AM
21999+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
22000+ */
22001+
22002+/*
22003+ * move-down, opposite of copy-up
c2b27bf2
AM
22004+ */
22005+
22006+#include "aufs.h"
22007+
c2b27bf2
AM
22008+struct au_mvd_args {
22009+ struct {
c2b27bf2
AM
22010+ struct super_block *h_sb;
22011+ struct dentry *h_parent;
22012+ struct au_hinode *hdir;
392086de 22013+ struct inode *h_dir, *h_inode;
c1595e42 22014+ struct au_pin pin;
c2b27bf2
AM
22015+ } info[AUFS_MVDOWN_NARRAY];
22016+
22017+ struct aufs_mvdown mvdown;
22018+ struct dentry *dentry, *parent;
22019+ struct inode *inode, *dir;
22020+ struct super_block *sb;
22021+ aufs_bindex_t bopq, bwh, bfound;
22022+ unsigned char rename_lock;
c2b27bf2
AM
22023+};
22024+
392086de 22025+#define mvd_errno mvdown.au_errno
076b876e
AM
22026+#define mvd_bsrc mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
22027+#define mvd_src_brid mvdown.stbr[AUFS_MVDOWN_UPPER].brid
22028+#define mvd_bdst mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
22029+#define mvd_dst_brid mvdown.stbr[AUFS_MVDOWN_LOWER].brid
c2b27bf2 22030+
392086de
AM
22031+#define mvd_h_src_sb info[AUFS_MVDOWN_UPPER].h_sb
22032+#define mvd_h_src_parent info[AUFS_MVDOWN_UPPER].h_parent
22033+#define mvd_hdir_src info[AUFS_MVDOWN_UPPER].hdir
22034+#define mvd_h_src_dir info[AUFS_MVDOWN_UPPER].h_dir
22035+#define mvd_h_src_inode info[AUFS_MVDOWN_UPPER].h_inode
c1595e42 22036+#define mvd_pin_src info[AUFS_MVDOWN_UPPER].pin
392086de
AM
22037+
22038+#define mvd_h_dst_sb info[AUFS_MVDOWN_LOWER].h_sb
22039+#define mvd_h_dst_parent info[AUFS_MVDOWN_LOWER].h_parent
22040+#define mvd_hdir_dst info[AUFS_MVDOWN_LOWER].hdir
22041+#define mvd_h_dst_dir info[AUFS_MVDOWN_LOWER].h_dir
22042+#define mvd_h_dst_inode info[AUFS_MVDOWN_LOWER].h_inode
c1595e42 22043+#define mvd_pin_dst info[AUFS_MVDOWN_LOWER].pin
c2b27bf2
AM
22044+
22045+#define AU_MVD_PR(flag, ...) do { \
22046+ if (flag) \
22047+ pr_err(__VA_ARGS__); \
22048+ } while (0)
22049+
076b876e
AM
22050+static int find_lower_writable(struct au_mvd_args *a)
22051+{
22052+ struct super_block *sb;
22053+ aufs_bindex_t bindex, bend;
22054+ struct au_branch *br;
22055+
22056+ sb = a->sb;
22057+ bindex = a->mvd_bsrc;
22058+ bend = au_sbend(sb);
22059+ if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
22060+ for (bindex++; bindex <= bend; bindex++) {
22061+ br = au_sbr(sb, bindex);
22062+ if (au_br_fhsm(br->br_perm)
22063+ && (!(au_br_sb(br)->s_flags & MS_RDONLY)))
22064+ return bindex;
22065+ }
22066+ else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
22067+ for (bindex++; bindex <= bend; bindex++) {
22068+ br = au_sbr(sb, bindex);
22069+ if (!au_br_rdonly(br))
22070+ return bindex;
22071+ }
22072+ else
22073+ for (bindex++; bindex <= bend; bindex++) {
22074+ br = au_sbr(sb, bindex);
22075+ if (!(au_br_sb(br)->s_flags & MS_RDONLY)) {
22076+ if (au_br_rdonly(br))
22077+ a->mvdown.flags
22078+ |= AUFS_MVDOWN_ROLOWER_R;
22079+ return bindex;
22080+ }
22081+ }
22082+
22083+ return -1;
22084+}
22085+
c2b27bf2 22086+/* make the parent dir on bdst */
392086de 22087+static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22088+{
22089+ int err;
22090+
22091+ err = 0;
22092+ a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
22093+ a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
22094+ a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
22095+ a->mvd_h_dst_parent = NULL;
22096+ if (au_dbend(a->parent) >= a->mvd_bdst)
22097+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22098+ if (!a->mvd_h_dst_parent) {
22099+ err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
22100+ if (unlikely(err)) {
392086de 22101+ AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
c2b27bf2
AM
22102+ goto out;
22103+ }
22104+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22105+ }
22106+
22107+out:
22108+ AuTraceErr(err);
22109+ return err;
22110+}
22111+
22112+/* lock them all */
392086de 22113+static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22114+{
22115+ int err;
22116+ struct dentry *h_trap;
22117+
22118+ a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
22119+ a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
c1595e42
JR
22120+ err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
22121+ au_opt_udba(a->sb),
22122+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22123+ AuTraceErr(err);
22124+ if (unlikely(err)) {
22125+ AU_MVD_PR(dmsg, "pin_dst failed\n");
22126+ goto out;
22127+ }
22128+
c2b27bf2
AM
22129+ if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
22130+ a->rename_lock = 0;
c1595e42
JR
22131+ au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22132+ AuLsc_DI_PARENT, AuLsc_I_PARENT3,
22133+ au_opt_udba(a->sb),
22134+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22135+ err = au_do_pin(&a->mvd_pin_src);
22136+ AuTraceErr(err);
5527c038 22137+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
c1595e42
JR
22138+ if (unlikely(err)) {
22139+ AU_MVD_PR(dmsg, "pin_src failed\n");
22140+ goto out_dst;
22141+ }
22142+ goto out; /* success */
c2b27bf2
AM
22143+ }
22144+
c2b27bf2 22145+ a->rename_lock = 1;
c1595e42
JR
22146+ au_pin_hdir_unlock(&a->mvd_pin_dst);
22147+ err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22148+ au_opt_udba(a->sb),
22149+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22150+ AuTraceErr(err);
5527c038 22151+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
c1595e42
JR
22152+ if (unlikely(err)) {
22153+ AU_MVD_PR(dmsg, "pin_src failed\n");
22154+ au_pin_hdir_lock(&a->mvd_pin_dst);
22155+ goto out_dst;
22156+ }
22157+ au_pin_hdir_unlock(&a->mvd_pin_src);
c2b27bf2
AM
22158+ h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22159+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22160+ if (h_trap) {
22161+ err = (h_trap != a->mvd_h_src_parent);
22162+ if (err)
22163+ err = (h_trap != a->mvd_h_dst_parent);
22164+ }
22165+ BUG_ON(err); /* it should never happen */
c1595e42
JR
22166+ if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
22167+ err = -EBUSY;
22168+ AuTraceErr(err);
22169+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22170+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22171+ au_pin_hdir_lock(&a->mvd_pin_src);
22172+ au_unpin(&a->mvd_pin_src);
22173+ au_pin_hdir_lock(&a->mvd_pin_dst);
22174+ goto out_dst;
22175+ }
22176+ goto out; /* success */
c2b27bf2 22177+
c1595e42
JR
22178+out_dst:
22179+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
22180+out:
22181+ AuTraceErr(err);
22182+ return err;
22183+}
22184+
392086de 22185+static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2 22186+{
c1595e42
JR
22187+ if (!a->rename_lock)
22188+ au_unpin(&a->mvd_pin_src);
22189+ else {
c2b27bf2
AM
22190+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22191+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
c1595e42
JR
22192+ au_pin_hdir_lock(&a->mvd_pin_src);
22193+ au_unpin(&a->mvd_pin_src);
22194+ au_pin_hdir_lock(&a->mvd_pin_dst);
22195+ }
22196+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
22197+}
22198+
22199+/* copy-down the file */
392086de 22200+static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22201+{
22202+ int err;
22203+ struct au_cp_generic cpg = {
22204+ .dentry = a->dentry,
22205+ .bdst = a->mvd_bdst,
22206+ .bsrc = a->mvd_bsrc,
22207+ .len = -1,
c1595e42 22208+ .pin = &a->mvd_pin_dst,
c2b27bf2
AM
22209+ .flags = AuCpup_DTIME | AuCpup_HOPEN
22210+ };
22211+
22212+ AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
392086de
AM
22213+ if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
22214+ au_fset_cpup(cpg.flags, OVERWRITE);
22215+ if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
22216+ au_fset_cpup(cpg.flags, RWDST);
c2b27bf2
AM
22217+ err = au_sio_cpdown_simple(&cpg);
22218+ if (unlikely(err))
392086de 22219+ AU_MVD_PR(dmsg, "cpdown failed\n");
c2b27bf2
AM
22220+
22221+ AuTraceErr(err);
22222+ return err;
22223+}
22224+
22225+/*
22226+ * unlink the whiteout on bdst if exist which may be created by UDBA while we
22227+ * were sleeping
22228+ */
392086de 22229+static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22230+{
22231+ int err;
22232+ struct path h_path;
22233+ struct au_branch *br;
523b37e3 22234+ struct inode *delegated;
c2b27bf2
AM
22235+
22236+ br = au_sbr(a->sb, a->mvd_bdst);
22237+ h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
22238+ err = PTR_ERR(h_path.dentry);
22239+ if (IS_ERR(h_path.dentry)) {
392086de 22240+ AU_MVD_PR(dmsg, "wh_lkup failed\n");
c2b27bf2
AM
22241+ goto out;
22242+ }
22243+
22244+ err = 0;
5527c038 22245+ if (d_is_positive(h_path.dentry)) {
c2b27bf2 22246+ h_path.mnt = au_br_mnt(br);
523b37e3 22247+ delegated = NULL;
5527c038 22248+ err = vfsub_unlink(d_inode(a->mvd_h_dst_parent), &h_path,
523b37e3
AM
22249+ &delegated, /*force*/0);
22250+ if (unlikely(err == -EWOULDBLOCK)) {
22251+ pr_warn("cannot retry for NFSv4 delegation"
22252+ " for an internal unlink\n");
22253+ iput(delegated);
22254+ }
c2b27bf2 22255+ if (unlikely(err))
392086de 22256+ AU_MVD_PR(dmsg, "wh_unlink failed\n");
c2b27bf2
AM
22257+ }
22258+ dput(h_path.dentry);
22259+
22260+out:
22261+ AuTraceErr(err);
22262+ return err;
22263+}
22264+
22265+/*
22266+ * unlink the topmost h_dentry
c2b27bf2 22267+ */
392086de 22268+static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22269+{
22270+ int err;
22271+ struct path h_path;
523b37e3 22272+ struct inode *delegated;
c2b27bf2
AM
22273+
22274+ h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
22275+ h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
523b37e3
AM
22276+ delegated = NULL;
22277+ err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
22278+ if (unlikely(err == -EWOULDBLOCK)) {
22279+ pr_warn("cannot retry for NFSv4 delegation"
22280+ " for an internal unlink\n");
22281+ iput(delegated);
22282+ }
c2b27bf2 22283+ if (unlikely(err))
392086de 22284+ AU_MVD_PR(dmsg, "unlink failed\n");
c2b27bf2
AM
22285+
22286+ AuTraceErr(err);
22287+ return err;
22288+}
22289+
076b876e
AM
22290+/* Since mvdown succeeded, we ignore an error of this function */
22291+static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
22292+{
22293+ int err;
22294+ struct au_branch *br;
22295+
22296+ a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
22297+ br = au_sbr(a->sb, a->mvd_bsrc);
22298+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
22299+ if (!err) {
22300+ br = au_sbr(a->sb, a->mvd_bdst);
22301+ a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
22302+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
22303+ }
22304+ if (!err)
22305+ a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
22306+ else
22307+ AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
22308+}
22309+
c2b27bf2
AM
22310+/*
22311+ * copy-down the file and unlink the bsrc file.
22312+ * - unlink the bdst whout if exist
22313+ * - copy-down the file (with whtmp name and rename)
22314+ * - unlink the bsrc file
22315+ */
392086de 22316+static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22317+{
22318+ int err;
22319+
392086de 22320+ err = au_do_mkdir(dmsg, a);
c2b27bf2 22321+ if (!err)
392086de 22322+ err = au_do_lock(dmsg, a);
c2b27bf2
AM
22323+ if (unlikely(err))
22324+ goto out;
22325+
22326+ /*
22327+ * do not revert the activities we made on bdst since they should be
22328+ * harmless in aufs.
22329+ */
22330+
392086de 22331+ err = au_do_cpdown(dmsg, a);
c2b27bf2 22332+ if (!err)
392086de
AM
22333+ err = au_do_unlink_wh(dmsg, a);
22334+ if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
22335+ err = au_do_unlink(dmsg, a);
c2b27bf2
AM
22336+ if (unlikely(err))
22337+ goto out_unlock;
22338+
c1595e42
JR
22339+ AuDbg("%pd2, 0x%x, %d --> %d\n",
22340+ a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
076b876e
AM
22341+ if (find_lower_writable(a) < 0)
22342+ a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
22343+
22344+ if (a->mvdown.flags & AUFS_MVDOWN_STFS)
22345+ au_do_stfs(dmsg, a);
22346+
c2b27bf2 22347+ /* maintain internal array */
392086de
AM
22348+ if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
22349+ au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
22350+ au_set_dbstart(a->dentry, a->mvd_bdst);
22351+ au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
22352+ au_set_ibstart(a->inode, a->mvd_bdst);
79b8bda9
AM
22353+ } else {
22354+ /* hide the lower */
22355+ au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
22356+ au_set_dbend(a->dentry, a->mvd_bsrc);
22357+ au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
22358+ au_set_ibend(a->inode, a->mvd_bsrc);
392086de 22359+ }
c2b27bf2
AM
22360+ if (au_dbend(a->dentry) < a->mvd_bdst)
22361+ au_set_dbend(a->dentry, a->mvd_bdst);
c2b27bf2
AM
22362+ if (au_ibend(a->inode) < a->mvd_bdst)
22363+ au_set_ibend(a->inode, a->mvd_bdst);
22364+
22365+out_unlock:
392086de 22366+ au_do_unlock(dmsg, a);
c2b27bf2
AM
22367+out:
22368+ AuTraceErr(err);
22369+ return err;
22370+}
22371+
22372+/* ---------------------------------------------------------------------- */
22373+
c2b27bf2 22374+/* make sure the file is idle */
392086de 22375+static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22376+{
22377+ int err, plinked;
c2b27bf2
AM
22378+
22379+ err = 0;
c2b27bf2
AM
22380+ plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
22381+ if (au_dbstart(a->dentry) == a->mvd_bsrc
c1595e42 22382+ && au_dcount(a->dentry) == 1
c2b27bf2 22383+ && atomic_read(&a->inode->i_count) == 1
392086de 22384+ /* && a->mvd_h_src_inode->i_nlink == 1 */
c2b27bf2
AM
22385+ && (!plinked || !au_plink_test(a->inode))
22386+ && a->inode->i_nlink == 1)
22387+ goto out;
22388+
22389+ err = -EBUSY;
392086de 22390+ AU_MVD_PR(dmsg,
c1595e42
JR
22391+ "b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
22392+ a->mvd_bsrc, au_dbstart(a->dentry), au_dcount(a->dentry),
c2b27bf2 22393+ atomic_read(&a->inode->i_count), a->inode->i_nlink,
392086de 22394+ a->mvd_h_src_inode->i_nlink,
c2b27bf2
AM
22395+ plinked, plinked ? au_plink_test(a->inode) : 0);
22396+
22397+out:
22398+ AuTraceErr(err);
22399+ return err;
22400+}
22401+
22402+/* make sure the parent dir is fine */
392086de 22403+static int au_mvd_args_parent(const unsigned char dmsg,
c2b27bf2
AM
22404+ struct au_mvd_args *a)
22405+{
22406+ int err;
22407+ aufs_bindex_t bindex;
22408+
22409+ err = 0;
22410+ if (unlikely(au_alive_dir(a->parent))) {
22411+ err = -ENOENT;
392086de 22412+ AU_MVD_PR(dmsg, "parent dir is dead\n");
c2b27bf2
AM
22413+ goto out;
22414+ }
22415+
22416+ a->bopq = au_dbdiropq(a->parent);
22417+ bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
22418+ AuDbg("b%d\n", bindex);
22419+ if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
22420+ || (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
22421+ err = -EINVAL;
392086de
AM
22422+ a->mvd_errno = EAU_MVDOWN_OPAQUE;
22423+ AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
c2b27bf2
AM
22424+ a->bopq, a->mvd_bdst);
22425+ }
22426+
22427+out:
22428+ AuTraceErr(err);
22429+ return err;
22430+}
22431+
392086de 22432+static int au_mvd_args_intermediate(const unsigned char dmsg,
c2b27bf2
AM
22433+ struct au_mvd_args *a)
22434+{
22435+ int err;
22436+ struct au_dinfo *dinfo, *tmp;
22437+
22438+ /* lookup the next lower positive entry */
22439+ err = -ENOMEM;
22440+ tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
22441+ if (unlikely(!tmp))
22442+ goto out;
22443+
22444+ a->bfound = -1;
22445+ a->bwh = -1;
22446+ dinfo = au_di(a->dentry);
22447+ au_di_cp(tmp, dinfo);
22448+ au_di_swap(tmp, dinfo);
22449+
22450+ /* returns the number of positive dentries */
22451+ err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1, /*type*/0);
22452+ if (!err)
22453+ a->bwh = au_dbwh(a->dentry);
22454+ else if (err > 0)
22455+ a->bfound = au_dbstart(a->dentry);
22456+
22457+ au_di_swap(tmp, dinfo);
22458+ au_rw_write_unlock(&tmp->di_rwsem);
22459+ au_di_free(tmp);
22460+ if (unlikely(err < 0))
392086de 22461+ AU_MVD_PR(dmsg, "failed look-up lower\n");
c2b27bf2
AM
22462+
22463+ /*
22464+ * here, we have these cases.
22465+ * bfound == -1
22466+ * no positive dentry under bsrc. there are more sub-cases.
22467+ * bwh < 0
22468+ * there no whiteout, we can safely move-down.
22469+ * bwh <= bsrc
22470+ * impossible
22471+ * bsrc < bwh && bwh < bdst
22472+ * there is a whiteout on RO branch. cannot proceed.
22473+ * bwh == bdst
22474+ * there is a whiteout on the RW target branch. it should
22475+ * be removed.
22476+ * bdst < bwh
22477+ * there is a whiteout somewhere unrelated branch.
22478+ * -1 < bfound && bfound <= bsrc
22479+ * impossible.
22480+ * bfound < bdst
22481+ * found, but it is on RO branch between bsrc and bdst. cannot
22482+ * proceed.
22483+ * bfound == bdst
22484+ * found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
22485+ * error.
22486+ * bdst < bfound
22487+ * found, after we create the file on bdst, it will be hidden.
22488+ */
22489+
22490+ AuDebugOn(a->bfound == -1
22491+ && a->bwh != -1
22492+ && a->bwh <= a->mvd_bsrc);
22493+ AuDebugOn(-1 < a->bfound
22494+ && a->bfound <= a->mvd_bsrc);
22495+
22496+ err = -EINVAL;
22497+ if (a->bfound == -1
22498+ && a->mvd_bsrc < a->bwh
22499+ && a->bwh != -1
22500+ && a->bwh < a->mvd_bdst) {
392086de
AM
22501+ a->mvd_errno = EAU_MVDOWN_WHITEOUT;
22502+ AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
c2b27bf2
AM
22503+ a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
22504+ goto out;
22505+ } else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
392086de
AM
22506+ a->mvd_errno = EAU_MVDOWN_UPPER;
22507+ AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
c2b27bf2
AM
22508+ a->mvd_bdst, a->bfound);
22509+ goto out;
22510+ }
22511+
22512+ err = 0; /* success */
22513+
22514+out:
22515+ AuTraceErr(err);
22516+ return err;
22517+}
22518+
392086de 22519+static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22520+{
22521+ int err;
22522+
392086de
AM
22523+ err = 0;
22524+ if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
22525+ && a->bfound == a->mvd_bdst)
22526+ err = -EEXIST;
c2b27bf2
AM
22527+ AuTraceErr(err);
22528+ return err;
22529+}
22530+
392086de 22531+static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22532+{
22533+ int err;
22534+ struct au_branch *br;
22535+
22536+ err = -EISDIR;
22537+ if (unlikely(S_ISDIR(a->inode->i_mode)))
22538+ goto out;
22539+
22540+ err = -EINVAL;
392086de
AM
22541+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
22542+ a->mvd_bsrc = au_ibstart(a->inode);
22543+ else {
22544+ a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
22545+ if (unlikely(a->mvd_bsrc < 0
22546+ || (a->mvd_bsrc < au_dbstart(a->dentry)
22547+ || au_dbend(a->dentry) < a->mvd_bsrc
22548+ || !au_h_dptr(a->dentry, a->mvd_bsrc))
22549+ || (a->mvd_bsrc < au_ibstart(a->inode)
22550+ || au_ibend(a->inode) < a->mvd_bsrc
22551+ || !au_h_iptr(a->inode, a->mvd_bsrc)))) {
22552+ a->mvd_errno = EAU_MVDOWN_NOUPPER;
22553+ AU_MVD_PR(dmsg, "no upper\n");
22554+ goto out;
22555+ }
22556+ }
c2b27bf2 22557+ if (unlikely(a->mvd_bsrc == au_sbend(a->sb))) {
392086de
AM
22558+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
22559+ AU_MVD_PR(dmsg, "on the bottom\n");
c2b27bf2
AM
22560+ goto out;
22561+ }
392086de 22562+ a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
c2b27bf2
AM
22563+ br = au_sbr(a->sb, a->mvd_bsrc);
22564+ err = au_br_rdonly(br);
392086de
AM
22565+ if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
22566+ if (unlikely(err))
22567+ goto out;
22568+ } else if (!(vfsub_native_ro(a->mvd_h_src_inode)
22569+ || IS_APPEND(a->mvd_h_src_inode))) {
22570+ if (err)
22571+ a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
22572+ /* go on */
22573+ } else
c2b27bf2
AM
22574+ goto out;
22575+
22576+ err = -EINVAL;
392086de
AM
22577+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
22578+ a->mvd_bdst = find_lower_writable(a);
22579+ if (unlikely(a->mvd_bdst < 0)) {
22580+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
22581+ AU_MVD_PR(dmsg, "no writable lower branch\n");
22582+ goto out;
22583+ }
22584+ } else {
22585+ a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
22586+ if (unlikely(a->mvd_bdst < 0
22587+ || au_sbend(a->sb) < a->mvd_bdst)) {
22588+ a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
22589+ AU_MVD_PR(dmsg, "no lower brid\n");
22590+ goto out;
22591+ }
c2b27bf2
AM
22592+ }
22593+
392086de 22594+ err = au_mvd_args_busy(dmsg, a);
c2b27bf2 22595+ if (!err)
392086de 22596+ err = au_mvd_args_parent(dmsg, a);
c2b27bf2 22597+ if (!err)
392086de 22598+ err = au_mvd_args_intermediate(dmsg, a);
c2b27bf2 22599+ if (!err)
392086de 22600+ err = au_mvd_args_exist(dmsg, a);
c2b27bf2
AM
22601+ if (!err)
22602+ AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
22603+
22604+out:
22605+ AuTraceErr(err);
22606+ return err;
22607+}
22608+
22609+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
22610+{
392086de
AM
22611+ int err, e;
22612+ unsigned char dmsg;
22613+ struct au_mvd_args *args;
79b8bda9 22614+ struct inode *inode;
c2b27bf2 22615+
79b8bda9 22616+ inode = d_inode(dentry);
c2b27bf2
AM
22617+ err = -EPERM;
22618+ if (unlikely(!capable(CAP_SYS_ADMIN)))
22619+ goto out;
22620+
392086de
AM
22621+ err = -ENOMEM;
22622+ args = kmalloc(sizeof(*args), GFP_NOFS);
22623+ if (unlikely(!args))
22624+ goto out;
22625+
22626+ err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
22627+ if (!err)
22628+ err = !access_ok(VERIFY_WRITE, uarg, sizeof(*uarg));
c2b27bf2
AM
22629+ if (unlikely(err)) {
22630+ err = -EFAULT;
392086de
AM
22631+ AuTraceErr(err);
22632+ goto out_free;
c2b27bf2 22633+ }
392086de
AM
22634+ AuDbg("flags 0x%x\n", args->mvdown.flags);
22635+ args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
22636+ args->mvdown.au_errno = 0;
22637+ args->dentry = dentry;
79b8bda9 22638+ args->inode = inode;
392086de 22639+ args->sb = dentry->d_sb;
c2b27bf2 22640+
392086de
AM
22641+ err = -ENOENT;
22642+ dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
22643+ args->parent = dget_parent(dentry);
5527c038 22644+ args->dir = d_inode(args->parent);
392086de
AM
22645+ mutex_lock_nested(&args->dir->i_mutex, I_MUTEX_PARENT);
22646+ dput(args->parent);
22647+ if (unlikely(args->parent != dentry->d_parent)) {
22648+ AU_MVD_PR(dmsg, "parent dir is moved\n");
c2b27bf2
AM
22649+ goto out_dir;
22650+ }
22651+
79b8bda9 22652+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
b95c5147 22653+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
c2b27bf2
AM
22654+ if (unlikely(err))
22655+ goto out_inode;
22656+
392086de
AM
22657+ di_write_lock_parent(args->parent);
22658+ err = au_mvd_args(dmsg, args);
c2b27bf2
AM
22659+ if (unlikely(err))
22660+ goto out_parent;
22661+
392086de 22662+ err = au_do_mvdown(dmsg, args);
c2b27bf2
AM
22663+ if (unlikely(err))
22664+ goto out_parent;
c2b27bf2 22665+
392086de 22666+ au_cpup_attr_timesizes(args->dir);
79b8bda9
AM
22667+ au_cpup_attr_timesizes(inode);
22668+ if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
22669+ au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
c2b27bf2
AM
22670+ /* au_digen_dec(dentry); */
22671+
22672+out_parent:
392086de 22673+ di_write_unlock(args->parent);
c2b27bf2
AM
22674+ aufs_read_unlock(dentry, AuLock_DW);
22675+out_inode:
79b8bda9 22676+ mutex_unlock(&inode->i_mutex);
c2b27bf2 22677+out_dir:
392086de
AM
22678+ mutex_unlock(&args->dir->i_mutex);
22679+out_free:
22680+ e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
22681+ if (unlikely(e))
22682+ err = -EFAULT;
22683+ kfree(args);
c2b27bf2
AM
22684+out:
22685+ AuTraceErr(err);
22686+ return err;
22687+}
22688diff -urN /usr/share/empty/fs/aufs/opts.c linux/fs/aufs/opts.c
22689--- /usr/share/empty/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
b95c5147 22690+++ linux/fs/aufs/opts.c 2015-12-10 17:59:16.839499823 +0100
79b8bda9 22691@@ -0,0 +1,1859 @@
1facf9fc 22692+/*
2000de60 22693+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 22694+ *
22695+ * This program, aufs is free software; you can redistribute it and/or modify
22696+ * it under the terms of the GNU General Public License as published by
22697+ * the Free Software Foundation; either version 2 of the License, or
22698+ * (at your option) any later version.
dece6358
AM
22699+ *
22700+ * This program is distributed in the hope that it will be useful,
22701+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22702+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22703+ * GNU General Public License for more details.
22704+ *
22705+ * You should have received a copy of the GNU General Public License
523b37e3 22706+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 22707+ */
22708+
22709+/*
22710+ * mount options/flags
22711+ */
22712+
dece6358 22713+#include <linux/namei.h>
1facf9fc 22714+#include <linux/types.h> /* a distribution requires */
22715+#include <linux/parser.h>
22716+#include "aufs.h"
22717+
22718+/* ---------------------------------------------------------------------- */
22719+
22720+enum {
22721+ Opt_br,
7e9cd9fe
AM
22722+ Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend,
22723+ Opt_idel, Opt_imod,
22724+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash,
dece6358 22725+ Opt_rdblk_def, Opt_rdhash_def,
7e9cd9fe 22726+ Opt_xino, Opt_noxino,
1facf9fc 22727+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
22728+ Opt_trunc_xino_path, Opt_itrunc_xino,
22729+ Opt_trunc_xib, Opt_notrunc_xib,
dece6358 22730+ Opt_shwh, Opt_noshwh,
1facf9fc 22731+ Opt_plink, Opt_noplink, Opt_list_plink,
22732+ Opt_udba,
4a4d8108 22733+ Opt_dio, Opt_nodio,
1facf9fc 22734+ Opt_diropq_a, Opt_diropq_w,
22735+ Opt_warn_perm, Opt_nowarn_perm,
22736+ Opt_wbr_copyup, Opt_wbr_create,
076b876e 22737+ Opt_fhsm_sec,
1facf9fc 22738+ Opt_verbose, Opt_noverbose,
22739+ Opt_sum, Opt_nosum, Opt_wsum,
076b876e 22740+ Opt_dirperm1, Opt_nodirperm1,
c1595e42 22741+ Opt_acl, Opt_noacl,
1facf9fc 22742+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
22743+};
22744+
22745+static match_table_t options = {
22746+ {Opt_br, "br=%s"},
22747+ {Opt_br, "br:%s"},
22748+
22749+ {Opt_add, "add=%d:%s"},
22750+ {Opt_add, "add:%d:%s"},
22751+ {Opt_add, "ins=%d:%s"},
22752+ {Opt_add, "ins:%d:%s"},
22753+ {Opt_append, "append=%s"},
22754+ {Opt_append, "append:%s"},
22755+ {Opt_prepend, "prepend=%s"},
22756+ {Opt_prepend, "prepend:%s"},
22757+
22758+ {Opt_del, "del=%s"},
22759+ {Opt_del, "del:%s"},
22760+ /* {Opt_idel, "idel:%d"}, */
22761+ {Opt_mod, "mod=%s"},
22762+ {Opt_mod, "mod:%s"},
22763+ /* {Opt_imod, "imod:%d:%s"}, */
22764+
22765+ {Opt_dirwh, "dirwh=%d"},
22766+
22767+ {Opt_xino, "xino=%s"},
22768+ {Opt_noxino, "noxino"},
22769+ {Opt_trunc_xino, "trunc_xino"},
22770+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
22771+ {Opt_notrunc_xino, "notrunc_xino"},
22772+ {Opt_trunc_xino_path, "trunc_xino=%s"},
22773+ {Opt_itrunc_xino, "itrunc_xino=%d"},
22774+ /* {Opt_zxino, "zxino=%s"}, */
22775+ {Opt_trunc_xib, "trunc_xib"},
22776+ {Opt_notrunc_xib, "notrunc_xib"},
22777+
e49829fe 22778+#ifdef CONFIG_PROC_FS
1facf9fc 22779+ {Opt_plink, "plink"},
e49829fe
JR
22780+#else
22781+ {Opt_ignore_silent, "plink"},
22782+#endif
22783+
1facf9fc 22784+ {Opt_noplink, "noplink"},
e49829fe 22785+
1facf9fc 22786+#ifdef CONFIG_AUFS_DEBUG
22787+ {Opt_list_plink, "list_plink"},
22788+#endif
22789+
22790+ {Opt_udba, "udba=%s"},
22791+
4a4d8108
AM
22792+ {Opt_dio, "dio"},
22793+ {Opt_nodio, "nodio"},
22794+
076b876e
AM
22795+#ifdef CONFIG_AUFS_FHSM
22796+ {Opt_fhsm_sec, "fhsm_sec=%d"},
22797+#else
22798+ {Opt_ignore_silent, "fhsm_sec=%d"},
22799+#endif
22800+
1facf9fc 22801+ {Opt_diropq_a, "diropq=always"},
22802+ {Opt_diropq_a, "diropq=a"},
22803+ {Opt_diropq_w, "diropq=whiteouted"},
22804+ {Opt_diropq_w, "diropq=w"},
22805+
22806+ {Opt_warn_perm, "warn_perm"},
22807+ {Opt_nowarn_perm, "nowarn_perm"},
22808+
22809+ /* keep them temporary */
1facf9fc 22810+ {Opt_ignore_silent, "nodlgt"},
1facf9fc 22811+ {Opt_ignore_silent, "clean_plink"},
22812+
dece6358
AM
22813+#ifdef CONFIG_AUFS_SHWH
22814+ {Opt_shwh, "shwh"},
22815+#endif
22816+ {Opt_noshwh, "noshwh"},
22817+
076b876e
AM
22818+ {Opt_dirperm1, "dirperm1"},
22819+ {Opt_nodirperm1, "nodirperm1"},
22820+
1facf9fc 22821+ {Opt_verbose, "verbose"},
22822+ {Opt_verbose, "v"},
22823+ {Opt_noverbose, "noverbose"},
22824+ {Opt_noverbose, "quiet"},
22825+ {Opt_noverbose, "q"},
22826+ {Opt_noverbose, "silent"},
22827+
22828+ {Opt_sum, "sum"},
22829+ {Opt_nosum, "nosum"},
22830+ {Opt_wsum, "wsum"},
22831+
22832+ {Opt_rdcache, "rdcache=%d"},
22833+ {Opt_rdblk, "rdblk=%d"},
dece6358 22834+ {Opt_rdblk_def, "rdblk=def"},
1facf9fc 22835+ {Opt_rdhash, "rdhash=%d"},
dece6358 22836+ {Opt_rdhash_def, "rdhash=def"},
1facf9fc 22837+
22838+ {Opt_wbr_create, "create=%s"},
22839+ {Opt_wbr_create, "create_policy=%s"},
22840+ {Opt_wbr_copyup, "cpup=%s"},
22841+ {Opt_wbr_copyup, "copyup=%s"},
22842+ {Opt_wbr_copyup, "copyup_policy=%s"},
22843+
c1595e42
JR
22844+ /* generic VFS flag */
22845+#ifdef CONFIG_FS_POSIX_ACL
22846+ {Opt_acl, "acl"},
22847+ {Opt_noacl, "noacl"},
22848+#else
22849+ {Opt_ignore_silent, "acl"},
22850+ {Opt_ignore_silent, "noacl"},
22851+#endif
22852+
1facf9fc 22853+ /* internal use for the scripts */
22854+ {Opt_ignore_silent, "si=%s"},
22855+
22856+ {Opt_br, "dirs=%s"},
22857+ {Opt_ignore, "debug=%d"},
22858+ {Opt_ignore, "delete=whiteout"},
22859+ {Opt_ignore, "delete=all"},
22860+ {Opt_ignore, "imap=%s"},
22861+
1308ab2a 22862+ /* temporary workaround, due to old mount(8)? */
22863+ {Opt_ignore_silent, "relatime"},
22864+
1facf9fc 22865+ {Opt_err, NULL}
22866+};
22867+
22868+/* ---------------------------------------------------------------------- */
22869+
076b876e 22870+static const char *au_parser_pattern(int val, match_table_t tbl)
1facf9fc 22871+{
076b876e
AM
22872+ struct match_token *p;
22873+
22874+ p = tbl;
22875+ while (p->pattern) {
22876+ if (p->token == val)
22877+ return p->pattern;
22878+ p++;
1facf9fc 22879+ }
22880+ BUG();
22881+ return "??";
22882+}
22883+
076b876e
AM
22884+static const char *au_optstr(int *val, match_table_t tbl)
22885+{
22886+ struct match_token *p;
22887+ int v;
22888+
22889+ v = *val;
2000de60
JR
22890+ if (!v)
22891+ goto out;
076b876e 22892+ p = tbl;
2000de60
JR
22893+ while (p->pattern) {
22894+ if (p->token
22895+ && (v & p->token) == p->token) {
076b876e
AM
22896+ *val &= ~p->token;
22897+ return p->pattern;
22898+ }
22899+ p++;
22900+ }
2000de60
JR
22901+
22902+out:
076b876e
AM
22903+ return NULL;
22904+}
22905+
1facf9fc 22906+/* ---------------------------------------------------------------------- */
22907+
1e00d052 22908+static match_table_t brperm = {
1facf9fc 22909+ {AuBrPerm_RO, AUFS_BRPERM_RO},
22910+ {AuBrPerm_RR, AUFS_BRPERM_RR},
22911+ {AuBrPerm_RW, AUFS_BRPERM_RW},
1e00d052
AM
22912+ {0, NULL}
22913+};
1facf9fc 22914+
86dc4139 22915+static match_table_t brattr = {
076b876e
AM
22916+ /* general */
22917+ {AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG},
22918+ {AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL},
c1595e42 22919+ /* 'unpin' attrib is meaningless since linux-3.18-rc1 */
86dc4139 22920+ {AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN},
2000de60 22921+#ifdef CONFIG_AUFS_FHSM
076b876e 22922+ {AuBrAttr_FHSM, AUFS_BRATTR_FHSM},
2000de60
JR
22923+#endif
22924+#ifdef CONFIG_AUFS_XATTR
c1595e42
JR
22925+ {AuBrAttr_ICEX, AUFS_BRATTR_ICEX},
22926+ {AuBrAttr_ICEX_SEC, AUFS_BRATTR_ICEX_SEC},
22927+ {AuBrAttr_ICEX_SYS, AUFS_BRATTR_ICEX_SYS},
22928+ {AuBrAttr_ICEX_TR, AUFS_BRATTR_ICEX_TR},
22929+ {AuBrAttr_ICEX_USR, AUFS_BRATTR_ICEX_USR},
22930+ {AuBrAttr_ICEX_OTH, AUFS_BRATTR_ICEX_OTH},
2000de60 22931+#endif
076b876e
AM
22932+
22933+ /* ro/rr branch */
1e00d052 22934+ {AuBrRAttr_WH, AUFS_BRRATTR_WH},
076b876e
AM
22935+
22936+ /* rw branch */
22937+ {AuBrWAttr_MOO, AUFS_BRWATTR_MOO},
1e00d052 22938+ {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
076b876e 22939+
1e00d052 22940+ {0, NULL}
1facf9fc 22941+};
22942+
1e00d052
AM
22943+static int br_attr_val(char *str, match_table_t table, substring_t args[])
22944+{
22945+ int attr, v;
22946+ char *p;
22947+
22948+ attr = 0;
22949+ do {
22950+ p = strchr(str, '+');
22951+ if (p)
22952+ *p = 0;
22953+ v = match_token(str, table, args);
076b876e
AM
22954+ if (v) {
22955+ if (v & AuBrAttr_CMOO_Mask)
22956+ attr &= ~AuBrAttr_CMOO_Mask;
1e00d052 22957+ attr |= v;
076b876e 22958+ } else {
1e00d052
AM
22959+ if (p)
22960+ *p = '+';
0c3ec466 22961+ pr_warn("ignored branch attribute %s\n", str);
1e00d052
AM
22962+ break;
22963+ }
22964+ if (p)
22965+ str = p + 1;
22966+ } while (p);
22967+
22968+ return attr;
22969+}
22970+
076b876e
AM
22971+static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm)
22972+{
22973+ int sz;
22974+ const char *p;
22975+ char *q;
22976+
076b876e
AM
22977+ q = str->a;
22978+ *q = 0;
22979+ p = au_optstr(&perm, brattr);
22980+ if (p) {
22981+ sz = strlen(p);
22982+ memcpy(q, p, sz + 1);
22983+ q += sz;
22984+ } else
22985+ goto out;
22986+
22987+ do {
22988+ p = au_optstr(&perm, brattr);
22989+ if (p) {
22990+ *q++ = '+';
22991+ sz = strlen(p);
22992+ memcpy(q, p, sz + 1);
22993+ q += sz;
22994+ }
22995+ } while (p);
22996+
22997+out:
c1595e42 22998+ return q - str->a;
076b876e
AM
22999+}
23000+
4a4d8108 23001+static int noinline_for_stack br_perm_val(char *perm)
1facf9fc 23002+{
076b876e
AM
23003+ int val, bad, sz;
23004+ char *p;
1facf9fc 23005+ substring_t args[MAX_OPT_ARGS];
076b876e 23006+ au_br_perm_str_t attr;
1facf9fc 23007+
1e00d052
AM
23008+ p = strchr(perm, '+');
23009+ if (p)
23010+ *p = 0;
23011+ val = match_token(perm, brperm, args);
23012+ if (!val) {
23013+ if (p)
23014+ *p = '+';
0c3ec466 23015+ pr_warn("ignored branch permission %s\n", perm);
1e00d052
AM
23016+ val = AuBrPerm_RO;
23017+ goto out;
23018+ }
23019+ if (!p)
23020+ goto out;
23021+
076b876e
AM
23022+ val |= br_attr_val(p + 1, brattr, args);
23023+
23024+ bad = 0;
86dc4139 23025+ switch (val & AuBrPerm_Mask) {
1e00d052
AM
23026+ case AuBrPerm_RO:
23027+ case AuBrPerm_RR:
076b876e
AM
23028+ bad = val & AuBrWAttr_Mask;
23029+ val &= ~AuBrWAttr_Mask;
1e00d052
AM
23030+ break;
23031+ case AuBrPerm_RW:
076b876e
AM
23032+ bad = val & AuBrRAttr_Mask;
23033+ val &= ~AuBrRAttr_Mask;
1e00d052
AM
23034+ break;
23035+ }
c1595e42
JR
23036+
23037+ /*
23038+ * 'unpin' attrib becomes meaningless since linux-3.18-rc1, but aufs
23039+ * does not treat it as an error, just warning.
23040+ * this is a tiny guard for the user operation.
23041+ */
23042+ if (val & AuBrAttr_UNPIN) {
23043+ bad |= AuBrAttr_UNPIN;
23044+ val &= ~AuBrAttr_UNPIN;
23045+ }
23046+
076b876e
AM
23047+ if (unlikely(bad)) {
23048+ sz = au_do_optstr_br_attr(&attr, bad);
23049+ AuDebugOn(!sz);
23050+ pr_warn("ignored branch attribute %s\n", attr.a);
23051+ }
1e00d052
AM
23052+
23053+out:
1facf9fc 23054+ return val;
23055+}
23056+
076b876e 23057+void au_optstr_br_perm(au_br_perm_str_t *str, int perm)
1facf9fc 23058+{
076b876e
AM
23059+ au_br_perm_str_t attr;
23060+ const char *p;
23061+ char *q;
1e00d052
AM
23062+ int sz;
23063+
076b876e
AM
23064+ q = str->a;
23065+ p = au_optstr(&perm, brperm);
23066+ AuDebugOn(!p || !*p);
23067+ sz = strlen(p);
23068+ memcpy(q, p, sz + 1);
23069+ q += sz;
1e00d052 23070+
076b876e
AM
23071+ sz = au_do_optstr_br_attr(&attr, perm);
23072+ if (sz) {
23073+ *q++ = '+';
23074+ memcpy(q, attr.a, sz + 1);
1e00d052
AM
23075+ }
23076+
076b876e 23077+ AuDebugOn(strlen(str->a) >= sizeof(str->a));
1facf9fc 23078+}
23079+
23080+/* ---------------------------------------------------------------------- */
23081+
23082+static match_table_t udbalevel = {
23083+ {AuOpt_UDBA_REVAL, "reval"},
23084+ {AuOpt_UDBA_NONE, "none"},
4a4d8108
AM
23085+#ifdef CONFIG_AUFS_HNOTIFY
23086+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
23087+#ifdef CONFIG_AUFS_HFSNOTIFY
23088+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
4a4d8108 23089+#endif
1facf9fc 23090+#endif
23091+ {-1, NULL}
23092+};
23093+
4a4d8108 23094+static int noinline_for_stack udba_val(char *str)
1facf9fc 23095+{
23096+ substring_t args[MAX_OPT_ARGS];
23097+
7f207e10 23098+ return match_token(str, udbalevel, args);
1facf9fc 23099+}
23100+
23101+const char *au_optstr_udba(int udba)
23102+{
076b876e 23103+ return au_parser_pattern(udba, udbalevel);
1facf9fc 23104+}
23105+
23106+/* ---------------------------------------------------------------------- */
23107+
23108+static match_table_t au_wbr_create_policy = {
23109+ {AuWbrCreate_TDP, "tdp"},
23110+ {AuWbrCreate_TDP, "top-down-parent"},
23111+ {AuWbrCreate_RR, "rr"},
23112+ {AuWbrCreate_RR, "round-robin"},
23113+ {AuWbrCreate_MFS, "mfs"},
23114+ {AuWbrCreate_MFS, "most-free-space"},
23115+ {AuWbrCreate_MFSV, "mfs:%d"},
23116+ {AuWbrCreate_MFSV, "most-free-space:%d"},
23117+
23118+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
23119+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
23120+ {AuWbrCreate_PMFS, "pmfs"},
23121+ {AuWbrCreate_PMFSV, "pmfs:%d"},
392086de
AM
23122+ {AuWbrCreate_PMFSRR, "pmfsrr:%d"},
23123+ {AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"},
1facf9fc 23124+
23125+ {-1, NULL}
23126+};
23127+
dece6358
AM
23128+/*
23129+ * cf. linux/lib/parser.c and cmdline.c
23130+ * gave up calling memparse() since it uses simple_strtoull() instead of
9dbd164d 23131+ * kstrto...().
dece6358 23132+ */
4a4d8108
AM
23133+static int noinline_for_stack
23134+au_match_ull(substring_t *s, unsigned long long *result)
1facf9fc 23135+{
23136+ int err;
23137+ unsigned int len;
23138+ char a[32];
23139+
23140+ err = -ERANGE;
23141+ len = s->to - s->from;
23142+ if (len + 1 <= sizeof(a)) {
23143+ memcpy(a, s->from, len);
23144+ a[len] = '\0';
9dbd164d 23145+ err = kstrtoull(a, 0, result);
1facf9fc 23146+ }
23147+ return err;
23148+}
23149+
23150+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
23151+ struct au_opt_wbr_create *create)
23152+{
23153+ int err;
23154+ unsigned long long ull;
23155+
23156+ err = 0;
23157+ if (!au_match_ull(arg, &ull))
23158+ create->mfsrr_watermark = ull;
23159+ else {
4a4d8108 23160+ pr_err("bad integer in %s\n", str);
1facf9fc 23161+ err = -EINVAL;
23162+ }
23163+
23164+ return err;
23165+}
23166+
23167+static int au_wbr_mfs_sec(substring_t *arg, char *str,
23168+ struct au_opt_wbr_create *create)
23169+{
23170+ int n, err;
23171+
23172+ err = 0;
027c5e7a 23173+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
1facf9fc 23174+ create->mfs_second = n;
23175+ else {
4a4d8108 23176+ pr_err("bad integer in %s\n", str);
1facf9fc 23177+ err = -EINVAL;
23178+ }
23179+
23180+ return err;
23181+}
23182+
4a4d8108
AM
23183+static int noinline_for_stack
23184+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
1facf9fc 23185+{
23186+ int err, e;
23187+ substring_t args[MAX_OPT_ARGS];
23188+
23189+ err = match_token(str, au_wbr_create_policy, args);
23190+ create->wbr_create = err;
23191+ switch (err) {
23192+ case AuWbrCreate_MFSRRV:
392086de 23193+ case AuWbrCreate_PMFSRRV:
1facf9fc 23194+ e = au_wbr_mfs_wmark(&args[0], str, create);
23195+ if (!e)
23196+ e = au_wbr_mfs_sec(&args[1], str, create);
23197+ if (unlikely(e))
23198+ err = e;
23199+ break;
23200+ case AuWbrCreate_MFSRR:
392086de 23201+ case AuWbrCreate_PMFSRR:
1facf9fc 23202+ e = au_wbr_mfs_wmark(&args[0], str, create);
23203+ if (unlikely(e)) {
23204+ err = e;
23205+ break;
23206+ }
23207+ /*FALLTHROUGH*/
23208+ case AuWbrCreate_MFS:
23209+ case AuWbrCreate_PMFS:
027c5e7a 23210+ create->mfs_second = AUFS_MFS_DEF_SEC;
1facf9fc 23211+ break;
23212+ case AuWbrCreate_MFSV:
23213+ case AuWbrCreate_PMFSV:
23214+ e = au_wbr_mfs_sec(&args[0], str, create);
23215+ if (unlikely(e))
23216+ err = e;
23217+ break;
23218+ }
23219+
23220+ return err;
23221+}
23222+
23223+const char *au_optstr_wbr_create(int wbr_create)
23224+{
076b876e 23225+ return au_parser_pattern(wbr_create, au_wbr_create_policy);
1facf9fc 23226+}
23227+
23228+static match_table_t au_wbr_copyup_policy = {
23229+ {AuWbrCopyup_TDP, "tdp"},
23230+ {AuWbrCopyup_TDP, "top-down-parent"},
23231+ {AuWbrCopyup_BUP, "bup"},
23232+ {AuWbrCopyup_BUP, "bottom-up-parent"},
23233+ {AuWbrCopyup_BU, "bu"},
23234+ {AuWbrCopyup_BU, "bottom-up"},
23235+ {-1, NULL}
23236+};
23237+
4a4d8108 23238+static int noinline_for_stack au_wbr_copyup_val(char *str)
1facf9fc 23239+{
23240+ substring_t args[MAX_OPT_ARGS];
23241+
23242+ return match_token(str, au_wbr_copyup_policy, args);
23243+}
23244+
23245+const char *au_optstr_wbr_copyup(int wbr_copyup)
23246+{
076b876e 23247+ return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy);
1facf9fc 23248+}
23249+
23250+/* ---------------------------------------------------------------------- */
23251+
23252+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
23253+
23254+static void dump_opts(struct au_opts *opts)
23255+{
23256+#ifdef CONFIG_AUFS_DEBUG
23257+ /* reduce stack space */
23258+ union {
23259+ struct au_opt_add *add;
23260+ struct au_opt_del *del;
23261+ struct au_opt_mod *mod;
23262+ struct au_opt_xino *xino;
23263+ struct au_opt_xino_itrunc *xino_itrunc;
23264+ struct au_opt_wbr_create *create;
23265+ } u;
23266+ struct au_opt *opt;
23267+
23268+ opt = opts->opt;
23269+ while (opt->type != Opt_tail) {
23270+ switch (opt->type) {
23271+ case Opt_add:
23272+ u.add = &opt->add;
23273+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
23274+ u.add->bindex, u.add->pathname, u.add->perm,
23275+ u.add->path.dentry);
23276+ break;
23277+ case Opt_del:
23278+ case Opt_idel:
23279+ u.del = &opt->del;
23280+ AuDbg("del {%s, %p}\n",
23281+ u.del->pathname, u.del->h_path.dentry);
23282+ break;
23283+ case Opt_mod:
23284+ case Opt_imod:
23285+ u.mod = &opt->mod;
23286+ AuDbg("mod {%s, 0x%x, %p}\n",
23287+ u.mod->path, u.mod->perm, u.mod->h_root);
23288+ break;
23289+ case Opt_append:
23290+ u.add = &opt->add;
23291+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
23292+ u.add->bindex, u.add->pathname, u.add->perm,
23293+ u.add->path.dentry);
23294+ break;
23295+ case Opt_prepend:
23296+ u.add = &opt->add;
23297+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
23298+ u.add->bindex, u.add->pathname, u.add->perm,
23299+ u.add->path.dentry);
23300+ break;
23301+ case Opt_dirwh:
23302+ AuDbg("dirwh %d\n", opt->dirwh);
23303+ break;
23304+ case Opt_rdcache:
23305+ AuDbg("rdcache %d\n", opt->rdcache);
23306+ break;
23307+ case Opt_rdblk:
23308+ AuDbg("rdblk %u\n", opt->rdblk);
23309+ break;
dece6358
AM
23310+ case Opt_rdblk_def:
23311+ AuDbg("rdblk_def\n");
23312+ break;
1facf9fc 23313+ case Opt_rdhash:
23314+ AuDbg("rdhash %u\n", opt->rdhash);
23315+ break;
dece6358
AM
23316+ case Opt_rdhash_def:
23317+ AuDbg("rdhash_def\n");
23318+ break;
1facf9fc 23319+ case Opt_xino:
23320+ u.xino = &opt->xino;
523b37e3 23321+ AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
1facf9fc 23322+ break;
23323+ case Opt_trunc_xino:
23324+ AuLabel(trunc_xino);
23325+ break;
23326+ case Opt_notrunc_xino:
23327+ AuLabel(notrunc_xino);
23328+ break;
23329+ case Opt_trunc_xino_path:
23330+ case Opt_itrunc_xino:
23331+ u.xino_itrunc = &opt->xino_itrunc;
23332+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
23333+ break;
1facf9fc 23334+ case Opt_noxino:
23335+ AuLabel(noxino);
23336+ break;
23337+ case Opt_trunc_xib:
23338+ AuLabel(trunc_xib);
23339+ break;
23340+ case Opt_notrunc_xib:
23341+ AuLabel(notrunc_xib);
23342+ break;
dece6358
AM
23343+ case Opt_shwh:
23344+ AuLabel(shwh);
23345+ break;
23346+ case Opt_noshwh:
23347+ AuLabel(noshwh);
23348+ break;
076b876e
AM
23349+ case Opt_dirperm1:
23350+ AuLabel(dirperm1);
23351+ break;
23352+ case Opt_nodirperm1:
23353+ AuLabel(nodirperm1);
23354+ break;
1facf9fc 23355+ case Opt_plink:
23356+ AuLabel(plink);
23357+ break;
23358+ case Opt_noplink:
23359+ AuLabel(noplink);
23360+ break;
23361+ case Opt_list_plink:
23362+ AuLabel(list_plink);
23363+ break;
23364+ case Opt_udba:
23365+ AuDbg("udba %d, %s\n",
23366+ opt->udba, au_optstr_udba(opt->udba));
23367+ break;
4a4d8108
AM
23368+ case Opt_dio:
23369+ AuLabel(dio);
23370+ break;
23371+ case Opt_nodio:
23372+ AuLabel(nodio);
23373+ break;
1facf9fc 23374+ case Opt_diropq_a:
23375+ AuLabel(diropq_a);
23376+ break;
23377+ case Opt_diropq_w:
23378+ AuLabel(diropq_w);
23379+ break;
23380+ case Opt_warn_perm:
23381+ AuLabel(warn_perm);
23382+ break;
23383+ case Opt_nowarn_perm:
23384+ AuLabel(nowarn_perm);
23385+ break;
1facf9fc 23386+ case Opt_verbose:
23387+ AuLabel(verbose);
23388+ break;
23389+ case Opt_noverbose:
23390+ AuLabel(noverbose);
23391+ break;
23392+ case Opt_sum:
23393+ AuLabel(sum);
23394+ break;
23395+ case Opt_nosum:
23396+ AuLabel(nosum);
23397+ break;
23398+ case Opt_wsum:
23399+ AuLabel(wsum);
23400+ break;
23401+ case Opt_wbr_create:
23402+ u.create = &opt->wbr_create;
23403+ AuDbg("create %d, %s\n", u.create->wbr_create,
23404+ au_optstr_wbr_create(u.create->wbr_create));
23405+ switch (u.create->wbr_create) {
23406+ case AuWbrCreate_MFSV:
23407+ case AuWbrCreate_PMFSV:
23408+ AuDbg("%d sec\n", u.create->mfs_second);
23409+ break;
23410+ case AuWbrCreate_MFSRR:
23411+ AuDbg("%llu watermark\n",
23412+ u.create->mfsrr_watermark);
23413+ break;
23414+ case AuWbrCreate_MFSRRV:
392086de 23415+ case AuWbrCreate_PMFSRRV:
1facf9fc 23416+ AuDbg("%llu watermark, %d sec\n",
23417+ u.create->mfsrr_watermark,
23418+ u.create->mfs_second);
23419+ break;
23420+ }
23421+ break;
23422+ case Opt_wbr_copyup:
23423+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
23424+ au_optstr_wbr_copyup(opt->wbr_copyup));
23425+ break;
076b876e
AM
23426+ case Opt_fhsm_sec:
23427+ AuDbg("fhsm_sec %u\n", opt->fhsm_second);
23428+ break;
c1595e42
JR
23429+ case Opt_acl:
23430+ AuLabel(acl);
23431+ break;
23432+ case Opt_noacl:
23433+ AuLabel(noacl);
23434+ break;
1facf9fc 23435+ default:
23436+ BUG();
23437+ }
23438+ opt++;
23439+ }
23440+#endif
23441+}
23442+
23443+void au_opts_free(struct au_opts *opts)
23444+{
23445+ struct au_opt *opt;
23446+
23447+ opt = opts->opt;
23448+ while (opt->type != Opt_tail) {
23449+ switch (opt->type) {
23450+ case Opt_add:
23451+ case Opt_append:
23452+ case Opt_prepend:
23453+ path_put(&opt->add.path);
23454+ break;
23455+ case Opt_del:
23456+ case Opt_idel:
23457+ path_put(&opt->del.h_path);
23458+ break;
23459+ case Opt_mod:
23460+ case Opt_imod:
23461+ dput(opt->mod.h_root);
23462+ break;
23463+ case Opt_xino:
23464+ fput(opt->xino.file);
23465+ break;
23466+ }
23467+ opt++;
23468+ }
23469+}
23470+
23471+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
23472+ aufs_bindex_t bindex)
23473+{
23474+ int err;
23475+ struct au_opt_add *add = &opt->add;
23476+ char *p;
23477+
23478+ add->bindex = bindex;
1e00d052 23479+ add->perm = AuBrPerm_RO;
1facf9fc 23480+ add->pathname = opt_str;
23481+ p = strchr(opt_str, '=');
23482+ if (p) {
23483+ *p++ = 0;
23484+ if (*p)
23485+ add->perm = br_perm_val(p);
23486+ }
23487+
23488+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
23489+ if (!err) {
23490+ if (!p) {
23491+ add->perm = AuBrPerm_RO;
23492+ if (au_test_fs_rr(add->path.dentry->d_sb))
23493+ add->perm = AuBrPerm_RR;
23494+ else if (!bindex && !(sb_flags & MS_RDONLY))
23495+ add->perm = AuBrPerm_RW;
23496+ }
23497+ opt->type = Opt_add;
23498+ goto out;
23499+ }
4a4d8108 23500+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
1facf9fc 23501+ err = -EINVAL;
23502+
4f0767ce 23503+out:
1facf9fc 23504+ return err;
23505+}
23506+
23507+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
23508+{
23509+ int err;
23510+
23511+ del->pathname = args[0].from;
23512+ AuDbg("del path %s\n", del->pathname);
23513+
23514+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
23515+ if (unlikely(err))
4a4d8108 23516+ pr_err("lookup failed %s (%d)\n", del->pathname, err);
1facf9fc 23517+
23518+ return err;
23519+}
23520+
23521+#if 0 /* reserved for future use */
23522+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
23523+ struct au_opt_del *del, substring_t args[])
23524+{
23525+ int err;
23526+ struct dentry *root;
23527+
23528+ err = -EINVAL;
23529+ root = sb->s_root;
23530+ aufs_read_lock(root, AuLock_FLUSH);
23531+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 23532+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 23533+ goto out;
23534+ }
23535+
23536+ err = 0;
23537+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
23538+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
23539+
4f0767ce 23540+out:
1facf9fc 23541+ aufs_read_unlock(root, !AuLock_IR);
23542+ return err;
23543+}
23544+#endif
23545+
4a4d8108
AM
23546+static int noinline_for_stack
23547+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
1facf9fc 23548+{
23549+ int err;
23550+ struct path path;
23551+ char *p;
23552+
23553+ err = -EINVAL;
23554+ mod->path = args[0].from;
23555+ p = strchr(mod->path, '=');
23556+ if (unlikely(!p)) {
4a4d8108 23557+ pr_err("no permssion %s\n", args[0].from);
1facf9fc 23558+ goto out;
23559+ }
23560+
23561+ *p++ = 0;
23562+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
23563+ if (unlikely(err)) {
4a4d8108 23564+ pr_err("lookup failed %s (%d)\n", mod->path, err);
1facf9fc 23565+ goto out;
23566+ }
23567+
23568+ mod->perm = br_perm_val(p);
23569+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
23570+ mod->h_root = dget(path.dentry);
23571+ path_put(&path);
23572+
4f0767ce 23573+out:
1facf9fc 23574+ return err;
23575+}
23576+
23577+#if 0 /* reserved for future use */
23578+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
23579+ struct au_opt_mod *mod, substring_t args[])
23580+{
23581+ int err;
23582+ struct dentry *root;
23583+
23584+ err = -EINVAL;
23585+ root = sb->s_root;
23586+ aufs_read_lock(root, AuLock_FLUSH);
23587+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 23588+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 23589+ goto out;
23590+ }
23591+
23592+ err = 0;
23593+ mod->perm = br_perm_val(args[1].from);
23594+ AuDbg("mod path %s, perm 0x%x, %s\n",
23595+ mod->path, mod->perm, args[1].from);
23596+ mod->h_root = dget(au_h_dptr(root, bindex));
23597+
4f0767ce 23598+out:
1facf9fc 23599+ aufs_read_unlock(root, !AuLock_IR);
23600+ return err;
23601+}
23602+#endif
23603+
23604+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
23605+ substring_t args[])
23606+{
23607+ int err;
23608+ struct file *file;
23609+
23610+ file = au_xino_create(sb, args[0].from, /*silent*/0);
23611+ err = PTR_ERR(file);
23612+ if (IS_ERR(file))
23613+ goto out;
23614+
23615+ err = -EINVAL;
2000de60 23616+ if (unlikely(file->f_path.dentry->d_sb == sb)) {
1facf9fc 23617+ fput(file);
4a4d8108 23618+ pr_err("%s must be outside\n", args[0].from);
1facf9fc 23619+ goto out;
23620+ }
23621+
23622+ err = 0;
23623+ xino->file = file;
23624+ xino->path = args[0].from;
23625+
4f0767ce 23626+out:
1facf9fc 23627+ return err;
23628+}
23629+
4a4d8108
AM
23630+static int noinline_for_stack
23631+au_opts_parse_xino_itrunc_path(struct super_block *sb,
23632+ struct au_opt_xino_itrunc *xino_itrunc,
23633+ substring_t args[])
1facf9fc 23634+{
23635+ int err;
23636+ aufs_bindex_t bend, bindex;
23637+ struct path path;
23638+ struct dentry *root;
23639+
23640+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
23641+ if (unlikely(err)) {
4a4d8108 23642+ pr_err("lookup failed %s (%d)\n", args[0].from, err);
1facf9fc 23643+ goto out;
23644+ }
23645+
23646+ xino_itrunc->bindex = -1;
23647+ root = sb->s_root;
23648+ aufs_read_lock(root, AuLock_FLUSH);
23649+ bend = au_sbend(sb);
23650+ for (bindex = 0; bindex <= bend; bindex++) {
23651+ if (au_h_dptr(root, bindex) == path.dentry) {
23652+ xino_itrunc->bindex = bindex;
23653+ break;
23654+ }
23655+ }
23656+ aufs_read_unlock(root, !AuLock_IR);
23657+ path_put(&path);
23658+
23659+ if (unlikely(xino_itrunc->bindex < 0)) {
4a4d8108 23660+ pr_err("no such branch %s\n", args[0].from);
1facf9fc 23661+ err = -EINVAL;
23662+ }
23663+
4f0767ce 23664+out:
1facf9fc 23665+ return err;
23666+}
23667+
23668+/* called without aufs lock */
23669+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
23670+{
23671+ int err, n, token;
23672+ aufs_bindex_t bindex;
23673+ unsigned char skipped;
23674+ struct dentry *root;
23675+ struct au_opt *opt, *opt_tail;
23676+ char *opt_str;
23677+ /* reduce the stack space */
23678+ union {
23679+ struct au_opt_xino_itrunc *xino_itrunc;
23680+ struct au_opt_wbr_create *create;
23681+ } u;
23682+ struct {
23683+ substring_t args[MAX_OPT_ARGS];
23684+ } *a;
23685+
23686+ err = -ENOMEM;
23687+ a = kmalloc(sizeof(*a), GFP_NOFS);
23688+ if (unlikely(!a))
23689+ goto out;
23690+
23691+ root = sb->s_root;
23692+ err = 0;
23693+ bindex = 0;
23694+ opt = opts->opt;
23695+ opt_tail = opt + opts->max_opt - 1;
23696+ opt->type = Opt_tail;
23697+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
23698+ err = -EINVAL;
23699+ skipped = 0;
23700+ token = match_token(opt_str, options, a->args);
23701+ switch (token) {
23702+ case Opt_br:
23703+ err = 0;
23704+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
23705+ && *opt_str) {
23706+ err = opt_add(opt, opt_str, opts->sb_flags,
23707+ bindex++);
23708+ if (unlikely(!err && ++opt > opt_tail)) {
23709+ err = -E2BIG;
23710+ break;
23711+ }
23712+ opt->type = Opt_tail;
23713+ skipped = 1;
23714+ }
23715+ break;
23716+ case Opt_add:
23717+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 23718+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23719+ break;
23720+ }
23721+ bindex = n;
23722+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
23723+ bindex);
23724+ if (!err)
23725+ opt->type = token;
23726+ break;
23727+ case Opt_append:
23728+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
23729+ /*dummy bindex*/1);
23730+ if (!err)
23731+ opt->type = token;
23732+ break;
23733+ case Opt_prepend:
23734+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
23735+ /*bindex*/0);
23736+ if (!err)
23737+ opt->type = token;
23738+ break;
23739+ case Opt_del:
23740+ err = au_opts_parse_del(&opt->del, a->args);
23741+ if (!err)
23742+ opt->type = token;
23743+ break;
23744+#if 0 /* reserved for future use */
23745+ case Opt_idel:
23746+ del->pathname = "(indexed)";
23747+ if (unlikely(match_int(&args[0], &n))) {
4a4d8108 23748+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23749+ break;
23750+ }
23751+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
23752+ if (!err)
23753+ opt->type = token;
23754+ break;
23755+#endif
23756+ case Opt_mod:
23757+ err = au_opts_parse_mod(&opt->mod, a->args);
23758+ if (!err)
23759+ opt->type = token;
23760+ break;
23761+#ifdef IMOD /* reserved for future use */
23762+ case Opt_imod:
23763+ u.mod->path = "(indexed)";
23764+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 23765+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23766+ break;
23767+ }
23768+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
23769+ if (!err)
23770+ opt->type = token;
23771+ break;
23772+#endif
23773+ case Opt_xino:
23774+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
23775+ if (!err)
23776+ opt->type = token;
23777+ break;
23778+
23779+ case Opt_trunc_xino_path:
23780+ err = au_opts_parse_xino_itrunc_path
23781+ (sb, &opt->xino_itrunc, a->args);
23782+ if (!err)
23783+ opt->type = token;
23784+ break;
23785+
23786+ case Opt_itrunc_xino:
23787+ u.xino_itrunc = &opt->xino_itrunc;
23788+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 23789+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23790+ break;
23791+ }
23792+ u.xino_itrunc->bindex = n;
23793+ aufs_read_lock(root, AuLock_FLUSH);
23794+ if (n < 0 || au_sbend(sb) < n) {
4a4d8108 23795+ pr_err("out of bounds, %d\n", n);
1facf9fc 23796+ aufs_read_unlock(root, !AuLock_IR);
23797+ break;
23798+ }
23799+ aufs_read_unlock(root, !AuLock_IR);
23800+ err = 0;
23801+ opt->type = token;
23802+ break;
23803+
23804+ case Opt_dirwh:
23805+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
23806+ break;
23807+ err = 0;
23808+ opt->type = token;
23809+ break;
23810+
23811+ case Opt_rdcache:
027c5e7a
AM
23812+ if (unlikely(match_int(&a->args[0], &n))) {
23813+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23814+ break;
027c5e7a
AM
23815+ }
23816+ if (unlikely(n > AUFS_RDCACHE_MAX)) {
23817+ pr_err("rdcache must be smaller than %d\n",
23818+ AUFS_RDCACHE_MAX);
23819+ break;
23820+ }
23821+ opt->rdcache = n;
1facf9fc 23822+ err = 0;
23823+ opt->type = token;
23824+ break;
23825+ case Opt_rdblk:
23826+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 23827+ || n < 0
1facf9fc 23828+ || n > KMALLOC_MAX_SIZE)) {
4a4d8108 23829+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23830+ break;
23831+ }
1308ab2a 23832+ if (unlikely(n && n < NAME_MAX)) {
4a4d8108
AM
23833+ pr_err("rdblk must be larger than %d\n",
23834+ NAME_MAX);
1facf9fc 23835+ break;
23836+ }
23837+ opt->rdblk = n;
23838+ err = 0;
23839+ opt->type = token;
23840+ break;
23841+ case Opt_rdhash:
23842+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 23843+ || n < 0
1facf9fc 23844+ || n * sizeof(struct hlist_head)
23845+ > KMALLOC_MAX_SIZE)) {
4a4d8108 23846+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23847+ break;
23848+ }
23849+ opt->rdhash = n;
23850+ err = 0;
23851+ opt->type = token;
23852+ break;
23853+
23854+ case Opt_trunc_xino:
23855+ case Opt_notrunc_xino:
23856+ case Opt_noxino:
23857+ case Opt_trunc_xib:
23858+ case Opt_notrunc_xib:
dece6358
AM
23859+ case Opt_shwh:
23860+ case Opt_noshwh:
076b876e
AM
23861+ case Opt_dirperm1:
23862+ case Opt_nodirperm1:
1facf9fc 23863+ case Opt_plink:
23864+ case Opt_noplink:
23865+ case Opt_list_plink:
4a4d8108
AM
23866+ case Opt_dio:
23867+ case Opt_nodio:
1facf9fc 23868+ case Opt_diropq_a:
23869+ case Opt_diropq_w:
23870+ case Opt_warn_perm:
23871+ case Opt_nowarn_perm:
1facf9fc 23872+ case Opt_verbose:
23873+ case Opt_noverbose:
23874+ case Opt_sum:
23875+ case Opt_nosum:
23876+ case Opt_wsum:
dece6358
AM
23877+ case Opt_rdblk_def:
23878+ case Opt_rdhash_def:
c1595e42
JR
23879+ case Opt_acl:
23880+ case Opt_noacl:
1facf9fc 23881+ err = 0;
23882+ opt->type = token;
23883+ break;
23884+
23885+ case Opt_udba:
23886+ opt->udba = udba_val(a->args[0].from);
23887+ if (opt->udba >= 0) {
23888+ err = 0;
23889+ opt->type = token;
23890+ } else
4a4d8108 23891+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 23892+ break;
23893+
23894+ case Opt_wbr_create:
23895+ u.create = &opt->wbr_create;
23896+ u.create->wbr_create
23897+ = au_wbr_create_val(a->args[0].from, u.create);
23898+ if (u.create->wbr_create >= 0) {
23899+ err = 0;
23900+ opt->type = token;
23901+ } else
4a4d8108 23902+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 23903+ break;
23904+ case Opt_wbr_copyup:
23905+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
23906+ if (opt->wbr_copyup >= 0) {
23907+ err = 0;
23908+ opt->type = token;
23909+ } else
4a4d8108 23910+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 23911+ break;
23912+
076b876e
AM
23913+ case Opt_fhsm_sec:
23914+ if (unlikely(match_int(&a->args[0], &n)
23915+ || n < 0)) {
23916+ pr_err("bad integer in %s\n", opt_str);
23917+ break;
23918+ }
23919+ if (sysaufs_brs) {
23920+ opt->fhsm_second = n;
23921+ opt->type = token;
23922+ } else
23923+ pr_warn("ignored %s\n", opt_str);
23924+ err = 0;
23925+ break;
23926+
1facf9fc 23927+ case Opt_ignore:
0c3ec466 23928+ pr_warn("ignored %s\n", opt_str);
1facf9fc 23929+ /*FALLTHROUGH*/
23930+ case Opt_ignore_silent:
23931+ skipped = 1;
23932+ err = 0;
23933+ break;
23934+ case Opt_err:
4a4d8108 23935+ pr_err("unknown option %s\n", opt_str);
1facf9fc 23936+ break;
23937+ }
23938+
23939+ if (!err && !skipped) {
23940+ if (unlikely(++opt > opt_tail)) {
23941+ err = -E2BIG;
23942+ opt--;
23943+ opt->type = Opt_tail;
23944+ break;
23945+ }
23946+ opt->type = Opt_tail;
23947+ }
23948+ }
23949+
23950+ kfree(a);
23951+ dump_opts(opts);
23952+ if (unlikely(err))
23953+ au_opts_free(opts);
23954+
4f0767ce 23955+out:
1facf9fc 23956+ return err;
23957+}
23958+
23959+static int au_opt_wbr_create(struct super_block *sb,
23960+ struct au_opt_wbr_create *create)
23961+{
23962+ int err;
23963+ struct au_sbinfo *sbinfo;
23964+
dece6358
AM
23965+ SiMustWriteLock(sb);
23966+
1facf9fc 23967+ err = 1; /* handled */
23968+ sbinfo = au_sbi(sb);
23969+ if (sbinfo->si_wbr_create_ops->fin) {
23970+ err = sbinfo->si_wbr_create_ops->fin(sb);
23971+ if (!err)
23972+ err = 1;
23973+ }
23974+
23975+ sbinfo->si_wbr_create = create->wbr_create;
23976+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
23977+ switch (create->wbr_create) {
23978+ case AuWbrCreate_MFSRRV:
23979+ case AuWbrCreate_MFSRR:
392086de
AM
23980+ case AuWbrCreate_PMFSRR:
23981+ case AuWbrCreate_PMFSRRV:
1facf9fc 23982+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
23983+ /*FALLTHROUGH*/
23984+ case AuWbrCreate_MFS:
23985+ case AuWbrCreate_MFSV:
23986+ case AuWbrCreate_PMFS:
23987+ case AuWbrCreate_PMFSV:
e49829fe
JR
23988+ sbinfo->si_wbr_mfs.mfs_expire
23989+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
1facf9fc 23990+ break;
23991+ }
23992+
23993+ if (sbinfo->si_wbr_create_ops->init)
23994+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
23995+
23996+ return err;
23997+}
23998+
23999+/*
24000+ * returns,
24001+ * plus: processed without an error
24002+ * zero: unprocessed
24003+ */
24004+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
24005+ struct au_opts *opts)
24006+{
24007+ int err;
24008+ struct au_sbinfo *sbinfo;
24009+
dece6358
AM
24010+ SiMustWriteLock(sb);
24011+
1facf9fc 24012+ err = 1; /* handled */
24013+ sbinfo = au_sbi(sb);
24014+ switch (opt->type) {
24015+ case Opt_udba:
24016+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
24017+ sbinfo->si_mntflags |= opt->udba;
24018+ opts->given_udba |= opt->udba;
24019+ break;
24020+
24021+ case Opt_plink:
24022+ au_opt_set(sbinfo->si_mntflags, PLINK);
24023+ break;
24024+ case Opt_noplink:
24025+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
e49829fe 24026+ au_plink_put(sb, /*verbose*/1);
1facf9fc 24027+ au_opt_clr(sbinfo->si_mntflags, PLINK);
24028+ break;
24029+ case Opt_list_plink:
24030+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
24031+ au_plink_list(sb);
24032+ break;
24033+
4a4d8108
AM
24034+ case Opt_dio:
24035+ au_opt_set(sbinfo->si_mntflags, DIO);
24036+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24037+ break;
24038+ case Opt_nodio:
24039+ au_opt_clr(sbinfo->si_mntflags, DIO);
24040+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24041+ break;
24042+
076b876e
AM
24043+ case Opt_fhsm_sec:
24044+ au_fhsm_set(sbinfo, opt->fhsm_second);
24045+ break;
24046+
1facf9fc 24047+ case Opt_diropq_a:
24048+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24049+ break;
24050+ case Opt_diropq_w:
24051+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24052+ break;
24053+
24054+ case Opt_warn_perm:
24055+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
24056+ break;
24057+ case Opt_nowarn_perm:
24058+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
24059+ break;
24060+
1facf9fc 24061+ case Opt_verbose:
24062+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
24063+ break;
24064+ case Opt_noverbose:
24065+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
24066+ break;
24067+
24068+ case Opt_sum:
24069+ au_opt_set(sbinfo->si_mntflags, SUM);
24070+ break;
24071+ case Opt_wsum:
24072+ au_opt_clr(sbinfo->si_mntflags, SUM);
24073+ au_opt_set(sbinfo->si_mntflags, SUM_W);
24074+ case Opt_nosum:
24075+ au_opt_clr(sbinfo->si_mntflags, SUM);
24076+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
24077+ break;
24078+
24079+ case Opt_wbr_create:
24080+ err = au_opt_wbr_create(sb, &opt->wbr_create);
24081+ break;
24082+ case Opt_wbr_copyup:
24083+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
24084+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
24085+ break;
24086+
24087+ case Opt_dirwh:
24088+ sbinfo->si_dirwh = opt->dirwh;
24089+ break;
24090+
24091+ case Opt_rdcache:
e49829fe
JR
24092+ sbinfo->si_rdcache
24093+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
1facf9fc 24094+ break;
24095+ case Opt_rdblk:
24096+ sbinfo->si_rdblk = opt->rdblk;
24097+ break;
dece6358
AM
24098+ case Opt_rdblk_def:
24099+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
24100+ break;
1facf9fc 24101+ case Opt_rdhash:
24102+ sbinfo->si_rdhash = opt->rdhash;
24103+ break;
dece6358
AM
24104+ case Opt_rdhash_def:
24105+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
24106+ break;
24107+
24108+ case Opt_shwh:
24109+ au_opt_set(sbinfo->si_mntflags, SHWH);
24110+ break;
24111+ case Opt_noshwh:
24112+ au_opt_clr(sbinfo->si_mntflags, SHWH);
24113+ break;
1facf9fc 24114+
076b876e
AM
24115+ case Opt_dirperm1:
24116+ au_opt_set(sbinfo->si_mntflags, DIRPERM1);
24117+ break;
24118+ case Opt_nodirperm1:
24119+ au_opt_clr(sbinfo->si_mntflags, DIRPERM1);
24120+ break;
24121+
1facf9fc 24122+ case Opt_trunc_xino:
24123+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
24124+ break;
24125+ case Opt_notrunc_xino:
24126+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
24127+ break;
24128+
24129+ case Opt_trunc_xino_path:
24130+ case Opt_itrunc_xino:
24131+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
24132+ if (!err)
24133+ err = 1;
24134+ break;
24135+
24136+ case Opt_trunc_xib:
24137+ au_fset_opts(opts->flags, TRUNC_XIB);
24138+ break;
24139+ case Opt_notrunc_xib:
24140+ au_fclr_opts(opts->flags, TRUNC_XIB);
24141+ break;
24142+
c1595e42
JR
24143+ case Opt_acl:
24144+ sb->s_flags |= MS_POSIXACL;
24145+ break;
24146+ case Opt_noacl:
24147+ sb->s_flags &= ~MS_POSIXACL;
24148+ break;
24149+
1facf9fc 24150+ default:
24151+ err = 0;
24152+ break;
24153+ }
24154+
24155+ return err;
24156+}
24157+
24158+/*
24159+ * returns tri-state.
24160+ * plus: processed without an error
24161+ * zero: unprocessed
24162+ * minus: error
24163+ */
24164+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
24165+ struct au_opts *opts)
24166+{
24167+ int err, do_refresh;
24168+
24169+ err = 0;
24170+ switch (opt->type) {
24171+ case Opt_append:
24172+ opt->add.bindex = au_sbend(sb) + 1;
24173+ if (opt->add.bindex < 0)
24174+ opt->add.bindex = 0;
24175+ goto add;
24176+ case Opt_prepend:
24177+ opt->add.bindex = 0;
f6b6e03d 24178+ add: /* indented label */
1facf9fc 24179+ case Opt_add:
24180+ err = au_br_add(sb, &opt->add,
24181+ au_ftest_opts(opts->flags, REMOUNT));
24182+ if (!err) {
24183+ err = 1;
027c5e7a 24184+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24185+ }
24186+ break;
24187+
24188+ case Opt_del:
24189+ case Opt_idel:
24190+ err = au_br_del(sb, &opt->del,
24191+ au_ftest_opts(opts->flags, REMOUNT));
24192+ if (!err) {
24193+ err = 1;
24194+ au_fset_opts(opts->flags, TRUNC_XIB);
027c5e7a 24195+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24196+ }
24197+ break;
24198+
24199+ case Opt_mod:
24200+ case Opt_imod:
24201+ err = au_br_mod(sb, &opt->mod,
24202+ au_ftest_opts(opts->flags, REMOUNT),
24203+ &do_refresh);
24204+ if (!err) {
24205+ err = 1;
027c5e7a
AM
24206+ if (do_refresh)
24207+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24208+ }
24209+ break;
24210+ }
24211+
24212+ return err;
24213+}
24214+
24215+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
24216+ struct au_opt_xino **opt_xino,
24217+ struct au_opts *opts)
24218+{
24219+ int err;
24220+ aufs_bindex_t bend, bindex;
24221+ struct dentry *root, *parent, *h_root;
24222+
24223+ err = 0;
24224+ switch (opt->type) {
24225+ case Opt_xino:
24226+ err = au_xino_set(sb, &opt->xino,
24227+ !!au_ftest_opts(opts->flags, REMOUNT));
24228+ if (unlikely(err))
24229+ break;
24230+
24231+ *opt_xino = &opt->xino;
24232+ au_xino_brid_set(sb, -1);
24233+
24234+ /* safe d_parent access */
2000de60 24235+ parent = opt->xino.file->f_path.dentry->d_parent;
1facf9fc 24236+ root = sb->s_root;
24237+ bend = au_sbend(sb);
24238+ for (bindex = 0; bindex <= bend; bindex++) {
24239+ h_root = au_h_dptr(root, bindex);
24240+ if (h_root == parent) {
24241+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
24242+ break;
24243+ }
24244+ }
24245+ break;
24246+
24247+ case Opt_noxino:
24248+ au_xino_clr(sb);
24249+ au_xino_brid_set(sb, -1);
24250+ *opt_xino = (void *)-1;
24251+ break;
24252+ }
24253+
24254+ return err;
24255+}
24256+
24257+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
24258+ unsigned int pending)
24259+{
076b876e 24260+ int err, fhsm;
1facf9fc 24261+ aufs_bindex_t bindex, bend;
79b8bda9 24262+ unsigned char do_plink, skip, do_free, can_no_dreval;
1facf9fc 24263+ struct au_branch *br;
24264+ struct au_wbr *wbr;
79b8bda9 24265+ struct dentry *root, *dentry;
1facf9fc 24266+ struct inode *dir, *h_dir;
24267+ struct au_sbinfo *sbinfo;
24268+ struct au_hinode *hdir;
24269+
dece6358
AM
24270+ SiMustAnyLock(sb);
24271+
1facf9fc 24272+ sbinfo = au_sbi(sb);
24273+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
24274+
dece6358
AM
24275+ if (!(sb_flags & MS_RDONLY)) {
24276+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
0c3ec466 24277+ pr_warn("first branch should be rw\n");
dece6358 24278+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
0c3ec466 24279+ pr_warn("shwh should be used with ro\n");
dece6358 24280+ }
1facf9fc 24281+
4a4d8108 24282+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
1facf9fc 24283+ && !au_opt_test(sbinfo->si_mntflags, XINO))
0c3ec466 24284+ pr_warn("udba=*notify requires xino\n");
1facf9fc 24285+
076b876e
AM
24286+ if (au_opt_test(sbinfo->si_mntflags, DIRPERM1))
24287+ pr_warn("dirperm1 breaks the protection"
24288+ " by the permission bits on the lower branch\n");
24289+
1facf9fc 24290+ err = 0;
076b876e 24291+ fhsm = 0;
1facf9fc 24292+ root = sb->s_root;
5527c038 24293+ dir = d_inode(root);
1facf9fc 24294+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
79b8bda9
AM
24295+ can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
24296+ UDBA_NONE);
1facf9fc 24297+ bend = au_sbend(sb);
24298+ for (bindex = 0; !err && bindex <= bend; bindex++) {
24299+ skip = 0;
24300+ h_dir = au_h_iptr(dir, bindex);
24301+ br = au_sbr(sb, bindex);
1facf9fc 24302+
c1595e42
JR
24303+ if ((br->br_perm & AuBrAttr_ICEX)
24304+ && !h_dir->i_op->listxattr)
24305+ br->br_perm &= ~AuBrAttr_ICEX;
24306+#if 0
24307+ if ((br->br_perm & AuBrAttr_ICEX_SEC)
24308+ && (au_br_sb(br)->s_flags & MS_NOSEC))
24309+ br->br_perm &= ~AuBrAttr_ICEX_SEC;
24310+#endif
24311+
24312+ do_free = 0;
1facf9fc 24313+ wbr = br->br_wbr;
24314+ if (wbr)
24315+ wbr_wh_read_lock(wbr);
24316+
1e00d052 24317+ if (!au_br_writable(br->br_perm)) {
1facf9fc 24318+ do_free = !!wbr;
24319+ skip = (!wbr
24320+ || (!wbr->wbr_whbase
24321+ && !wbr->wbr_plink
24322+ && !wbr->wbr_orph));
1e00d052 24323+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 24324+ /* skip = (!br->br_whbase && !br->br_orph); */
24325+ skip = (!wbr || !wbr->wbr_whbase);
24326+ if (skip && wbr) {
24327+ if (do_plink)
24328+ skip = !!wbr->wbr_plink;
24329+ else
24330+ skip = !wbr->wbr_plink;
24331+ }
1e00d052 24332+ } else {
1facf9fc 24333+ /* skip = (br->br_whbase && br->br_ohph); */
24334+ skip = (wbr && wbr->wbr_whbase);
24335+ if (skip) {
24336+ if (do_plink)
24337+ skip = !!wbr->wbr_plink;
24338+ else
24339+ skip = !wbr->wbr_plink;
24340+ }
1facf9fc 24341+ }
24342+ if (wbr)
24343+ wbr_wh_read_unlock(wbr);
24344+
79b8bda9
AM
24345+ if (can_no_dreval) {
24346+ dentry = br->br_path.dentry;
24347+ spin_lock(&dentry->d_lock);
24348+ if (dentry->d_flags &
24349+ (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE))
24350+ can_no_dreval = 0;
24351+ spin_unlock(&dentry->d_lock);
24352+ }
24353+
076b876e
AM
24354+ if (au_br_fhsm(br->br_perm)) {
24355+ fhsm++;
24356+ AuDebugOn(!br->br_fhsm);
24357+ }
24358+
1facf9fc 24359+ if (skip)
24360+ continue;
24361+
24362+ hdir = au_hi(dir, bindex);
4a4d8108 24363+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 24364+ if (wbr)
24365+ wbr_wh_write_lock(wbr);
86dc4139 24366+ err = au_wh_init(br, sb);
1facf9fc 24367+ if (wbr)
24368+ wbr_wh_write_unlock(wbr);
4a4d8108 24369+ au_hn_imtx_unlock(hdir);
1facf9fc 24370+
24371+ if (!err && do_free) {
24372+ kfree(wbr);
24373+ br->br_wbr = NULL;
24374+ }
24375+ }
24376+
79b8bda9
AM
24377+ if (can_no_dreval)
24378+ au_fset_si(sbinfo, NO_DREVAL);
24379+ else
24380+ au_fclr_si(sbinfo, NO_DREVAL);
24381+
c1595e42 24382+ if (fhsm >= 2) {
076b876e 24383+ au_fset_si(sbinfo, FHSM);
c1595e42
JR
24384+ for (bindex = bend; bindex >= 0; bindex--) {
24385+ br = au_sbr(sb, bindex);
24386+ if (au_br_fhsm(br->br_perm)) {
24387+ au_fhsm_set_bottom(sb, bindex);
24388+ break;
24389+ }
24390+ }
24391+ } else {
076b876e 24392+ au_fclr_si(sbinfo, FHSM);
c1595e42
JR
24393+ au_fhsm_set_bottom(sb, -1);
24394+ }
076b876e 24395+
1facf9fc 24396+ return err;
24397+}
24398+
24399+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
24400+{
24401+ int err;
24402+ unsigned int tmp;
027c5e7a 24403+ aufs_bindex_t bindex, bend;
1facf9fc 24404+ struct au_opt *opt;
24405+ struct au_opt_xino *opt_xino, xino;
24406+ struct au_sbinfo *sbinfo;
027c5e7a 24407+ struct au_branch *br;
076b876e 24408+ struct inode *dir;
1facf9fc 24409+
dece6358
AM
24410+ SiMustWriteLock(sb);
24411+
1facf9fc 24412+ err = 0;
24413+ opt_xino = NULL;
24414+ opt = opts->opt;
24415+ while (err >= 0 && opt->type != Opt_tail)
24416+ err = au_opt_simple(sb, opt++, opts);
24417+ if (err > 0)
24418+ err = 0;
24419+ else if (unlikely(err < 0))
24420+ goto out;
24421+
24422+ /* disable xino and udba temporary */
24423+ sbinfo = au_sbi(sb);
24424+ tmp = sbinfo->si_mntflags;
24425+ au_opt_clr(sbinfo->si_mntflags, XINO);
24426+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
24427+
24428+ opt = opts->opt;
24429+ while (err >= 0 && opt->type != Opt_tail)
24430+ err = au_opt_br(sb, opt++, opts);
24431+ if (err > 0)
24432+ err = 0;
24433+ else if (unlikely(err < 0))
24434+ goto out;
24435+
24436+ bend = au_sbend(sb);
24437+ if (unlikely(bend < 0)) {
24438+ err = -EINVAL;
4a4d8108 24439+ pr_err("no branches\n");
1facf9fc 24440+ goto out;
24441+ }
24442+
24443+ if (au_opt_test(tmp, XINO))
24444+ au_opt_set(sbinfo->si_mntflags, XINO);
24445+ opt = opts->opt;
24446+ while (!err && opt->type != Opt_tail)
24447+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
24448+ if (unlikely(err))
24449+ goto out;
24450+
24451+ err = au_opts_verify(sb, sb->s_flags, tmp);
24452+ if (unlikely(err))
24453+ goto out;
24454+
24455+ /* restore xino */
24456+ if (au_opt_test(tmp, XINO) && !opt_xino) {
24457+ xino.file = au_xino_def(sb);
24458+ err = PTR_ERR(xino.file);
24459+ if (IS_ERR(xino.file))
24460+ goto out;
24461+
24462+ err = au_xino_set(sb, &xino, /*remount*/0);
24463+ fput(xino.file);
24464+ if (unlikely(err))
24465+ goto out;
24466+ }
24467+
24468+ /* restore udba */
027c5e7a 24469+ tmp &= AuOptMask_UDBA;
1facf9fc 24470+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
027c5e7a
AM
24471+ sbinfo->si_mntflags |= tmp;
24472+ bend = au_sbend(sb);
24473+ for (bindex = 0; bindex <= bend; bindex++) {
24474+ br = au_sbr(sb, bindex);
24475+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
24476+ if (unlikely(err))
24477+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
24478+ bindex, err);
24479+ /* go on even if err */
24480+ }
4a4d8108 24481+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
5527c038 24482+ dir = d_inode(sb->s_root);
4a4d8108 24483+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
1facf9fc 24484+ }
24485+
4f0767ce 24486+out:
1facf9fc 24487+ return err;
24488+}
24489+
24490+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
24491+{
24492+ int err, rerr;
79b8bda9 24493+ unsigned char no_dreval;
1facf9fc 24494+ struct inode *dir;
24495+ struct au_opt_xino *opt_xino;
24496+ struct au_opt *opt;
24497+ struct au_sbinfo *sbinfo;
24498+
dece6358
AM
24499+ SiMustWriteLock(sb);
24500+
79b8bda9 24501+ err = 0;
5527c038 24502+ dir = d_inode(sb->s_root);
1facf9fc 24503+ sbinfo = au_sbi(sb);
1facf9fc 24504+ opt_xino = NULL;
24505+ opt = opts->opt;
24506+ while (err >= 0 && opt->type != Opt_tail) {
24507+ err = au_opt_simple(sb, opt, opts);
24508+ if (!err)
24509+ err = au_opt_br(sb, opt, opts);
24510+ if (!err)
24511+ err = au_opt_xino(sb, opt, &opt_xino, opts);
24512+ opt++;
24513+ }
24514+ if (err > 0)
24515+ err = 0;
24516+ AuTraceErr(err);
24517+ /* go on even err */
24518+
79b8bda9 24519+ no_dreval = !!au_ftest_si(sbinfo, NO_DREVAL);
1facf9fc 24520+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
24521+ if (unlikely(rerr && !err))
24522+ err = rerr;
24523+
79b8bda9 24524+ if (no_dreval != !!au_ftest_si(sbinfo, NO_DREVAL))
b95c5147 24525+ au_fset_opts(opts->flags, REFRESH_IDOP);
79b8bda9 24526+
1facf9fc 24527+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
24528+ rerr = au_xib_trunc(sb);
24529+ if (unlikely(rerr && !err))
24530+ err = rerr;
24531+ }
24532+
24533+ /* will be handled by the caller */
027c5e7a 24534+ if (!au_ftest_opts(opts->flags, REFRESH)
79b8bda9
AM
24535+ && (opts->given_udba
24536+ || au_opt_test(sbinfo->si_mntflags, XINO)
b95c5147 24537+ || au_ftest_opts(opts->flags, REFRESH_IDOP)
79b8bda9 24538+ ))
027c5e7a 24539+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24540+
24541+ AuDbg("status 0x%x\n", opts->flags);
24542+ return err;
24543+}
24544+
24545+/* ---------------------------------------------------------------------- */
24546+
24547+unsigned int au_opt_udba(struct super_block *sb)
24548+{
24549+ return au_mntflags(sb) & AuOptMask_UDBA;
24550+}
7f207e10
AM
24551diff -urN /usr/share/empty/fs/aufs/opts.h linux/fs/aufs/opts.h
24552--- /usr/share/empty/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
b95c5147 24553+++ linux/fs/aufs/opts.h 2015-12-10 17:59:16.839499823 +0100
79b8bda9 24554@@ -0,0 +1,211 @@
1facf9fc 24555+/*
2000de60 24556+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 24557+ *
24558+ * This program, aufs is free software; you can redistribute it and/or modify
24559+ * it under the terms of the GNU General Public License as published by
24560+ * the Free Software Foundation; either version 2 of the License, or
24561+ * (at your option) any later version.
dece6358
AM
24562+ *
24563+ * This program is distributed in the hope that it will be useful,
24564+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24565+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24566+ * GNU General Public License for more details.
24567+ *
24568+ * You should have received a copy of the GNU General Public License
523b37e3 24569+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 24570+ */
24571+
24572+/*
24573+ * mount options/flags
24574+ */
24575+
24576+#ifndef __AUFS_OPTS_H__
24577+#define __AUFS_OPTS_H__
24578+
24579+#ifdef __KERNEL__
24580+
dece6358 24581+#include <linux/path.h>
1facf9fc 24582+
dece6358
AM
24583+struct file;
24584+struct super_block;
24585+
1facf9fc 24586+/* ---------------------------------------------------------------------- */
24587+
24588+/* mount flags */
24589+#define AuOpt_XINO 1 /* external inode number bitmap
24590+ and translation table */
24591+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
24592+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
24593+#define AuOpt_UDBA_REVAL (1 << 3)
4a4d8108 24594+#define AuOpt_UDBA_HNOTIFY (1 << 4)
dece6358
AM
24595+#define AuOpt_SHWH (1 << 5) /* show whiteout */
24596+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
076b876e
AM
24597+#define AuOpt_DIRPERM1 (1 << 7) /* ignore the lower dir's perm
24598+ bits */
dece6358
AM
24599+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
24600+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
24601+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
24602+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
24603+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
4a4d8108 24604+#define AuOpt_DIO (1 << 14) /* direct io */
1facf9fc 24605+
4a4d8108
AM
24606+#ifndef CONFIG_AUFS_HNOTIFY
24607+#undef AuOpt_UDBA_HNOTIFY
24608+#define AuOpt_UDBA_HNOTIFY 0
1facf9fc 24609+#endif
dece6358
AM
24610+#ifndef CONFIG_AUFS_SHWH
24611+#undef AuOpt_SHWH
24612+#define AuOpt_SHWH 0
24613+#endif
1facf9fc 24614+
24615+#define AuOpt_Def (AuOpt_XINO \
24616+ | AuOpt_UDBA_REVAL \
24617+ | AuOpt_PLINK \
24618+ /* | AuOpt_DIRPERM1 */ \
24619+ | AuOpt_WARN_PERM)
24620+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
24621+ | AuOpt_UDBA_REVAL \
4a4d8108 24622+ | AuOpt_UDBA_HNOTIFY)
1facf9fc 24623+
24624+#define au_opt_test(flags, name) (flags & AuOpt_##name)
24625+#define au_opt_set(flags, name) do { \
24626+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
24627+ ((flags) |= AuOpt_##name); \
24628+} while (0)
24629+#define au_opt_set_udba(flags, name) do { \
24630+ (flags) &= ~AuOptMask_UDBA; \
24631+ ((flags) |= AuOpt_##name); \
24632+} while (0)
7f207e10
AM
24633+#define au_opt_clr(flags, name) do { \
24634+ ((flags) &= ~AuOpt_##name); \
24635+} while (0)
1facf9fc 24636+
e49829fe
JR
24637+static inline unsigned int au_opts_plink(unsigned int mntflags)
24638+{
24639+#ifdef CONFIG_PROC_FS
24640+ return mntflags;
24641+#else
24642+ return mntflags & ~AuOpt_PLINK;
24643+#endif
24644+}
24645+
1facf9fc 24646+/* ---------------------------------------------------------------------- */
24647+
24648+/* policies to select one among multiple writable branches */
24649+enum {
24650+ AuWbrCreate_TDP, /* top down parent */
24651+ AuWbrCreate_RR, /* round robin */
24652+ AuWbrCreate_MFS, /* most free space */
24653+ AuWbrCreate_MFSV, /* mfs with seconds */
24654+ AuWbrCreate_MFSRR, /* mfs then rr */
24655+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
24656+ AuWbrCreate_PMFS, /* parent and mfs */
24657+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
392086de
AM
24658+ AuWbrCreate_PMFSRR, /* parent, mfs and round-robin */
24659+ AuWbrCreate_PMFSRRV, /* plus seconds */
1facf9fc 24660+
24661+ AuWbrCreate_Def = AuWbrCreate_TDP
24662+};
24663+
24664+enum {
24665+ AuWbrCopyup_TDP, /* top down parent */
24666+ AuWbrCopyup_BUP, /* bottom up parent */
24667+ AuWbrCopyup_BU, /* bottom up */
24668+
24669+ AuWbrCopyup_Def = AuWbrCopyup_TDP
24670+};
24671+
24672+/* ---------------------------------------------------------------------- */
24673+
24674+struct au_opt_add {
24675+ aufs_bindex_t bindex;
24676+ char *pathname;
24677+ int perm;
24678+ struct path path;
24679+};
24680+
24681+struct au_opt_del {
24682+ char *pathname;
24683+ struct path h_path;
24684+};
24685+
24686+struct au_opt_mod {
24687+ char *path;
24688+ int perm;
24689+ struct dentry *h_root;
24690+};
24691+
24692+struct au_opt_xino {
24693+ char *path;
24694+ struct file *file;
24695+};
24696+
24697+struct au_opt_xino_itrunc {
24698+ aufs_bindex_t bindex;
24699+};
24700+
24701+struct au_opt_wbr_create {
24702+ int wbr_create;
24703+ int mfs_second;
24704+ unsigned long long mfsrr_watermark;
24705+};
24706+
24707+struct au_opt {
24708+ int type;
24709+ union {
24710+ struct au_opt_xino xino;
24711+ struct au_opt_xino_itrunc xino_itrunc;
24712+ struct au_opt_add add;
24713+ struct au_opt_del del;
24714+ struct au_opt_mod mod;
24715+ int dirwh;
24716+ int rdcache;
24717+ unsigned int rdblk;
24718+ unsigned int rdhash;
24719+ int udba;
24720+ struct au_opt_wbr_create wbr_create;
24721+ int wbr_copyup;
076b876e 24722+ unsigned int fhsm_second;
1facf9fc 24723+ };
24724+};
24725+
24726+/* opts flags */
24727+#define AuOpts_REMOUNT 1
027c5e7a
AM
24728+#define AuOpts_REFRESH (1 << 1)
24729+#define AuOpts_TRUNC_XIB (1 << 2)
24730+#define AuOpts_REFRESH_DYAOP (1 << 3)
b95c5147 24731+#define AuOpts_REFRESH_IDOP (1 << 4)
1facf9fc 24732+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
7f207e10
AM
24733+#define au_fset_opts(flags, name) \
24734+ do { (flags) |= AuOpts_##name; } while (0)
24735+#define au_fclr_opts(flags, name) \
24736+ do { (flags) &= ~AuOpts_##name; } while (0)
1facf9fc 24737+
24738+struct au_opts {
24739+ struct au_opt *opt;
24740+ int max_opt;
24741+
24742+ unsigned int given_udba;
24743+ unsigned int flags;
24744+ unsigned long sb_flags;
24745+};
24746+
24747+/* ---------------------------------------------------------------------- */
24748+
7e9cd9fe 24749+/* opts.c */
076b876e 24750+void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
1facf9fc 24751+const char *au_optstr_udba(int udba);
24752+const char *au_optstr_wbr_copyup(int wbr_copyup);
24753+const char *au_optstr_wbr_create(int wbr_create);
24754+
24755+void au_opts_free(struct au_opts *opts);
24756+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
24757+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
24758+ unsigned int pending);
24759+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
24760+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
24761+
24762+unsigned int au_opt_udba(struct super_block *sb);
24763+
1facf9fc 24764+#endif /* __KERNEL__ */
24765+#endif /* __AUFS_OPTS_H__ */
7f207e10
AM
24766diff -urN /usr/share/empty/fs/aufs/plink.c linux/fs/aufs/plink.c
24767--- /usr/share/empty/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 24768+++ linux/fs/aufs/plink.c 2015-09-24 10:47:58.254719746 +0200
5527c038 24769@@ -0,0 +1,528 @@
1facf9fc 24770+/*
2000de60 24771+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 24772+ *
24773+ * This program, aufs is free software; you can redistribute it and/or modify
24774+ * it under the terms of the GNU General Public License as published by
24775+ * the Free Software Foundation; either version 2 of the License, or
24776+ * (at your option) any later version.
dece6358
AM
24777+ *
24778+ * This program is distributed in the hope that it will be useful,
24779+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24780+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24781+ * GNU General Public License for more details.
24782+ *
24783+ * You should have received a copy of the GNU General Public License
523b37e3 24784+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 24785+ */
24786+
24787+/*
24788+ * pseudo-link
24789+ */
24790+
24791+#include "aufs.h"
24792+
24793+/*
e49829fe 24794+ * the pseudo-link maintenance mode.
1facf9fc 24795+ * during a user process maintains the pseudo-links,
24796+ * prohibit adding a new plink and branch manipulation.
e49829fe
JR
24797+ *
24798+ * Flags
24799+ * NOPLM:
24800+ * For entry functions which will handle plink, and i_mutex is already held
24801+ * in VFS.
24802+ * They cannot wait and should return an error at once.
24803+ * Callers has to check the error.
24804+ * NOPLMW:
24805+ * For entry functions which will handle plink, but i_mutex is not held
24806+ * in VFS.
24807+ * They can wait the plink maintenance mode to finish.
24808+ *
24809+ * They behave like F_SETLK and F_SETLKW.
24810+ * If the caller never handle plink, then both flags are unnecessary.
1facf9fc 24811+ */
e49829fe
JR
24812+
24813+int au_plink_maint(struct super_block *sb, int flags)
1facf9fc 24814+{
e49829fe
JR
24815+ int err;
24816+ pid_t pid, ppid;
24817+ struct au_sbinfo *sbi;
dece6358
AM
24818+
24819+ SiMustAnyLock(sb);
24820+
e49829fe
JR
24821+ err = 0;
24822+ if (!au_opt_test(au_mntflags(sb), PLINK))
24823+ goto out;
24824+
24825+ sbi = au_sbi(sb);
24826+ pid = sbi->si_plink_maint_pid;
24827+ if (!pid || pid == current->pid)
24828+ goto out;
24829+
24830+ /* todo: it highly depends upon /sbin/mount.aufs */
24831+ rcu_read_lock();
24832+ ppid = task_pid_vnr(rcu_dereference(current->real_parent));
24833+ rcu_read_unlock();
24834+ if (pid == ppid)
24835+ goto out;
24836+
24837+ if (au_ftest_lock(flags, NOPLMW)) {
027c5e7a
AM
24838+ /* if there is no i_mutex lock in VFS, we don't need to wait */
24839+ /* AuDebugOn(!lockdep_depth(current)); */
e49829fe
JR
24840+ while (sbi->si_plink_maint_pid) {
24841+ si_read_unlock(sb);
24842+ /* gave up wake_up_bit() */
24843+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
24844+
24845+ if (au_ftest_lock(flags, FLUSH))
24846+ au_nwt_flush(&sbi->si_nowait);
24847+ si_noflush_read_lock(sb);
24848+ }
24849+ } else if (au_ftest_lock(flags, NOPLM)) {
24850+ AuDbg("ppid %d, pid %d\n", ppid, pid);
24851+ err = -EAGAIN;
24852+ }
24853+
24854+out:
24855+ return err;
4a4d8108
AM
24856+}
24857+
e49829fe 24858+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
4a4d8108 24859+{
4a4d8108 24860+ spin_lock(&sbinfo->si_plink_maint_lock);
027c5e7a 24861+ sbinfo->si_plink_maint_pid = 0;
4a4d8108 24862+ spin_unlock(&sbinfo->si_plink_maint_lock);
027c5e7a 24863+ wake_up_all(&sbinfo->si_plink_wq);
4a4d8108
AM
24864+}
24865+
e49829fe 24866+int au_plink_maint_enter(struct super_block *sb)
4a4d8108
AM
24867+{
24868+ int err;
4a4d8108
AM
24869+ struct au_sbinfo *sbinfo;
24870+
24871+ err = 0;
4a4d8108
AM
24872+ sbinfo = au_sbi(sb);
24873+ /* make sure i am the only one in this fs */
e49829fe
JR
24874+ si_write_lock(sb, AuLock_FLUSH);
24875+ if (au_opt_test(au_mntflags(sb), PLINK)) {
24876+ spin_lock(&sbinfo->si_plink_maint_lock);
24877+ if (!sbinfo->si_plink_maint_pid)
24878+ sbinfo->si_plink_maint_pid = current->pid;
24879+ else
24880+ err = -EBUSY;
24881+ spin_unlock(&sbinfo->si_plink_maint_lock);
24882+ }
4a4d8108
AM
24883+ si_write_unlock(sb);
24884+
24885+ return err;
1facf9fc 24886+}
24887+
24888+/* ---------------------------------------------------------------------- */
24889+
1facf9fc 24890+#ifdef CONFIG_AUFS_DEBUG
24891+void au_plink_list(struct super_block *sb)
24892+{
86dc4139 24893+ int i;
1facf9fc 24894+ struct au_sbinfo *sbinfo;
86dc4139 24895+ struct hlist_head *plink_hlist;
1facf9fc 24896+ struct pseudo_link *plink;
24897+
dece6358
AM
24898+ SiMustAnyLock(sb);
24899+
1facf9fc 24900+ sbinfo = au_sbi(sb);
24901+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 24902+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 24903+
86dc4139
AM
24904+ for (i = 0; i < AuPlink_NHASH; i++) {
24905+ plink_hlist = &sbinfo->si_plink[i].head;
24906+ rcu_read_lock();
24907+ hlist_for_each_entry_rcu(plink, plink_hlist, hlist)
24908+ AuDbg("%lu\n", plink->inode->i_ino);
24909+ rcu_read_unlock();
24910+ }
1facf9fc 24911+}
24912+#endif
24913+
24914+/* is the inode pseudo-linked? */
24915+int au_plink_test(struct inode *inode)
24916+{
86dc4139 24917+ int found, i;
1facf9fc 24918+ struct au_sbinfo *sbinfo;
86dc4139 24919+ struct hlist_head *plink_hlist;
1facf9fc 24920+ struct pseudo_link *plink;
24921+
24922+ sbinfo = au_sbi(inode->i_sb);
dece6358 24923+ AuRwMustAnyLock(&sbinfo->si_rwsem);
1facf9fc 24924+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
e49829fe 24925+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
1facf9fc 24926+
24927+ found = 0;
86dc4139
AM
24928+ i = au_plink_hash(inode->i_ino);
24929+ plink_hlist = &sbinfo->si_plink[i].head;
4a4d8108 24930+ rcu_read_lock();
86dc4139 24931+ hlist_for_each_entry_rcu(plink, plink_hlist, hlist)
1facf9fc 24932+ if (plink->inode == inode) {
24933+ found = 1;
24934+ break;
24935+ }
4a4d8108 24936+ rcu_read_unlock();
1facf9fc 24937+ return found;
24938+}
24939+
24940+/* ---------------------------------------------------------------------- */
24941+
24942+/*
24943+ * generate a name for plink.
24944+ * the file will be stored under AUFS_WH_PLINKDIR.
24945+ */
24946+/* 20 is max digits length of ulong 64 */
24947+#define PLINK_NAME_LEN ((20 + 1) * 2)
24948+
24949+static int plink_name(char *name, int len, struct inode *inode,
24950+ aufs_bindex_t bindex)
24951+{
24952+ int rlen;
24953+ struct inode *h_inode;
24954+
24955+ h_inode = au_h_iptr(inode, bindex);
24956+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
24957+ return rlen;
24958+}
24959+
7f207e10
AM
24960+struct au_do_plink_lkup_args {
24961+ struct dentry **errp;
24962+ struct qstr *tgtname;
24963+ struct dentry *h_parent;
24964+ struct au_branch *br;
24965+};
24966+
24967+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
24968+ struct dentry *h_parent,
24969+ struct au_branch *br)
24970+{
24971+ struct dentry *h_dentry;
24972+ struct mutex *h_mtx;
24973+
5527c038 24974+ h_mtx = &d_inode(h_parent)->i_mutex;
7f207e10 24975+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
b4510431 24976+ h_dentry = vfsub_lkup_one(tgtname, h_parent);
7f207e10
AM
24977+ mutex_unlock(h_mtx);
24978+ return h_dentry;
24979+}
24980+
24981+static void au_call_do_plink_lkup(void *args)
24982+{
24983+ struct au_do_plink_lkup_args *a = args;
24984+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
24985+}
24986+
1facf9fc 24987+/* lookup the plink-ed @inode under the branch at @bindex */
24988+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
24989+{
24990+ struct dentry *h_dentry, *h_parent;
24991+ struct au_branch *br;
7f207e10 24992+ int wkq_err;
1facf9fc 24993+ char a[PLINK_NAME_LEN];
0c3ec466 24994+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 24995+
e49829fe
JR
24996+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
24997+
1facf9fc 24998+ br = au_sbr(inode->i_sb, bindex);
24999+ h_parent = br->br_wbr->wbr_plink;
1facf9fc 25000+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25001+
2dfbb274 25002+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
7f207e10
AM
25003+ struct au_do_plink_lkup_args args = {
25004+ .errp = &h_dentry,
25005+ .tgtname = &tgtname,
25006+ .h_parent = h_parent,
25007+ .br = br
25008+ };
25009+
25010+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
25011+ if (unlikely(wkq_err))
25012+ h_dentry = ERR_PTR(wkq_err);
25013+ } else
25014+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
25015+
1facf9fc 25016+ return h_dentry;
25017+}
25018+
25019+/* create a pseudo-link */
25020+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
25021+ struct dentry *h_dentry, struct au_branch *br)
25022+{
25023+ int err;
25024+ struct path h_path = {
86dc4139 25025+ .mnt = au_br_mnt(br)
1facf9fc 25026+ };
523b37e3 25027+ struct inode *h_dir, *delegated;
1facf9fc 25028+
5527c038 25029+ h_dir = d_inode(h_parent);
7f207e10 25030+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
4f0767ce 25031+again:
b4510431 25032+ h_path.dentry = vfsub_lkup_one(tgt, h_parent);
1facf9fc 25033+ err = PTR_ERR(h_path.dentry);
25034+ if (IS_ERR(h_path.dentry))
25035+ goto out;
25036+
25037+ err = 0;
25038+ /* wh.plink dir is not monitored */
7f207e10 25039+ /* todo: is it really safe? */
5527c038
JR
25040+ if (d_is_positive(h_path.dentry)
25041+ && d_inode(h_path.dentry) != d_inode(h_dentry)) {
523b37e3
AM
25042+ delegated = NULL;
25043+ err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
25044+ if (unlikely(err == -EWOULDBLOCK)) {
25045+ pr_warn("cannot retry for NFSv4 delegation"
25046+ " for an internal unlink\n");
25047+ iput(delegated);
25048+ }
1facf9fc 25049+ dput(h_path.dentry);
25050+ h_path.dentry = NULL;
25051+ if (!err)
25052+ goto again;
25053+ }
5527c038 25054+ if (!err && d_is_negative(h_path.dentry)) {
523b37e3
AM
25055+ delegated = NULL;
25056+ err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
25057+ if (unlikely(err == -EWOULDBLOCK)) {
25058+ pr_warn("cannot retry for NFSv4 delegation"
25059+ " for an internal link\n");
25060+ iput(delegated);
25061+ }
25062+ }
1facf9fc 25063+ dput(h_path.dentry);
25064+
4f0767ce 25065+out:
7f207e10 25066+ mutex_unlock(&h_dir->i_mutex);
1facf9fc 25067+ return err;
25068+}
25069+
25070+struct do_whplink_args {
25071+ int *errp;
25072+ struct qstr *tgt;
25073+ struct dentry *h_parent;
25074+ struct dentry *h_dentry;
25075+ struct au_branch *br;
25076+};
25077+
25078+static void call_do_whplink(void *args)
25079+{
25080+ struct do_whplink_args *a = args;
25081+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
25082+}
25083+
25084+static int whplink(struct dentry *h_dentry, struct inode *inode,
25085+ aufs_bindex_t bindex, struct au_branch *br)
25086+{
25087+ int err, wkq_err;
25088+ struct au_wbr *wbr;
25089+ struct dentry *h_parent;
1facf9fc 25090+ char a[PLINK_NAME_LEN];
0c3ec466 25091+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 25092+
25093+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
25094+ h_parent = wbr->wbr_plink;
1facf9fc 25095+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25096+
25097+ /* always superio. */
2dfbb274 25098+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
1facf9fc 25099+ struct do_whplink_args args = {
25100+ .errp = &err,
25101+ .tgt = &tgtname,
25102+ .h_parent = h_parent,
25103+ .h_dentry = h_dentry,
25104+ .br = br
25105+ };
25106+ wkq_err = au_wkq_wait(call_do_whplink, &args);
25107+ if (unlikely(wkq_err))
25108+ err = wkq_err;
25109+ } else
25110+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
1facf9fc 25111+
25112+ return err;
25113+}
25114+
25115+/* free a single plink */
25116+static void do_put_plink(struct pseudo_link *plink, int do_del)
25117+{
1facf9fc 25118+ if (do_del)
86dc4139 25119+ hlist_del(&plink->hlist);
4a4d8108
AM
25120+ iput(plink->inode);
25121+ kfree(plink);
25122+}
25123+
25124+static void do_put_plink_rcu(struct rcu_head *rcu)
25125+{
25126+ struct pseudo_link *plink;
25127+
25128+ plink = container_of(rcu, struct pseudo_link, rcu);
25129+ iput(plink->inode);
1facf9fc 25130+ kfree(plink);
25131+}
25132+
25133+/*
25134+ * create a new pseudo-link for @h_dentry on @bindex.
25135+ * the linked inode is held in aufs @inode.
25136+ */
25137+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
25138+ struct dentry *h_dentry)
25139+{
25140+ struct super_block *sb;
25141+ struct au_sbinfo *sbinfo;
86dc4139 25142+ struct hlist_head *plink_hlist;
4a4d8108 25143+ struct pseudo_link *plink, *tmp;
86dc4139
AM
25144+ struct au_sphlhead *sphl;
25145+ int found, err, cnt, i;
1facf9fc 25146+
25147+ sb = inode->i_sb;
25148+ sbinfo = au_sbi(sb);
25149+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25150+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25151+
86dc4139 25152+ found = au_plink_test(inode);
4a4d8108 25153+ if (found)
1facf9fc 25154+ return;
4a4d8108 25155+
86dc4139
AM
25156+ i = au_plink_hash(inode->i_ino);
25157+ sphl = sbinfo->si_plink + i;
25158+ plink_hlist = &sphl->head;
4a4d8108
AM
25159+ tmp = kmalloc(sizeof(*plink), GFP_NOFS);
25160+ if (tmp)
25161+ tmp->inode = au_igrab(inode);
25162+ else {
25163+ err = -ENOMEM;
25164+ goto out;
1facf9fc 25165+ }
25166+
86dc4139
AM
25167+ spin_lock(&sphl->spin);
25168+ hlist_for_each_entry(plink, plink_hlist, hlist) {
4a4d8108
AM
25169+ if (plink->inode == inode) {
25170+ found = 1;
25171+ break;
25172+ }
1facf9fc 25173+ }
4a4d8108 25174+ if (!found)
86dc4139
AM
25175+ hlist_add_head_rcu(&tmp->hlist, plink_hlist);
25176+ spin_unlock(&sphl->spin);
4a4d8108 25177+ if (!found) {
86dc4139
AM
25178+ cnt = au_sphl_count(sphl);
25179+#define msg "unexpectedly unblanced or too many pseudo-links"
25180+ if (cnt > AUFS_PLINK_WARN)
25181+ AuWarn1(msg ", %d\n", cnt);
25182+#undef msg
1facf9fc 25183+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
4a4d8108
AM
25184+ } else {
25185+ do_put_plink(tmp, 0);
25186+ return;
1facf9fc 25187+ }
25188+
4a4d8108 25189+out:
1facf9fc 25190+ if (unlikely(err)) {
0c3ec466 25191+ pr_warn("err %d, damaged pseudo link.\n", err);
4a4d8108 25192+ if (tmp) {
86dc4139 25193+ au_sphl_del_rcu(&tmp->hlist, sphl);
4a4d8108
AM
25194+ call_rcu(&tmp->rcu, do_put_plink_rcu);
25195+ }
1facf9fc 25196+ }
25197+}
25198+
25199+/* free all plinks */
e49829fe 25200+void au_plink_put(struct super_block *sb, int verbose)
1facf9fc 25201+{
86dc4139 25202+ int i, warned;
1facf9fc 25203+ struct au_sbinfo *sbinfo;
86dc4139
AM
25204+ struct hlist_head *plink_hlist;
25205+ struct hlist_node *tmp;
25206+ struct pseudo_link *plink;
1facf9fc 25207+
dece6358
AM
25208+ SiMustWriteLock(sb);
25209+
1facf9fc 25210+ sbinfo = au_sbi(sb);
25211+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25212+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25213+
1facf9fc 25214+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
25215+ warned = 0;
25216+ for (i = 0; i < AuPlink_NHASH; i++) {
25217+ plink_hlist = &sbinfo->si_plink[i].head;
25218+ if (!warned && verbose && !hlist_empty(plink_hlist)) {
25219+ pr_warn("pseudo-link is not flushed");
25220+ warned = 1;
25221+ }
25222+ hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist)
25223+ do_put_plink(plink, 0);
25224+ INIT_HLIST_HEAD(plink_hlist);
25225+ }
1facf9fc 25226+}
25227+
e49829fe
JR
25228+void au_plink_clean(struct super_block *sb, int verbose)
25229+{
25230+ struct dentry *root;
25231+
25232+ root = sb->s_root;
25233+ aufs_write_lock(root);
25234+ if (au_opt_test(au_mntflags(sb), PLINK))
25235+ au_plink_put(sb, verbose);
25236+ aufs_write_unlock(root);
25237+}
25238+
86dc4139
AM
25239+static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
25240+{
25241+ int do_put;
25242+ aufs_bindex_t bstart, bend, bindex;
25243+
25244+ do_put = 0;
25245+ bstart = au_ibstart(inode);
25246+ bend = au_ibend(inode);
25247+ if (bstart >= 0) {
25248+ for (bindex = bstart; bindex <= bend; bindex++) {
25249+ if (!au_h_iptr(inode, bindex)
25250+ || au_ii_br_id(inode, bindex) != br_id)
25251+ continue;
25252+ au_set_h_iptr(inode, bindex, NULL, 0);
25253+ do_put = 1;
25254+ break;
25255+ }
25256+ if (do_put)
25257+ for (bindex = bstart; bindex <= bend; bindex++)
25258+ if (au_h_iptr(inode, bindex)) {
25259+ do_put = 0;
25260+ break;
25261+ }
25262+ } else
25263+ do_put = 1;
25264+
25265+ return do_put;
25266+}
25267+
1facf9fc 25268+/* free the plinks on a branch specified by @br_id */
25269+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
25270+{
25271+ struct au_sbinfo *sbinfo;
86dc4139
AM
25272+ struct hlist_head *plink_hlist;
25273+ struct hlist_node *tmp;
25274+ struct pseudo_link *plink;
1facf9fc 25275+ struct inode *inode;
86dc4139 25276+ int i, do_put;
1facf9fc 25277+
dece6358
AM
25278+ SiMustWriteLock(sb);
25279+
1facf9fc 25280+ sbinfo = au_sbi(sb);
25281+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25282+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25283+
1facf9fc 25284+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
25285+ for (i = 0; i < AuPlink_NHASH; i++) {
25286+ plink_hlist = &sbinfo->si_plink[i].head;
25287+ hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist) {
25288+ inode = au_igrab(plink->inode);
25289+ ii_write_lock_child(inode);
25290+ do_put = au_plink_do_half_refresh(inode, br_id);
dece6358
AM
25291+ if (do_put)
25292+ do_put_plink(plink, 1);
86dc4139
AM
25293+ ii_write_unlock(inode);
25294+ iput(inode);
dece6358 25295+ }
dece6358
AM
25296+ }
25297+}
7f207e10
AM
25298diff -urN /usr/share/empty/fs/aufs/poll.c linux/fs/aufs/poll.c
25299--- /usr/share/empty/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 25300+++ linux/fs/aufs/poll.c 2015-09-24 10:47:58.254719746 +0200
b912730e 25301@@ -0,0 +1,52 @@
dece6358 25302+/*
2000de60 25303+ * Copyright (C) 2005-2015 Junjiro R. Okajima
dece6358
AM
25304+ *
25305+ * This program, aufs is free software; you can redistribute it and/or modify
25306+ * it under the terms of the GNU General Public License as published by
25307+ * the Free Software Foundation; either version 2 of the License, or
25308+ * (at your option) any later version.
25309+ *
25310+ * This program is distributed in the hope that it will be useful,
25311+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25312+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25313+ * GNU General Public License for more details.
25314+ *
25315+ * You should have received a copy of the GNU General Public License
523b37e3 25316+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358
AM
25317+ */
25318+
1308ab2a 25319+/*
25320+ * poll operation
25321+ * There is only one filesystem which implements ->poll operation, currently.
25322+ */
25323+
25324+#include "aufs.h"
25325+
25326+unsigned int aufs_poll(struct file *file, poll_table *wait)
25327+{
25328+ unsigned int mask;
25329+ int err;
25330+ struct file *h_file;
1308ab2a 25331+ struct super_block *sb;
25332+
25333+ /* We should pretend an error happened. */
25334+ mask = POLLERR /* | POLLIN | POLLOUT */;
b912730e 25335+ sb = file->f_path.dentry->d_sb;
e49829fe 25336+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
25337+
25338+ h_file = au_read_pre(file, /*keep_fi*/0);
25339+ err = PTR_ERR(h_file);
25340+ if (IS_ERR(h_file))
1308ab2a 25341+ goto out;
25342+
25343+ /* it is not an error if h_file has no operation */
25344+ mask = DEFAULT_POLLMASK;
523b37e3 25345+ if (h_file->f_op->poll)
1308ab2a 25346+ mask = h_file->f_op->poll(h_file, wait);
b912730e 25347+ fput(h_file); /* instead of au_read_post() */
1308ab2a 25348+
4f0767ce 25349+out:
1308ab2a 25350+ si_read_unlock(sb);
25351+ AuTraceErr((int)mask);
25352+ return mask;
25353+}
c1595e42
JR
25354diff -urN /usr/share/empty/fs/aufs/posix_acl.c linux/fs/aufs/posix_acl.c
25355--- /usr/share/empty/fs/aufs/posix_acl.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 25356+++ linux/fs/aufs/posix_acl.c 2015-09-24 10:47:58.254719746 +0200
c1595e42
JR
25357@@ -0,0 +1,99 @@
25358+/*
2000de60 25359+ * Copyright (C) 2014-2015 Junjiro R. Okajima
c1595e42
JR
25360+ *
25361+ * This program, aufs is free software; you can redistribute it and/or modify
25362+ * it under the terms of the GNU General Public License as published by
25363+ * the Free Software Foundation; either version 2 of the License, or
25364+ * (at your option) any later version.
25365+ *
25366+ * This program is distributed in the hope that it will be useful,
25367+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25368+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25369+ * GNU General Public License for more details.
25370+ *
25371+ * You should have received a copy of the GNU General Public License
25372+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25373+ */
25374+
25375+/*
25376+ * posix acl operations
25377+ */
25378+
25379+#include <linux/fs.h>
25380+#include <linux/posix_acl.h>
25381+#include "aufs.h"
25382+
25383+struct posix_acl *aufs_get_acl(struct inode *inode, int type)
25384+{
25385+ struct posix_acl *acl;
25386+ int err;
25387+ aufs_bindex_t bindex;
25388+ struct inode *h_inode;
25389+ struct super_block *sb;
25390+
25391+ acl = NULL;
25392+ sb = inode->i_sb;
25393+ si_read_lock(sb, AuLock_FLUSH);
25394+ ii_read_lock_child(inode);
25395+ if (!(sb->s_flags & MS_POSIXACL))
25396+ goto out;
25397+
25398+ bindex = au_ibstart(inode);
25399+ h_inode = au_h_iptr(inode, bindex);
25400+ if (unlikely(!h_inode
25401+ || ((h_inode->i_mode & S_IFMT)
25402+ != (inode->i_mode & S_IFMT)))) {
25403+ err = au_busy_or_stale();
25404+ acl = ERR_PTR(err);
25405+ goto out;
25406+ }
25407+
25408+ /* always topmost only */
25409+ acl = get_acl(h_inode, type);
25410+
25411+out:
25412+ ii_read_unlock(inode);
25413+ si_read_unlock(sb);
25414+
25415+ AuTraceErrPtr(acl);
25416+ return acl;
25417+}
25418+
25419+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
25420+{
25421+ int err;
25422+ ssize_t ssz;
25423+ struct dentry *dentry;
25424+ struct au_srxattr arg = {
25425+ .type = AU_ACL_SET,
25426+ .u.acl_set = {
25427+ .acl = acl,
25428+ .type = type
25429+ },
25430+ };
25431+
25432+ mutex_lock(&inode->i_mutex);
25433+ if (inode->i_ino == AUFS_ROOT_INO)
25434+ dentry = dget(inode->i_sb->s_root);
25435+ else {
25436+ dentry = d_find_alias(inode);
25437+ if (!dentry)
25438+ dentry = d_find_any_alias(inode);
25439+ if (!dentry) {
25440+ pr_warn("cannot handle this inode, "
25441+ "please report to aufs-users ML\n");
25442+ err = -ENOENT;
25443+ goto out;
25444+ }
25445+ }
25446+
25447+ ssz = au_srxattr(dentry, &arg);
25448+ dput(dentry);
25449+ err = ssz;
25450+ if (ssz >= 0)
25451+ err = 0;
25452+
25453+out:
25454+ mutex_unlock(&inode->i_mutex);
25455+ return err;
25456+}
7f207e10
AM
25457diff -urN /usr/share/empty/fs/aufs/procfs.c linux/fs/aufs/procfs.c
25458--- /usr/share/empty/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 25459+++ linux/fs/aufs/procfs.c 2015-09-24 10:47:58.254719746 +0200
523b37e3 25460@@ -0,0 +1,169 @@
e49829fe 25461+/*
2000de60 25462+ * Copyright (C) 2010-2015 Junjiro R. Okajima
e49829fe
JR
25463+ *
25464+ * This program, aufs is free software; you can redistribute it and/or modify
25465+ * it under the terms of the GNU General Public License as published by
25466+ * the Free Software Foundation; either version 2 of the License, or
25467+ * (at your option) any later version.
25468+ *
25469+ * This program is distributed in the hope that it will be useful,
25470+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25471+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25472+ * GNU General Public License for more details.
25473+ *
25474+ * You should have received a copy of the GNU General Public License
523b37e3 25475+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
25476+ */
25477+
25478+/*
25479+ * procfs interfaces
25480+ */
25481+
25482+#include <linux/proc_fs.h>
25483+#include "aufs.h"
25484+
25485+static int au_procfs_plm_release(struct inode *inode, struct file *file)
25486+{
25487+ struct au_sbinfo *sbinfo;
25488+
25489+ sbinfo = file->private_data;
25490+ if (sbinfo) {
25491+ au_plink_maint_leave(sbinfo);
25492+ kobject_put(&sbinfo->si_kobj);
25493+ }
25494+
25495+ return 0;
25496+}
25497+
25498+static void au_procfs_plm_write_clean(struct file *file)
25499+{
25500+ struct au_sbinfo *sbinfo;
25501+
25502+ sbinfo = file->private_data;
25503+ if (sbinfo)
25504+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
25505+}
25506+
25507+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
25508+{
25509+ int err;
25510+ struct super_block *sb;
25511+ struct au_sbinfo *sbinfo;
25512+
25513+ err = -EBUSY;
25514+ if (unlikely(file->private_data))
25515+ goto out;
25516+
25517+ sb = NULL;
53392da6 25518+ /* don't use au_sbilist_lock() here */
e49829fe
JR
25519+ spin_lock(&au_sbilist.spin);
25520+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
25521+ if (id == sysaufs_si_id(sbinfo)) {
25522+ kobject_get(&sbinfo->si_kobj);
25523+ sb = sbinfo->si_sb;
25524+ break;
25525+ }
25526+ spin_unlock(&au_sbilist.spin);
25527+
25528+ err = -EINVAL;
25529+ if (unlikely(!sb))
25530+ goto out;
25531+
25532+ err = au_plink_maint_enter(sb);
25533+ if (!err)
25534+ /* keep kobject_get() */
25535+ file->private_data = sbinfo;
25536+ else
25537+ kobject_put(&sbinfo->si_kobj);
25538+out:
25539+ return err;
25540+}
25541+
25542+/*
25543+ * Accept a valid "si=xxxx" only.
25544+ * Once it is accepted successfully, accept "clean" too.
25545+ */
25546+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
25547+ size_t count, loff_t *ppos)
25548+{
25549+ ssize_t err;
25550+ unsigned long id;
25551+ /* last newline is allowed */
25552+ char buf[3 + sizeof(unsigned long) * 2 + 1];
25553+
25554+ err = -EACCES;
25555+ if (unlikely(!capable(CAP_SYS_ADMIN)))
25556+ goto out;
25557+
25558+ err = -EINVAL;
25559+ if (unlikely(count > sizeof(buf)))
25560+ goto out;
25561+
25562+ err = copy_from_user(buf, ubuf, count);
25563+ if (unlikely(err)) {
25564+ err = -EFAULT;
25565+ goto out;
25566+ }
25567+ buf[count] = 0;
25568+
25569+ err = -EINVAL;
25570+ if (!strcmp("clean", buf)) {
25571+ au_procfs_plm_write_clean(file);
25572+ goto out_success;
25573+ } else if (unlikely(strncmp("si=", buf, 3)))
25574+ goto out;
25575+
9dbd164d 25576+ err = kstrtoul(buf + 3, 16, &id);
e49829fe
JR
25577+ if (unlikely(err))
25578+ goto out;
25579+
25580+ err = au_procfs_plm_write_si(file, id);
25581+ if (unlikely(err))
25582+ goto out;
25583+
25584+out_success:
25585+ err = count; /* success */
25586+out:
25587+ return err;
25588+}
25589+
25590+static const struct file_operations au_procfs_plm_fop = {
25591+ .write = au_procfs_plm_write,
25592+ .release = au_procfs_plm_release,
25593+ .owner = THIS_MODULE
25594+};
25595+
25596+/* ---------------------------------------------------------------------- */
25597+
25598+static struct proc_dir_entry *au_procfs_dir;
25599+
25600+void au_procfs_fin(void)
25601+{
25602+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
25603+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
25604+}
25605+
25606+int __init au_procfs_init(void)
25607+{
25608+ int err;
25609+ struct proc_dir_entry *entry;
25610+
25611+ err = -ENOMEM;
25612+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
25613+ if (unlikely(!au_procfs_dir))
25614+ goto out;
25615+
25616+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
25617+ au_procfs_dir, &au_procfs_plm_fop);
25618+ if (unlikely(!entry))
25619+ goto out_dir;
25620+
25621+ err = 0;
25622+ goto out; /* success */
25623+
25624+
25625+out_dir:
25626+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
25627+out:
25628+ return err;
25629+}
7f207e10
AM
25630diff -urN /usr/share/empty/fs/aufs/rdu.c linux/fs/aufs/rdu.c
25631--- /usr/share/empty/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 25632+++ linux/fs/aufs/rdu.c 2015-09-24 10:47:58.254719746 +0200
523b37e3 25633@@ -0,0 +1,388 @@
1308ab2a 25634+/*
2000de60 25635+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1308ab2a 25636+ *
25637+ * This program, aufs is free software; you can redistribute it and/or modify
25638+ * it under the terms of the GNU General Public License as published by
25639+ * the Free Software Foundation; either version 2 of the License, or
25640+ * (at your option) any later version.
25641+ *
25642+ * This program is distributed in the hope that it will be useful,
25643+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25644+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25645+ * GNU General Public License for more details.
25646+ *
25647+ * You should have received a copy of the GNU General Public License
523b37e3 25648+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1308ab2a 25649+ */
25650+
25651+/*
25652+ * readdir in userspace.
25653+ */
25654+
b752ccd1 25655+#include <linux/compat.h>
4a4d8108 25656+#include <linux/fs_stack.h>
1308ab2a 25657+#include <linux/security.h>
1308ab2a 25658+#include "aufs.h"
25659+
25660+/* bits for struct aufs_rdu.flags */
25661+#define AuRdu_CALLED 1
25662+#define AuRdu_CONT (1 << 1)
25663+#define AuRdu_FULL (1 << 2)
25664+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
7f207e10
AM
25665+#define au_fset_rdu(flags, name) \
25666+ do { (flags) |= AuRdu_##name; } while (0)
25667+#define au_fclr_rdu(flags, name) \
25668+ do { (flags) &= ~AuRdu_##name; } while (0)
1308ab2a 25669+
25670+struct au_rdu_arg {
392086de 25671+ struct dir_context ctx;
1308ab2a 25672+ struct aufs_rdu *rdu;
25673+ union au_rdu_ent_ul ent;
25674+ unsigned long end;
25675+
25676+ struct super_block *sb;
25677+ int err;
25678+};
25679+
392086de 25680+static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
1308ab2a 25681+ loff_t offset, u64 h_ino, unsigned int d_type)
25682+{
25683+ int err, len;
392086de 25684+ struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
1308ab2a 25685+ struct aufs_rdu *rdu = arg->rdu;
25686+ struct au_rdu_ent ent;
25687+
25688+ err = 0;
25689+ arg->err = 0;
25690+ au_fset_rdu(rdu->cookie.flags, CALLED);
25691+ len = au_rdu_len(nlen);
25692+ if (arg->ent.ul + len < arg->end) {
25693+ ent.ino = h_ino;
25694+ ent.bindex = rdu->cookie.bindex;
25695+ ent.type = d_type;
25696+ ent.nlen = nlen;
4a4d8108
AM
25697+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
25698+ ent.type = DT_UNKNOWN;
1308ab2a 25699+
9dbd164d 25700+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 25701+ err = -EFAULT;
25702+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
25703+ goto out;
25704+ if (copy_to_user(arg->ent.e->name, name, nlen))
25705+ goto out;
25706+ /* the terminating NULL */
25707+ if (__put_user(0, arg->ent.e->name + nlen))
25708+ goto out;
25709+ err = 0;
25710+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
25711+ arg->ent.ul += len;
25712+ rdu->rent++;
25713+ } else {
25714+ err = -EFAULT;
25715+ au_fset_rdu(rdu->cookie.flags, FULL);
25716+ rdu->full = 1;
25717+ rdu->tail = arg->ent;
25718+ }
25719+
4f0767ce 25720+out:
1308ab2a 25721+ /* AuTraceErr(err); */
25722+ return err;
25723+}
25724+
25725+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
25726+{
25727+ int err;
25728+ loff_t offset;
25729+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
25730+
92d182d2 25731+ /* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
1308ab2a 25732+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
25733+ err = offset;
25734+ if (unlikely(offset != cookie->h_pos))
25735+ goto out;
25736+
25737+ err = 0;
25738+ do {
25739+ arg->err = 0;
25740+ au_fclr_rdu(cookie->flags, CALLED);
25741+ /* smp_mb(); */
392086de 25742+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1308ab2a 25743+ if (err >= 0)
25744+ err = arg->err;
25745+ } while (!err
25746+ && au_ftest_rdu(cookie->flags, CALLED)
25747+ && !au_ftest_rdu(cookie->flags, FULL));
25748+ cookie->h_pos = h_file->f_pos;
25749+
4f0767ce 25750+out:
1308ab2a 25751+ AuTraceErr(err);
25752+ return err;
25753+}
25754+
25755+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
25756+{
25757+ int err;
25758+ aufs_bindex_t bend;
392086de
AM
25759+ struct au_rdu_arg arg = {
25760+ .ctx = {
2000de60 25761+ .actor = au_rdu_fill
392086de
AM
25762+ }
25763+ };
1308ab2a 25764+ struct dentry *dentry;
25765+ struct inode *inode;
25766+ struct file *h_file;
25767+ struct au_rdu_cookie *cookie = &rdu->cookie;
25768+
25769+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
25770+ if (unlikely(err)) {
25771+ err = -EFAULT;
25772+ AuTraceErr(err);
25773+ goto out;
25774+ }
25775+ rdu->rent = 0;
25776+ rdu->tail = rdu->ent;
25777+ rdu->full = 0;
25778+ arg.rdu = rdu;
25779+ arg.ent = rdu->ent;
25780+ arg.end = arg.ent.ul;
25781+ arg.end += rdu->sz;
25782+
25783+ err = -ENOTDIR;
523b37e3 25784+ if (unlikely(!file->f_op->iterate))
1308ab2a 25785+ goto out;
25786+
25787+ err = security_file_permission(file, MAY_READ);
25788+ AuTraceErr(err);
25789+ if (unlikely(err))
25790+ goto out;
25791+
2000de60 25792+ dentry = file->f_path.dentry;
5527c038 25793+ inode = d_inode(dentry);
1308ab2a 25794+#if 1
25795+ mutex_lock(&inode->i_mutex);
25796+#else
25797+ err = mutex_lock_killable(&inode->i_mutex);
25798+ AuTraceErr(err);
25799+ if (unlikely(err))
25800+ goto out;
25801+#endif
1308ab2a 25802+
25803+ arg.sb = inode->i_sb;
e49829fe
JR
25804+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
25805+ if (unlikely(err))
25806+ goto out_mtx;
027c5e7a
AM
25807+ err = au_alive_dir(dentry);
25808+ if (unlikely(err))
25809+ goto out_si;
e49829fe 25810+ /* todo: reval? */
1308ab2a 25811+ fi_read_lock(file);
25812+
25813+ err = -EAGAIN;
25814+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
25815+ && cookie->generation != au_figen(file)))
25816+ goto out_unlock;
25817+
25818+ err = 0;
25819+ if (!rdu->blk) {
25820+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
25821+ if (!rdu->blk)
25822+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
25823+ }
25824+ bend = au_fbstart(file);
25825+ if (cookie->bindex < bend)
25826+ cookie->bindex = bend;
4a4d8108 25827+ bend = au_fbend_dir(file);
1308ab2a 25828+ /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
25829+ for (; !err && cookie->bindex <= bend;
25830+ cookie->bindex++, cookie->h_pos = 0) {
4a4d8108 25831+ h_file = au_hf_dir(file, cookie->bindex);
1308ab2a 25832+ if (!h_file)
25833+ continue;
25834+
25835+ au_fclr_rdu(cookie->flags, FULL);
25836+ err = au_rdu_do(h_file, &arg);
25837+ AuTraceErr(err);
25838+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
25839+ break;
25840+ }
25841+ AuDbg("rent %llu\n", rdu->rent);
25842+
25843+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
25844+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
25845+ au_fset_rdu(cookie->flags, CONT);
25846+ cookie->generation = au_figen(file);
25847+ }
25848+
25849+ ii_read_lock_child(inode);
25850+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
25851+ ii_read_unlock(inode);
25852+
4f0767ce 25853+out_unlock:
1308ab2a 25854+ fi_read_unlock(file);
027c5e7a 25855+out_si:
1308ab2a 25856+ si_read_unlock(arg.sb);
4f0767ce 25857+out_mtx:
1308ab2a 25858+ mutex_unlock(&inode->i_mutex);
4f0767ce 25859+out:
1308ab2a 25860+ AuTraceErr(err);
25861+ return err;
25862+}
25863+
25864+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
25865+{
25866+ int err;
25867+ ino_t ino;
25868+ unsigned long long nent;
25869+ union au_rdu_ent_ul *u;
25870+ struct au_rdu_ent ent;
25871+ struct super_block *sb;
25872+
25873+ err = 0;
25874+ nent = rdu->nent;
25875+ u = &rdu->ent;
2000de60 25876+ sb = file->f_path.dentry->d_sb;
1308ab2a 25877+ si_read_lock(sb, AuLock_FLUSH);
25878+ while (nent-- > 0) {
9dbd164d 25879+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 25880+ err = copy_from_user(&ent, u->e, sizeof(ent));
4a4d8108
AM
25881+ if (!err)
25882+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
1308ab2a 25883+ if (unlikely(err)) {
25884+ err = -EFAULT;
25885+ AuTraceErr(err);
25886+ break;
25887+ }
25888+
25889+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
25890+ if (!ent.wh)
25891+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
25892+ else
25893+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
25894+ &ino);
25895+ if (unlikely(err)) {
25896+ AuTraceErr(err);
25897+ break;
25898+ }
25899+
25900+ err = __put_user(ino, &u->e->ino);
25901+ if (unlikely(err)) {
25902+ err = -EFAULT;
25903+ AuTraceErr(err);
25904+ break;
25905+ }
25906+ u->ul += au_rdu_len(ent.nlen);
25907+ }
25908+ si_read_unlock(sb);
25909+
25910+ return err;
25911+}
25912+
25913+/* ---------------------------------------------------------------------- */
25914+
25915+static int au_rdu_verify(struct aufs_rdu *rdu)
25916+{
b752ccd1 25917+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
1308ab2a 25918+ "%llu, b%d, 0x%x, g%u}\n",
b752ccd1 25919+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
1308ab2a 25920+ rdu->blk,
25921+ rdu->rent, rdu->shwh, rdu->full,
25922+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
25923+ rdu->cookie.generation);
dece6358 25924+
b752ccd1 25925+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
1308ab2a 25926+ return 0;
dece6358 25927+
b752ccd1
AM
25928+ AuDbg("%u:%u\n",
25929+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
1308ab2a 25930+ return -EINVAL;
25931+}
25932+
25933+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
dece6358 25934+{
1308ab2a 25935+ long err, e;
25936+ struct aufs_rdu rdu;
25937+ void __user *p = (void __user *)arg;
dece6358 25938+
1308ab2a 25939+ err = copy_from_user(&rdu, p, sizeof(rdu));
25940+ if (unlikely(err)) {
25941+ err = -EFAULT;
25942+ AuTraceErr(err);
25943+ goto out;
25944+ }
25945+ err = au_rdu_verify(&rdu);
dece6358
AM
25946+ if (unlikely(err))
25947+ goto out;
25948+
1308ab2a 25949+ switch (cmd) {
25950+ case AUFS_CTL_RDU:
25951+ err = au_rdu(file, &rdu);
25952+ if (unlikely(err))
25953+ break;
dece6358 25954+
1308ab2a 25955+ e = copy_to_user(p, &rdu, sizeof(rdu));
25956+ if (unlikely(e)) {
25957+ err = -EFAULT;
25958+ AuTraceErr(err);
25959+ }
25960+ break;
25961+ case AUFS_CTL_RDU_INO:
25962+ err = au_rdu_ino(file, &rdu);
25963+ break;
25964+
25965+ default:
4a4d8108 25966+ /* err = -ENOTTY; */
1308ab2a 25967+ err = -EINVAL;
25968+ }
dece6358 25969+
4f0767ce 25970+out:
1308ab2a 25971+ AuTraceErr(err);
25972+ return err;
1facf9fc 25973+}
b752ccd1
AM
25974+
25975+#ifdef CONFIG_COMPAT
25976+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
25977+{
25978+ long err, e;
25979+ struct aufs_rdu rdu;
25980+ void __user *p = compat_ptr(arg);
25981+
25982+ /* todo: get_user()? */
25983+ err = copy_from_user(&rdu, p, sizeof(rdu));
25984+ if (unlikely(err)) {
25985+ err = -EFAULT;
25986+ AuTraceErr(err);
25987+ goto out;
25988+ }
25989+ rdu.ent.e = compat_ptr(rdu.ent.ul);
25990+ err = au_rdu_verify(&rdu);
25991+ if (unlikely(err))
25992+ goto out;
25993+
25994+ switch (cmd) {
25995+ case AUFS_CTL_RDU:
25996+ err = au_rdu(file, &rdu);
25997+ if (unlikely(err))
25998+ break;
25999+
26000+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
26001+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
26002+ e = copy_to_user(p, &rdu, sizeof(rdu));
26003+ if (unlikely(e)) {
26004+ err = -EFAULT;
26005+ AuTraceErr(err);
26006+ }
26007+ break;
26008+ case AUFS_CTL_RDU_INO:
26009+ err = au_rdu_ino(file, &rdu);
26010+ break;
26011+
26012+ default:
26013+ /* err = -ENOTTY; */
26014+ err = -EINVAL;
26015+ }
26016+
4f0767ce 26017+out:
b752ccd1
AM
26018+ AuTraceErr(err);
26019+ return err;
26020+}
26021+#endif
7f207e10
AM
26022diff -urN /usr/share/empty/fs/aufs/rwsem.h linux/fs/aufs/rwsem.h
26023--- /usr/share/empty/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 26024+++ linux/fs/aufs/rwsem.h 2015-09-24 10:47:58.254719746 +0200
076b876e 26025@@ -0,0 +1,191 @@
1facf9fc 26026+/*
2000de60 26027+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 26028+ *
26029+ * This program, aufs is free software; you can redistribute it and/or modify
26030+ * it under the terms of the GNU General Public License as published by
26031+ * the Free Software Foundation; either version 2 of the License, or
26032+ * (at your option) any later version.
dece6358
AM
26033+ *
26034+ * This program is distributed in the hope that it will be useful,
26035+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26036+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26037+ * GNU General Public License for more details.
26038+ *
26039+ * You should have received a copy of the GNU General Public License
523b37e3 26040+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26041+ */
26042+
26043+/*
26044+ * simple read-write semaphore wrappers
26045+ */
26046+
26047+#ifndef __AUFS_RWSEM_H__
26048+#define __AUFS_RWSEM_H__
26049+
26050+#ifdef __KERNEL__
26051+
4a4d8108 26052+#include "debug.h"
dece6358
AM
26053+
26054+struct au_rwsem {
26055+ struct rw_semaphore rwsem;
26056+#ifdef CONFIG_AUFS_DEBUG
26057+ /* just for debugging, not almighty counter */
26058+ atomic_t rcnt, wcnt;
26059+#endif
26060+};
26061+
26062+#ifdef CONFIG_AUFS_DEBUG
26063+#define AuDbgCntInit(rw) do { \
26064+ atomic_set(&(rw)->rcnt, 0); \
26065+ atomic_set(&(rw)->wcnt, 0); \
26066+ smp_mb(); /* atomic set */ \
26067+} while (0)
26068+
e49829fe 26069+#define AuDbgRcntInc(rw) atomic_inc(&(rw)->rcnt)
dece6358 26070+#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
e49829fe 26071+#define AuDbgWcntInc(rw) atomic_inc(&(rw)->wcnt)
dece6358
AM
26072+#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
26073+#else
26074+#define AuDbgCntInit(rw) do {} while (0)
26075+#define AuDbgRcntInc(rw) do {} while (0)
26076+#define AuDbgRcntDec(rw) do {} while (0)
26077+#define AuDbgWcntInc(rw) do {} while (0)
26078+#define AuDbgWcntDec(rw) do {} while (0)
26079+#endif /* CONFIG_AUFS_DEBUG */
26080+
26081+/* to debug easier, do not make them inlined functions */
26082+#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
26083+/* rwsem_is_locked() is unusable */
26084+#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
26085+#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
26086+#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
26087+ && atomic_read(&(rw)->wcnt) <= 0)
26088+#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \
26089+ || atomic_read(&(rw)->wcnt))
26090+
e49829fe
JR
26091+#define au_rw_class(rw, key) lockdep_set_class(&(rw)->rwsem, key)
26092+
dece6358
AM
26093+static inline void au_rw_init(struct au_rwsem *rw)
26094+{
26095+ AuDbgCntInit(rw);
26096+ init_rwsem(&rw->rwsem);
26097+}
26098+
26099+static inline void au_rw_init_wlock(struct au_rwsem *rw)
26100+{
26101+ au_rw_init(rw);
26102+ down_write(&rw->rwsem);
26103+ AuDbgWcntInc(rw);
26104+}
26105+
26106+static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
26107+ unsigned int lsc)
26108+{
26109+ au_rw_init(rw);
26110+ down_write_nested(&rw->rwsem, lsc);
26111+ AuDbgWcntInc(rw);
26112+}
26113+
26114+static inline void au_rw_read_lock(struct au_rwsem *rw)
26115+{
26116+ down_read(&rw->rwsem);
26117+ AuDbgRcntInc(rw);
26118+}
26119+
26120+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
26121+{
26122+ down_read_nested(&rw->rwsem, lsc);
26123+ AuDbgRcntInc(rw);
26124+}
26125+
26126+static inline void au_rw_read_unlock(struct au_rwsem *rw)
26127+{
26128+ AuRwMustReadLock(rw);
26129+ AuDbgRcntDec(rw);
26130+ up_read(&rw->rwsem);
26131+}
26132+
26133+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
26134+{
26135+ AuRwMustWriteLock(rw);
26136+ AuDbgRcntInc(rw);
26137+ AuDbgWcntDec(rw);
26138+ downgrade_write(&rw->rwsem);
26139+}
26140+
26141+static inline void au_rw_write_lock(struct au_rwsem *rw)
26142+{
26143+ down_write(&rw->rwsem);
26144+ AuDbgWcntInc(rw);
26145+}
26146+
26147+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
26148+ unsigned int lsc)
26149+{
26150+ down_write_nested(&rw->rwsem, lsc);
26151+ AuDbgWcntInc(rw);
26152+}
1facf9fc 26153+
dece6358
AM
26154+static inline void au_rw_write_unlock(struct au_rwsem *rw)
26155+{
26156+ AuRwMustWriteLock(rw);
26157+ AuDbgWcntDec(rw);
26158+ up_write(&rw->rwsem);
26159+}
26160+
26161+/* why is not _nested version defined */
26162+static inline int au_rw_read_trylock(struct au_rwsem *rw)
26163+{
076b876e
AM
26164+ int ret;
26165+
26166+ ret = down_read_trylock(&rw->rwsem);
dece6358
AM
26167+ if (ret)
26168+ AuDbgRcntInc(rw);
26169+ return ret;
26170+}
26171+
26172+static inline int au_rw_write_trylock(struct au_rwsem *rw)
26173+{
076b876e
AM
26174+ int ret;
26175+
26176+ ret = down_write_trylock(&rw->rwsem);
dece6358
AM
26177+ if (ret)
26178+ AuDbgWcntInc(rw);
26179+ return ret;
26180+}
26181+
26182+#undef AuDbgCntInit
26183+#undef AuDbgRcntInc
26184+#undef AuDbgRcntDec
26185+#undef AuDbgWcntInc
26186+#undef AuDbgWcntDec
1facf9fc 26187+
26188+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26189+static inline void prefix##_read_lock(param) \
dece6358 26190+{ au_rw_read_lock(rwsem); } \
1facf9fc 26191+static inline void prefix##_write_lock(param) \
dece6358 26192+{ au_rw_write_lock(rwsem); } \
1facf9fc 26193+static inline int prefix##_read_trylock(param) \
dece6358 26194+{ return au_rw_read_trylock(rwsem); } \
1facf9fc 26195+static inline int prefix##_write_trylock(param) \
dece6358 26196+{ return au_rw_write_trylock(rwsem); }
1facf9fc 26197+/* why is not _nested version defined */
26198+/* static inline void prefix##_read_trylock_nested(param, lsc)
dece6358 26199+{ au_rw_read_trylock_nested(rwsem, lsc)); }
1facf9fc 26200+static inline void prefix##_write_trylock_nestd(param, lsc)
dece6358 26201+{ au_rw_write_trylock_nested(rwsem, lsc); } */
1facf9fc 26202+
26203+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
26204+static inline void prefix##_read_unlock(param) \
dece6358 26205+{ au_rw_read_unlock(rwsem); } \
1facf9fc 26206+static inline void prefix##_write_unlock(param) \
dece6358 26207+{ au_rw_write_unlock(rwsem); } \
1facf9fc 26208+static inline void prefix##_downgrade_lock(param) \
dece6358 26209+{ au_rw_dgrade_lock(rwsem); }
1facf9fc 26210+
26211+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
26212+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26213+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
26214+
26215+#endif /* __KERNEL__ */
26216+#endif /* __AUFS_RWSEM_H__ */
7f207e10
AM
26217diff -urN /usr/share/empty/fs/aufs/sbinfo.c linux/fs/aufs/sbinfo.c
26218--- /usr/share/empty/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
b95c5147
AM
26219+++ linux/fs/aufs/sbinfo.c 2015-12-10 17:59:16.839499823 +0100
26220@@ -0,0 +1,366 @@
1facf9fc 26221+/*
2000de60 26222+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 26223+ *
26224+ * This program, aufs is free software; you can redistribute it and/or modify
26225+ * it under the terms of the GNU General Public License as published by
26226+ * the Free Software Foundation; either version 2 of the License, or
26227+ * (at your option) any later version.
dece6358
AM
26228+ *
26229+ * This program is distributed in the hope that it will be useful,
26230+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26231+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26232+ * GNU General Public License for more details.
26233+ *
26234+ * You should have received a copy of the GNU General Public License
523b37e3 26235+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26236+ */
26237+
26238+/*
26239+ * superblock private data
26240+ */
26241+
26242+#include "aufs.h"
26243+
26244+/*
26245+ * they are necessary regardless sysfs is disabled.
26246+ */
26247+void au_si_free(struct kobject *kobj)
26248+{
86dc4139 26249+ int i;
1facf9fc 26250+ struct au_sbinfo *sbinfo;
b752ccd1 26251+ char *locked __maybe_unused; /* debug only */
1facf9fc 26252+
26253+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
86dc4139
AM
26254+ for (i = 0; i < AuPlink_NHASH; i++)
26255+ AuDebugOn(!hlist_empty(&sbinfo->si_plink[i].head));
e49829fe 26256+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
1facf9fc 26257+
c2c0f25c
AM
26258+ AuDebugOn(!hlist_empty(&sbinfo->si_symlink.head));
26259+
e49829fe 26260+ au_rw_write_lock(&sbinfo->si_rwsem);
1facf9fc 26261+ au_br_free(sbinfo);
e49829fe 26262+ au_rw_write_unlock(&sbinfo->si_rwsem);
b752ccd1
AM
26263+
26264+ AuDebugOn(radix_tree_gang_lookup
26265+ (&sbinfo->au_si_pid.tree, (void **)&locked,
26266+ /*first_index*/PID_MAX_DEFAULT - 1,
26267+ /*max_items*/sizeof(locked)/sizeof(*locked)));
26268+
1facf9fc 26269+ kfree(sbinfo->si_branch);
b752ccd1 26270+ kfree(sbinfo->au_si_pid.bitmap);
1facf9fc 26271+ mutex_destroy(&sbinfo->si_xib_mtx);
dece6358 26272+ AuRwDestroy(&sbinfo->si_rwsem);
1facf9fc 26273+
26274+ kfree(sbinfo);
26275+}
26276+
26277+int au_si_alloc(struct super_block *sb)
26278+{
86dc4139 26279+ int err, i;
1facf9fc 26280+ struct au_sbinfo *sbinfo;
e49829fe 26281+ static struct lock_class_key aufs_si;
1facf9fc 26282+
26283+ err = -ENOMEM;
4a4d8108 26284+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
1facf9fc 26285+ if (unlikely(!sbinfo))
26286+ goto out;
26287+
b752ccd1
AM
26288+ BUILD_BUG_ON(sizeof(unsigned long) !=
26289+ sizeof(*sbinfo->au_si_pid.bitmap));
26290+ sbinfo->au_si_pid.bitmap = kcalloc(BITS_TO_LONGS(PID_MAX_DEFAULT),
26291+ sizeof(*sbinfo->au_si_pid.bitmap),
26292+ GFP_NOFS);
26293+ if (unlikely(!sbinfo->au_si_pid.bitmap))
26294+ goto out_sbinfo;
26295+
1facf9fc 26296+ /* will be reallocated separately */
26297+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
26298+ if (unlikely(!sbinfo->si_branch))
b752ccd1 26299+ goto out_pidmap;
1facf9fc 26300+
1facf9fc 26301+ err = sysaufs_si_init(sbinfo);
26302+ if (unlikely(err))
26303+ goto out_br;
26304+
26305+ au_nwt_init(&sbinfo->si_nowait);
dece6358 26306+ au_rw_init_wlock(&sbinfo->si_rwsem);
e49829fe 26307+ au_rw_class(&sbinfo->si_rwsem, &aufs_si);
b752ccd1
AM
26308+ spin_lock_init(&sbinfo->au_si_pid.tree_lock);
26309+ INIT_RADIX_TREE(&sbinfo->au_si_pid.tree, GFP_ATOMIC | __GFP_NOFAIL);
26310+
7f207e10 26311+ atomic_long_set(&sbinfo->si_ninodes, 0);
7f207e10
AM
26312+ atomic_long_set(&sbinfo->si_nfiles, 0);
26313+
1facf9fc 26314+ sbinfo->si_bend = -1;
392086de 26315+ sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
1facf9fc 26316+
26317+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
26318+ sbinfo->si_wbr_create = AuWbrCreate_Def;
4a4d8108
AM
26319+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
26320+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
1facf9fc 26321+
076b876e
AM
26322+ au_fhsm_init(sbinfo);
26323+
e49829fe 26324+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
1facf9fc 26325+
c2c0f25c
AM
26326+ au_sphl_init(&sbinfo->si_symlink);
26327+
392086de
AM
26328+ sbinfo->si_xino_jiffy = jiffies;
26329+ sbinfo->si_xino_expire
26330+ = msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
1facf9fc 26331+ mutex_init(&sbinfo->si_xib_mtx);
1facf9fc 26332+ sbinfo->si_xino_brid = -1;
26333+ /* leave si_xib_last_pindex and si_xib_next_bit */
26334+
b912730e
AM
26335+ au_sphl_init(&sbinfo->si_aopen);
26336+
e49829fe 26337+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
1facf9fc 26338+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
26339+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
26340+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
26341+
86dc4139
AM
26342+ for (i = 0; i < AuPlink_NHASH; i++)
26343+ au_sphl_init(sbinfo->si_plink + i);
1facf9fc 26344+ init_waitqueue_head(&sbinfo->si_plink_wq);
4a4d8108 26345+ spin_lock_init(&sbinfo->si_plink_maint_lock);
1facf9fc 26346+
523b37e3
AM
26347+ au_sphl_init(&sbinfo->si_files);
26348+
b95c5147
AM
26349+ /* with getattr by default */
26350+ sbinfo->si_iop_array = aufs_iop;
26351+
1facf9fc 26352+ /* leave other members for sysaufs and si_mnt. */
26353+ sbinfo->si_sb = sb;
26354+ sb->s_fs_info = sbinfo;
b752ccd1 26355+ si_pid_set(sb);
1facf9fc 26356+ return 0; /* success */
26357+
4f0767ce 26358+out_br:
1facf9fc 26359+ kfree(sbinfo->si_branch);
4f0767ce 26360+out_pidmap:
b752ccd1 26361+ kfree(sbinfo->au_si_pid.bitmap);
4f0767ce 26362+out_sbinfo:
1facf9fc 26363+ kfree(sbinfo);
4f0767ce 26364+out:
1facf9fc 26365+ return err;
26366+}
26367+
26368+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
26369+{
26370+ int err, sz;
26371+ struct au_branch **brp;
26372+
dece6358
AM
26373+ AuRwMustWriteLock(&sbinfo->si_rwsem);
26374+
1facf9fc 26375+ err = -ENOMEM;
26376+ sz = sizeof(*brp) * (sbinfo->si_bend + 1);
26377+ if (unlikely(!sz))
26378+ sz = sizeof(*brp);
26379+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
26380+ if (brp) {
26381+ sbinfo->si_branch = brp;
26382+ err = 0;
26383+ }
26384+
26385+ return err;
26386+}
26387+
26388+/* ---------------------------------------------------------------------- */
26389+
26390+unsigned int au_sigen_inc(struct super_block *sb)
26391+{
26392+ unsigned int gen;
5527c038 26393+ struct inode *inode;
1facf9fc 26394+
dece6358
AM
26395+ SiMustWriteLock(sb);
26396+
1facf9fc 26397+ gen = ++au_sbi(sb)->si_generation;
26398+ au_update_digen(sb->s_root);
5527c038
JR
26399+ inode = d_inode(sb->s_root);
26400+ au_update_iigen(inode, /*half*/0);
26401+ inode->i_version++;
1facf9fc 26402+ return gen;
26403+}
26404+
26405+aufs_bindex_t au_new_br_id(struct super_block *sb)
26406+{
26407+ aufs_bindex_t br_id;
26408+ int i;
26409+ struct au_sbinfo *sbinfo;
26410+
dece6358
AM
26411+ SiMustWriteLock(sb);
26412+
1facf9fc 26413+ sbinfo = au_sbi(sb);
26414+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
26415+ br_id = ++sbinfo->si_last_br_id;
7f207e10 26416+ AuDebugOn(br_id < 0);
1facf9fc 26417+ if (br_id && au_br_index(sb, br_id) < 0)
26418+ return br_id;
26419+ }
26420+
26421+ return -1;
26422+}
26423+
26424+/* ---------------------------------------------------------------------- */
26425+
e49829fe
JR
26426+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
26427+int si_read_lock(struct super_block *sb, int flags)
26428+{
26429+ int err;
26430+
26431+ err = 0;
26432+ if (au_ftest_lock(flags, FLUSH))
26433+ au_nwt_flush(&au_sbi(sb)->si_nowait);
26434+
26435+ si_noflush_read_lock(sb);
26436+ err = au_plink_maint(sb, flags);
26437+ if (unlikely(err))
26438+ si_read_unlock(sb);
26439+
26440+ return err;
26441+}
26442+
26443+int si_write_lock(struct super_block *sb, int flags)
26444+{
26445+ int err;
26446+
26447+ if (au_ftest_lock(flags, FLUSH))
26448+ au_nwt_flush(&au_sbi(sb)->si_nowait);
26449+
26450+ si_noflush_write_lock(sb);
26451+ err = au_plink_maint(sb, flags);
26452+ if (unlikely(err))
26453+ si_write_unlock(sb);
26454+
26455+ return err;
26456+}
26457+
1facf9fc 26458+/* dentry and super_block lock. call at entry point */
e49829fe 26459+int aufs_read_lock(struct dentry *dentry, int flags)
1facf9fc 26460+{
e49829fe 26461+ int err;
027c5e7a 26462+ struct super_block *sb;
e49829fe 26463+
027c5e7a
AM
26464+ sb = dentry->d_sb;
26465+ err = si_read_lock(sb, flags);
26466+ if (unlikely(err))
26467+ goto out;
26468+
26469+ if (au_ftest_lock(flags, DW))
26470+ di_write_lock_child(dentry);
26471+ else
26472+ di_read_lock_child(dentry, flags);
26473+
26474+ if (au_ftest_lock(flags, GEN)) {
26475+ err = au_digen_test(dentry, au_sigen(sb));
79b8bda9
AM
26476+ if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
26477+ AuDebugOn(!err && au_dbrange_test(dentry));
26478+ else if (!err)
26479+ err = au_dbrange_test(dentry);
027c5e7a
AM
26480+ if (unlikely(err))
26481+ aufs_read_unlock(dentry, flags);
e49829fe
JR
26482+ }
26483+
027c5e7a 26484+out:
e49829fe 26485+ return err;
1facf9fc 26486+}
26487+
26488+void aufs_read_unlock(struct dentry *dentry, int flags)
26489+{
26490+ if (au_ftest_lock(flags, DW))
26491+ di_write_unlock(dentry);
26492+ else
26493+ di_read_unlock(dentry, flags);
26494+ si_read_unlock(dentry->d_sb);
26495+}
26496+
26497+void aufs_write_lock(struct dentry *dentry)
26498+{
e49829fe 26499+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 26500+ di_write_lock_child(dentry);
26501+}
26502+
26503+void aufs_write_unlock(struct dentry *dentry)
26504+{
26505+ di_write_unlock(dentry);
26506+ si_write_unlock(dentry->d_sb);
26507+}
26508+
e49829fe 26509+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
1facf9fc 26510+{
e49829fe 26511+ int err;
027c5e7a
AM
26512+ unsigned int sigen;
26513+ struct super_block *sb;
e49829fe 26514+
027c5e7a
AM
26515+ sb = d1->d_sb;
26516+ err = si_read_lock(sb, flags);
26517+ if (unlikely(err))
26518+ goto out;
26519+
b95c5147 26520+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
027c5e7a
AM
26521+
26522+ if (au_ftest_lock(flags, GEN)) {
26523+ sigen = au_sigen(sb);
26524+ err = au_digen_test(d1, sigen);
26525+ AuDebugOn(!err && au_dbrange_test(d1));
26526+ if (!err) {
26527+ err = au_digen_test(d2, sigen);
26528+ AuDebugOn(!err && au_dbrange_test(d2));
26529+ }
26530+ if (unlikely(err))
26531+ aufs_read_and_write_unlock2(d1, d2);
26532+ }
26533+
26534+out:
e49829fe 26535+ return err;
1facf9fc 26536+}
26537+
26538+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
26539+{
26540+ di_write_unlock2(d1, d2);
26541+ si_read_unlock(d1->d_sb);
26542+}
b752ccd1
AM
26543+
26544+/* ---------------------------------------------------------------------- */
26545+
26546+int si_pid_test_slow(struct super_block *sb)
26547+{
26548+ void *p;
26549+
26550+ rcu_read_lock();
26551+ p = radix_tree_lookup(&au_sbi(sb)->au_si_pid.tree, current->pid);
26552+ rcu_read_unlock();
26553+
027c5e7a 26554+ return (long)!!p;
b752ccd1
AM
26555+}
26556+
26557+void si_pid_set_slow(struct super_block *sb)
26558+{
26559+ int err;
26560+ struct au_sbinfo *sbinfo;
26561+
26562+ AuDebugOn(si_pid_test_slow(sb));
26563+
26564+ sbinfo = au_sbi(sb);
26565+ err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
26566+ AuDebugOn(err);
26567+ spin_lock(&sbinfo->au_si_pid.tree_lock);
26568+ err = radix_tree_insert(&sbinfo->au_si_pid.tree, current->pid,
027c5e7a 26569+ /*any valid ptr*/sb);
b752ccd1
AM
26570+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
26571+ AuDebugOn(err);
26572+ radix_tree_preload_end();
26573+}
26574+
26575+void si_pid_clr_slow(struct super_block *sb)
26576+{
26577+ void *p;
26578+ struct au_sbinfo *sbinfo;
26579+
26580+ AuDebugOn(!si_pid_test_slow(sb));
26581+
26582+ sbinfo = au_sbi(sb);
26583+ spin_lock(&sbinfo->au_si_pid.tree_lock);
26584+ p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid);
26585+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
b752ccd1 26586+}
7f207e10
AM
26587diff -urN /usr/share/empty/fs/aufs/spl.h linux/fs/aufs/spl.h
26588--- /usr/share/empty/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 26589+++ linux/fs/aufs/spl.h 2015-09-24 10:47:58.254719746 +0200
523b37e3 26590@@ -0,0 +1,111 @@
1facf9fc 26591+/*
2000de60 26592+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 26593+ *
26594+ * This program, aufs is free software; you can redistribute it and/or modify
26595+ * it under the terms of the GNU General Public License as published by
26596+ * the Free Software Foundation; either version 2 of the License, or
26597+ * (at your option) any later version.
dece6358
AM
26598+ *
26599+ * This program is distributed in the hope that it will be useful,
26600+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26601+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26602+ * GNU General Public License for more details.
26603+ *
26604+ * You should have received a copy of the GNU General Public License
523b37e3 26605+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26606+ */
26607+
26608+/*
26609+ * simple list protected by a spinlock
26610+ */
26611+
26612+#ifndef __AUFS_SPL_H__
26613+#define __AUFS_SPL_H__
26614+
26615+#ifdef __KERNEL__
26616+
1facf9fc 26617+struct au_splhead {
26618+ spinlock_t spin;
26619+ struct list_head head;
26620+};
26621+
26622+static inline void au_spl_init(struct au_splhead *spl)
26623+{
26624+ spin_lock_init(&spl->spin);
26625+ INIT_LIST_HEAD(&spl->head);
26626+}
26627+
26628+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
26629+{
26630+ spin_lock(&spl->spin);
26631+ list_add(list, &spl->head);
26632+ spin_unlock(&spl->spin);
26633+}
26634+
26635+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
26636+{
26637+ spin_lock(&spl->spin);
26638+ list_del(list);
26639+ spin_unlock(&spl->spin);
26640+}
26641+
4a4d8108
AM
26642+static inline void au_spl_del_rcu(struct list_head *list,
26643+ struct au_splhead *spl)
26644+{
26645+ spin_lock(&spl->spin);
26646+ list_del_rcu(list);
26647+ spin_unlock(&spl->spin);
26648+}
26649+
86dc4139
AM
26650+/* ---------------------------------------------------------------------- */
26651+
26652+struct au_sphlhead {
26653+ spinlock_t spin;
26654+ struct hlist_head head;
26655+};
26656+
26657+static inline void au_sphl_init(struct au_sphlhead *sphl)
26658+{
26659+ spin_lock_init(&sphl->spin);
26660+ INIT_HLIST_HEAD(&sphl->head);
26661+}
26662+
26663+static inline void au_sphl_add(struct hlist_node *hlist,
26664+ struct au_sphlhead *sphl)
26665+{
26666+ spin_lock(&sphl->spin);
26667+ hlist_add_head(hlist, &sphl->head);
26668+ spin_unlock(&sphl->spin);
26669+}
26670+
26671+static inline void au_sphl_del(struct hlist_node *hlist,
26672+ struct au_sphlhead *sphl)
26673+{
26674+ spin_lock(&sphl->spin);
26675+ hlist_del(hlist);
26676+ spin_unlock(&sphl->spin);
26677+}
26678+
26679+static inline void au_sphl_del_rcu(struct hlist_node *hlist,
26680+ struct au_sphlhead *sphl)
26681+{
26682+ spin_lock(&sphl->spin);
26683+ hlist_del_rcu(hlist);
26684+ spin_unlock(&sphl->spin);
26685+}
26686+
26687+static inline unsigned long au_sphl_count(struct au_sphlhead *sphl)
26688+{
26689+ unsigned long cnt;
26690+ struct hlist_node *pos;
26691+
26692+ cnt = 0;
26693+ spin_lock(&sphl->spin);
26694+ hlist_for_each(pos, &sphl->head)
26695+ cnt++;
26696+ spin_unlock(&sphl->spin);
26697+ return cnt;
26698+}
26699+
1facf9fc 26700+#endif /* __KERNEL__ */
26701+#endif /* __AUFS_SPL_H__ */
7f207e10
AM
26702diff -urN /usr/share/empty/fs/aufs/super.c linux/fs/aufs/super.c
26703--- /usr/share/empty/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
b95c5147
AM
26704+++ linux/fs/aufs/super.c 2015-12-10 17:59:16.842833237 +0100
26705@@ -0,0 +1,1047 @@
1facf9fc 26706+/*
2000de60 26707+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 26708+ *
26709+ * This program, aufs is free software; you can redistribute it and/or modify
26710+ * it under the terms of the GNU General Public License as published by
26711+ * the Free Software Foundation; either version 2 of the License, or
26712+ * (at your option) any later version.
dece6358
AM
26713+ *
26714+ * This program is distributed in the hope that it will be useful,
26715+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26716+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26717+ * GNU General Public License for more details.
26718+ *
26719+ * You should have received a copy of the GNU General Public License
523b37e3 26720+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26721+ */
26722+
26723+/*
26724+ * mount and super_block operations
26725+ */
26726+
f6c5ef8b 26727+#include <linux/mm.h>
1facf9fc 26728+#include <linux/seq_file.h>
26729+#include <linux/statfs.h>
7f207e10 26730+#include <linux/vmalloc.h>
1facf9fc 26731+#include "aufs.h"
26732+
26733+/*
26734+ * super_operations
26735+ */
26736+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
26737+{
26738+ struct au_icntnr *c;
26739+
26740+ c = au_cache_alloc_icntnr();
26741+ if (c) {
027c5e7a 26742+ au_icntnr_init(c);
1facf9fc 26743+ c->vfs_inode.i_version = 1; /* sigen(sb); */
26744+ c->iinfo.ii_hinode = NULL;
26745+ return &c->vfs_inode;
26746+ }
26747+ return NULL;
26748+}
26749+
027c5e7a
AM
26750+static void aufs_destroy_inode_cb(struct rcu_head *head)
26751+{
26752+ struct inode *inode = container_of(head, struct inode, i_rcu);
26753+
b4510431 26754+ INIT_HLIST_HEAD(&inode->i_dentry);
027c5e7a
AM
26755+ au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
26756+}
26757+
1facf9fc 26758+static void aufs_destroy_inode(struct inode *inode)
26759+{
26760+ au_iinfo_fin(inode);
027c5e7a 26761+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
1facf9fc 26762+}
26763+
26764+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
26765+{
26766+ struct inode *inode;
26767+ int err;
26768+
26769+ inode = iget_locked(sb, ino);
26770+ if (unlikely(!inode)) {
26771+ inode = ERR_PTR(-ENOMEM);
26772+ goto out;
26773+ }
26774+ if (!(inode->i_state & I_NEW))
26775+ goto out;
26776+
26777+ err = au_xigen_new(inode);
26778+ if (!err)
26779+ err = au_iinfo_init(inode);
26780+ if (!err)
26781+ inode->i_version++;
26782+ else {
26783+ iget_failed(inode);
26784+ inode = ERR_PTR(err);
26785+ }
26786+
4f0767ce 26787+out:
1facf9fc 26788+ /* never return NULL */
26789+ AuDebugOn(!inode);
26790+ AuTraceErrPtr(inode);
26791+ return inode;
26792+}
26793+
26794+/* lock free root dinfo */
26795+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
26796+{
26797+ int err;
26798+ aufs_bindex_t bindex, bend;
26799+ struct path path;
4a4d8108 26800+ struct au_hdentry *hdp;
1facf9fc 26801+ struct au_branch *br;
076b876e 26802+ au_br_perm_str_t perm;
1facf9fc 26803+
26804+ err = 0;
26805+ bend = au_sbend(sb);
4a4d8108 26806+ hdp = au_di(sb->s_root)->di_hdentry;
1facf9fc 26807+ for (bindex = 0; !err && bindex <= bend; bindex++) {
26808+ br = au_sbr(sb, bindex);
86dc4139 26809+ path.mnt = au_br_mnt(br);
4a4d8108 26810+ path.dentry = hdp[bindex].hd_dentry;
1facf9fc 26811+ err = au_seq_path(seq, &path);
79b8bda9 26812+ if (!err) {
076b876e 26813+ au_optstr_br_perm(&perm, br->br_perm);
79b8bda9
AM
26814+ seq_printf(seq, "=%s", perm.a);
26815+ if (bindex != bend)
26816+ seq_putc(seq, ':');
1e00d052 26817+ }
1facf9fc 26818+ }
79b8bda9
AM
26819+ if (unlikely(err || seq_has_overflowed(seq)))
26820+ err = -E2BIG;
1facf9fc 26821+
26822+ return err;
26823+}
26824+
26825+static void au_show_wbr_create(struct seq_file *m, int v,
26826+ struct au_sbinfo *sbinfo)
26827+{
26828+ const char *pat;
26829+
dece6358
AM
26830+ AuRwMustAnyLock(&sbinfo->si_rwsem);
26831+
c2b27bf2 26832+ seq_puts(m, ",create=");
1facf9fc 26833+ pat = au_optstr_wbr_create(v);
26834+ switch (v) {
26835+ case AuWbrCreate_TDP:
26836+ case AuWbrCreate_RR:
26837+ case AuWbrCreate_MFS:
26838+ case AuWbrCreate_PMFS:
c2b27bf2 26839+ seq_puts(m, pat);
1facf9fc 26840+ break;
26841+ case AuWbrCreate_MFSV:
26842+ seq_printf(m, /*pat*/"mfs:%lu",
e49829fe
JR
26843+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26844+ / MSEC_PER_SEC);
1facf9fc 26845+ break;
26846+ case AuWbrCreate_PMFSV:
26847+ seq_printf(m, /*pat*/"pmfs:%lu",
e49829fe
JR
26848+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26849+ / MSEC_PER_SEC);
1facf9fc 26850+ break;
26851+ case AuWbrCreate_MFSRR:
26852+ seq_printf(m, /*pat*/"mfsrr:%llu",
26853+ sbinfo->si_wbr_mfs.mfsrr_watermark);
26854+ break;
26855+ case AuWbrCreate_MFSRRV:
26856+ seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
26857+ sbinfo->si_wbr_mfs.mfsrr_watermark,
e49829fe
JR
26858+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26859+ / MSEC_PER_SEC);
1facf9fc 26860+ break;
392086de
AM
26861+ case AuWbrCreate_PMFSRR:
26862+ seq_printf(m, /*pat*/"pmfsrr:%llu",
26863+ sbinfo->si_wbr_mfs.mfsrr_watermark);
26864+ break;
26865+ case AuWbrCreate_PMFSRRV:
26866+ seq_printf(m, /*pat*/"pmfsrr:%llu:%lu",
26867+ sbinfo->si_wbr_mfs.mfsrr_watermark,
26868+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26869+ / MSEC_PER_SEC);
26870+ break;
1facf9fc 26871+ }
26872+}
26873+
7eafdf33 26874+static int au_show_xino(struct seq_file *seq, struct super_block *sb)
1facf9fc 26875+{
26876+#ifdef CONFIG_SYSFS
26877+ return 0;
26878+#else
26879+ int err;
26880+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
26881+ aufs_bindex_t bindex, brid;
1facf9fc 26882+ struct qstr *name;
26883+ struct file *f;
26884+ struct dentry *d, *h_root;
4a4d8108 26885+ struct au_hdentry *hdp;
1facf9fc 26886+
dece6358
AM
26887+ AuRwMustAnyLock(&sbinfo->si_rwsem);
26888+
1facf9fc 26889+ err = 0;
1facf9fc 26890+ f = au_sbi(sb)->si_xib;
26891+ if (!f)
26892+ goto out;
26893+
26894+ /* stop printing the default xino path on the first writable branch */
26895+ h_root = NULL;
26896+ brid = au_xino_brid(sb);
26897+ if (brid >= 0) {
26898+ bindex = au_br_index(sb, brid);
4a4d8108
AM
26899+ hdp = au_di(sb->s_root)->di_hdentry;
26900+ h_root = hdp[0 + bindex].hd_dentry;
1facf9fc 26901+ }
2000de60 26902+ d = f->f_path.dentry;
1facf9fc 26903+ name = &d->d_name;
26904+ /* safe ->d_parent because the file is unlinked */
26905+ if (d->d_parent == h_root
26906+ && name->len == len
26907+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
26908+ goto out;
26909+
26910+ seq_puts(seq, ",xino=");
26911+ err = au_xino_path(seq, f);
26912+
4f0767ce 26913+out:
1facf9fc 26914+ return err;
26915+#endif
26916+}
26917+
26918+/* seq_file will re-call me in case of too long string */
7eafdf33 26919+static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
1facf9fc 26920+{
027c5e7a 26921+ int err;
1facf9fc 26922+ unsigned int mnt_flags, v;
26923+ struct super_block *sb;
26924+ struct au_sbinfo *sbinfo;
26925+
26926+#define AuBool(name, str) do { \
26927+ v = au_opt_test(mnt_flags, name); \
26928+ if (v != au_opt_test(AuOpt_Def, name)) \
26929+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
26930+} while (0)
26931+
26932+#define AuStr(name, str) do { \
26933+ v = mnt_flags & AuOptMask_##name; \
26934+ if (v != (AuOpt_Def & AuOptMask_##name)) \
26935+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
26936+} while (0)
26937+
26938+#define AuUInt(name, str, val) do { \
26939+ if (val != AUFS_##name##_DEF) \
26940+ seq_printf(m, "," #str "=%u", val); \
26941+} while (0)
26942+
7eafdf33 26943+ sb = dentry->d_sb;
c1595e42
JR
26944+ if (sb->s_flags & MS_POSIXACL)
26945+ seq_puts(m, ",acl");
26946+
26947+ /* lock free root dinfo */
1facf9fc 26948+ si_noflush_read_lock(sb);
26949+ sbinfo = au_sbi(sb);
26950+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
26951+
26952+ mnt_flags = au_mntflags(sb);
26953+ if (au_opt_test(mnt_flags, XINO)) {
7eafdf33 26954+ err = au_show_xino(m, sb);
1facf9fc 26955+ if (unlikely(err))
26956+ goto out;
26957+ } else
26958+ seq_puts(m, ",noxino");
26959+
26960+ AuBool(TRUNC_XINO, trunc_xino);
26961+ AuStr(UDBA, udba);
dece6358 26962+ AuBool(SHWH, shwh);
1facf9fc 26963+ AuBool(PLINK, plink);
4a4d8108 26964+ AuBool(DIO, dio);
076b876e 26965+ AuBool(DIRPERM1, dirperm1);
1facf9fc 26966+
26967+ v = sbinfo->si_wbr_create;
26968+ if (v != AuWbrCreate_Def)
26969+ au_show_wbr_create(m, v, sbinfo);
26970+
26971+ v = sbinfo->si_wbr_copyup;
26972+ if (v != AuWbrCopyup_Def)
26973+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
26974+
26975+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
26976+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
26977+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
26978+
26979+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
26980+
027c5e7a
AM
26981+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
26982+ AuUInt(RDCACHE, rdcache, v);
1facf9fc 26983+
26984+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
26985+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
26986+
076b876e
AM
26987+ au_fhsm_show(m, sbinfo);
26988+
1facf9fc 26989+ AuBool(SUM, sum);
26990+ /* AuBool(SUM_W, wsum); */
26991+ AuBool(WARN_PERM, warn_perm);
26992+ AuBool(VERBOSE, verbose);
26993+
4f0767ce 26994+out:
1facf9fc 26995+ /* be sure to print "br:" last */
26996+ if (!sysaufs_brs) {
26997+ seq_puts(m, ",br:");
26998+ au_show_brs(m, sb);
26999+ }
27000+ si_read_unlock(sb);
27001+ return 0;
27002+
1facf9fc 27003+#undef AuBool
27004+#undef AuStr
4a4d8108 27005+#undef AuUInt
1facf9fc 27006+}
27007+
27008+/* ---------------------------------------------------------------------- */
27009+
27010+/* sum mode which returns the summation for statfs(2) */
27011+
27012+static u64 au_add_till_max(u64 a, u64 b)
27013+{
27014+ u64 old;
27015+
27016+ old = a;
27017+ a += b;
92d182d2
AM
27018+ if (old <= a)
27019+ return a;
27020+ return ULLONG_MAX;
27021+}
27022+
27023+static u64 au_mul_till_max(u64 a, long mul)
27024+{
27025+ u64 old;
27026+
27027+ old = a;
27028+ a *= mul;
27029+ if (old <= a)
1facf9fc 27030+ return a;
27031+ return ULLONG_MAX;
27032+}
27033+
27034+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
27035+{
27036+ int err;
92d182d2 27037+ long bsize, factor;
1facf9fc 27038+ u64 blocks, bfree, bavail, files, ffree;
27039+ aufs_bindex_t bend, bindex, i;
27040+ unsigned char shared;
7f207e10 27041+ struct path h_path;
1facf9fc 27042+ struct super_block *h_sb;
27043+
92d182d2
AM
27044+ err = 0;
27045+ bsize = LONG_MAX;
27046+ files = 0;
27047+ ffree = 0;
1facf9fc 27048+ blocks = 0;
27049+ bfree = 0;
27050+ bavail = 0;
1facf9fc 27051+ bend = au_sbend(sb);
92d182d2 27052+ for (bindex = 0; bindex <= bend; bindex++) {
7f207e10
AM
27053+ h_path.mnt = au_sbr_mnt(sb, bindex);
27054+ h_sb = h_path.mnt->mnt_sb;
1facf9fc 27055+ shared = 0;
92d182d2 27056+ for (i = 0; !shared && i < bindex; i++)
1facf9fc 27057+ shared = (au_sbr_sb(sb, i) == h_sb);
27058+ if (shared)
27059+ continue;
27060+
27061+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27062+ h_path.dentry = h_path.mnt->mnt_root;
27063+ err = vfs_statfs(&h_path, buf);
1facf9fc 27064+ if (unlikely(err))
27065+ goto out;
27066+
92d182d2
AM
27067+ if (bsize > buf->f_bsize) {
27068+ /*
27069+ * we will reduce bsize, so we have to expand blocks
27070+ * etc. to match them again
27071+ */
27072+ factor = (bsize / buf->f_bsize);
27073+ blocks = au_mul_till_max(blocks, factor);
27074+ bfree = au_mul_till_max(bfree, factor);
27075+ bavail = au_mul_till_max(bavail, factor);
27076+ bsize = buf->f_bsize;
27077+ }
27078+
27079+ factor = (buf->f_bsize / bsize);
27080+ blocks = au_add_till_max(blocks,
27081+ au_mul_till_max(buf->f_blocks, factor));
27082+ bfree = au_add_till_max(bfree,
27083+ au_mul_till_max(buf->f_bfree, factor));
27084+ bavail = au_add_till_max(bavail,
27085+ au_mul_till_max(buf->f_bavail, factor));
1facf9fc 27086+ files = au_add_till_max(files, buf->f_files);
27087+ ffree = au_add_till_max(ffree, buf->f_ffree);
27088+ }
27089+
92d182d2 27090+ buf->f_bsize = bsize;
1facf9fc 27091+ buf->f_blocks = blocks;
27092+ buf->f_bfree = bfree;
27093+ buf->f_bavail = bavail;
27094+ buf->f_files = files;
27095+ buf->f_ffree = ffree;
92d182d2 27096+ buf->f_frsize = 0;
1facf9fc 27097+
4f0767ce 27098+out:
1facf9fc 27099+ return err;
27100+}
27101+
27102+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
27103+{
27104+ int err;
7f207e10 27105+ struct path h_path;
1facf9fc 27106+ struct super_block *sb;
27107+
27108+ /* lock free root dinfo */
27109+ sb = dentry->d_sb;
27110+ si_noflush_read_lock(sb);
7f207e10 27111+ if (!au_opt_test(au_mntflags(sb), SUM)) {
1facf9fc 27112+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27113+ h_path.mnt = au_sbr_mnt(sb, 0);
27114+ h_path.dentry = h_path.mnt->mnt_root;
27115+ err = vfs_statfs(&h_path, buf);
27116+ } else
1facf9fc 27117+ err = au_statfs_sum(sb, buf);
27118+ si_read_unlock(sb);
27119+
27120+ if (!err) {
27121+ buf->f_type = AUFS_SUPER_MAGIC;
4a4d8108 27122+ buf->f_namelen = AUFS_MAX_NAMELEN;
1facf9fc 27123+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
27124+ }
27125+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
27126+
27127+ return err;
27128+}
27129+
27130+/* ---------------------------------------------------------------------- */
27131+
537831f9
AM
27132+static int aufs_sync_fs(struct super_block *sb, int wait)
27133+{
27134+ int err, e;
27135+ aufs_bindex_t bend, bindex;
27136+ struct au_branch *br;
27137+ struct super_block *h_sb;
27138+
27139+ err = 0;
27140+ si_noflush_read_lock(sb);
27141+ bend = au_sbend(sb);
27142+ for (bindex = 0; bindex <= bend; bindex++) {
27143+ br = au_sbr(sb, bindex);
27144+ if (!au_br_writable(br->br_perm))
27145+ continue;
27146+
27147+ h_sb = au_sbr_sb(sb, bindex);
27148+ if (h_sb->s_op->sync_fs) {
27149+ e = h_sb->s_op->sync_fs(h_sb, wait);
27150+ if (unlikely(e && !err))
27151+ err = e;
27152+ /* go on even if an error happens */
27153+ }
27154+ }
27155+ si_read_unlock(sb);
27156+
27157+ return err;
27158+}
27159+
27160+/* ---------------------------------------------------------------------- */
27161+
1facf9fc 27162+/* final actions when unmounting a file system */
27163+static void aufs_put_super(struct super_block *sb)
27164+{
27165+ struct au_sbinfo *sbinfo;
27166+
27167+ sbinfo = au_sbi(sb);
27168+ if (!sbinfo)
27169+ return;
27170+
1facf9fc 27171+ dbgaufs_si_fin(sbinfo);
27172+ kobject_put(&sbinfo->si_kobj);
27173+}
27174+
27175+/* ---------------------------------------------------------------------- */
27176+
7f207e10
AM
27177+void au_array_free(void *array)
27178+{
27179+ if (array) {
27180+ if (!is_vmalloc_addr(array))
27181+ kfree(array);
27182+ else
27183+ vfree(array);
27184+ }
27185+}
27186+
79b8bda9
AM
27187+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
27188+ struct super_block *sb, void *arg)
7f207e10
AM
27189+{
27190+ void *array;
076b876e 27191+ unsigned long long n, sz;
7f207e10
AM
27192+
27193+ array = NULL;
27194+ n = 0;
27195+ if (!*hint)
27196+ goto out;
27197+
27198+ if (*hint > ULLONG_MAX / sizeof(array)) {
27199+ array = ERR_PTR(-EMFILE);
27200+ pr_err("hint %llu\n", *hint);
27201+ goto out;
27202+ }
27203+
076b876e
AM
27204+ sz = sizeof(array) * *hint;
27205+ array = kzalloc(sz, GFP_NOFS);
7f207e10 27206+ if (unlikely(!array))
076b876e 27207+ array = vzalloc(sz);
7f207e10
AM
27208+ if (unlikely(!array)) {
27209+ array = ERR_PTR(-ENOMEM);
27210+ goto out;
27211+ }
27212+
79b8bda9 27213+ n = cb(sb, array, *hint, arg);
7f207e10
AM
27214+ AuDebugOn(n > *hint);
27215+
27216+out:
27217+ *hint = n;
27218+ return array;
27219+}
27220+
79b8bda9 27221+static unsigned long long au_iarray_cb(struct super_block *sb, void *a,
7f207e10
AM
27222+ unsigned long long max __maybe_unused,
27223+ void *arg)
27224+{
27225+ unsigned long long n;
27226+ struct inode **p, *inode;
27227+ struct list_head *head;
27228+
27229+ n = 0;
27230+ p = a;
27231+ head = arg;
79b8bda9 27232+ spin_lock(&sb->s_inode_list_lock);
7f207e10
AM
27233+ list_for_each_entry(inode, head, i_sb_list) {
27234+ if (!is_bad_inode(inode)
27235+ && au_ii(inode)->ii_bstart >= 0) {
2cbb1c4b
JR
27236+ spin_lock(&inode->i_lock);
27237+ if (atomic_read(&inode->i_count)) {
27238+ au_igrab(inode);
27239+ *p++ = inode;
27240+ n++;
27241+ AuDebugOn(n > max);
27242+ }
27243+ spin_unlock(&inode->i_lock);
7f207e10
AM
27244+ }
27245+ }
79b8bda9 27246+ spin_unlock(&sb->s_inode_list_lock);
7f207e10
AM
27247+
27248+ return n;
27249+}
27250+
27251+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
27252+{
27253+ *max = atomic_long_read(&au_sbi(sb)->si_ninodes);
79b8bda9 27254+ return au_array_alloc(max, au_iarray_cb, sb, &sb->s_inodes);
7f207e10
AM
27255+}
27256+
27257+void au_iarray_free(struct inode **a, unsigned long long max)
27258+{
27259+ unsigned long long ull;
27260+
27261+ for (ull = 0; ull < max; ull++)
27262+ iput(a[ull]);
27263+ au_array_free(a);
27264+}
27265+
27266+/* ---------------------------------------------------------------------- */
27267+
1facf9fc 27268+/*
27269+ * refresh dentry and inode at remount time.
27270+ */
027c5e7a
AM
27271+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
27272+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
27273+ struct dentry *parent)
1facf9fc 27274+{
27275+ int err;
1facf9fc 27276+
27277+ di_write_lock_child(dentry);
1facf9fc 27278+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
27279+ err = au_refresh_dentry(dentry, parent);
27280+ if (!err && dir_flags)
5527c038 27281+ au_hn_reset(d_inode(dentry), dir_flags);
1facf9fc 27282+ di_read_unlock(parent, AuLock_IR);
1facf9fc 27283+ di_write_unlock(dentry);
27284+
27285+ return err;
27286+}
27287+
027c5e7a
AM
27288+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
27289+ struct au_sbinfo *sbinfo,
b95c5147 27290+ const unsigned int dir_flags, unsigned int do_idop)
1facf9fc 27291+{
027c5e7a
AM
27292+ int err;
27293+ struct dentry *parent;
027c5e7a
AM
27294+
27295+ err = 0;
27296+ parent = dget_parent(dentry);
27297+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
5527c038
JR
27298+ if (d_really_is_positive(dentry)) {
27299+ if (!d_is_dir(dentry))
027c5e7a
AM
27300+ err = au_do_refresh(dentry, /*dir_flags*/0,
27301+ parent);
27302+ else {
27303+ err = au_do_refresh(dentry, dir_flags, parent);
27304+ if (unlikely(err))
27305+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
27306+ }
27307+ } else
27308+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
27309+ AuDbgDentry(dentry);
27310+ }
27311+ dput(parent);
27312+
79b8bda9 27313+ if (!err) {
b95c5147 27314+ if (do_idop)
79b8bda9
AM
27315+ au_refresh_dop(dentry, /*force_reval*/0);
27316+ } else
27317+ au_refresh_dop(dentry, /*force_reval*/1);
27318+
027c5e7a
AM
27319+ AuTraceErr(err);
27320+ return err;
1facf9fc 27321+}
27322+
b95c5147 27323+static int au_refresh_d(struct super_block *sb, unsigned int do_idop)
1facf9fc 27324+{
27325+ int err, i, j, ndentry, e;
027c5e7a 27326+ unsigned int sigen;
1facf9fc 27327+ struct au_dcsub_pages dpages;
27328+ struct au_dpage *dpage;
027c5e7a
AM
27329+ struct dentry **dentries, *d;
27330+ struct au_sbinfo *sbinfo;
27331+ struct dentry *root = sb->s_root;
5527c038 27332+ const unsigned int dir_flags = au_hi_flags(d_inode(root), /*isdir*/1);
1facf9fc 27333+
b95c5147 27334+ if (do_idop)
79b8bda9
AM
27335+ au_refresh_dop(root, /*force_reval*/0);
27336+
027c5e7a
AM
27337+ err = au_dpages_init(&dpages, GFP_NOFS);
27338+ if (unlikely(err))
1facf9fc 27339+ goto out;
027c5e7a
AM
27340+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
27341+ if (unlikely(err))
1facf9fc 27342+ goto out_dpages;
1facf9fc 27343+
027c5e7a
AM
27344+ sigen = au_sigen(sb);
27345+ sbinfo = au_sbi(sb);
27346+ for (i = 0; i < dpages.ndpage; i++) {
1facf9fc 27347+ dpage = dpages.dpages + i;
27348+ dentries = dpage->dentries;
27349+ ndentry = dpage->ndentry;
027c5e7a 27350+ for (j = 0; j < ndentry; j++) {
1facf9fc 27351+ d = dentries[j];
79b8bda9 27352+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags,
b95c5147 27353+ do_idop);
027c5e7a
AM
27354+ if (unlikely(e && !err))
27355+ err = e;
27356+ /* go on even err */
1facf9fc 27357+ }
27358+ }
27359+
4f0767ce 27360+out_dpages:
1facf9fc 27361+ au_dpages_free(&dpages);
4f0767ce 27362+out:
1facf9fc 27363+ return err;
27364+}
27365+
b95c5147 27366+static int au_refresh_i(struct super_block *sb, unsigned int do_idop)
1facf9fc 27367+{
027c5e7a
AM
27368+ int err, e;
27369+ unsigned int sigen;
27370+ unsigned long long max, ull;
27371+ struct inode *inode, **array;
1facf9fc 27372+
027c5e7a
AM
27373+ array = au_iarray_alloc(sb, &max);
27374+ err = PTR_ERR(array);
27375+ if (IS_ERR(array))
27376+ goto out;
1facf9fc 27377+
27378+ err = 0;
027c5e7a
AM
27379+ sigen = au_sigen(sb);
27380+ for (ull = 0; ull < max; ull++) {
27381+ inode = array[ull];
076b876e
AM
27382+ if (unlikely(!inode))
27383+ break;
b95c5147
AM
27384+
27385+ e = 0;
27386+ ii_write_lock_child(inode);
537831f9 27387+ if (au_iigen(inode, NULL) != sigen) {
027c5e7a 27388+ e = au_refresh_hinode_self(inode);
1facf9fc 27389+ if (unlikely(e)) {
b95c5147 27390+ au_refresh_iop(inode, /*force_getattr*/1);
027c5e7a 27391+ pr_err("error %d, i%lu\n", e, inode->i_ino);
1facf9fc 27392+ if (!err)
27393+ err = e;
27394+ /* go on even if err */
27395+ }
27396+ }
b95c5147
AM
27397+ if (!e && do_idop)
27398+ au_refresh_iop(inode, /*force_getattr*/0);
27399+ ii_write_unlock(inode);
1facf9fc 27400+ }
27401+
027c5e7a 27402+ au_iarray_free(array, max);
1facf9fc 27403+
4f0767ce 27404+out:
1facf9fc 27405+ return err;
27406+}
27407+
b95c5147 27408+static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
1facf9fc 27409+{
027c5e7a
AM
27410+ int err, e;
27411+ unsigned int udba;
27412+ aufs_bindex_t bindex, bend;
1facf9fc 27413+ struct dentry *root;
27414+ struct inode *inode;
027c5e7a 27415+ struct au_branch *br;
79b8bda9 27416+ struct au_sbinfo *sbi;
1facf9fc 27417+
27418+ au_sigen_inc(sb);
79b8bda9
AM
27419+ sbi = au_sbi(sb);
27420+ au_fclr_si(sbi, FAILED_REFRESH_DIR);
1facf9fc 27421+
27422+ root = sb->s_root;
27423+ DiMustNoWaiters(root);
5527c038 27424+ inode = d_inode(root);
1facf9fc 27425+ IiMustNoWaiters(inode);
1facf9fc 27426+
027c5e7a
AM
27427+ udba = au_opt_udba(sb);
27428+ bend = au_sbend(sb);
27429+ for (bindex = 0; bindex <= bend; bindex++) {
27430+ br = au_sbr(sb, bindex);
27431+ err = au_hnotify_reset_br(udba, br, br->br_perm);
1facf9fc 27432+ if (unlikely(err))
027c5e7a
AM
27433+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
27434+ bindex, err);
27435+ /* go on even if err */
1facf9fc 27436+ }
027c5e7a 27437+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
1facf9fc 27438+
b95c5147 27439+ if (do_idop) {
79b8bda9
AM
27440+ if (au_ftest_si(sbi, NO_DREVAL)) {
27441+ AuDebugOn(sb->s_d_op == &aufs_dop_noreval);
27442+ sb->s_d_op = &aufs_dop_noreval;
b95c5147
AM
27443+ AuDebugOn(sbi->si_iop_array == aufs_iop_nogetattr);
27444+ sbi->si_iop_array = aufs_iop_nogetattr;
79b8bda9
AM
27445+ } else {
27446+ AuDebugOn(sb->s_d_op == &aufs_dop);
27447+ sb->s_d_op = &aufs_dop;
b95c5147
AM
27448+ AuDebugOn(sbi->si_iop_array == aufs_iop);
27449+ sbi->si_iop_array = aufs_iop;
79b8bda9 27450+ }
b95c5147
AM
27451+ pr_info("reset to %pf and %pf\n",
27452+ sb->s_d_op, sbi->si_iop_array);
79b8bda9
AM
27453+ }
27454+
027c5e7a 27455+ di_write_unlock(root);
b95c5147
AM
27456+ err = au_refresh_d(sb, do_idop);
27457+ e = au_refresh_i(sb, do_idop);
027c5e7a
AM
27458+ if (unlikely(e && !err))
27459+ err = e;
1facf9fc 27460+ /* aufs_write_lock() calls ..._child() */
27461+ di_write_lock_child(root);
027c5e7a
AM
27462+
27463+ au_cpup_attr_all(inode, /*force*/1);
27464+
27465+ if (unlikely(err))
27466+ AuIOErr("refresh failed, ignored, %d\n", err);
1facf9fc 27467+}
27468+
27469+/* stop extra interpretation of errno in mount(8), and strange error messages */
27470+static int cvt_err(int err)
27471+{
27472+ AuTraceErr(err);
27473+
27474+ switch (err) {
27475+ case -ENOENT:
27476+ case -ENOTDIR:
27477+ case -EEXIST:
27478+ case -EIO:
27479+ err = -EINVAL;
27480+ }
27481+ return err;
27482+}
27483+
27484+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
27485+{
4a4d8108
AM
27486+ int err, do_dx;
27487+ unsigned int mntflags;
1facf9fc 27488+ struct au_opts opts;
27489+ struct dentry *root;
27490+ struct inode *inode;
27491+ struct au_sbinfo *sbinfo;
27492+
27493+ err = 0;
27494+ root = sb->s_root;
27495+ if (!data || !*data) {
e49829fe
JR
27496+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
27497+ if (!err) {
27498+ di_write_lock_child(root);
27499+ err = au_opts_verify(sb, *flags, /*pending*/0);
27500+ aufs_write_unlock(root);
27501+ }
1facf9fc 27502+ goto out;
27503+ }
27504+
27505+ err = -ENOMEM;
27506+ memset(&opts, 0, sizeof(opts));
27507+ opts.opt = (void *)__get_free_page(GFP_NOFS);
27508+ if (unlikely(!opts.opt))
27509+ goto out;
27510+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
27511+ opts.flags = AuOpts_REMOUNT;
27512+ opts.sb_flags = *flags;
27513+
27514+ /* parse it before aufs lock */
27515+ err = au_opts_parse(sb, data, &opts);
27516+ if (unlikely(err))
27517+ goto out_opts;
27518+
27519+ sbinfo = au_sbi(sb);
5527c038 27520+ inode = d_inode(root);
1facf9fc 27521+ mutex_lock(&inode->i_mutex);
e49829fe
JR
27522+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
27523+ if (unlikely(err))
27524+ goto out_mtx;
27525+ di_write_lock_child(root);
1facf9fc 27526+
27527+ /* au_opts_remount() may return an error */
27528+ err = au_opts_remount(sb, &opts);
27529+ au_opts_free(&opts);
27530+
027c5e7a 27531+ if (au_ftest_opts(opts.flags, REFRESH))
b95c5147 27532+ au_remount_refresh(sb, au_ftest_opts(opts.flags, REFRESH_IDOP));
1facf9fc 27533+
4a4d8108
AM
27534+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
27535+ mntflags = au_mntflags(sb);
27536+ do_dx = !!au_opt_test(mntflags, DIO);
27537+ au_dy_arefresh(do_dx);
27538+ }
27539+
076b876e 27540+ au_fhsm_wrote_all(sb, /*force*/1); /* ?? */
1facf9fc 27541+ aufs_write_unlock(root);
953406b4 27542+
e49829fe
JR
27543+out_mtx:
27544+ mutex_unlock(&inode->i_mutex);
4f0767ce 27545+out_opts:
1facf9fc 27546+ free_page((unsigned long)opts.opt);
4f0767ce 27547+out:
1facf9fc 27548+ err = cvt_err(err);
27549+ AuTraceErr(err);
27550+ return err;
27551+}
27552+
4a4d8108 27553+static const struct super_operations aufs_sop = {
1facf9fc 27554+ .alloc_inode = aufs_alloc_inode,
27555+ .destroy_inode = aufs_destroy_inode,
b752ccd1 27556+ /* always deleting, no clearing */
1facf9fc 27557+ .drop_inode = generic_delete_inode,
27558+ .show_options = aufs_show_options,
27559+ .statfs = aufs_statfs,
27560+ .put_super = aufs_put_super,
537831f9 27561+ .sync_fs = aufs_sync_fs,
1facf9fc 27562+ .remount_fs = aufs_remount_fs
27563+};
27564+
27565+/* ---------------------------------------------------------------------- */
27566+
27567+static int alloc_root(struct super_block *sb)
27568+{
27569+ int err;
27570+ struct inode *inode;
27571+ struct dentry *root;
27572+
27573+ err = -ENOMEM;
27574+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
27575+ err = PTR_ERR(inode);
27576+ if (IS_ERR(inode))
27577+ goto out;
27578+
b95c5147 27579+ inode->i_op = aufs_iop + AuIop_DIR; /* with getattr by default */
1facf9fc 27580+ inode->i_fop = &aufs_dir_fop;
27581+ inode->i_mode = S_IFDIR;
9dbd164d 27582+ set_nlink(inode, 2);
1facf9fc 27583+ unlock_new_inode(inode);
27584+
92d182d2 27585+ root = d_make_root(inode);
1facf9fc 27586+ if (unlikely(!root))
92d182d2 27587+ goto out;
1facf9fc 27588+ err = PTR_ERR(root);
27589+ if (IS_ERR(root))
92d182d2 27590+ goto out;
1facf9fc 27591+
4a4d8108 27592+ err = au_di_init(root);
1facf9fc 27593+ if (!err) {
27594+ sb->s_root = root;
27595+ return 0; /* success */
27596+ }
27597+ dput(root);
1facf9fc 27598+
4f0767ce 27599+out:
1facf9fc 27600+ return err;
1facf9fc 27601+}
27602+
27603+static int aufs_fill_super(struct super_block *sb, void *raw_data,
27604+ int silent __maybe_unused)
27605+{
27606+ int err;
27607+ struct au_opts opts;
79b8bda9 27608+ struct au_sbinfo *sbinfo;
1facf9fc 27609+ struct dentry *root;
27610+ struct inode *inode;
27611+ char *arg = raw_data;
27612+
27613+ if (unlikely(!arg || !*arg)) {
27614+ err = -EINVAL;
4a4d8108 27615+ pr_err("no arg\n");
1facf9fc 27616+ goto out;
27617+ }
27618+
27619+ err = -ENOMEM;
27620+ memset(&opts, 0, sizeof(opts));
27621+ opts.opt = (void *)__get_free_page(GFP_NOFS);
27622+ if (unlikely(!opts.opt))
27623+ goto out;
27624+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
27625+ opts.sb_flags = sb->s_flags;
27626+
27627+ err = au_si_alloc(sb);
27628+ if (unlikely(err))
27629+ goto out_opts;
79b8bda9 27630+ sbinfo = au_sbi(sb);
1facf9fc 27631+
27632+ /* all timestamps always follow the ones on the branch */
27633+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
27634+ sb->s_op = &aufs_sop;
027c5e7a 27635+ sb->s_d_op = &aufs_dop;
1facf9fc 27636+ sb->s_magic = AUFS_SUPER_MAGIC;
27637+ sb->s_maxbytes = 0;
c1595e42 27638+ sb->s_stack_depth = 1;
1facf9fc 27639+ au_export_init(sb);
c1595e42 27640+ /* au_xattr_init(sb); */
1facf9fc 27641+
27642+ err = alloc_root(sb);
27643+ if (unlikely(err)) {
27644+ si_write_unlock(sb);
27645+ goto out_info;
27646+ }
27647+ root = sb->s_root;
5527c038 27648+ inode = d_inode(root);
1facf9fc 27649+
27650+ /*
27651+ * actually we can parse options regardless aufs lock here.
27652+ * but at remount time, parsing must be done before aufs lock.
27653+ * so we follow the same rule.
27654+ */
27655+ ii_write_lock_parent(inode);
27656+ aufs_write_unlock(root);
27657+ err = au_opts_parse(sb, arg, &opts);
27658+ if (unlikely(err))
27659+ goto out_root;
27660+
27661+ /* lock vfs_inode first, then aufs. */
27662+ mutex_lock(&inode->i_mutex);
1facf9fc 27663+ aufs_write_lock(root);
27664+ err = au_opts_mount(sb, &opts);
27665+ au_opts_free(&opts);
79b8bda9
AM
27666+ if (!err && au_ftest_si(sbinfo, NO_DREVAL)) {
27667+ sb->s_d_op = &aufs_dop_noreval;
27668+ pr_info("%pf\n", sb->s_d_op);
27669+ au_refresh_dop(root, /*force_reval*/0);
b95c5147
AM
27670+ sbinfo->si_iop_array = aufs_iop_nogetattr;
27671+ au_refresh_iop(inode, /*force_getattr*/0);
79b8bda9 27672+ }
1facf9fc 27673+ aufs_write_unlock(root);
27674+ mutex_unlock(&inode->i_mutex);
4a4d8108
AM
27675+ if (!err)
27676+ goto out_opts; /* success */
1facf9fc 27677+
4f0767ce 27678+out_root:
1facf9fc 27679+ dput(root);
27680+ sb->s_root = NULL;
4f0767ce 27681+out_info:
79b8bda9
AM
27682+ dbgaufs_si_fin(sbinfo);
27683+ kobject_put(&sbinfo->si_kobj);
1facf9fc 27684+ sb->s_fs_info = NULL;
4f0767ce 27685+out_opts:
1facf9fc 27686+ free_page((unsigned long)opts.opt);
4f0767ce 27687+out:
1facf9fc 27688+ AuTraceErr(err);
27689+ err = cvt_err(err);
27690+ AuTraceErr(err);
27691+ return err;
27692+}
27693+
27694+/* ---------------------------------------------------------------------- */
27695+
027c5e7a
AM
27696+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
27697+ const char *dev_name __maybe_unused,
27698+ void *raw_data)
1facf9fc 27699+{
027c5e7a 27700+ struct dentry *root;
1facf9fc 27701+ struct super_block *sb;
27702+
27703+ /* all timestamps always follow the ones on the branch */
27704+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
027c5e7a
AM
27705+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
27706+ if (IS_ERR(root))
27707+ goto out;
27708+
27709+ sb = root->d_sb;
27710+ si_write_lock(sb, !AuLock_FLUSH);
27711+ sysaufs_brs_add(sb, 0);
27712+ si_write_unlock(sb);
27713+ au_sbilist_add(sb);
27714+
27715+out:
27716+ return root;
1facf9fc 27717+}
27718+
e49829fe
JR
27719+static void aufs_kill_sb(struct super_block *sb)
27720+{
27721+ struct au_sbinfo *sbinfo;
27722+
27723+ sbinfo = au_sbi(sb);
27724+ if (sbinfo) {
27725+ au_sbilist_del(sb);
27726+ aufs_write_lock(sb->s_root);
076b876e 27727+ au_fhsm_fin(sb);
e49829fe
JR
27728+ if (sbinfo->si_wbr_create_ops->fin)
27729+ sbinfo->si_wbr_create_ops->fin(sb);
27730+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
27731+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
b95c5147 27732+ au_remount_refresh(sb, /*do_idop*/0);
e49829fe
JR
27733+ }
27734+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
27735+ au_plink_put(sb, /*verbose*/1);
27736+ au_xino_clr(sb);
1e00d052 27737+ sbinfo->si_sb = NULL;
e49829fe 27738+ aufs_write_unlock(sb->s_root);
e49829fe
JR
27739+ au_nwt_flush(&sbinfo->si_nowait);
27740+ }
98d9a5b1 27741+ kill_anon_super(sb);
e49829fe
JR
27742+}
27743+
1facf9fc 27744+struct file_system_type aufs_fs_type = {
27745+ .name = AUFS_FSTYPE,
c06a8ce3
AM
27746+ /* a race between rename and others */
27747+ .fs_flags = FS_RENAME_DOES_D_MOVE,
027c5e7a 27748+ .mount = aufs_mount,
e49829fe 27749+ .kill_sb = aufs_kill_sb,
1facf9fc 27750+ /* no need to __module_get() and module_put(). */
27751+ .owner = THIS_MODULE,
27752+};
7f207e10
AM
27753diff -urN /usr/share/empty/fs/aufs/super.h linux/fs/aufs/super.h
27754--- /usr/share/empty/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
b95c5147
AM
27755+++ linux/fs/aufs/super.h 2015-12-10 17:59:16.842833237 +0100
27756@@ -0,0 +1,642 @@
1facf9fc 27757+/*
2000de60 27758+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 27759+ *
27760+ * This program, aufs is free software; you can redistribute it and/or modify
27761+ * it under the terms of the GNU General Public License as published by
27762+ * the Free Software Foundation; either version 2 of the License, or
27763+ * (at your option) any later version.
dece6358
AM
27764+ *
27765+ * This program is distributed in the hope that it will be useful,
27766+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27767+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27768+ * GNU General Public License for more details.
27769+ *
27770+ * You should have received a copy of the GNU General Public License
523b37e3 27771+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 27772+ */
27773+
27774+/*
27775+ * super_block operations
27776+ */
27777+
27778+#ifndef __AUFS_SUPER_H__
27779+#define __AUFS_SUPER_H__
27780+
27781+#ifdef __KERNEL__
27782+
27783+#include <linux/fs.h>
5527c038 27784+#include <linux/kobject.h>
1facf9fc 27785+#include "rwsem.h"
27786+#include "spl.h"
27787+#include "wkq.h"
27788+
1facf9fc 27789+/* policies to select one among multiple writable branches */
27790+struct au_wbr_copyup_operations {
27791+ int (*copyup)(struct dentry *dentry);
27792+};
27793+
392086de
AM
27794+#define AuWbr_DIR 1 /* target is a dir */
27795+#define AuWbr_PARENT (1 << 1) /* always require a parent */
27796+
27797+#define au_ftest_wbr(flags, name) ((flags) & AuWbr_##name)
27798+#define au_fset_wbr(flags, name) { (flags) |= AuWbr_##name; }
27799+#define au_fclr_wbr(flags, name) { (flags) &= ~AuWbr_##name; }
27800+
1facf9fc 27801+struct au_wbr_create_operations {
392086de 27802+ int (*create)(struct dentry *dentry, unsigned int flags);
1facf9fc 27803+ int (*init)(struct super_block *sb);
27804+ int (*fin)(struct super_block *sb);
27805+};
27806+
27807+struct au_wbr_mfs {
27808+ struct mutex mfs_lock; /* protect this structure */
27809+ unsigned long mfs_jiffy;
27810+ unsigned long mfs_expire;
27811+ aufs_bindex_t mfs_bindex;
27812+
27813+ unsigned long long mfsrr_bytes;
27814+ unsigned long long mfsrr_watermark;
27815+};
27816+
86dc4139
AM
27817+struct pseudo_link {
27818+ union {
27819+ struct hlist_node hlist;
27820+ struct rcu_head rcu;
27821+ };
27822+ struct inode *inode;
27823+};
27824+
27825+#define AuPlink_NHASH 100
27826+static inline int au_plink_hash(ino_t ino)
27827+{
27828+ return ino % AuPlink_NHASH;
27829+}
27830+
076b876e
AM
27831+/* File-based Hierarchical Storage Management */
27832+struct au_fhsm {
27833+#ifdef CONFIG_AUFS_FHSM
27834+ /* allow only one process who can receive the notification */
27835+ spinlock_t fhsm_spin;
27836+ pid_t fhsm_pid;
27837+ wait_queue_head_t fhsm_wqh;
27838+ atomic_t fhsm_readable;
27839+
c1595e42 27840+ /* these are protected by si_rwsem */
076b876e 27841+ unsigned long fhsm_expire;
c1595e42 27842+ aufs_bindex_t fhsm_bottom;
076b876e
AM
27843+#endif
27844+};
27845+
1facf9fc 27846+struct au_branch;
27847+struct au_sbinfo {
27848+ /* nowait tasks in the system-wide workqueue */
27849+ struct au_nowait_tasks si_nowait;
27850+
b752ccd1
AM
27851+ /*
27852+ * tried sb->s_umount, but failed due to the dependecy between i_mutex.
27853+ * rwsem for au_sbinfo is necessary.
27854+ */
dece6358 27855+ struct au_rwsem si_rwsem;
1facf9fc 27856+
b752ccd1
AM
27857+ /* prevent recursive locking in deleting inode */
27858+ struct {
27859+ unsigned long *bitmap;
27860+ spinlock_t tree_lock;
27861+ struct radix_tree_root tree;
27862+ } au_si_pid;
27863+
7f207e10 27864+ /*
523b37e3
AM
27865+ * dirty approach to protect sb->sb_inodes and ->s_files (gone) from
27866+ * remount.
7f207e10
AM
27867+ */
27868+ atomic_long_t si_ninodes, si_nfiles;
27869+
1facf9fc 27870+ /* branch management */
27871+ unsigned int si_generation;
27872+
2000de60 27873+ /* see AuSi_ flags */
1facf9fc 27874+ unsigned char au_si_status;
27875+
27876+ aufs_bindex_t si_bend;
7f207e10
AM
27877+
27878+ /* dirty trick to keep br_id plus */
27879+ unsigned int si_last_br_id :
27880+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
1facf9fc 27881+ struct au_branch **si_branch;
27882+
27883+ /* policy to select a writable branch */
27884+ unsigned char si_wbr_copyup;
27885+ unsigned char si_wbr_create;
27886+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
27887+ struct au_wbr_create_operations *si_wbr_create_ops;
27888+
27889+ /* round robin */
27890+ atomic_t si_wbr_rr_next;
27891+
27892+ /* most free space */
27893+ struct au_wbr_mfs si_wbr_mfs;
27894+
076b876e
AM
27895+ /* File-based Hierarchical Storage Management */
27896+ struct au_fhsm si_fhsm;
27897+
1facf9fc 27898+ /* mount flags */
27899+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
27900+ unsigned int si_mntflags;
27901+
c2c0f25c
AM
27902+ /* symlink to follow_link() and put_link() */
27903+ struct au_sphlhead si_symlink;
27904+
1facf9fc 27905+ /* external inode number (bitmap and translation table) */
5527c038
JR
27906+ vfs_readf_t si_xread;
27907+ vfs_writef_t si_xwrite;
1facf9fc 27908+ struct file *si_xib;
27909+ struct mutex si_xib_mtx; /* protect xib members */
27910+ unsigned long *si_xib_buf;
27911+ unsigned long si_xib_last_pindex;
27912+ int si_xib_next_bit;
27913+ aufs_bindex_t si_xino_brid;
392086de
AM
27914+ unsigned long si_xino_jiffy;
27915+ unsigned long si_xino_expire;
1facf9fc 27916+ /* reserved for future use */
27917+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
27918+
27919+#ifdef CONFIG_AUFS_EXPORT
27920+ /* i_generation */
27921+ struct file *si_xigen;
27922+ atomic_t si_xigen_next;
27923+#endif
27924+
b912730e
AM
27925+ /* dirty trick to suppoer atomic_open */
27926+ struct au_sphlhead si_aopen;
27927+
1facf9fc 27928+ /* vdir parameters */
e49829fe 27929+ unsigned long si_rdcache; /* max cache time in jiffies */
1facf9fc 27930+ unsigned int si_rdblk; /* deblk size */
27931+ unsigned int si_rdhash; /* hash size */
27932+
27933+ /*
27934+ * If the number of whiteouts are larger than si_dirwh, leave all of
27935+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
27936+ * future fsck.aufs or kernel thread will remove them later.
27937+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
27938+ */
27939+ unsigned int si_dirwh;
27940+
1facf9fc 27941+ /* pseudo_link list */
86dc4139 27942+ struct au_sphlhead si_plink[AuPlink_NHASH];
1facf9fc 27943+ wait_queue_head_t si_plink_wq;
4a4d8108 27944+ spinlock_t si_plink_maint_lock;
e49829fe 27945+ pid_t si_plink_maint_pid;
1facf9fc 27946+
523b37e3
AM
27947+ /* file list */
27948+ struct au_sphlhead si_files;
27949+
b95c5147
AM
27950+ /* with/without getattr, brother of sb->s_d_op */
27951+ struct inode_operations *si_iop_array;
27952+
1facf9fc 27953+ /*
27954+ * sysfs and lifetime management.
27955+ * this is not a small structure and it may be a waste of memory in case
27956+ * of sysfs is disabled, particulary when many aufs-es are mounted.
27957+ * but using sysfs is majority.
27958+ */
27959+ struct kobject si_kobj;
27960+#ifdef CONFIG_DEBUG_FS
86dc4139
AM
27961+ struct dentry *si_dbgaufs;
27962+ struct dentry *si_dbgaufs_plink;
27963+ struct dentry *si_dbgaufs_xib;
1facf9fc 27964+#ifdef CONFIG_AUFS_EXPORT
27965+ struct dentry *si_dbgaufs_xigen;
27966+#endif
27967+#endif
27968+
e49829fe
JR
27969+#ifdef CONFIG_AUFS_SBILIST
27970+ struct list_head si_list;
27971+#endif
27972+
1facf9fc 27973+ /* dirty, necessary for unmounting, sysfs and sysrq */
27974+ struct super_block *si_sb;
27975+};
27976+
dece6358
AM
27977+/* sbinfo status flags */
27978+/*
27979+ * set true when refresh_dirs() failed at remount time.
27980+ * then try refreshing dirs at access time again.
27981+ * if it is false, refreshing dirs at access time is unnecesary
27982+ */
027c5e7a 27983+#define AuSi_FAILED_REFRESH_DIR 1
076b876e 27984+#define AuSi_FHSM (1 << 1) /* fhsm is active now */
79b8bda9 27985+#define AuSi_NO_DREVAL (1 << 2) /* disable all d_revalidate */
076b876e
AM
27986+
27987+#ifndef CONFIG_AUFS_FHSM
27988+#undef AuSi_FHSM
27989+#define AuSi_FHSM 0
27990+#endif
27991+
dece6358
AM
27992+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
27993+ unsigned int flag)
27994+{
27995+ AuRwMustAnyLock(&sbi->si_rwsem);
27996+ return sbi->au_si_status & flag;
27997+}
27998+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
27999+#define au_fset_si(sbinfo, name) do { \
28000+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
28001+ (sbinfo)->au_si_status |= AuSi_##name; \
28002+} while (0)
28003+#define au_fclr_si(sbinfo, name) do { \
28004+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
28005+ (sbinfo)->au_si_status &= ~AuSi_##name; \
28006+} while (0)
28007+
1facf9fc 28008+/* ---------------------------------------------------------------------- */
28009+
28010+/* policy to select one among writable branches */
4a4d8108
AM
28011+#define AuWbrCopyup(sbinfo, ...) \
28012+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
28013+#define AuWbrCreate(sbinfo, ...) \
28014+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
1facf9fc 28015+
28016+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
28017+#define AuLock_DW 1 /* write-lock dentry */
28018+#define AuLock_IR (1 << 1) /* read-lock inode */
28019+#define AuLock_IW (1 << 2) /* write-lock inode */
28020+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
b95c5147 28021+#define AuLock_DIRS (1 << 4) /* target is a pair of dirs */
e49829fe
JR
28022+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
28023+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
027c5e7a 28024+#define AuLock_GEN (1 << 7) /* test digen/iigen */
1facf9fc 28025+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
7f207e10
AM
28026+#define au_fset_lock(flags, name) \
28027+ do { (flags) |= AuLock_##name; } while (0)
28028+#define au_fclr_lock(flags, name) \
28029+ do { (flags) &= ~AuLock_##name; } while (0)
1facf9fc 28030+
28031+/* ---------------------------------------------------------------------- */
28032+
28033+/* super.c */
28034+extern struct file_system_type aufs_fs_type;
28035+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
79b8bda9
AM
28036+typedef unsigned long long (*au_arraycb_t)(struct super_block *sb, void *array,
28037+ unsigned long long max, void *arg);
7f207e10 28038+void au_array_free(void *array);
79b8bda9
AM
28039+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
28040+ struct super_block *sb, void *arg);
7f207e10
AM
28041+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
28042+void au_iarray_free(struct inode **a, unsigned long long max);
1facf9fc 28043+
28044+/* sbinfo.c */
28045+void au_si_free(struct kobject *kobj);
28046+int au_si_alloc(struct super_block *sb);
28047+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
28048+
28049+unsigned int au_sigen_inc(struct super_block *sb);
28050+aufs_bindex_t au_new_br_id(struct super_block *sb);
28051+
e49829fe
JR
28052+int si_read_lock(struct super_block *sb, int flags);
28053+int si_write_lock(struct super_block *sb, int flags);
28054+int aufs_read_lock(struct dentry *dentry, int flags);
1facf9fc 28055+void aufs_read_unlock(struct dentry *dentry, int flags);
28056+void aufs_write_lock(struct dentry *dentry);
28057+void aufs_write_unlock(struct dentry *dentry);
e49829fe 28058+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
1facf9fc 28059+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
28060+
b752ccd1
AM
28061+int si_pid_test_slow(struct super_block *sb);
28062+void si_pid_set_slow(struct super_block *sb);
28063+void si_pid_clr_slow(struct super_block *sb);
28064+
1facf9fc 28065+/* wbr_policy.c */
28066+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
28067+extern struct au_wbr_create_operations au_wbr_create_ops[];
28068+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
c2b27bf2 28069+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
076b876e 28070+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart);
c2b27bf2
AM
28071+
28072+/* mvdown.c */
28073+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
1facf9fc 28074+
076b876e
AM
28075+#ifdef CONFIG_AUFS_FHSM
28076+/* fhsm.c */
28077+
28078+static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
28079+{
28080+ pid_t pid;
28081+
28082+ spin_lock(&fhsm->fhsm_spin);
28083+ pid = fhsm->fhsm_pid;
28084+ spin_unlock(&fhsm->fhsm_spin);
28085+
28086+ return pid;
28087+}
28088+
28089+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
28090+void au_fhsm_wrote_all(struct super_block *sb, int force);
28091+int au_fhsm_fd(struct super_block *sb, int oflags);
28092+int au_fhsm_br_alloc(struct au_branch *br);
c1595e42 28093+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
076b876e
AM
28094+void au_fhsm_fin(struct super_block *sb);
28095+void au_fhsm_init(struct au_sbinfo *sbinfo);
28096+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
28097+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
28098+#else
28099+AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
28100+ int force)
28101+AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
28102+AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
c1595e42
JR
28103+AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
28104+AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
28105+AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
076b876e
AM
28106+AuStubVoid(au_fhsm_fin, struct super_block *sb)
28107+AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
28108+AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
28109+AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
28110+#endif
28111+
1facf9fc 28112+/* ---------------------------------------------------------------------- */
28113+
28114+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
28115+{
28116+ return sb->s_fs_info;
28117+}
28118+
28119+/* ---------------------------------------------------------------------- */
28120+
28121+#ifdef CONFIG_AUFS_EXPORT
a2a7ad62 28122+int au_test_nfsd(void);
1facf9fc 28123+void au_export_init(struct super_block *sb);
b752ccd1 28124+void au_xigen_inc(struct inode *inode);
1facf9fc 28125+int au_xigen_new(struct inode *inode);
28126+int au_xigen_set(struct super_block *sb, struct file *base);
28127+void au_xigen_clr(struct super_block *sb);
28128+
28129+static inline int au_busy_or_stale(void)
28130+{
b752ccd1 28131+ if (!au_test_nfsd())
1facf9fc 28132+ return -EBUSY;
28133+ return -ESTALE;
28134+}
28135+#else
b752ccd1 28136+AuStubInt0(au_test_nfsd, void)
a2a7ad62 28137+AuStubVoid(au_export_init, struct super_block *sb)
b752ccd1 28138+AuStubVoid(au_xigen_inc, struct inode *inode)
4a4d8108
AM
28139+AuStubInt0(au_xigen_new, struct inode *inode)
28140+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
28141+AuStubVoid(au_xigen_clr, struct super_block *sb)
c1595e42 28142+AuStub(int, au_busy_or_stale, return -EBUSY, void)
1facf9fc 28143+#endif /* CONFIG_AUFS_EXPORT */
28144+
28145+/* ---------------------------------------------------------------------- */
28146+
e49829fe
JR
28147+#ifdef CONFIG_AUFS_SBILIST
28148+/* module.c */
28149+extern struct au_splhead au_sbilist;
28150+
28151+static inline void au_sbilist_init(void)
28152+{
28153+ au_spl_init(&au_sbilist);
28154+}
28155+
28156+static inline void au_sbilist_add(struct super_block *sb)
28157+{
28158+ au_spl_add(&au_sbi(sb)->si_list, &au_sbilist);
28159+}
28160+
28161+static inline void au_sbilist_del(struct super_block *sb)
28162+{
28163+ au_spl_del(&au_sbi(sb)->si_list, &au_sbilist);
28164+}
53392da6
AM
28165+
28166+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
28167+static inline void au_sbilist_lock(void)
28168+{
28169+ spin_lock(&au_sbilist.spin);
28170+}
28171+
28172+static inline void au_sbilist_unlock(void)
28173+{
28174+ spin_unlock(&au_sbilist.spin);
28175+}
28176+#define AuGFP_SBILIST GFP_ATOMIC
28177+#else
28178+AuStubVoid(au_sbilist_lock, void)
28179+AuStubVoid(au_sbilist_unlock, void)
28180+#define AuGFP_SBILIST GFP_NOFS
28181+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
e49829fe
JR
28182+#else
28183+AuStubVoid(au_sbilist_init, void)
c1595e42
JR
28184+AuStubVoid(au_sbilist_add, struct super_block *sb)
28185+AuStubVoid(au_sbilist_del, struct super_block *sb)
53392da6
AM
28186+AuStubVoid(au_sbilist_lock, void)
28187+AuStubVoid(au_sbilist_unlock, void)
28188+#define AuGFP_SBILIST GFP_NOFS
e49829fe
JR
28189+#endif
28190+
28191+/* ---------------------------------------------------------------------- */
28192+
1facf9fc 28193+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
28194+{
dece6358 28195+ /*
c1595e42 28196+ * This function is a dynamic '__init' function actually,
dece6358
AM
28197+ * so the tiny check for si_rwsem is unnecessary.
28198+ */
28199+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
1facf9fc 28200+#ifdef CONFIG_DEBUG_FS
28201+ sbinfo->si_dbgaufs = NULL;
86dc4139 28202+ sbinfo->si_dbgaufs_plink = NULL;
1facf9fc 28203+ sbinfo->si_dbgaufs_xib = NULL;
28204+#ifdef CONFIG_AUFS_EXPORT
28205+ sbinfo->si_dbgaufs_xigen = NULL;
28206+#endif
28207+#endif
28208+}
28209+
28210+/* ---------------------------------------------------------------------- */
28211+
b752ccd1
AM
28212+static inline pid_t si_pid_bit(void)
28213+{
28214+ /* the origin of pid is 1, but the bitmap's is 0 */
28215+ return current->pid - 1;
28216+}
28217+
28218+static inline int si_pid_test(struct super_block *sb)
28219+{
076b876e
AM
28220+ pid_t bit;
28221+
28222+ bit = si_pid_bit();
b752ccd1
AM
28223+ if (bit < PID_MAX_DEFAULT)
28224+ return test_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
c1595e42 28225+ return si_pid_test_slow(sb);
b752ccd1
AM
28226+}
28227+
28228+static inline void si_pid_set(struct super_block *sb)
28229+{
076b876e
AM
28230+ pid_t bit;
28231+
28232+ bit = si_pid_bit();
b752ccd1
AM
28233+ if (bit < PID_MAX_DEFAULT) {
28234+ AuDebugOn(test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
28235+ set_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
28236+ /* smp_mb(); */
28237+ } else
28238+ si_pid_set_slow(sb);
28239+}
28240+
28241+static inline void si_pid_clr(struct super_block *sb)
28242+{
076b876e
AM
28243+ pid_t bit;
28244+
28245+ bit = si_pid_bit();
b752ccd1
AM
28246+ if (bit < PID_MAX_DEFAULT) {
28247+ AuDebugOn(!test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
28248+ clear_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
28249+ /* smp_mb(); */
28250+ } else
28251+ si_pid_clr_slow(sb);
28252+}
28253+
28254+/* ---------------------------------------------------------------------- */
28255+
1facf9fc 28256+/* lock superblock. mainly for entry point functions */
28257+/*
b752ccd1
AM
28258+ * __si_read_lock, __si_write_lock,
28259+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
1facf9fc 28260+ */
b752ccd1 28261+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
1facf9fc 28262+
dece6358
AM
28263+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
28264+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
28265+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
28266+
b752ccd1
AM
28267+static inline void si_noflush_read_lock(struct super_block *sb)
28268+{
28269+ __si_read_lock(sb);
28270+ si_pid_set(sb);
28271+}
28272+
28273+static inline int si_noflush_read_trylock(struct super_block *sb)
28274+{
076b876e
AM
28275+ int locked;
28276+
28277+ locked = __si_read_trylock(sb);
b752ccd1
AM
28278+ if (locked)
28279+ si_pid_set(sb);
28280+ return locked;
28281+}
28282+
28283+static inline void si_noflush_write_lock(struct super_block *sb)
28284+{
28285+ __si_write_lock(sb);
28286+ si_pid_set(sb);
28287+}
28288+
28289+static inline int si_noflush_write_trylock(struct super_block *sb)
28290+{
076b876e
AM
28291+ int locked;
28292+
28293+ locked = __si_write_trylock(sb);
b752ccd1
AM
28294+ if (locked)
28295+ si_pid_set(sb);
28296+ return locked;
28297+}
28298+
7e9cd9fe 28299+#if 0 /* reserved */
1facf9fc 28300+static inline int si_read_trylock(struct super_block *sb, int flags)
28301+{
28302+ if (au_ftest_lock(flags, FLUSH))
28303+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28304+ return si_noflush_read_trylock(sb);
28305+}
e49829fe 28306+#endif
1facf9fc 28307+
b752ccd1
AM
28308+static inline void si_read_unlock(struct super_block *sb)
28309+{
28310+ si_pid_clr(sb);
28311+ __si_read_unlock(sb);
28312+}
28313+
7e9cd9fe 28314+#if 0 /* reserved */
1facf9fc 28315+static inline int si_write_trylock(struct super_block *sb, int flags)
28316+{
28317+ if (au_ftest_lock(flags, FLUSH))
28318+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28319+ return si_noflush_write_trylock(sb);
28320+}
b752ccd1
AM
28321+#endif
28322+
28323+static inline void si_write_unlock(struct super_block *sb)
28324+{
28325+ si_pid_clr(sb);
28326+ __si_write_unlock(sb);
28327+}
28328+
7e9cd9fe 28329+#if 0 /* reserved */
b752ccd1
AM
28330+static inline void si_downgrade_lock(struct super_block *sb)
28331+{
28332+ __si_downgrade_lock(sb);
28333+}
28334+#endif
1facf9fc 28335+
28336+/* ---------------------------------------------------------------------- */
28337+
28338+static inline aufs_bindex_t au_sbend(struct super_block *sb)
28339+{
dece6358 28340+ SiMustAnyLock(sb);
1facf9fc 28341+ return au_sbi(sb)->si_bend;
28342+}
28343+
28344+static inline unsigned int au_mntflags(struct super_block *sb)
28345+{
dece6358 28346+ SiMustAnyLock(sb);
1facf9fc 28347+ return au_sbi(sb)->si_mntflags;
28348+}
28349+
28350+static inline unsigned int au_sigen(struct super_block *sb)
28351+{
dece6358 28352+ SiMustAnyLock(sb);
1facf9fc 28353+ return au_sbi(sb)->si_generation;
28354+}
28355+
7f207e10
AM
28356+static inline void au_ninodes_inc(struct super_block *sb)
28357+{
28358+ atomic_long_inc(&au_sbi(sb)->si_ninodes);
28359+}
28360+
28361+static inline void au_ninodes_dec(struct super_block *sb)
28362+{
28363+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes));
28364+ atomic_long_dec(&au_sbi(sb)->si_ninodes);
28365+}
28366+
28367+static inline void au_nfiles_inc(struct super_block *sb)
28368+{
28369+ atomic_long_inc(&au_sbi(sb)->si_nfiles);
28370+}
28371+
28372+static inline void au_nfiles_dec(struct super_block *sb)
28373+{
28374+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles));
28375+ atomic_long_dec(&au_sbi(sb)->si_nfiles);
28376+}
28377+
1facf9fc 28378+static inline struct au_branch *au_sbr(struct super_block *sb,
28379+ aufs_bindex_t bindex)
28380+{
dece6358 28381+ SiMustAnyLock(sb);
1facf9fc 28382+ return au_sbi(sb)->si_branch[0 + bindex];
28383+}
28384+
28385+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
28386+{
dece6358 28387+ SiMustWriteLock(sb);
1facf9fc 28388+ au_sbi(sb)->si_xino_brid = brid;
28389+}
28390+
28391+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
28392+{
dece6358 28393+ SiMustAnyLock(sb);
1facf9fc 28394+ return au_sbi(sb)->si_xino_brid;
28395+}
28396+
28397+#endif /* __KERNEL__ */
28398+#endif /* __AUFS_SUPER_H__ */
7f207e10
AM
28399diff -urN /usr/share/empty/fs/aufs/sysaufs.c linux/fs/aufs/sysaufs.c
28400--- /usr/share/empty/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 28401+++ linux/fs/aufs/sysaufs.c 2015-09-24 10:47:58.254719746 +0200
523b37e3 28402@@ -0,0 +1,104 @@
1facf9fc 28403+/*
2000de60 28404+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 28405+ *
28406+ * This program, aufs is free software; you can redistribute it and/or modify
28407+ * it under the terms of the GNU General Public License as published by
28408+ * the Free Software Foundation; either version 2 of the License, or
28409+ * (at your option) any later version.
dece6358
AM
28410+ *
28411+ * This program is distributed in the hope that it will be useful,
28412+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28413+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28414+ * GNU General Public License for more details.
28415+ *
28416+ * You should have received a copy of the GNU General Public License
523b37e3 28417+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28418+ */
28419+
28420+/*
28421+ * sysfs interface and lifetime management
28422+ * they are necessary regardless sysfs is disabled.
28423+ */
28424+
1facf9fc 28425+#include <linux/random.h>
1facf9fc 28426+#include "aufs.h"
28427+
28428+unsigned long sysaufs_si_mask;
e49829fe 28429+struct kset *sysaufs_kset;
1facf9fc 28430+
28431+#define AuSiAttr(_name) { \
28432+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
28433+ .show = sysaufs_si_##_name, \
28434+}
28435+
28436+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
28437+struct attribute *sysaufs_si_attrs[] = {
28438+ &sysaufs_si_attr_xi_path.attr,
28439+ NULL,
28440+};
28441+
4a4d8108 28442+static const struct sysfs_ops au_sbi_ops = {
1facf9fc 28443+ .show = sysaufs_si_show
28444+};
28445+
28446+static struct kobj_type au_sbi_ktype = {
28447+ .release = au_si_free,
28448+ .sysfs_ops = &au_sbi_ops,
28449+ .default_attrs = sysaufs_si_attrs
28450+};
28451+
28452+/* ---------------------------------------------------------------------- */
28453+
28454+int sysaufs_si_init(struct au_sbinfo *sbinfo)
28455+{
28456+ int err;
28457+
e49829fe 28458+ sbinfo->si_kobj.kset = sysaufs_kset;
1facf9fc 28459+ /* cf. sysaufs_name() */
28460+ err = kobject_init_and_add
e49829fe 28461+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
1facf9fc 28462+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
28463+
28464+ dbgaufs_si_null(sbinfo);
28465+ if (!err) {
28466+ err = dbgaufs_si_init(sbinfo);
28467+ if (unlikely(err))
28468+ kobject_put(&sbinfo->si_kobj);
28469+ }
28470+ return err;
28471+}
28472+
28473+void sysaufs_fin(void)
28474+{
28475+ dbgaufs_fin();
e49829fe
JR
28476+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
28477+ kset_unregister(sysaufs_kset);
1facf9fc 28478+}
28479+
28480+int __init sysaufs_init(void)
28481+{
28482+ int err;
28483+
28484+ do {
28485+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
28486+ } while (!sysaufs_si_mask);
28487+
4a4d8108 28488+ err = -EINVAL;
e49829fe
JR
28489+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
28490+ if (unlikely(!sysaufs_kset))
4a4d8108 28491+ goto out;
e49829fe
JR
28492+ err = PTR_ERR(sysaufs_kset);
28493+ if (IS_ERR(sysaufs_kset))
1facf9fc 28494+ goto out;
e49829fe 28495+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
1facf9fc 28496+ if (unlikely(err)) {
e49829fe 28497+ kset_unregister(sysaufs_kset);
1facf9fc 28498+ goto out;
28499+ }
28500+
28501+ err = dbgaufs_init();
28502+ if (unlikely(err))
28503+ sysaufs_fin();
4f0767ce 28504+out:
1facf9fc 28505+ return err;
28506+}
7f207e10
AM
28507diff -urN /usr/share/empty/fs/aufs/sysaufs.h linux/fs/aufs/sysaufs.h
28508--- /usr/share/empty/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 28509+++ linux/fs/aufs/sysaufs.h 2015-09-24 10:47:58.254719746 +0200
c1595e42 28510@@ -0,0 +1,101 @@
1facf9fc 28511+/*
2000de60 28512+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 28513+ *
28514+ * This program, aufs is free software; you can redistribute it and/or modify
28515+ * it under the terms of the GNU General Public License as published by
28516+ * the Free Software Foundation; either version 2 of the License, or
28517+ * (at your option) any later version.
dece6358
AM
28518+ *
28519+ * This program is distributed in the hope that it will be useful,
28520+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28521+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28522+ * GNU General Public License for more details.
28523+ *
28524+ * You should have received a copy of the GNU General Public License
523b37e3 28525+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28526+ */
28527+
28528+/*
28529+ * sysfs interface and mount lifetime management
28530+ */
28531+
28532+#ifndef __SYSAUFS_H__
28533+#define __SYSAUFS_H__
28534+
28535+#ifdef __KERNEL__
28536+
1facf9fc 28537+#include <linux/sysfs.h>
1facf9fc 28538+#include "module.h"
28539+
dece6358
AM
28540+struct super_block;
28541+struct au_sbinfo;
28542+
1facf9fc 28543+struct sysaufs_si_attr {
28544+ struct attribute attr;
28545+ int (*show)(struct seq_file *seq, struct super_block *sb);
28546+};
28547+
28548+/* ---------------------------------------------------------------------- */
28549+
28550+/* sysaufs.c */
28551+extern unsigned long sysaufs_si_mask;
e49829fe 28552+extern struct kset *sysaufs_kset;
1facf9fc 28553+extern struct attribute *sysaufs_si_attrs[];
28554+int sysaufs_si_init(struct au_sbinfo *sbinfo);
28555+int __init sysaufs_init(void);
28556+void sysaufs_fin(void);
28557+
28558+/* ---------------------------------------------------------------------- */
28559+
28560+/* some people doesn't like to show a pointer in kernel */
28561+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
28562+{
28563+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
28564+}
28565+
28566+#define SysaufsSiNamePrefix "si_"
28567+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
28568+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
28569+{
28570+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
28571+ sysaufs_si_id(sbinfo));
28572+}
28573+
28574+struct au_branch;
28575+#ifdef CONFIG_SYSFS
28576+/* sysfs.c */
28577+extern struct attribute_group *sysaufs_attr_group;
28578+
28579+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
28580+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
28581+ char *buf);
076b876e
AM
28582+long au_brinfo_ioctl(struct file *file, unsigned long arg);
28583+#ifdef CONFIG_COMPAT
28584+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
28585+#endif
1facf9fc 28586+
28587+void sysaufs_br_init(struct au_branch *br);
28588+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
28589+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
28590+
28591+#define sysaufs_brs_init() do {} while (0)
28592+
28593+#else
28594+#define sysaufs_attr_group NULL
28595+
4a4d8108 28596+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
c1595e42
JR
28597+AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
28598+ struct attribute *attr, char *buf)
4a4d8108
AM
28599+AuStubVoid(sysaufs_br_init, struct au_branch *br)
28600+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
28601+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
1facf9fc 28602+
28603+static inline void sysaufs_brs_init(void)
28604+{
28605+ sysaufs_brs = 0;
28606+}
28607+
28608+#endif /* CONFIG_SYSFS */
28609+
28610+#endif /* __KERNEL__ */
28611+#endif /* __SYSAUFS_H__ */
7f207e10
AM
28612diff -urN /usr/share/empty/fs/aufs/sysfs.c linux/fs/aufs/sysfs.c
28613--- /usr/share/empty/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
79b8bda9
AM
28614+++ linux/fs/aufs/sysfs.c 2015-11-11 17:21:46.922197217 +0100
28615@@ -0,0 +1,376 @@
1facf9fc 28616+/*
2000de60 28617+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 28618+ *
28619+ * This program, aufs is free software; you can redistribute it and/or modify
28620+ * it under the terms of the GNU General Public License as published by
28621+ * the Free Software Foundation; either version 2 of the License, or
28622+ * (at your option) any later version.
dece6358
AM
28623+ *
28624+ * This program is distributed in the hope that it will be useful,
28625+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28626+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28627+ * GNU General Public License for more details.
28628+ *
28629+ * You should have received a copy of the GNU General Public License
523b37e3 28630+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28631+ */
28632+
28633+/*
28634+ * sysfs interface
28635+ */
28636+
076b876e 28637+#include <linux/compat.h>
1facf9fc 28638+#include <linux/seq_file.h>
1facf9fc 28639+#include "aufs.h"
28640+
4a4d8108
AM
28641+#ifdef CONFIG_AUFS_FS_MODULE
28642+/* this entry violates the "one line per file" policy of sysfs */
28643+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
28644+ char *buf)
28645+{
28646+ ssize_t err;
28647+ static char *conf =
28648+/* this file is generated at compiling */
28649+#include "conf.str"
28650+ ;
28651+
28652+ err = snprintf(buf, PAGE_SIZE, conf);
28653+ if (unlikely(err >= PAGE_SIZE))
28654+ err = -EFBIG;
28655+ return err;
28656+}
28657+
28658+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
28659+#endif
28660+
1facf9fc 28661+static struct attribute *au_attr[] = {
4a4d8108
AM
28662+#ifdef CONFIG_AUFS_FS_MODULE
28663+ &au_config_attr.attr,
28664+#endif
1facf9fc 28665+ NULL, /* need to NULL terminate the list of attributes */
28666+};
28667+
28668+static struct attribute_group sysaufs_attr_group_body = {
28669+ .attrs = au_attr
28670+};
28671+
28672+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
28673+
28674+/* ---------------------------------------------------------------------- */
28675+
28676+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
28677+{
28678+ int err;
28679+
dece6358
AM
28680+ SiMustAnyLock(sb);
28681+
1facf9fc 28682+ err = 0;
28683+ if (au_opt_test(au_mntflags(sb), XINO)) {
28684+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
28685+ seq_putc(seq, '\n');
28686+ }
28687+ return err;
28688+}
28689+
28690+/*
28691+ * the lifetime of branch is independent from the entry under sysfs.
28692+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
28693+ * unlinked.
28694+ */
28695+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
392086de 28696+ aufs_bindex_t bindex, int idx)
1facf9fc 28697+{
1e00d052 28698+ int err;
1facf9fc 28699+ struct path path;
28700+ struct dentry *root;
28701+ struct au_branch *br;
076b876e 28702+ au_br_perm_str_t perm;
1facf9fc 28703+
28704+ AuDbg("b%d\n", bindex);
28705+
1e00d052 28706+ err = 0;
1facf9fc 28707+ root = sb->s_root;
28708+ di_read_lock_parent(root, !AuLock_IR);
28709+ br = au_sbr(sb, bindex);
392086de
AM
28710+
28711+ switch (idx) {
28712+ case AuBrSysfs_BR:
28713+ path.mnt = au_br_mnt(br);
28714+ path.dentry = au_h_dptr(root, bindex);
79b8bda9
AM
28715+ err = au_seq_path(seq, &path);
28716+ if (!err) {
28717+ au_optstr_br_perm(&perm, br->br_perm);
28718+ seq_printf(seq, "=%s\n", perm.a);
28719+ }
392086de
AM
28720+ break;
28721+ case AuBrSysfs_BRID:
79b8bda9 28722+ seq_printf(seq, "%d\n", br->br_id);
392086de
AM
28723+ break;
28724+ }
076b876e 28725+ di_read_unlock(root, !AuLock_IR);
79b8bda9 28726+ if (unlikely(err || seq_has_overflowed(seq)))
076b876e 28727+ err = -E2BIG;
392086de 28728+
1e00d052 28729+ return err;
1facf9fc 28730+}
28731+
28732+/* ---------------------------------------------------------------------- */
28733+
28734+static struct seq_file *au_seq(char *p, ssize_t len)
28735+{
28736+ struct seq_file *seq;
28737+
28738+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
28739+ if (seq) {
28740+ /* mutex_init(&seq.lock); */
28741+ seq->buf = p;
28742+ seq->size = len;
28743+ return seq; /* success */
28744+ }
28745+
28746+ seq = ERR_PTR(-ENOMEM);
28747+ return seq;
28748+}
28749+
392086de
AM
28750+#define SysaufsBr_PREFIX "br"
28751+#define SysaufsBrid_PREFIX "brid"
1facf9fc 28752+
28753+/* todo: file size may exceed PAGE_SIZE */
28754+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
1308ab2a 28755+ char *buf)
1facf9fc 28756+{
28757+ ssize_t err;
392086de 28758+ int idx;
1facf9fc 28759+ long l;
28760+ aufs_bindex_t bend;
28761+ struct au_sbinfo *sbinfo;
28762+ struct super_block *sb;
28763+ struct seq_file *seq;
28764+ char *name;
28765+ struct attribute **cattr;
28766+
28767+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
28768+ sb = sbinfo->si_sb;
1308ab2a 28769+
28770+ /*
28771+ * prevent a race condition between sysfs and aufs.
28772+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
28773+ * prohibits maintaining the sysfs entries.
28774+ * hew we acquire read lock after sysfs_get_active_two().
28775+ * on the other hand, the remount process may maintain the sysfs/aufs
28776+ * entries after acquiring write lock.
28777+ * it can cause a deadlock.
28778+ * simply we gave up processing read here.
28779+ */
28780+ err = -EBUSY;
28781+ if (unlikely(!si_noflush_read_trylock(sb)))
28782+ goto out;
1facf9fc 28783+
28784+ seq = au_seq(buf, PAGE_SIZE);
28785+ err = PTR_ERR(seq);
28786+ if (IS_ERR(seq))
1308ab2a 28787+ goto out_unlock;
1facf9fc 28788+
28789+ name = (void *)attr->name;
28790+ cattr = sysaufs_si_attrs;
28791+ while (*cattr) {
28792+ if (!strcmp(name, (*cattr)->name)) {
28793+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
28794+ ->show(seq, sb);
28795+ goto out_seq;
28796+ }
28797+ cattr++;
28798+ }
28799+
392086de
AM
28800+ if (!strncmp(name, SysaufsBrid_PREFIX,
28801+ sizeof(SysaufsBrid_PREFIX) - 1)) {
28802+ idx = AuBrSysfs_BRID;
28803+ name += sizeof(SysaufsBrid_PREFIX) - 1;
28804+ } else if (!strncmp(name, SysaufsBr_PREFIX,
28805+ sizeof(SysaufsBr_PREFIX) - 1)) {
28806+ idx = AuBrSysfs_BR;
1facf9fc 28807+ name += sizeof(SysaufsBr_PREFIX) - 1;
392086de
AM
28808+ } else
28809+ BUG();
28810+
28811+ err = kstrtol(name, 10, &l);
28812+ if (!err) {
28813+ bend = au_sbend(sb);
28814+ if (l <= bend)
28815+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
28816+ else
28817+ err = -ENOENT;
1facf9fc 28818+ }
1facf9fc 28819+
4f0767ce 28820+out_seq:
1facf9fc 28821+ if (!err) {
28822+ err = seq->count;
28823+ /* sysfs limit */
28824+ if (unlikely(err == PAGE_SIZE))
28825+ err = -EFBIG;
28826+ }
28827+ kfree(seq);
4f0767ce 28828+out_unlock:
1facf9fc 28829+ si_read_unlock(sb);
4f0767ce 28830+out:
1facf9fc 28831+ return err;
28832+}
28833+
28834+/* ---------------------------------------------------------------------- */
28835+
076b876e
AM
28836+static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
28837+{
28838+ int err;
28839+ int16_t brid;
28840+ aufs_bindex_t bindex, bend;
28841+ size_t sz;
28842+ char *buf;
28843+ struct seq_file *seq;
28844+ struct au_branch *br;
28845+
28846+ si_read_lock(sb, AuLock_FLUSH);
28847+ bend = au_sbend(sb);
28848+ err = bend + 1;
28849+ if (!arg)
28850+ goto out;
28851+
28852+ err = -ENOMEM;
28853+ buf = (void *)__get_free_page(GFP_NOFS);
28854+ if (unlikely(!buf))
28855+ goto out;
28856+
28857+ seq = au_seq(buf, PAGE_SIZE);
28858+ err = PTR_ERR(seq);
28859+ if (IS_ERR(seq))
28860+ goto out_buf;
28861+
28862+ sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
28863+ for (bindex = 0; bindex <= bend; bindex++, arg++) {
28864+ err = !access_ok(VERIFY_WRITE, arg, sizeof(*arg));
28865+ if (unlikely(err))
28866+ break;
28867+
28868+ br = au_sbr(sb, bindex);
28869+ brid = br->br_id;
28870+ BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
28871+ err = __put_user(brid, &arg->id);
28872+ if (unlikely(err))
28873+ break;
28874+
28875+ BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
28876+ err = __put_user(br->br_perm, &arg->perm);
28877+ if (unlikely(err))
28878+ break;
28879+
79b8bda9
AM
28880+ err = au_seq_path(seq, &br->br_path);
28881+ if (unlikely(err))
28882+ break;
28883+ seq_putc(seq, '\0');
28884+ if (!seq_has_overflowed(seq)) {
076b876e
AM
28885+ err = copy_to_user(arg->path, seq->buf, seq->count);
28886+ seq->count = 0;
28887+ if (unlikely(err))
28888+ break;
28889+ } else {
28890+ err = -E2BIG;
28891+ goto out_seq;
28892+ }
28893+ }
28894+ if (unlikely(err))
28895+ err = -EFAULT;
28896+
28897+out_seq:
28898+ kfree(seq);
28899+out_buf:
28900+ free_page((unsigned long)buf);
28901+out:
28902+ si_read_unlock(sb);
28903+ return err;
28904+}
28905+
28906+long au_brinfo_ioctl(struct file *file, unsigned long arg)
28907+{
2000de60 28908+ return au_brinfo(file->f_path.dentry->d_sb, (void __user *)arg);
076b876e
AM
28909+}
28910+
28911+#ifdef CONFIG_COMPAT
28912+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
28913+{
2000de60 28914+ return au_brinfo(file->f_path.dentry->d_sb, compat_ptr(arg));
076b876e
AM
28915+}
28916+#endif
28917+
28918+/* ---------------------------------------------------------------------- */
28919+
1facf9fc 28920+void sysaufs_br_init(struct au_branch *br)
28921+{
392086de
AM
28922+ int i;
28923+ struct au_brsysfs *br_sysfs;
28924+ struct attribute *attr;
4a4d8108 28925+
392086de
AM
28926+ br_sysfs = br->br_sysfs;
28927+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
28928+ attr = &br_sysfs->attr;
28929+ sysfs_attr_init(attr);
28930+ attr->name = br_sysfs->name;
28931+ attr->mode = S_IRUGO;
28932+ br_sysfs++;
28933+ }
1facf9fc 28934+}
28935+
28936+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
28937+{
28938+ struct au_branch *br;
28939+ struct kobject *kobj;
392086de
AM
28940+ struct au_brsysfs *br_sysfs;
28941+ int i;
1facf9fc 28942+ aufs_bindex_t bend;
28943+
28944+ dbgaufs_brs_del(sb, bindex);
28945+
28946+ if (!sysaufs_brs)
28947+ return;
28948+
28949+ kobj = &au_sbi(sb)->si_kobj;
28950+ bend = au_sbend(sb);
28951+ for (; bindex <= bend; bindex++) {
28952+ br = au_sbr(sb, bindex);
392086de
AM
28953+ br_sysfs = br->br_sysfs;
28954+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
28955+ sysfs_remove_file(kobj, &br_sysfs->attr);
28956+ br_sysfs++;
28957+ }
1facf9fc 28958+ }
28959+}
28960+
28961+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
28962+{
392086de 28963+ int err, i;
1facf9fc 28964+ aufs_bindex_t bend;
28965+ struct kobject *kobj;
28966+ struct au_branch *br;
392086de 28967+ struct au_brsysfs *br_sysfs;
1facf9fc 28968+
28969+ dbgaufs_brs_add(sb, bindex);
28970+
28971+ if (!sysaufs_brs)
28972+ return;
28973+
28974+ kobj = &au_sbi(sb)->si_kobj;
28975+ bend = au_sbend(sb);
28976+ for (; bindex <= bend; bindex++) {
28977+ br = au_sbr(sb, bindex);
392086de
AM
28978+ br_sysfs = br->br_sysfs;
28979+ snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
28980+ SysaufsBr_PREFIX "%d", bindex);
28981+ snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
28982+ SysaufsBrid_PREFIX "%d", bindex);
28983+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
28984+ err = sysfs_create_file(kobj, &br_sysfs->attr);
28985+ if (unlikely(err))
28986+ pr_warn("failed %s under sysfs(%d)\n",
28987+ br_sysfs->name, err);
28988+ br_sysfs++;
28989+ }
1facf9fc 28990+ }
28991+}
7f207e10
AM
28992diff -urN /usr/share/empty/fs/aufs/sysrq.c linux/fs/aufs/sysrq.c
28993--- /usr/share/empty/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
79b8bda9 28994+++ linux/fs/aufs/sysrq.c 2015-11-11 17:21:46.922197217 +0100
076b876e 28995@@ -0,0 +1,157 @@
1facf9fc 28996+/*
2000de60 28997+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 28998+ *
28999+ * This program, aufs is free software; you can redistribute it and/or modify
29000+ * it under the terms of the GNU General Public License as published by
29001+ * the Free Software Foundation; either version 2 of the License, or
29002+ * (at your option) any later version.
dece6358
AM
29003+ *
29004+ * This program is distributed in the hope that it will be useful,
29005+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29006+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29007+ * GNU General Public License for more details.
29008+ *
29009+ * You should have received a copy of the GNU General Public License
523b37e3 29010+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29011+ */
29012+
29013+/*
29014+ * magic sysrq hanlder
29015+ */
29016+
1facf9fc 29017+/* #include <linux/sysrq.h> */
027c5e7a 29018+#include <linux/writeback.h>
1facf9fc 29019+#include "aufs.h"
29020+
29021+/* ---------------------------------------------------------------------- */
29022+
29023+static void sysrq_sb(struct super_block *sb)
29024+{
29025+ char *plevel;
29026+ struct au_sbinfo *sbinfo;
29027+ struct file *file;
523b37e3
AM
29028+ struct au_sphlhead *files;
29029+ struct au_finfo *finfo;
1facf9fc 29030+
29031+ plevel = au_plevel;
29032+ au_plevel = KERN_WARNING;
1facf9fc 29033+
4a4d8108 29034+ /* since we define pr_fmt, call printk directly */
c06a8ce3
AM
29035+#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
29036+
29037+ sbinfo = au_sbi(sb);
4a4d8108 29038+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
c06a8ce3 29039+ pr("superblock\n");
1facf9fc 29040+ au_dpri_sb(sb);
027c5e7a
AM
29041+
29042+#if 0
c06a8ce3 29043+ pr("root dentry\n");
1facf9fc 29044+ au_dpri_dentry(sb->s_root);
c06a8ce3 29045+ pr("root inode\n");
5527c038 29046+ au_dpri_inode(d_inode(sb->s_root));
027c5e7a
AM
29047+#endif
29048+
1facf9fc 29049+#if 0
027c5e7a
AM
29050+ do {
29051+ int err, i, j, ndentry;
29052+ struct au_dcsub_pages dpages;
29053+ struct au_dpage *dpage;
29054+
29055+ err = au_dpages_init(&dpages, GFP_ATOMIC);
29056+ if (unlikely(err))
29057+ break;
29058+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
29059+ if (!err)
29060+ for (i = 0; i < dpages.ndpage; i++) {
29061+ dpage = dpages.dpages + i;
29062+ ndentry = dpage->ndentry;
29063+ for (j = 0; j < ndentry; j++)
29064+ au_dpri_dentry(dpage->dentries[j]);
29065+ }
29066+ au_dpages_free(&dpages);
29067+ } while (0);
29068+#endif
29069+
29070+#if 1
29071+ {
29072+ struct inode *i;
076b876e 29073+
c06a8ce3 29074+ pr("isolated inode\n");
79b8bda9 29075+ spin_lock(&sb->s_inode_list_lock);
2cbb1c4b
JR
29076+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
29077+ spin_lock(&i->i_lock);
b4510431 29078+ if (1 || hlist_empty(&i->i_dentry))
027c5e7a 29079+ au_dpri_inode(i);
2cbb1c4b
JR
29080+ spin_unlock(&i->i_lock);
29081+ }
79b8bda9 29082+ spin_unlock(&sb->s_inode_list_lock);
027c5e7a 29083+ }
1facf9fc 29084+#endif
c06a8ce3 29085+ pr("files\n");
523b37e3
AM
29086+ files = &au_sbi(sb)->si_files;
29087+ spin_lock(&files->spin);
29088+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
4a4d8108 29089+ umode_t mode;
076b876e 29090+
523b37e3 29091+ file = finfo->fi_file;
c06a8ce3 29092+ mode = file_inode(file)->i_mode;
38d290e6 29093+ if (!special_file(mode))
1facf9fc 29094+ au_dpri_file(file);
523b37e3
AM
29095+ }
29096+ spin_unlock(&files->spin);
c06a8ce3 29097+ pr("done\n");
1facf9fc 29098+
c06a8ce3 29099+#undef pr
1facf9fc 29100+ au_plevel = plevel;
1facf9fc 29101+}
29102+
29103+/* ---------------------------------------------------------------------- */
29104+
29105+/* module parameter */
29106+static char *aufs_sysrq_key = "a";
29107+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
29108+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
29109+
0c5527e5 29110+static void au_sysrq(int key __maybe_unused)
1facf9fc 29111+{
1facf9fc 29112+ struct au_sbinfo *sbinfo;
29113+
027c5e7a 29114+ lockdep_off();
53392da6 29115+ au_sbilist_lock();
e49829fe 29116+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
1facf9fc 29117+ sysrq_sb(sbinfo->si_sb);
53392da6 29118+ au_sbilist_unlock();
027c5e7a 29119+ lockdep_on();
1facf9fc 29120+}
29121+
29122+static struct sysrq_key_op au_sysrq_op = {
29123+ .handler = au_sysrq,
29124+ .help_msg = "Aufs",
29125+ .action_msg = "Aufs",
29126+ .enable_mask = SYSRQ_ENABLE_DUMP
29127+};
29128+
29129+/* ---------------------------------------------------------------------- */
29130+
29131+int __init au_sysrq_init(void)
29132+{
29133+ int err;
29134+ char key;
29135+
29136+ err = -1;
29137+ key = *aufs_sysrq_key;
29138+ if ('a' <= key && key <= 'z')
29139+ err = register_sysrq_key(key, &au_sysrq_op);
29140+ if (unlikely(err))
4a4d8108 29141+ pr_err("err %d, sysrq=%c\n", err, key);
1facf9fc 29142+ return err;
29143+}
29144+
29145+void au_sysrq_fin(void)
29146+{
29147+ int err;
076b876e 29148+
1facf9fc 29149+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
29150+ if (unlikely(err))
4a4d8108 29151+ pr_err("err %d (ignored)\n", err);
1facf9fc 29152+}
7f207e10
AM
29153diff -urN /usr/share/empty/fs/aufs/vdir.c linux/fs/aufs/vdir.c
29154--- /usr/share/empty/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
79b8bda9 29155+++ linux/fs/aufs/vdir.c 2015-11-11 17:21:46.922197217 +0100
b912730e 29156@@ -0,0 +1,888 @@
1facf9fc 29157+/*
2000de60 29158+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 29159+ *
29160+ * This program, aufs is free software; you can redistribute it and/or modify
29161+ * it under the terms of the GNU General Public License as published by
29162+ * the Free Software Foundation; either version 2 of the License, or
29163+ * (at your option) any later version.
dece6358
AM
29164+ *
29165+ * This program is distributed in the hope that it will be useful,
29166+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29167+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29168+ * GNU General Public License for more details.
29169+ *
29170+ * You should have received a copy of the GNU General Public License
523b37e3 29171+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29172+ */
29173+
29174+/*
29175+ * virtual or vertical directory
29176+ */
29177+
29178+#include "aufs.h"
29179+
dece6358 29180+static unsigned int calc_size(int nlen)
1facf9fc 29181+{
dece6358 29182+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
1facf9fc 29183+}
29184+
29185+static int set_deblk_end(union au_vdir_deblk_p *p,
29186+ union au_vdir_deblk_p *deblk_end)
29187+{
29188+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
29189+ p->de->de_str.len = 0;
29190+ /* smp_mb(); */
29191+ return 0;
29192+ }
29193+ return -1; /* error */
29194+}
29195+
29196+/* returns true or false */
29197+static int is_deblk_end(union au_vdir_deblk_p *p,
29198+ union au_vdir_deblk_p *deblk_end)
29199+{
29200+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
29201+ return !p->de->de_str.len;
29202+ return 1;
29203+}
29204+
29205+static unsigned char *last_deblk(struct au_vdir *vdir)
29206+{
29207+ return vdir->vd_deblk[vdir->vd_nblk - 1];
29208+}
29209+
29210+/* ---------------------------------------------------------------------- */
29211+
79b8bda9 29212+/* estimate the appropriate size for name hash table */
1308ab2a 29213+unsigned int au_rdhash_est(loff_t sz)
29214+{
29215+ unsigned int n;
29216+
29217+ n = UINT_MAX;
29218+ sz >>= 10;
29219+ if (sz < n)
29220+ n = sz;
29221+ if (sz < AUFS_RDHASH_DEF)
29222+ n = AUFS_RDHASH_DEF;
4a4d8108 29223+ /* pr_info("n %u\n", n); */
1308ab2a 29224+ return n;
29225+}
29226+
1facf9fc 29227+/*
29228+ * the allocated memory has to be freed by
dece6358 29229+ * au_nhash_wh_free() or au_nhash_de_free().
1facf9fc 29230+ */
dece6358 29231+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
1facf9fc 29232+{
1facf9fc 29233+ struct hlist_head *head;
dece6358 29234+ unsigned int u;
076b876e 29235+ size_t sz;
1facf9fc 29236+
076b876e
AM
29237+ sz = sizeof(*nhash->nh_head) * num_hash;
29238+ head = kmalloc(sz, gfp);
dece6358
AM
29239+ if (head) {
29240+ nhash->nh_num = num_hash;
29241+ nhash->nh_head = head;
29242+ for (u = 0; u < num_hash; u++)
1facf9fc 29243+ INIT_HLIST_HEAD(head++);
dece6358 29244+ return 0; /* success */
1facf9fc 29245+ }
1facf9fc 29246+
dece6358 29247+ return -ENOMEM;
1facf9fc 29248+}
29249+
dece6358
AM
29250+static void nhash_count(struct hlist_head *head)
29251+{
29252+#if 0
29253+ unsigned long n;
29254+ struct hlist_node *pos;
29255+
29256+ n = 0;
29257+ hlist_for_each(pos, head)
29258+ n++;
4a4d8108 29259+ pr_info("%lu\n", n);
dece6358
AM
29260+#endif
29261+}
29262+
29263+static void au_nhash_wh_do_free(struct hlist_head *head)
1facf9fc 29264+{
c06a8ce3
AM
29265+ struct au_vdir_wh *pos;
29266+ struct hlist_node *node;
1facf9fc 29267+
c06a8ce3
AM
29268+ hlist_for_each_entry_safe(pos, node, head, wh_hash)
29269+ kfree(pos);
1facf9fc 29270+}
29271+
dece6358 29272+static void au_nhash_de_do_free(struct hlist_head *head)
1facf9fc 29273+{
c06a8ce3
AM
29274+ struct au_vdir_dehstr *pos;
29275+ struct hlist_node *node;
1facf9fc 29276+
c06a8ce3
AM
29277+ hlist_for_each_entry_safe(pos, node, head, hash)
29278+ au_cache_free_vdir_dehstr(pos);
1facf9fc 29279+}
29280+
dece6358
AM
29281+static void au_nhash_do_free(struct au_nhash *nhash,
29282+ void (*free)(struct hlist_head *head))
1facf9fc 29283+{
1308ab2a 29284+ unsigned int n;
1facf9fc 29285+ struct hlist_head *head;
1facf9fc 29286+
dece6358 29287+ n = nhash->nh_num;
1308ab2a 29288+ if (!n)
29289+ return;
29290+
dece6358 29291+ head = nhash->nh_head;
1308ab2a 29292+ while (n-- > 0) {
dece6358
AM
29293+ nhash_count(head);
29294+ free(head++);
1facf9fc 29295+ }
dece6358 29296+ kfree(nhash->nh_head);
1facf9fc 29297+}
29298+
dece6358 29299+void au_nhash_wh_free(struct au_nhash *whlist)
1facf9fc 29300+{
dece6358
AM
29301+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
29302+}
1facf9fc 29303+
dece6358
AM
29304+static void au_nhash_de_free(struct au_nhash *delist)
29305+{
29306+ au_nhash_do_free(delist, au_nhash_de_do_free);
1facf9fc 29307+}
29308+
29309+/* ---------------------------------------------------------------------- */
29310+
29311+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
29312+ int limit)
29313+{
29314+ int num;
29315+ unsigned int u, n;
29316+ struct hlist_head *head;
c06a8ce3 29317+ struct au_vdir_wh *pos;
1facf9fc 29318+
29319+ num = 0;
29320+ n = whlist->nh_num;
29321+ head = whlist->nh_head;
1308ab2a 29322+ for (u = 0; u < n; u++, head++)
c06a8ce3
AM
29323+ hlist_for_each_entry(pos, head, wh_hash)
29324+ if (pos->wh_bindex == btgt && ++num > limit)
1facf9fc 29325+ return 1;
1facf9fc 29326+ return 0;
29327+}
29328+
29329+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
dece6358 29330+ unsigned char *name,
1facf9fc 29331+ unsigned int len)
29332+{
dece6358
AM
29333+ unsigned int v;
29334+ /* const unsigned int magic_bit = 12; */
29335+
1308ab2a 29336+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
29337+
dece6358
AM
29338+ v = 0;
29339+ while (len--)
29340+ v += *name++;
29341+ /* v = hash_long(v, magic_bit); */
29342+ v %= nhash->nh_num;
29343+ return nhash->nh_head + v;
29344+}
29345+
29346+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
29347+ int nlen)
29348+{
29349+ return str->len == nlen && !memcmp(str->name, name, nlen);
1facf9fc 29350+}
29351+
29352+/* returns found or not */
dece6358 29353+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
1facf9fc 29354+{
29355+ struct hlist_head *head;
c06a8ce3 29356+ struct au_vdir_wh *pos;
1facf9fc 29357+ struct au_vdir_destr *str;
29358+
dece6358 29359+ head = au_name_hash(whlist, name, nlen);
c06a8ce3
AM
29360+ hlist_for_each_entry(pos, head, wh_hash) {
29361+ str = &pos->wh_str;
1facf9fc 29362+ AuDbg("%.*s\n", str->len, str->name);
dece6358
AM
29363+ if (au_nhash_test_name(str, name, nlen))
29364+ return 1;
29365+ }
29366+ return 0;
29367+}
29368+
29369+/* returns found(true) or not */
29370+static int test_known(struct au_nhash *delist, char *name, int nlen)
29371+{
29372+ struct hlist_head *head;
c06a8ce3 29373+ struct au_vdir_dehstr *pos;
dece6358
AM
29374+ struct au_vdir_destr *str;
29375+
29376+ head = au_name_hash(delist, name, nlen);
c06a8ce3
AM
29377+ hlist_for_each_entry(pos, head, hash) {
29378+ str = pos->str;
dece6358
AM
29379+ AuDbg("%.*s\n", str->len, str->name);
29380+ if (au_nhash_test_name(str, name, nlen))
1facf9fc 29381+ return 1;
29382+ }
29383+ return 0;
29384+}
29385+
dece6358
AM
29386+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
29387+ unsigned char d_type)
29388+{
29389+#ifdef CONFIG_AUFS_SHWH
29390+ wh->wh_ino = ino;
29391+ wh->wh_type = d_type;
29392+#endif
29393+}
29394+
29395+/* ---------------------------------------------------------------------- */
29396+
29397+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
29398+ unsigned int d_type, aufs_bindex_t bindex,
29399+ unsigned char shwh)
1facf9fc 29400+{
29401+ int err;
29402+ struct au_vdir_destr *str;
29403+ struct au_vdir_wh *wh;
29404+
dece6358 29405+ AuDbg("%.*s\n", nlen, name);
1308ab2a 29406+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
29407+
1facf9fc 29408+ err = -ENOMEM;
dece6358 29409+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
1facf9fc 29410+ if (unlikely(!wh))
29411+ goto out;
29412+
29413+ err = 0;
29414+ wh->wh_bindex = bindex;
dece6358
AM
29415+ if (shwh)
29416+ au_shwh_init_wh(wh, ino, d_type);
1facf9fc 29417+ str = &wh->wh_str;
dece6358
AM
29418+ str->len = nlen;
29419+ memcpy(str->name, name, nlen);
29420+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
1facf9fc 29421+ /* smp_mb(); */
29422+
4f0767ce 29423+out:
1facf9fc 29424+ return err;
29425+}
29426+
1facf9fc 29427+static int append_deblk(struct au_vdir *vdir)
29428+{
29429+ int err;
dece6358 29430+ unsigned long ul;
1facf9fc 29431+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
29432+ union au_vdir_deblk_p p, deblk_end;
29433+ unsigned char **o;
29434+
29435+ err = -ENOMEM;
dece6358
AM
29436+ o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
29437+ GFP_NOFS);
1facf9fc 29438+ if (unlikely(!o))
29439+ goto out;
29440+
29441+ vdir->vd_deblk = o;
29442+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
29443+ if (p.deblk) {
29444+ ul = vdir->vd_nblk++;
29445+ vdir->vd_deblk[ul] = p.deblk;
29446+ vdir->vd_last.ul = ul;
29447+ vdir->vd_last.p.deblk = p.deblk;
29448+ deblk_end.deblk = p.deblk + deblk_sz;
29449+ err = set_deblk_end(&p, &deblk_end);
29450+ }
29451+
4f0767ce 29452+out:
1facf9fc 29453+ return err;
29454+}
29455+
dece6358
AM
29456+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
29457+ unsigned int d_type, struct au_nhash *delist)
29458+{
29459+ int err;
29460+ unsigned int sz;
29461+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
29462+ union au_vdir_deblk_p p, *room, deblk_end;
29463+ struct au_vdir_dehstr *dehstr;
29464+
29465+ p.deblk = last_deblk(vdir);
29466+ deblk_end.deblk = p.deblk + deblk_sz;
29467+ room = &vdir->vd_last.p;
29468+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
29469+ || !is_deblk_end(room, &deblk_end));
29470+
29471+ sz = calc_size(nlen);
29472+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
29473+ err = append_deblk(vdir);
29474+ if (unlikely(err))
29475+ goto out;
29476+
29477+ p.deblk = last_deblk(vdir);
29478+ deblk_end.deblk = p.deblk + deblk_sz;
29479+ /* smp_mb(); */
29480+ AuDebugOn(room->deblk != p.deblk);
29481+ }
29482+
29483+ err = -ENOMEM;
4a4d8108 29484+ dehstr = au_cache_alloc_vdir_dehstr();
dece6358
AM
29485+ if (unlikely(!dehstr))
29486+ goto out;
29487+
29488+ dehstr->str = &room->de->de_str;
29489+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
29490+ room->de->de_ino = ino;
29491+ room->de->de_type = d_type;
29492+ room->de->de_str.len = nlen;
29493+ memcpy(room->de->de_str.name, name, nlen);
29494+
29495+ err = 0;
29496+ room->deblk += sz;
29497+ if (unlikely(set_deblk_end(room, &deblk_end)))
29498+ err = append_deblk(vdir);
29499+ /* smp_mb(); */
29500+
4f0767ce 29501+out:
dece6358
AM
29502+ return err;
29503+}
29504+
29505+/* ---------------------------------------------------------------------- */
29506+
29507+void au_vdir_free(struct au_vdir *vdir)
29508+{
29509+ unsigned char **deblk;
29510+
29511+ deblk = vdir->vd_deblk;
29512+ while (vdir->vd_nblk--)
29513+ kfree(*deblk++);
29514+ kfree(vdir->vd_deblk);
29515+ au_cache_free_vdir(vdir);
29516+}
29517+
1308ab2a 29518+static struct au_vdir *alloc_vdir(struct file *file)
1facf9fc 29519+{
29520+ struct au_vdir *vdir;
1308ab2a 29521+ struct super_block *sb;
1facf9fc 29522+ int err;
29523+
2000de60 29524+ sb = file->f_path.dentry->d_sb;
dece6358
AM
29525+ SiMustAnyLock(sb);
29526+
1facf9fc 29527+ err = -ENOMEM;
29528+ vdir = au_cache_alloc_vdir();
29529+ if (unlikely(!vdir))
29530+ goto out;
29531+
29532+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
29533+ if (unlikely(!vdir->vd_deblk))
29534+ goto out_free;
29535+
29536+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
1308ab2a 29537+ if (!vdir->vd_deblk_sz) {
79b8bda9 29538+ /* estimate the appropriate size for deblk */
1308ab2a 29539+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
4a4d8108 29540+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
1308ab2a 29541+ }
1facf9fc 29542+ vdir->vd_nblk = 0;
29543+ vdir->vd_version = 0;
29544+ vdir->vd_jiffy = 0;
29545+ err = append_deblk(vdir);
29546+ if (!err)
29547+ return vdir; /* success */
29548+
29549+ kfree(vdir->vd_deblk);
29550+
4f0767ce 29551+out_free:
1facf9fc 29552+ au_cache_free_vdir(vdir);
4f0767ce 29553+out:
1facf9fc 29554+ vdir = ERR_PTR(err);
29555+ return vdir;
29556+}
29557+
29558+static int reinit_vdir(struct au_vdir *vdir)
29559+{
29560+ int err;
29561+ union au_vdir_deblk_p p, deblk_end;
29562+
29563+ while (vdir->vd_nblk > 1) {
29564+ kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
29565+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
29566+ vdir->vd_nblk--;
29567+ }
29568+ p.deblk = vdir->vd_deblk[0];
29569+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
29570+ err = set_deblk_end(&p, &deblk_end);
29571+ /* keep vd_dblk_sz */
29572+ vdir->vd_last.ul = 0;
29573+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
29574+ vdir->vd_version = 0;
29575+ vdir->vd_jiffy = 0;
29576+ /* smp_mb(); */
29577+ return err;
29578+}
29579+
29580+/* ---------------------------------------------------------------------- */
29581+
1facf9fc 29582+#define AuFillVdir_CALLED 1
29583+#define AuFillVdir_WHABLE (1 << 1)
dece6358 29584+#define AuFillVdir_SHWH (1 << 2)
1facf9fc 29585+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
7f207e10
AM
29586+#define au_fset_fillvdir(flags, name) \
29587+ do { (flags) |= AuFillVdir_##name; } while (0)
29588+#define au_fclr_fillvdir(flags, name) \
29589+ do { (flags) &= ~AuFillVdir_##name; } while (0)
1facf9fc 29590+
dece6358
AM
29591+#ifndef CONFIG_AUFS_SHWH
29592+#undef AuFillVdir_SHWH
29593+#define AuFillVdir_SHWH 0
29594+#endif
29595+
1facf9fc 29596+struct fillvdir_arg {
392086de 29597+ struct dir_context ctx;
1facf9fc 29598+ struct file *file;
29599+ struct au_vdir *vdir;
dece6358
AM
29600+ struct au_nhash delist;
29601+ struct au_nhash whlist;
1facf9fc 29602+ aufs_bindex_t bindex;
29603+ unsigned int flags;
29604+ int err;
29605+};
29606+
392086de 29607+static int fillvdir(struct dir_context *ctx, const char *__name, int nlen,
1facf9fc 29608+ loff_t offset __maybe_unused, u64 h_ino,
29609+ unsigned int d_type)
29610+{
392086de 29611+ struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
1facf9fc 29612+ char *name = (void *)__name;
29613+ struct super_block *sb;
1facf9fc 29614+ ino_t ino;
dece6358 29615+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
1facf9fc 29616+
1facf9fc 29617+ arg->err = 0;
2000de60 29618+ sb = arg->file->f_path.dentry->d_sb;
1facf9fc 29619+ au_fset_fillvdir(arg->flags, CALLED);
29620+ /* smp_mb(); */
dece6358 29621+ if (nlen <= AUFS_WH_PFX_LEN
1facf9fc 29622+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
dece6358
AM
29623+ if (test_known(&arg->delist, name, nlen)
29624+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
29625+ goto out; /* already exists or whiteouted */
1facf9fc 29626+
dece6358 29627+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
4a4d8108
AM
29628+ if (!arg->err) {
29629+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
29630+ d_type = DT_UNKNOWN;
dece6358
AM
29631+ arg->err = append_de(arg->vdir, name, nlen, ino,
29632+ d_type, &arg->delist);
4a4d8108 29633+ }
1facf9fc 29634+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
29635+ name += AUFS_WH_PFX_LEN;
dece6358
AM
29636+ nlen -= AUFS_WH_PFX_LEN;
29637+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
29638+ goto out; /* already whiteouted */
1facf9fc 29639+
dece6358
AM
29640+ if (shwh)
29641+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
29642+ &ino);
4a4d8108
AM
29643+ if (!arg->err) {
29644+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
29645+ d_type = DT_UNKNOWN;
1facf9fc 29646+ arg->err = au_nhash_append_wh
dece6358
AM
29647+ (&arg->whlist, name, nlen, ino, d_type,
29648+ arg->bindex, shwh);
4a4d8108 29649+ }
1facf9fc 29650+ }
29651+
4f0767ce 29652+out:
1facf9fc 29653+ if (!arg->err)
29654+ arg->vdir->vd_jiffy = jiffies;
29655+ /* smp_mb(); */
29656+ AuTraceErr(arg->err);
29657+ return arg->err;
29658+}
29659+
dece6358
AM
29660+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
29661+ struct au_nhash *whlist, struct au_nhash *delist)
29662+{
29663+#ifdef CONFIG_AUFS_SHWH
29664+ int err;
29665+ unsigned int nh, u;
29666+ struct hlist_head *head;
c06a8ce3
AM
29667+ struct au_vdir_wh *pos;
29668+ struct hlist_node *n;
dece6358
AM
29669+ char *p, *o;
29670+ struct au_vdir_destr *destr;
29671+
29672+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
29673+
29674+ err = -ENOMEM;
537831f9 29675+ o = p = (void *)__get_free_page(GFP_NOFS);
dece6358
AM
29676+ if (unlikely(!p))
29677+ goto out;
29678+
29679+ err = 0;
29680+ nh = whlist->nh_num;
29681+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
29682+ p += AUFS_WH_PFX_LEN;
29683+ for (u = 0; u < nh; u++) {
29684+ head = whlist->nh_head + u;
c06a8ce3
AM
29685+ hlist_for_each_entry_safe(pos, n, head, wh_hash) {
29686+ destr = &pos->wh_str;
dece6358
AM
29687+ memcpy(p, destr->name, destr->len);
29688+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
c06a8ce3 29689+ pos->wh_ino, pos->wh_type, delist);
dece6358
AM
29690+ if (unlikely(err))
29691+ break;
29692+ }
29693+ }
29694+
537831f9 29695+ free_page((unsigned long)o);
dece6358 29696+
4f0767ce 29697+out:
dece6358
AM
29698+ AuTraceErr(err);
29699+ return err;
29700+#else
29701+ return 0;
29702+#endif
29703+}
29704+
1facf9fc 29705+static int au_do_read_vdir(struct fillvdir_arg *arg)
29706+{
29707+ int err;
dece6358 29708+ unsigned int rdhash;
1facf9fc 29709+ loff_t offset;
dece6358
AM
29710+ aufs_bindex_t bend, bindex, bstart;
29711+ unsigned char shwh;
1facf9fc 29712+ struct file *hf, *file;
29713+ struct super_block *sb;
29714+
1facf9fc 29715+ file = arg->file;
2000de60 29716+ sb = file->f_path.dentry->d_sb;
dece6358
AM
29717+ SiMustAnyLock(sb);
29718+
29719+ rdhash = au_sbi(sb)->si_rdhash;
1308ab2a 29720+ if (!rdhash)
29721+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
dece6358
AM
29722+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
29723+ if (unlikely(err))
1facf9fc 29724+ goto out;
dece6358
AM
29725+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
29726+ if (unlikely(err))
1facf9fc 29727+ goto out_delist;
29728+
29729+ err = 0;
29730+ arg->flags = 0;
dece6358
AM
29731+ shwh = 0;
29732+ if (au_opt_test(au_mntflags(sb), SHWH)) {
29733+ shwh = 1;
29734+ au_fset_fillvdir(arg->flags, SHWH);
29735+ }
29736+ bstart = au_fbstart(file);
4a4d8108 29737+ bend = au_fbend_dir(file);
dece6358 29738+ for (bindex = bstart; !err && bindex <= bend; bindex++) {
4a4d8108 29739+ hf = au_hf_dir(file, bindex);
1facf9fc 29740+ if (!hf)
29741+ continue;
29742+
29743+ offset = vfsub_llseek(hf, 0, SEEK_SET);
29744+ err = offset;
29745+ if (unlikely(offset))
29746+ break;
29747+
29748+ arg->bindex = bindex;
29749+ au_fclr_fillvdir(arg->flags, WHABLE);
dece6358
AM
29750+ if (shwh
29751+ || (bindex != bend
29752+ && au_br_whable(au_sbr_perm(sb, bindex))))
1facf9fc 29753+ au_fset_fillvdir(arg->flags, WHABLE);
29754+ do {
29755+ arg->err = 0;
29756+ au_fclr_fillvdir(arg->flags, CALLED);
29757+ /* smp_mb(); */
392086de 29758+ err = vfsub_iterate_dir(hf, &arg->ctx);
1facf9fc 29759+ if (err >= 0)
29760+ err = arg->err;
29761+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
392086de
AM
29762+
29763+ /*
29764+ * dir_relax() may be good for concurrency, but aufs should not
29765+ * use it since it will cause a lockdep problem.
29766+ */
1facf9fc 29767+ }
dece6358
AM
29768+
29769+ if (!err && shwh)
29770+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
29771+
29772+ au_nhash_wh_free(&arg->whlist);
1facf9fc 29773+
4f0767ce 29774+out_delist:
dece6358 29775+ au_nhash_de_free(&arg->delist);
4f0767ce 29776+out:
1facf9fc 29777+ return err;
29778+}
29779+
29780+static int read_vdir(struct file *file, int may_read)
29781+{
29782+ int err;
29783+ unsigned long expire;
29784+ unsigned char do_read;
392086de
AM
29785+ struct fillvdir_arg arg = {
29786+ .ctx = {
2000de60 29787+ .actor = fillvdir
392086de
AM
29788+ }
29789+ };
1facf9fc 29790+ struct inode *inode;
29791+ struct au_vdir *vdir, *allocated;
29792+
29793+ err = 0;
c06a8ce3 29794+ inode = file_inode(file);
1facf9fc 29795+ IMustLock(inode);
dece6358
AM
29796+ SiMustAnyLock(inode->i_sb);
29797+
1facf9fc 29798+ allocated = NULL;
29799+ do_read = 0;
29800+ expire = au_sbi(inode->i_sb)->si_rdcache;
29801+ vdir = au_ivdir(inode);
29802+ if (!vdir) {
29803+ do_read = 1;
1308ab2a 29804+ vdir = alloc_vdir(file);
1facf9fc 29805+ err = PTR_ERR(vdir);
29806+ if (IS_ERR(vdir))
29807+ goto out;
29808+ err = 0;
29809+ allocated = vdir;
29810+ } else if (may_read
29811+ && (inode->i_version != vdir->vd_version
29812+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
29813+ do_read = 1;
29814+ err = reinit_vdir(vdir);
29815+ if (unlikely(err))
29816+ goto out;
29817+ }
29818+
29819+ if (!do_read)
29820+ return 0; /* success */
29821+
29822+ arg.file = file;
29823+ arg.vdir = vdir;
29824+ err = au_do_read_vdir(&arg);
29825+ if (!err) {
392086de 29826+ /* file->f_pos = 0; */ /* todo: ctx->pos? */
1facf9fc 29827+ vdir->vd_version = inode->i_version;
29828+ vdir->vd_last.ul = 0;
29829+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
29830+ if (allocated)
29831+ au_set_ivdir(inode, allocated);
29832+ } else if (allocated)
29833+ au_vdir_free(allocated);
29834+
4f0767ce 29835+out:
1facf9fc 29836+ return err;
29837+}
29838+
29839+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
29840+{
29841+ int err, rerr;
29842+ unsigned long ul, n;
29843+ const unsigned int deblk_sz = src->vd_deblk_sz;
29844+
29845+ AuDebugOn(tgt->vd_nblk != 1);
29846+
29847+ err = -ENOMEM;
29848+ if (tgt->vd_nblk < src->vd_nblk) {
29849+ unsigned char **p;
29850+
dece6358
AM
29851+ p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
29852+ GFP_NOFS);
1facf9fc 29853+ if (unlikely(!p))
29854+ goto out;
29855+ tgt->vd_deblk = p;
29856+ }
29857+
1308ab2a 29858+ if (tgt->vd_deblk_sz != deblk_sz) {
29859+ unsigned char *p;
29860+
29861+ tgt->vd_deblk_sz = deblk_sz;
29862+ p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
29863+ if (unlikely(!p))
29864+ goto out;
29865+ tgt->vd_deblk[0] = p;
29866+ }
1facf9fc 29867+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
1facf9fc 29868+ tgt->vd_version = src->vd_version;
29869+ tgt->vd_jiffy = src->vd_jiffy;
29870+
29871+ n = src->vd_nblk;
29872+ for (ul = 1; ul < n; ul++) {
dece6358
AM
29873+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
29874+ GFP_NOFS);
29875+ if (unlikely(!tgt->vd_deblk[ul]))
1facf9fc 29876+ goto out;
1308ab2a 29877+ tgt->vd_nblk++;
1facf9fc 29878+ }
1308ab2a 29879+ tgt->vd_nblk = n;
29880+ tgt->vd_last.ul = tgt->vd_last.ul;
29881+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
29882+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
29883+ - src->vd_deblk[src->vd_last.ul];
1facf9fc 29884+ /* smp_mb(); */
29885+ return 0; /* success */
29886+
4f0767ce 29887+out:
1facf9fc 29888+ rerr = reinit_vdir(tgt);
29889+ BUG_ON(rerr);
29890+ return err;
29891+}
29892+
29893+int au_vdir_init(struct file *file)
29894+{
29895+ int err;
29896+ struct inode *inode;
29897+ struct au_vdir *vdir_cache, *allocated;
29898+
392086de 29899+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 29900+ err = read_vdir(file, !file->f_pos);
29901+ if (unlikely(err))
29902+ goto out;
29903+
29904+ allocated = NULL;
29905+ vdir_cache = au_fvdir_cache(file);
29906+ if (!vdir_cache) {
1308ab2a 29907+ vdir_cache = alloc_vdir(file);
1facf9fc 29908+ err = PTR_ERR(vdir_cache);
29909+ if (IS_ERR(vdir_cache))
29910+ goto out;
29911+ allocated = vdir_cache;
29912+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
392086de 29913+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 29914+ err = reinit_vdir(vdir_cache);
29915+ if (unlikely(err))
29916+ goto out;
29917+ } else
29918+ return 0; /* success */
29919+
c06a8ce3 29920+ inode = file_inode(file);
1facf9fc 29921+ err = copy_vdir(vdir_cache, au_ivdir(inode));
29922+ if (!err) {
29923+ file->f_version = inode->i_version;
29924+ if (allocated)
29925+ au_set_fvdir_cache(file, allocated);
29926+ } else if (allocated)
29927+ au_vdir_free(allocated);
29928+
4f0767ce 29929+out:
1facf9fc 29930+ return err;
29931+}
29932+
29933+static loff_t calc_offset(struct au_vdir *vdir)
29934+{
29935+ loff_t offset;
29936+ union au_vdir_deblk_p p;
29937+
29938+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
29939+ offset = vdir->vd_last.p.deblk - p.deblk;
29940+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
29941+ return offset;
29942+}
29943+
29944+/* returns true or false */
392086de 29945+static int seek_vdir(struct file *file, struct dir_context *ctx)
1facf9fc 29946+{
29947+ int valid;
29948+ unsigned int deblk_sz;
29949+ unsigned long ul, n;
29950+ loff_t offset;
29951+ union au_vdir_deblk_p p, deblk_end;
29952+ struct au_vdir *vdir_cache;
29953+
29954+ valid = 1;
29955+ vdir_cache = au_fvdir_cache(file);
29956+ offset = calc_offset(vdir_cache);
29957+ AuDbg("offset %lld\n", offset);
392086de 29958+ if (ctx->pos == offset)
1facf9fc 29959+ goto out;
29960+
29961+ vdir_cache->vd_last.ul = 0;
29962+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
392086de 29963+ if (!ctx->pos)
1facf9fc 29964+ goto out;
29965+
29966+ valid = 0;
29967+ deblk_sz = vdir_cache->vd_deblk_sz;
392086de 29968+ ul = div64_u64(ctx->pos, deblk_sz);
1facf9fc 29969+ AuDbg("ul %lu\n", ul);
29970+ if (ul >= vdir_cache->vd_nblk)
29971+ goto out;
29972+
29973+ n = vdir_cache->vd_nblk;
29974+ for (; ul < n; ul++) {
29975+ p.deblk = vdir_cache->vd_deblk[ul];
29976+ deblk_end.deblk = p.deblk + deblk_sz;
29977+ offset = ul;
29978+ offset *= deblk_sz;
392086de 29979+ while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
1facf9fc 29980+ unsigned int l;
29981+
29982+ l = calc_size(p.de->de_str.len);
29983+ offset += l;
29984+ p.deblk += l;
29985+ }
29986+ if (!is_deblk_end(&p, &deblk_end)) {
29987+ valid = 1;
29988+ vdir_cache->vd_last.ul = ul;
29989+ vdir_cache->vd_last.p = p;
29990+ break;
29991+ }
29992+ }
29993+
4f0767ce 29994+out:
1facf9fc 29995+ /* smp_mb(); */
29996+ AuTraceErr(!valid);
29997+ return valid;
29998+}
29999+
392086de 30000+int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
1facf9fc 30001+{
1facf9fc 30002+ unsigned int l, deblk_sz;
30003+ union au_vdir_deblk_p deblk_end;
30004+ struct au_vdir *vdir_cache;
30005+ struct au_vdir_de *de;
30006+
30007+ vdir_cache = au_fvdir_cache(file);
392086de 30008+ if (!seek_vdir(file, ctx))
1facf9fc 30009+ return 0;
30010+
30011+ deblk_sz = vdir_cache->vd_deblk_sz;
30012+ while (1) {
30013+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
30014+ deblk_end.deblk += deblk_sz;
30015+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
30016+ de = vdir_cache->vd_last.p.de;
30017+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
392086de 30018+ de->de_str.len, de->de_str.name, ctx->pos,
1facf9fc 30019+ (unsigned long)de->de_ino, de->de_type);
392086de
AM
30020+ if (unlikely(!dir_emit(ctx, de->de_str.name,
30021+ de->de_str.len, de->de_ino,
30022+ de->de_type))) {
1facf9fc 30023+ /* todo: ignore the error caused by udba? */
30024+ /* return err; */
30025+ return 0;
30026+ }
30027+
30028+ l = calc_size(de->de_str.len);
30029+ vdir_cache->vd_last.p.deblk += l;
392086de 30030+ ctx->pos += l;
1facf9fc 30031+ }
30032+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
30033+ vdir_cache->vd_last.ul++;
30034+ vdir_cache->vd_last.p.deblk
30035+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
392086de 30036+ ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
1facf9fc 30037+ continue;
30038+ }
30039+ break;
30040+ }
30041+
30042+ /* smp_mb(); */
30043+ return 0;
30044+}
7f207e10
AM
30045diff -urN /usr/share/empty/fs/aufs/vfsub.c linux/fs/aufs/vfsub.c
30046--- /usr/share/empty/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 30047+++ linux/fs/aufs/vfsub.c 2015-09-24 10:47:58.258053165 +0200
5527c038 30048@@ -0,0 +1,848 @@
1facf9fc 30049+/*
2000de60 30050+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 30051+ *
30052+ * This program, aufs is free software; you can redistribute it and/or modify
30053+ * it under the terms of the GNU General Public License as published by
30054+ * the Free Software Foundation; either version 2 of the License, or
30055+ * (at your option) any later version.
dece6358
AM
30056+ *
30057+ * This program is distributed in the hope that it will be useful,
30058+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30059+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30060+ * GNU General Public License for more details.
30061+ *
30062+ * You should have received a copy of the GNU General Public License
523b37e3 30063+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 30064+ */
30065+
30066+/*
30067+ * sub-routines for VFS
30068+ */
30069+
dece6358
AM
30070+#include <linux/namei.h>
30071+#include <linux/security.h>
30072+#include <linux/splice.h>
1facf9fc 30073+#include "aufs.h"
30074+
30075+int vfsub_update_h_iattr(struct path *h_path, int *did)
30076+{
30077+ int err;
30078+ struct kstat st;
30079+ struct super_block *h_sb;
30080+
30081+ /* for remote fs, leave work for its getattr or d_revalidate */
30082+ /* for bad i_attr fs, handle them in aufs_getattr() */
30083+ /* still some fs may acquire i_mutex. we need to skip them */
30084+ err = 0;
30085+ if (!did)
30086+ did = &err;
30087+ h_sb = h_path->dentry->d_sb;
30088+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
30089+ if (*did)
c06a8ce3 30090+ err = vfs_getattr(h_path, &st);
1facf9fc 30091+
30092+ return err;
30093+}
30094+
30095+/* ---------------------------------------------------------------------- */
30096+
4a4d8108 30097+struct file *vfsub_dentry_open(struct path *path, int flags)
1308ab2a 30098+{
30099+ struct file *file;
30100+
b4510431 30101+ file = dentry_open(path, flags /* | __FMODE_NONOTIFY */,
7f207e10 30102+ current_cred());
2cbb1c4b
JR
30103+ if (!IS_ERR_OR_NULL(file)
30104+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
5527c038 30105+ i_readcount_inc(d_inode(path->dentry));
4a4d8108 30106+
1308ab2a 30107+ return file;
30108+}
30109+
1facf9fc 30110+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
30111+{
30112+ struct file *file;
30113+
2cbb1c4b 30114+ lockdep_off();
7f207e10 30115+ file = filp_open(path,
2cbb1c4b 30116+ oflags /* | __FMODE_NONOTIFY */,
7f207e10 30117+ mode);
2cbb1c4b 30118+ lockdep_on();
1facf9fc 30119+ if (IS_ERR(file))
30120+ goto out;
30121+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30122+
4f0767ce 30123+out:
1facf9fc 30124+ return file;
30125+}
30126+
b912730e
AM
30127+/*
30128+ * Ideally this function should call VFS:do_last() in order to keep all its
30129+ * checkings. But it is very hard for aufs to regenerate several VFS internal
30130+ * structure such as nameidata. This is a second (or third) best approach.
30131+ * cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
30132+ */
30133+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
30134+ struct vfsub_aopen_args *args, struct au_branch *br)
30135+{
30136+ int err;
30137+ struct file *file = args->file;
30138+ /* copied from linux/fs/namei.c:atomic_open() */
30139+ struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
30140+
30141+ IMustLock(dir);
30142+ AuDebugOn(!dir->i_op->atomic_open);
30143+
30144+ err = au_br_test_oflag(args->open_flag, br);
30145+ if (unlikely(err))
30146+ goto out;
30147+
30148+ args->file->f_path.dentry = DENTRY_NOT_SET;
30149+ args->file->f_path.mnt = au_br_mnt(br);
30150+ err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
30151+ args->create_mode, args->opened);
30152+ if (err >= 0) {
30153+ /* some filesystems don't set FILE_CREATED while succeeded? */
30154+ if (*args->opened & FILE_CREATED)
30155+ fsnotify_create(dir, dentry);
30156+ } else
30157+ goto out;
30158+
30159+
30160+ if (!err) {
30161+ /* todo: call VFS:may_open() here */
30162+ err = open_check_o_direct(file);
30163+ /* todo: ima_file_check() too? */
30164+ if (!err && (args->open_flag & __FMODE_EXEC))
30165+ err = deny_write_access(file);
30166+ if (unlikely(err))
30167+ /* note that the file is created and still opened */
30168+ goto out;
30169+ }
30170+
30171+ atomic_inc(&br->br_count);
30172+ fsnotify_open(file);
30173+
30174+out:
30175+ return err;
30176+}
30177+
1facf9fc 30178+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
30179+{
30180+ int err;
30181+
1facf9fc 30182+ err = kern_path(name, flags, path);
5527c038 30183+ if (!err && d_is_positive(path->dentry))
1facf9fc 30184+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
30185+ return err;
30186+}
30187+
30188+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
30189+ int len)
30190+{
30191+ struct path path = {
30192+ .mnt = NULL
30193+ };
30194+
1308ab2a 30195+ /* VFS checks it too, but by WARN_ON_ONCE() */
5527c038 30196+ IMustLock(d_inode(parent));
1facf9fc 30197+
30198+ path.dentry = lookup_one_len(name, parent, len);
30199+ if (IS_ERR(path.dentry))
30200+ goto out;
5527c038 30201+ if (d_is_positive(path.dentry))
1facf9fc 30202+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
30203+
4f0767ce 30204+out:
4a4d8108 30205+ AuTraceErrPtr(path.dentry);
1facf9fc 30206+ return path.dentry;
30207+}
30208+
b4510431 30209+void vfsub_call_lkup_one(void *args)
2cbb1c4b 30210+{
b4510431
AM
30211+ struct vfsub_lkup_one_args *a = args;
30212+ *a->errp = vfsub_lkup_one(a->name, a->parent);
2cbb1c4b
JR
30213+}
30214+
1facf9fc 30215+/* ---------------------------------------------------------------------- */
30216+
30217+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
30218+ struct dentry *d2, struct au_hinode *hdir2)
30219+{
30220+ struct dentry *d;
30221+
2cbb1c4b 30222+ lockdep_off();
1facf9fc 30223+ d = lock_rename(d1, d2);
2cbb1c4b 30224+ lockdep_on();
4a4d8108 30225+ au_hn_suspend(hdir1);
1facf9fc 30226+ if (hdir1 != hdir2)
4a4d8108 30227+ au_hn_suspend(hdir2);
1facf9fc 30228+
30229+ return d;
30230+}
30231+
30232+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
30233+ struct dentry *d2, struct au_hinode *hdir2)
30234+{
4a4d8108 30235+ au_hn_resume(hdir1);
1facf9fc 30236+ if (hdir1 != hdir2)
4a4d8108 30237+ au_hn_resume(hdir2);
2cbb1c4b 30238+ lockdep_off();
1facf9fc 30239+ unlock_rename(d1, d2);
2cbb1c4b 30240+ lockdep_on();
1facf9fc 30241+}
30242+
30243+/* ---------------------------------------------------------------------- */
30244+
b4510431 30245+int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
1facf9fc 30246+{
30247+ int err;
30248+ struct dentry *d;
30249+
30250+ IMustLock(dir);
30251+
30252+ d = path->dentry;
30253+ path->dentry = d->d_parent;
b752ccd1 30254+ err = security_path_mknod(path, d, mode, 0);
1facf9fc 30255+ path->dentry = d;
30256+ if (unlikely(err))
30257+ goto out;
30258+
c1595e42 30259+ lockdep_off();
b4510431 30260+ err = vfs_create(dir, path->dentry, mode, want_excl);
c1595e42 30261+ lockdep_on();
1facf9fc 30262+ if (!err) {
30263+ struct path tmp = *path;
30264+ int did;
30265+
30266+ vfsub_update_h_iattr(&tmp, &did);
30267+ if (did) {
30268+ tmp.dentry = path->dentry->d_parent;
30269+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30270+ }
30271+ /*ignore*/
30272+ }
30273+
4f0767ce 30274+out:
1facf9fc 30275+ return err;
30276+}
30277+
30278+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
30279+{
30280+ int err;
30281+ struct dentry *d;
30282+
30283+ IMustLock(dir);
30284+
30285+ d = path->dentry;
30286+ path->dentry = d->d_parent;
b752ccd1 30287+ err = security_path_symlink(path, d, symname);
1facf9fc 30288+ path->dentry = d;
30289+ if (unlikely(err))
30290+ goto out;
30291+
c1595e42 30292+ lockdep_off();
1facf9fc 30293+ err = vfs_symlink(dir, path->dentry, symname);
c1595e42 30294+ lockdep_on();
1facf9fc 30295+ if (!err) {
30296+ struct path tmp = *path;
30297+ int did;
30298+
30299+ vfsub_update_h_iattr(&tmp, &did);
30300+ if (did) {
30301+ tmp.dentry = path->dentry->d_parent;
30302+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30303+ }
30304+ /*ignore*/
30305+ }
30306+
4f0767ce 30307+out:
1facf9fc 30308+ return err;
30309+}
30310+
30311+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
30312+{
30313+ int err;
30314+ struct dentry *d;
30315+
30316+ IMustLock(dir);
30317+
30318+ d = path->dentry;
30319+ path->dentry = d->d_parent;
027c5e7a 30320+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
1facf9fc 30321+ path->dentry = d;
30322+ if (unlikely(err))
30323+ goto out;
30324+
c1595e42 30325+ lockdep_off();
1facf9fc 30326+ err = vfs_mknod(dir, path->dentry, mode, dev);
c1595e42 30327+ lockdep_on();
1facf9fc 30328+ if (!err) {
30329+ struct path tmp = *path;
30330+ int did;
30331+
30332+ vfsub_update_h_iattr(&tmp, &did);
30333+ if (did) {
30334+ tmp.dentry = path->dentry->d_parent;
30335+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30336+ }
30337+ /*ignore*/
30338+ }
30339+
4f0767ce 30340+out:
1facf9fc 30341+ return err;
30342+}
30343+
30344+static int au_test_nlink(struct inode *inode)
30345+{
30346+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
30347+
30348+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
30349+ || inode->i_nlink < link_max)
30350+ return 0;
30351+ return -EMLINK;
30352+}
30353+
523b37e3
AM
30354+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
30355+ struct inode **delegated_inode)
1facf9fc 30356+{
30357+ int err;
30358+ struct dentry *d;
30359+
30360+ IMustLock(dir);
30361+
5527c038 30362+ err = au_test_nlink(d_inode(src_dentry));
1facf9fc 30363+ if (unlikely(err))
30364+ return err;
30365+
b4510431 30366+ /* we don't call may_linkat() */
1facf9fc 30367+ d = path->dentry;
30368+ path->dentry = d->d_parent;
b752ccd1 30369+ err = security_path_link(src_dentry, path, d);
1facf9fc 30370+ path->dentry = d;
30371+ if (unlikely(err))
30372+ goto out;
30373+
2cbb1c4b 30374+ lockdep_off();
523b37e3 30375+ err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
2cbb1c4b 30376+ lockdep_on();
1facf9fc 30377+ if (!err) {
30378+ struct path tmp = *path;
30379+ int did;
30380+
30381+ /* fuse has different memory inode for the same inumber */
30382+ vfsub_update_h_iattr(&tmp, &did);
30383+ if (did) {
30384+ tmp.dentry = path->dentry->d_parent;
30385+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30386+ tmp.dentry = src_dentry;
30387+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30388+ }
30389+ /*ignore*/
30390+ }
30391+
4f0767ce 30392+out:
1facf9fc 30393+ return err;
30394+}
30395+
30396+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
523b37e3
AM
30397+ struct inode *dir, struct path *path,
30398+ struct inode **delegated_inode)
1facf9fc 30399+{
30400+ int err;
30401+ struct path tmp = {
30402+ .mnt = path->mnt
30403+ };
30404+ struct dentry *d;
30405+
30406+ IMustLock(dir);
30407+ IMustLock(src_dir);
30408+
30409+ d = path->dentry;
30410+ path->dentry = d->d_parent;
30411+ tmp.dentry = src_dentry->d_parent;
38d290e6 30412+ err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
1facf9fc 30413+ path->dentry = d;
30414+ if (unlikely(err))
30415+ goto out;
30416+
2cbb1c4b 30417+ lockdep_off();
523b37e3 30418+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
38d290e6 30419+ delegated_inode, /*flags*/0);
2cbb1c4b 30420+ lockdep_on();
1facf9fc 30421+ if (!err) {
30422+ int did;
30423+
30424+ tmp.dentry = d->d_parent;
30425+ vfsub_update_h_iattr(&tmp, &did);
30426+ if (did) {
30427+ tmp.dentry = src_dentry;
30428+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30429+ tmp.dentry = src_dentry->d_parent;
30430+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30431+ }
30432+ /*ignore*/
30433+ }
30434+
4f0767ce 30435+out:
1facf9fc 30436+ return err;
30437+}
30438+
30439+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
30440+{
30441+ int err;
30442+ struct dentry *d;
30443+
30444+ IMustLock(dir);
30445+
30446+ d = path->dentry;
30447+ path->dentry = d->d_parent;
b752ccd1 30448+ err = security_path_mkdir(path, d, mode);
1facf9fc 30449+ path->dentry = d;
30450+ if (unlikely(err))
30451+ goto out;
30452+
c1595e42 30453+ lockdep_off();
1facf9fc 30454+ err = vfs_mkdir(dir, path->dentry, mode);
c1595e42 30455+ lockdep_on();
1facf9fc 30456+ if (!err) {
30457+ struct path tmp = *path;
30458+ int did;
30459+
30460+ vfsub_update_h_iattr(&tmp, &did);
30461+ if (did) {
30462+ tmp.dentry = path->dentry->d_parent;
30463+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30464+ }
30465+ /*ignore*/
30466+ }
30467+
4f0767ce 30468+out:
1facf9fc 30469+ return err;
30470+}
30471+
30472+int vfsub_rmdir(struct inode *dir, struct path *path)
30473+{
30474+ int err;
30475+ struct dentry *d;
30476+
30477+ IMustLock(dir);
30478+
30479+ d = path->dentry;
30480+ path->dentry = d->d_parent;
b752ccd1 30481+ err = security_path_rmdir(path, d);
1facf9fc 30482+ path->dentry = d;
30483+ if (unlikely(err))
30484+ goto out;
30485+
2cbb1c4b 30486+ lockdep_off();
1facf9fc 30487+ err = vfs_rmdir(dir, path->dentry);
2cbb1c4b 30488+ lockdep_on();
1facf9fc 30489+ if (!err) {
30490+ struct path tmp = {
30491+ .dentry = path->dentry->d_parent,
30492+ .mnt = path->mnt
30493+ };
30494+
30495+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
30496+ }
30497+
4f0767ce 30498+out:
1facf9fc 30499+ return err;
30500+}
30501+
30502+/* ---------------------------------------------------------------------- */
30503+
9dbd164d 30504+/* todo: support mmap_sem? */
1facf9fc 30505+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
30506+ loff_t *ppos)
30507+{
30508+ ssize_t err;
30509+
2cbb1c4b 30510+ lockdep_off();
1facf9fc 30511+ err = vfs_read(file, ubuf, count, ppos);
2cbb1c4b 30512+ lockdep_on();
1facf9fc 30513+ if (err >= 0)
30514+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30515+ return err;
30516+}
30517+
30518+/* todo: kernel_read()? */
30519+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
30520+ loff_t *ppos)
30521+{
30522+ ssize_t err;
30523+ mm_segment_t oldfs;
b752ccd1
AM
30524+ union {
30525+ void *k;
30526+ char __user *u;
30527+ } buf;
1facf9fc 30528+
b752ccd1 30529+ buf.k = kbuf;
1facf9fc 30530+ oldfs = get_fs();
30531+ set_fs(KERNEL_DS);
b752ccd1 30532+ err = vfsub_read_u(file, buf.u, count, ppos);
1facf9fc 30533+ set_fs(oldfs);
30534+ return err;
30535+}
30536+
30537+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
30538+ loff_t *ppos)
30539+{
30540+ ssize_t err;
30541+
2cbb1c4b 30542+ lockdep_off();
1facf9fc 30543+ err = vfs_write(file, ubuf, count, ppos);
2cbb1c4b 30544+ lockdep_on();
1facf9fc 30545+ if (err >= 0)
30546+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30547+ return err;
30548+}
30549+
30550+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
30551+{
30552+ ssize_t err;
30553+ mm_segment_t oldfs;
b752ccd1
AM
30554+ union {
30555+ void *k;
30556+ const char __user *u;
30557+ } buf;
1facf9fc 30558+
b752ccd1 30559+ buf.k = kbuf;
1facf9fc 30560+ oldfs = get_fs();
30561+ set_fs(KERNEL_DS);
b752ccd1 30562+ err = vfsub_write_u(file, buf.u, count, ppos);
1facf9fc 30563+ set_fs(oldfs);
30564+ return err;
30565+}
30566+
4a4d8108
AM
30567+int vfsub_flush(struct file *file, fl_owner_t id)
30568+{
30569+ int err;
30570+
30571+ err = 0;
523b37e3 30572+ if (file->f_op->flush) {
2000de60 30573+ if (!au_test_nfs(file->f_path.dentry->d_sb))
2cbb1c4b
JR
30574+ err = file->f_op->flush(file, id);
30575+ else {
30576+ lockdep_off();
30577+ err = file->f_op->flush(file, id);
30578+ lockdep_on();
30579+ }
4a4d8108
AM
30580+ if (!err)
30581+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
30582+ /*ignore*/
30583+ }
30584+ return err;
30585+}
30586+
392086de 30587+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
1facf9fc 30588+{
30589+ int err;
30590+
523b37e3 30591+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 30592+
2cbb1c4b 30593+ lockdep_off();
392086de 30594+ err = iterate_dir(file, ctx);
2cbb1c4b 30595+ lockdep_on();
1facf9fc 30596+ if (err >= 0)
30597+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30598+ return err;
30599+}
30600+
30601+long vfsub_splice_to(struct file *in, loff_t *ppos,
30602+ struct pipe_inode_info *pipe, size_t len,
30603+ unsigned int flags)
30604+{
30605+ long err;
30606+
2cbb1c4b 30607+ lockdep_off();
0fc653ad 30608+ err = do_splice_to(in, ppos, pipe, len, flags);
2cbb1c4b 30609+ lockdep_on();
4a4d8108 30610+ file_accessed(in);
1facf9fc 30611+ if (err >= 0)
30612+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
30613+ return err;
30614+}
30615+
30616+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
30617+ loff_t *ppos, size_t len, unsigned int flags)
30618+{
30619+ long err;
30620+
2cbb1c4b 30621+ lockdep_off();
0fc653ad 30622+ err = do_splice_from(pipe, out, ppos, len, flags);
2cbb1c4b 30623+ lockdep_on();
1facf9fc 30624+ if (err >= 0)
30625+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
30626+ return err;
30627+}
30628+
53392da6
AM
30629+int vfsub_fsync(struct file *file, struct path *path, int datasync)
30630+{
30631+ int err;
30632+
30633+ /* file can be NULL */
30634+ lockdep_off();
30635+ err = vfs_fsync(file, datasync);
30636+ lockdep_on();
30637+ if (!err) {
30638+ if (!path) {
30639+ AuDebugOn(!file);
30640+ path = &file->f_path;
30641+ }
30642+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
30643+ }
30644+ return err;
30645+}
30646+
1facf9fc 30647+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
30648+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
30649+ struct file *h_file)
30650+{
30651+ int err;
30652+ struct inode *h_inode;
c06a8ce3 30653+ struct super_block *h_sb;
1facf9fc 30654+
1facf9fc 30655+ if (!h_file) {
c06a8ce3
AM
30656+ err = vfsub_truncate(h_path, length);
30657+ goto out;
1facf9fc 30658+ }
30659+
5527c038 30660+ h_inode = d_inode(h_path->dentry);
c06a8ce3
AM
30661+ h_sb = h_inode->i_sb;
30662+ lockdep_off();
30663+ sb_start_write(h_sb);
30664+ lockdep_on();
1facf9fc 30665+ err = locks_verify_truncate(h_inode, h_file, length);
30666+ if (!err)
953406b4 30667+ err = security_path_truncate(h_path);
2cbb1c4b
JR
30668+ if (!err) {
30669+ lockdep_off();
1facf9fc 30670+ err = do_truncate(h_path->dentry, length, attr, h_file);
2cbb1c4b
JR
30671+ lockdep_on();
30672+ }
c06a8ce3
AM
30673+ lockdep_off();
30674+ sb_end_write(h_sb);
30675+ lockdep_on();
1facf9fc 30676+
4f0767ce 30677+out:
1facf9fc 30678+ return err;
30679+}
30680+
30681+/* ---------------------------------------------------------------------- */
30682+
30683+struct au_vfsub_mkdir_args {
30684+ int *errp;
30685+ struct inode *dir;
30686+ struct path *path;
30687+ int mode;
30688+};
30689+
30690+static void au_call_vfsub_mkdir(void *args)
30691+{
30692+ struct au_vfsub_mkdir_args *a = args;
30693+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
30694+}
30695+
30696+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
30697+{
30698+ int err, do_sio, wkq_err;
30699+
30700+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
30701+ if (!do_sio) {
30702+ lockdep_off();
1facf9fc 30703+ err = vfsub_mkdir(dir, path, mode);
c1595e42
JR
30704+ lockdep_on();
30705+ } else {
1facf9fc 30706+ struct au_vfsub_mkdir_args args = {
30707+ .errp = &err,
30708+ .dir = dir,
30709+ .path = path,
30710+ .mode = mode
30711+ };
30712+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
30713+ if (unlikely(wkq_err))
30714+ err = wkq_err;
30715+ }
30716+
30717+ return err;
30718+}
30719+
30720+struct au_vfsub_rmdir_args {
30721+ int *errp;
30722+ struct inode *dir;
30723+ struct path *path;
30724+};
30725+
30726+static void au_call_vfsub_rmdir(void *args)
30727+{
30728+ struct au_vfsub_rmdir_args *a = args;
30729+ *a->errp = vfsub_rmdir(a->dir, a->path);
30730+}
30731+
30732+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
30733+{
30734+ int err, do_sio, wkq_err;
30735+
30736+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
30737+ if (!do_sio) {
30738+ lockdep_off();
1facf9fc 30739+ err = vfsub_rmdir(dir, path);
c1595e42
JR
30740+ lockdep_on();
30741+ } else {
1facf9fc 30742+ struct au_vfsub_rmdir_args args = {
30743+ .errp = &err,
30744+ .dir = dir,
30745+ .path = path
30746+ };
30747+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
30748+ if (unlikely(wkq_err))
30749+ err = wkq_err;
30750+ }
30751+
30752+ return err;
30753+}
30754+
30755+/* ---------------------------------------------------------------------- */
30756+
30757+struct notify_change_args {
30758+ int *errp;
30759+ struct path *path;
30760+ struct iattr *ia;
523b37e3 30761+ struct inode **delegated_inode;
1facf9fc 30762+};
30763+
30764+static void call_notify_change(void *args)
30765+{
30766+ struct notify_change_args *a = args;
30767+ struct inode *h_inode;
30768+
5527c038 30769+ h_inode = d_inode(a->path->dentry);
1facf9fc 30770+ IMustLock(h_inode);
30771+
30772+ *a->errp = -EPERM;
30773+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
c1595e42 30774+ lockdep_off();
523b37e3
AM
30775+ *a->errp = notify_change(a->path->dentry, a->ia,
30776+ a->delegated_inode);
c1595e42 30777+ lockdep_on();
1facf9fc 30778+ if (!*a->errp)
30779+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
30780+ }
30781+ AuTraceErr(*a->errp);
30782+}
30783+
523b37e3
AM
30784+int vfsub_notify_change(struct path *path, struct iattr *ia,
30785+ struct inode **delegated_inode)
1facf9fc 30786+{
30787+ int err;
30788+ struct notify_change_args args = {
523b37e3
AM
30789+ .errp = &err,
30790+ .path = path,
30791+ .ia = ia,
30792+ .delegated_inode = delegated_inode
1facf9fc 30793+ };
30794+
30795+ call_notify_change(&args);
30796+
30797+ return err;
30798+}
30799+
523b37e3
AM
30800+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
30801+ struct inode **delegated_inode)
1facf9fc 30802+{
30803+ int err, wkq_err;
30804+ struct notify_change_args args = {
523b37e3
AM
30805+ .errp = &err,
30806+ .path = path,
30807+ .ia = ia,
30808+ .delegated_inode = delegated_inode
1facf9fc 30809+ };
30810+
30811+ wkq_err = au_wkq_wait(call_notify_change, &args);
30812+ if (unlikely(wkq_err))
30813+ err = wkq_err;
30814+
30815+ return err;
30816+}
30817+
30818+/* ---------------------------------------------------------------------- */
30819+
30820+struct unlink_args {
30821+ int *errp;
30822+ struct inode *dir;
30823+ struct path *path;
523b37e3 30824+ struct inode **delegated_inode;
1facf9fc 30825+};
30826+
30827+static void call_unlink(void *args)
30828+{
30829+ struct unlink_args *a = args;
30830+ struct dentry *d = a->path->dentry;
30831+ struct inode *h_inode;
30832+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
c1595e42 30833+ && au_dcount(d) == 1);
1facf9fc 30834+
30835+ IMustLock(a->dir);
30836+
30837+ a->path->dentry = d->d_parent;
30838+ *a->errp = security_path_unlink(a->path, d);
30839+ a->path->dentry = d;
30840+ if (unlikely(*a->errp))
30841+ return;
30842+
30843+ if (!stop_sillyrename)
30844+ dget(d);
5527c038
JR
30845+ h_inode = NULL;
30846+ if (d_is_positive(d)) {
30847+ h_inode = d_inode(d);
027c5e7a 30848+ ihold(h_inode);
5527c038 30849+ }
1facf9fc 30850+
2cbb1c4b 30851+ lockdep_off();
523b37e3 30852+ *a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
2cbb1c4b 30853+ lockdep_on();
1facf9fc 30854+ if (!*a->errp) {
30855+ struct path tmp = {
30856+ .dentry = d->d_parent,
30857+ .mnt = a->path->mnt
30858+ };
30859+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
30860+ }
30861+
30862+ if (!stop_sillyrename)
30863+ dput(d);
30864+ if (h_inode)
30865+ iput(h_inode);
30866+
30867+ AuTraceErr(*a->errp);
30868+}
30869+
30870+/*
30871+ * @dir: must be locked.
30872+ * @dentry: target dentry.
30873+ */
523b37e3
AM
30874+int vfsub_unlink(struct inode *dir, struct path *path,
30875+ struct inode **delegated_inode, int force)
1facf9fc 30876+{
30877+ int err;
30878+ struct unlink_args args = {
523b37e3
AM
30879+ .errp = &err,
30880+ .dir = dir,
30881+ .path = path,
30882+ .delegated_inode = delegated_inode
1facf9fc 30883+ };
30884+
30885+ if (!force)
30886+ call_unlink(&args);
30887+ else {
30888+ int wkq_err;
30889+
30890+ wkq_err = au_wkq_wait(call_unlink, &args);
30891+ if (unlikely(wkq_err))
30892+ err = wkq_err;
30893+ }
30894+
30895+ return err;
30896+}
7f207e10
AM
30897diff -urN /usr/share/empty/fs/aufs/vfsub.h linux/fs/aufs/vfsub.h
30898--- /usr/share/empty/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
79b8bda9
AM
30899+++ linux/fs/aufs/vfsub.h 2015-11-11 17:21:46.922197217 +0100
30900@@ -0,0 +1,287 @@
1facf9fc 30901+/*
2000de60 30902+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 30903+ *
30904+ * This program, aufs is free software; you can redistribute it and/or modify
30905+ * it under the terms of the GNU General Public License as published by
30906+ * the Free Software Foundation; either version 2 of the License, or
30907+ * (at your option) any later version.
dece6358
AM
30908+ *
30909+ * This program is distributed in the hope that it will be useful,
30910+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30911+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30912+ * GNU General Public License for more details.
30913+ *
30914+ * You should have received a copy of the GNU General Public License
523b37e3 30915+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 30916+ */
30917+
30918+/*
30919+ * sub-routines for VFS
30920+ */
30921+
30922+#ifndef __AUFS_VFSUB_H__
30923+#define __AUFS_VFSUB_H__
30924+
30925+#ifdef __KERNEL__
30926+
30927+#include <linux/fs.h>
b4510431 30928+#include <linux/mount.h>
c1595e42 30929+#include <linux/xattr.h>
7f207e10 30930+#include "debug.h"
1facf9fc 30931+
7f207e10 30932+/* copied from linux/fs/internal.h */
2cbb1c4b 30933+/* todo: BAD approach!! */
c06a8ce3 30934+extern void __mnt_drop_write(struct vfsmount *);
b912730e 30935+extern int open_check_o_direct(struct file *f);
7f207e10
AM
30936+
30937+/* ---------------------------------------------------------------------- */
1facf9fc 30938+
30939+/* lock subclass for lower inode */
30940+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
30941+/* reduce? gave up. */
30942+enum {
c1595e42 30943+ AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
1facf9fc 30944+ AuLsc_I_PARENT, /* lower inode, parent first */
30945+ AuLsc_I_PARENT2, /* copyup dirs */
dece6358 30946+ AuLsc_I_PARENT3, /* copyup wh */
1facf9fc 30947+ AuLsc_I_CHILD,
30948+ AuLsc_I_CHILD2,
30949+ AuLsc_I_End
30950+};
30951+
30952+/* to debug easier, do not make them inlined functions */
30953+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
30954+#define IMustLock(i) MtxMustLock(&(i)->i_mutex)
30955+
30956+/* ---------------------------------------------------------------------- */
30957+
7f207e10
AM
30958+static inline void vfsub_drop_nlink(struct inode *inode)
30959+{
30960+ AuDebugOn(!inode->i_nlink);
30961+ drop_nlink(inode);
30962+}
30963+
027c5e7a
AM
30964+static inline void vfsub_dead_dir(struct inode *inode)
30965+{
30966+ AuDebugOn(!S_ISDIR(inode->i_mode));
30967+ inode->i_flags |= S_DEAD;
30968+ clear_nlink(inode);
30969+}
30970+
392086de
AM
30971+static inline int vfsub_native_ro(struct inode *inode)
30972+{
30973+ return (inode->i_sb->s_flags & MS_RDONLY)
30974+ || IS_RDONLY(inode)
30975+ /* || IS_APPEND(inode) */
30976+ || IS_IMMUTABLE(inode);
30977+}
30978+
7f207e10
AM
30979+/* ---------------------------------------------------------------------- */
30980+
30981+int vfsub_update_h_iattr(struct path *h_path, int *did);
30982+struct file *vfsub_dentry_open(struct path *path, int flags);
30983+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
b912730e
AM
30984+struct vfsub_aopen_args {
30985+ struct file *file;
30986+ unsigned int open_flag;
30987+ umode_t create_mode;
30988+ int *opened;
30989+};
30990+struct au_branch;
30991+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
30992+ struct vfsub_aopen_args *args, struct au_branch *br);
1facf9fc 30993+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
b4510431 30994+
1facf9fc 30995+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
30996+ int len);
b4510431
AM
30997+
30998+struct vfsub_lkup_one_args {
30999+ struct dentry **errp;
31000+ struct qstr *name;
31001+ struct dentry *parent;
31002+};
31003+
31004+static inline struct dentry *vfsub_lkup_one(struct qstr *name,
31005+ struct dentry *parent)
31006+{
31007+ return vfsub_lookup_one_len(name->name, parent, name->len);
31008+}
31009+
31010+void vfsub_call_lkup_one(void *args);
31011+
31012+/* ---------------------------------------------------------------------- */
31013+
31014+static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
31015+{
31016+ int err;
076b876e 31017+
b4510431
AM
31018+ lockdep_off();
31019+ err = mnt_want_write(mnt);
31020+ lockdep_on();
31021+ return err;
31022+}
31023+
31024+static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
31025+{
31026+ lockdep_off();
31027+ mnt_drop_write(mnt);
31028+ lockdep_on();
31029+}
1facf9fc 31030+
7e9cd9fe 31031+#if 0 /* reserved */
c06a8ce3
AM
31032+static inline void vfsub_mnt_drop_write_file(struct file *file)
31033+{
31034+ lockdep_off();
31035+ mnt_drop_write_file(file);
31036+ lockdep_on();
31037+}
7e9cd9fe 31038+#endif
c06a8ce3 31039+
1facf9fc 31040+/* ---------------------------------------------------------------------- */
31041+
31042+struct au_hinode;
31043+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
31044+ struct dentry *d2, struct au_hinode *hdir2);
31045+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
31046+ struct dentry *d2, struct au_hinode *hdir2);
31047+
537831f9
AM
31048+int vfsub_create(struct inode *dir, struct path *path, int mode,
31049+ bool want_excl);
1facf9fc 31050+int vfsub_symlink(struct inode *dir, struct path *path,
31051+ const char *symname);
31052+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
31053+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
523b37e3 31054+ struct path *path, struct inode **delegated_inode);
1facf9fc 31055+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
523b37e3
AM
31056+ struct inode *hdir, struct path *path,
31057+ struct inode **delegated_inode);
1facf9fc 31058+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
31059+int vfsub_rmdir(struct inode *dir, struct path *path);
31060+
31061+/* ---------------------------------------------------------------------- */
31062+
31063+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
31064+ loff_t *ppos);
31065+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
31066+ loff_t *ppos);
31067+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
31068+ loff_t *ppos);
31069+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
31070+ loff_t *ppos);
4a4d8108 31071+int vfsub_flush(struct file *file, fl_owner_t id);
392086de
AM
31072+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
31073+
c06a8ce3
AM
31074+static inline loff_t vfsub_f_size_read(struct file *file)
31075+{
31076+ return i_size_read(file_inode(file));
31077+}
31078+
4a4d8108
AM
31079+static inline unsigned int vfsub_file_flags(struct file *file)
31080+{
31081+ unsigned int flags;
31082+
31083+ spin_lock(&file->f_lock);
31084+ flags = file->f_flags;
31085+ spin_unlock(&file->f_lock);
31086+
31087+ return flags;
31088+}
1308ab2a 31089+
7e9cd9fe 31090+#if 0 /* reserved */
1facf9fc 31091+static inline void vfsub_file_accessed(struct file *h_file)
31092+{
31093+ file_accessed(h_file);
31094+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
31095+}
7e9cd9fe 31096+#endif
1facf9fc 31097+
79b8bda9 31098+#if 0 /* reserved */
1facf9fc 31099+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
31100+ struct dentry *h_dentry)
31101+{
31102+ struct path h_path = {
31103+ .dentry = h_dentry,
31104+ .mnt = h_mnt
31105+ };
92d182d2 31106+ touch_atime(&h_path);
1facf9fc 31107+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
31108+}
79b8bda9 31109+#endif
1facf9fc 31110+
0c3ec466
AM
31111+static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts,
31112+ int flags)
31113+{
7e9cd9fe 31114+ return generic_update_time(h_inode, ts, flags);
0c3ec466
AM
31115+ /* no vfsub_update_h_iattr() since we don't have struct path */
31116+}
31117+
4a4d8108
AM
31118+long vfsub_splice_to(struct file *in, loff_t *ppos,
31119+ struct pipe_inode_info *pipe, size_t len,
31120+ unsigned int flags);
31121+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
31122+ loff_t *ppos, size_t len, unsigned int flags);
c06a8ce3
AM
31123+
31124+static inline long vfsub_truncate(struct path *path, loff_t length)
31125+{
31126+ long err;
076b876e 31127+
c06a8ce3
AM
31128+ lockdep_off();
31129+ err = vfs_truncate(path, length);
31130+ lockdep_on();
31131+ return err;
31132+}
31133+
4a4d8108
AM
31134+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
31135+ struct file *h_file);
53392da6 31136+int vfsub_fsync(struct file *file, struct path *path, int datasync);
4a4d8108 31137+
1facf9fc 31138+/* ---------------------------------------------------------------------- */
31139+
31140+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
31141+{
31142+ loff_t err;
31143+
2cbb1c4b 31144+ lockdep_off();
1facf9fc 31145+ err = vfs_llseek(file, offset, origin);
2cbb1c4b 31146+ lockdep_on();
1facf9fc 31147+ return err;
31148+}
31149+
31150+/* ---------------------------------------------------------------------- */
31151+
4a4d8108
AM
31152+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
31153+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
523b37e3
AM
31154+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
31155+ struct inode **delegated_inode);
31156+int vfsub_notify_change(struct path *path, struct iattr *ia,
31157+ struct inode **delegated_inode);
31158+int vfsub_unlink(struct inode *dir, struct path *path,
31159+ struct inode **delegated_inode, int force);
4a4d8108 31160+
c1595e42
JR
31161+/* ---------------------------------------------------------------------- */
31162+
31163+static inline int vfsub_setxattr(struct dentry *dentry, const char *name,
31164+ const void *value, size_t size, int flags)
31165+{
31166+ int err;
31167+
31168+ lockdep_off();
31169+ err = vfs_setxattr(dentry, name, value, size, flags);
31170+ lockdep_on();
31171+
31172+ return err;
31173+}
31174+
31175+static inline int vfsub_removexattr(struct dentry *dentry, const char *name)
31176+{
31177+ int err;
31178+
31179+ lockdep_off();
31180+ err = vfs_removexattr(dentry, name);
31181+ lockdep_on();
31182+
31183+ return err;
31184+}
31185+
1facf9fc 31186+#endif /* __KERNEL__ */
31187+#endif /* __AUFS_VFSUB_H__ */
7f207e10
AM
31188diff -urN /usr/share/empty/fs/aufs/wbr_policy.c linux/fs/aufs/wbr_policy.c
31189--- /usr/share/empty/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 31190+++ linux/fs/aufs/wbr_policy.c 2015-09-24 10:47:58.258053165 +0200
076b876e 31191@@ -0,0 +1,765 @@
1facf9fc 31192+/*
2000de60 31193+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 31194+ *
31195+ * This program, aufs is free software; you can redistribute it and/or modify
31196+ * it under the terms of the GNU General Public License as published by
31197+ * the Free Software Foundation; either version 2 of the License, or
31198+ * (at your option) any later version.
dece6358
AM
31199+ *
31200+ * This program is distributed in the hope that it will be useful,
31201+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31202+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31203+ * GNU General Public License for more details.
31204+ *
31205+ * You should have received a copy of the GNU General Public License
523b37e3 31206+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31207+ */
31208+
31209+/*
31210+ * policies for selecting one among multiple writable branches
31211+ */
31212+
31213+#include <linux/statfs.h>
31214+#include "aufs.h"
31215+
31216+/* subset of cpup_attr() */
31217+static noinline_for_stack
31218+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
31219+{
31220+ int err, sbits;
31221+ struct iattr ia;
31222+ struct inode *h_isrc;
31223+
5527c038 31224+ h_isrc = d_inode(h_src);
1facf9fc 31225+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
31226+ ia.ia_mode = h_isrc->i_mode;
31227+ ia.ia_uid = h_isrc->i_uid;
31228+ ia.ia_gid = h_isrc->i_gid;
31229+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
5527c038 31230+ au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags);
523b37e3
AM
31231+ /* no delegation since it is just created */
31232+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 31233+
31234+ /* is this nfs only? */
31235+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
31236+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
31237+ ia.ia_mode = h_isrc->i_mode;
523b37e3 31238+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 31239+ }
31240+
31241+ return err;
31242+}
31243+
31244+#define AuCpdown_PARENT_OPQ 1
31245+#define AuCpdown_WHED (1 << 1)
31246+#define AuCpdown_MADE_DIR (1 << 2)
31247+#define AuCpdown_DIROPQ (1 << 3)
31248+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
7f207e10
AM
31249+#define au_fset_cpdown(flags, name) \
31250+ do { (flags) |= AuCpdown_##name; } while (0)
31251+#define au_fclr_cpdown(flags, name) \
31252+ do { (flags) &= ~AuCpdown_##name; } while (0)
1facf9fc 31253+
1facf9fc 31254+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
c2b27bf2 31255+ unsigned int *flags)
1facf9fc 31256+{
31257+ int err;
31258+ struct dentry *opq_dentry;
31259+
31260+ opq_dentry = au_diropq_create(dentry, bdst);
31261+ err = PTR_ERR(opq_dentry);
31262+ if (IS_ERR(opq_dentry))
31263+ goto out;
31264+ dput(opq_dentry);
c2b27bf2 31265+ au_fset_cpdown(*flags, DIROPQ);
1facf9fc 31266+
4f0767ce 31267+out:
1facf9fc 31268+ return err;
31269+}
31270+
31271+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
31272+ struct inode *dir, aufs_bindex_t bdst)
31273+{
31274+ int err;
31275+ struct path h_path;
31276+ struct au_branch *br;
31277+
31278+ br = au_sbr(dentry->d_sb, bdst);
31279+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
31280+ err = PTR_ERR(h_path.dentry);
31281+ if (IS_ERR(h_path.dentry))
31282+ goto out;
31283+
31284+ err = 0;
5527c038 31285+ if (d_is_positive(h_path.dentry)) {
86dc4139 31286+ h_path.mnt = au_br_mnt(br);
1facf9fc 31287+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
31288+ dentry);
31289+ }
31290+ dput(h_path.dentry);
31291+
4f0767ce 31292+out:
1facf9fc 31293+ return err;
31294+}
31295+
31296+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 31297+ struct au_pin *pin,
1facf9fc 31298+ struct dentry *h_parent, void *arg)
31299+{
31300+ int err, rerr;
4a4d8108 31301+ aufs_bindex_t bopq, bstart;
1facf9fc 31302+ struct path h_path;
31303+ struct dentry *parent;
31304+ struct inode *h_dir, *h_inode, *inode, *dir;
c2b27bf2 31305+ unsigned int *flags = arg;
1facf9fc 31306+
31307+ bstart = au_dbstart(dentry);
31308+ /* dentry is di-locked */
31309+ parent = dget_parent(dentry);
5527c038
JR
31310+ dir = d_inode(parent);
31311+ h_dir = d_inode(h_parent);
1facf9fc 31312+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
31313+ IMustLock(h_dir);
31314+
86dc4139 31315+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
1facf9fc 31316+ if (unlikely(err < 0))
31317+ goto out;
31318+ h_path.dentry = au_h_dptr(dentry, bdst);
31319+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
31320+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
31321+ S_IRWXU | S_IRUGO | S_IXUGO);
31322+ if (unlikely(err))
31323+ goto out_put;
c2b27bf2 31324+ au_fset_cpdown(*flags, MADE_DIR);
1facf9fc 31325+
1facf9fc 31326+ bopq = au_dbdiropq(dentry);
c2b27bf2
AM
31327+ au_fclr_cpdown(*flags, WHED);
31328+ au_fclr_cpdown(*flags, DIROPQ);
1facf9fc 31329+ if (au_dbwh(dentry) == bdst)
c2b27bf2
AM
31330+ au_fset_cpdown(*flags, WHED);
31331+ if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
31332+ au_fset_cpdown(*flags, PARENT_OPQ);
5527c038 31333+ h_inode = d_inode(h_path.dentry);
1facf9fc 31334+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
c2b27bf2
AM
31335+ if (au_ftest_cpdown(*flags, WHED)) {
31336+ err = au_cpdown_dir_opq(dentry, bdst, flags);
1facf9fc 31337+ if (unlikely(err)) {
31338+ mutex_unlock(&h_inode->i_mutex);
31339+ goto out_dir;
31340+ }
31341+ }
31342+
31343+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
31344+ mutex_unlock(&h_inode->i_mutex);
31345+ if (unlikely(err))
31346+ goto out_opq;
31347+
c2b27bf2 31348+ if (au_ftest_cpdown(*flags, WHED)) {
1facf9fc 31349+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
31350+ if (unlikely(err))
31351+ goto out_opq;
31352+ }
31353+
5527c038 31354+ inode = d_inode(dentry);
1facf9fc 31355+ if (au_ibend(inode) < bdst)
31356+ au_set_ibend(inode, bdst);
31357+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
31358+ au_hi_flags(inode, /*isdir*/1));
076b876e 31359+ au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
1facf9fc 31360+ goto out; /* success */
31361+
31362+ /* revert */
4f0767ce 31363+out_opq:
c2b27bf2 31364+ if (au_ftest_cpdown(*flags, DIROPQ)) {
1facf9fc 31365+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
31366+ rerr = au_diropq_remove(dentry, bdst);
31367+ mutex_unlock(&h_inode->i_mutex);
31368+ if (unlikely(rerr)) {
523b37e3
AM
31369+ AuIOErr("failed removing diropq for %pd b%d (%d)\n",
31370+ dentry, bdst, rerr);
1facf9fc 31371+ err = -EIO;
31372+ goto out;
31373+ }
31374+ }
4f0767ce 31375+out_dir:
c2b27bf2 31376+ if (au_ftest_cpdown(*flags, MADE_DIR)) {
1facf9fc 31377+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
31378+ if (unlikely(rerr)) {
523b37e3
AM
31379+ AuIOErr("failed removing %pd b%d (%d)\n",
31380+ dentry, bdst, rerr);
1facf9fc 31381+ err = -EIO;
31382+ }
31383+ }
4f0767ce 31384+out_put:
1facf9fc 31385+ au_set_h_dptr(dentry, bdst, NULL);
31386+ if (au_dbend(dentry) == bdst)
31387+ au_update_dbend(dentry);
4f0767ce 31388+out:
1facf9fc 31389+ dput(parent);
31390+ return err;
31391+}
31392+
31393+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
31394+{
31395+ int err;
c2b27bf2 31396+ unsigned int flags;
1facf9fc 31397+
c2b27bf2
AM
31398+ flags = 0;
31399+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
1facf9fc 31400+
31401+ return err;
31402+}
31403+
31404+/* ---------------------------------------------------------------------- */
31405+
31406+/* policies for create */
31407+
c2b27bf2 31408+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
4a4d8108
AM
31409+{
31410+ int err, i, j, ndentry;
31411+ aufs_bindex_t bopq;
31412+ struct au_dcsub_pages dpages;
31413+ struct au_dpage *dpage;
31414+ struct dentry **dentries, *parent, *d;
31415+
31416+ err = au_dpages_init(&dpages, GFP_NOFS);
31417+ if (unlikely(err))
31418+ goto out;
31419+ parent = dget_parent(dentry);
027c5e7a 31420+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
4a4d8108
AM
31421+ if (unlikely(err))
31422+ goto out_free;
31423+
31424+ err = bindex;
31425+ for (i = 0; i < dpages.ndpage; i++) {
31426+ dpage = dpages.dpages + i;
31427+ dentries = dpage->dentries;
31428+ ndentry = dpage->ndentry;
31429+ for (j = 0; j < ndentry; j++) {
31430+ d = dentries[j];
31431+ di_read_lock_parent2(d, !AuLock_IR);
31432+ bopq = au_dbdiropq(d);
31433+ di_read_unlock(d, !AuLock_IR);
31434+ if (bopq >= 0 && bopq < err)
31435+ err = bopq;
31436+ }
31437+ }
31438+
31439+out_free:
31440+ dput(parent);
31441+ au_dpages_free(&dpages);
31442+out:
31443+ return err;
31444+}
31445+
1facf9fc 31446+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
31447+{
31448+ for (; bindex >= 0; bindex--)
31449+ if (!au_br_rdonly(au_sbr(sb, bindex)))
31450+ return bindex;
31451+ return -EROFS;
31452+}
31453+
31454+/* top down parent */
392086de
AM
31455+static int au_wbr_create_tdp(struct dentry *dentry,
31456+ unsigned int flags __maybe_unused)
1facf9fc 31457+{
31458+ int err;
31459+ aufs_bindex_t bstart, bindex;
31460+ struct super_block *sb;
31461+ struct dentry *parent, *h_parent;
31462+
31463+ sb = dentry->d_sb;
31464+ bstart = au_dbstart(dentry);
31465+ err = bstart;
31466+ if (!au_br_rdonly(au_sbr(sb, bstart)))
31467+ goto out;
31468+
31469+ err = -EROFS;
31470+ parent = dget_parent(dentry);
31471+ for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
31472+ h_parent = au_h_dptr(parent, bindex);
5527c038 31473+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 31474+ continue;
31475+
31476+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
31477+ err = bindex;
31478+ break;
31479+ }
31480+ }
31481+ dput(parent);
31482+
31483+ /* bottom up here */
4a4d8108 31484+ if (unlikely(err < 0)) {
1facf9fc 31485+ err = au_wbr_bu(sb, bstart - 1);
4a4d8108
AM
31486+ if (err >= 0)
31487+ err = au_wbr_nonopq(dentry, err);
31488+ }
1facf9fc 31489+
4f0767ce 31490+out:
1facf9fc 31491+ AuDbg("b%d\n", err);
31492+ return err;
31493+}
31494+
31495+/* ---------------------------------------------------------------------- */
31496+
31497+/* an exception for the policy other than tdp */
31498+static int au_wbr_create_exp(struct dentry *dentry)
31499+{
31500+ int err;
31501+ aufs_bindex_t bwh, bdiropq;
31502+ struct dentry *parent;
31503+
31504+ err = -1;
31505+ bwh = au_dbwh(dentry);
31506+ parent = dget_parent(dentry);
31507+ bdiropq = au_dbdiropq(parent);
31508+ if (bwh >= 0) {
31509+ if (bdiropq >= 0)
31510+ err = min(bdiropq, bwh);
31511+ else
31512+ err = bwh;
31513+ AuDbg("%d\n", err);
31514+ } else if (bdiropq >= 0) {
31515+ err = bdiropq;
31516+ AuDbg("%d\n", err);
31517+ }
31518+ dput(parent);
31519+
4a4d8108
AM
31520+ if (err >= 0)
31521+ err = au_wbr_nonopq(dentry, err);
31522+
1facf9fc 31523+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
31524+ err = -1;
31525+
31526+ AuDbg("%d\n", err);
31527+ return err;
31528+}
31529+
31530+/* ---------------------------------------------------------------------- */
31531+
31532+/* round robin */
31533+static int au_wbr_create_init_rr(struct super_block *sb)
31534+{
31535+ int err;
31536+
31537+ err = au_wbr_bu(sb, au_sbend(sb));
31538+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
dece6358 31539+ /* smp_mb(); */
1facf9fc 31540+
31541+ AuDbg("b%d\n", err);
31542+ return err;
31543+}
31544+
392086de 31545+static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
1facf9fc 31546+{
31547+ int err, nbr;
31548+ unsigned int u;
31549+ aufs_bindex_t bindex, bend;
31550+ struct super_block *sb;
31551+ atomic_t *next;
31552+
31553+ err = au_wbr_create_exp(dentry);
31554+ if (err >= 0)
31555+ goto out;
31556+
31557+ sb = dentry->d_sb;
31558+ next = &au_sbi(sb)->si_wbr_rr_next;
31559+ bend = au_sbend(sb);
31560+ nbr = bend + 1;
31561+ for (bindex = 0; bindex <= bend; bindex++) {
392086de 31562+ if (!au_ftest_wbr(flags, DIR)) {
1facf9fc 31563+ err = atomic_dec_return(next) + 1;
31564+ /* modulo for 0 is meaningless */
31565+ if (unlikely(!err))
31566+ err = atomic_dec_return(next) + 1;
31567+ } else
31568+ err = atomic_read(next);
31569+ AuDbg("%d\n", err);
31570+ u = err;
31571+ err = u % nbr;
31572+ AuDbg("%d\n", err);
31573+ if (!au_br_rdonly(au_sbr(sb, err)))
31574+ break;
31575+ err = -EROFS;
31576+ }
31577+
4a4d8108
AM
31578+ if (err >= 0)
31579+ err = au_wbr_nonopq(dentry, err);
31580+
4f0767ce 31581+out:
1facf9fc 31582+ AuDbg("%d\n", err);
31583+ return err;
31584+}
31585+
31586+/* ---------------------------------------------------------------------- */
31587+
31588+/* most free space */
392086de 31589+static void au_mfs(struct dentry *dentry, struct dentry *parent)
1facf9fc 31590+{
31591+ struct super_block *sb;
31592+ struct au_branch *br;
31593+ struct au_wbr_mfs *mfs;
392086de 31594+ struct dentry *h_parent;
1facf9fc 31595+ aufs_bindex_t bindex, bend;
31596+ int err;
31597+ unsigned long long b, bavail;
7f207e10 31598+ struct path h_path;
1facf9fc 31599+ /* reduce the stack usage */
31600+ struct kstatfs *st;
31601+
31602+ st = kmalloc(sizeof(*st), GFP_NOFS);
31603+ if (unlikely(!st)) {
31604+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
31605+ return;
31606+ }
31607+
31608+ bavail = 0;
31609+ sb = dentry->d_sb;
31610+ mfs = &au_sbi(sb)->si_wbr_mfs;
dece6358 31611+ MtxMustLock(&mfs->mfs_lock);
1facf9fc 31612+ mfs->mfs_bindex = -EROFS;
31613+ mfs->mfsrr_bytes = 0;
392086de
AM
31614+ if (!parent) {
31615+ bindex = 0;
31616+ bend = au_sbend(sb);
31617+ } else {
31618+ bindex = au_dbstart(parent);
31619+ bend = au_dbtaildir(parent);
31620+ }
31621+
31622+ for (; bindex <= bend; bindex++) {
31623+ if (parent) {
31624+ h_parent = au_h_dptr(parent, bindex);
5527c038 31625+ if (!h_parent || d_is_negative(h_parent))
392086de
AM
31626+ continue;
31627+ }
1facf9fc 31628+ br = au_sbr(sb, bindex);
31629+ if (au_br_rdonly(br))
31630+ continue;
31631+
31632+ /* sb->s_root for NFS is unreliable */
86dc4139 31633+ h_path.mnt = au_br_mnt(br);
7f207e10
AM
31634+ h_path.dentry = h_path.mnt->mnt_root;
31635+ err = vfs_statfs(&h_path, st);
1facf9fc 31636+ if (unlikely(err)) {
31637+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
31638+ continue;
31639+ }
31640+
31641+ /* when the available size is equal, select the lower one */
31642+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
31643+ || sizeof(b) < sizeof(st->f_bsize));
31644+ b = st->f_bavail * st->f_bsize;
31645+ br->br_wbr->wbr_bytes = b;
31646+ if (b >= bavail) {
31647+ bavail = b;
31648+ mfs->mfs_bindex = bindex;
31649+ mfs->mfs_jiffy = jiffies;
31650+ }
31651+ }
31652+
31653+ mfs->mfsrr_bytes = bavail;
31654+ AuDbg("b%d\n", mfs->mfs_bindex);
31655+ kfree(st);
31656+}
31657+
392086de 31658+static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
1facf9fc 31659+{
31660+ int err;
392086de 31661+ struct dentry *parent;
1facf9fc 31662+ struct super_block *sb;
31663+ struct au_wbr_mfs *mfs;
31664+
31665+ err = au_wbr_create_exp(dentry);
31666+ if (err >= 0)
31667+ goto out;
31668+
31669+ sb = dentry->d_sb;
392086de
AM
31670+ parent = NULL;
31671+ if (au_ftest_wbr(flags, PARENT))
31672+ parent = dget_parent(dentry);
1facf9fc 31673+ mfs = &au_sbi(sb)->si_wbr_mfs;
31674+ mutex_lock(&mfs->mfs_lock);
31675+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
31676+ || mfs->mfs_bindex < 0
31677+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
392086de 31678+ au_mfs(dentry, parent);
1facf9fc 31679+ mutex_unlock(&mfs->mfs_lock);
31680+ err = mfs->mfs_bindex;
392086de 31681+ dput(parent);
1facf9fc 31682+
4a4d8108
AM
31683+ if (err >= 0)
31684+ err = au_wbr_nonopq(dentry, err);
31685+
4f0767ce 31686+out:
1facf9fc 31687+ AuDbg("b%d\n", err);
31688+ return err;
31689+}
31690+
31691+static int au_wbr_create_init_mfs(struct super_block *sb)
31692+{
31693+ struct au_wbr_mfs *mfs;
31694+
31695+ mfs = &au_sbi(sb)->si_wbr_mfs;
31696+ mutex_init(&mfs->mfs_lock);
31697+ mfs->mfs_jiffy = 0;
31698+ mfs->mfs_bindex = -EROFS;
31699+
31700+ return 0;
31701+}
31702+
31703+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
31704+{
31705+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
31706+ return 0;
31707+}
31708+
31709+/* ---------------------------------------------------------------------- */
31710+
31711+/* most free space and then round robin */
392086de 31712+static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
1facf9fc 31713+{
31714+ int err;
31715+ struct au_wbr_mfs *mfs;
31716+
392086de 31717+ err = au_wbr_create_mfs(dentry, flags);
1facf9fc 31718+ if (err >= 0) {
31719+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
dece6358 31720+ mutex_lock(&mfs->mfs_lock);
1facf9fc 31721+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
392086de 31722+ err = au_wbr_create_rr(dentry, flags);
dece6358 31723+ mutex_unlock(&mfs->mfs_lock);
1facf9fc 31724+ }
31725+
31726+ AuDbg("b%d\n", err);
31727+ return err;
31728+}
31729+
31730+static int au_wbr_create_init_mfsrr(struct super_block *sb)
31731+{
31732+ int err;
31733+
31734+ au_wbr_create_init_mfs(sb); /* ignore */
31735+ err = au_wbr_create_init_rr(sb);
31736+
31737+ return err;
31738+}
31739+
31740+/* ---------------------------------------------------------------------- */
31741+
31742+/* top down parent and most free space */
392086de 31743+static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
1facf9fc 31744+{
31745+ int err, e2;
31746+ unsigned long long b;
31747+ aufs_bindex_t bindex, bstart, bend;
31748+ struct super_block *sb;
31749+ struct dentry *parent, *h_parent;
31750+ struct au_branch *br;
31751+
392086de 31752+ err = au_wbr_create_tdp(dentry, flags);
1facf9fc 31753+ if (unlikely(err < 0))
31754+ goto out;
31755+ parent = dget_parent(dentry);
31756+ bstart = au_dbstart(parent);
31757+ bend = au_dbtaildir(parent);
31758+ if (bstart == bend)
31759+ goto out_parent; /* success */
31760+
392086de 31761+ e2 = au_wbr_create_mfs(dentry, flags);
1facf9fc 31762+ if (e2 < 0)
31763+ goto out_parent; /* success */
31764+
31765+ /* when the available size is equal, select upper one */
31766+ sb = dentry->d_sb;
31767+ br = au_sbr(sb, err);
31768+ b = br->br_wbr->wbr_bytes;
31769+ AuDbg("b%d, %llu\n", err, b);
31770+
31771+ for (bindex = bstart; bindex <= bend; bindex++) {
31772+ h_parent = au_h_dptr(parent, bindex);
5527c038 31773+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 31774+ continue;
31775+
31776+ br = au_sbr(sb, bindex);
31777+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
31778+ b = br->br_wbr->wbr_bytes;
31779+ err = bindex;
31780+ AuDbg("b%d, %llu\n", err, b);
31781+ }
31782+ }
31783+
4a4d8108
AM
31784+ if (err >= 0)
31785+ err = au_wbr_nonopq(dentry, err);
31786+
4f0767ce 31787+out_parent:
1facf9fc 31788+ dput(parent);
4f0767ce 31789+out:
1facf9fc 31790+ AuDbg("b%d\n", err);
31791+ return err;
31792+}
31793+
31794+/* ---------------------------------------------------------------------- */
31795+
392086de
AM
31796+/*
31797+ * - top down parent
31798+ * - most free space with parent
31799+ * - most free space round-robin regardless parent
31800+ */
31801+static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
31802+{
31803+ int err;
31804+ unsigned long long watermark;
31805+ struct super_block *sb;
31806+ struct au_branch *br;
31807+ struct au_wbr_mfs *mfs;
31808+
31809+ err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
31810+ if (unlikely(err < 0))
31811+ goto out;
31812+
31813+ sb = dentry->d_sb;
31814+ br = au_sbr(sb, err);
31815+ mfs = &au_sbi(sb)->si_wbr_mfs;
31816+ mutex_lock(&mfs->mfs_lock);
31817+ watermark = mfs->mfsrr_watermark;
31818+ mutex_unlock(&mfs->mfs_lock);
31819+ if (br->br_wbr->wbr_bytes < watermark)
31820+ /* regardless the parent dir */
31821+ err = au_wbr_create_mfsrr(dentry, flags);
31822+
31823+out:
31824+ AuDbg("b%d\n", err);
31825+ return err;
31826+}
31827+
31828+/* ---------------------------------------------------------------------- */
31829+
1facf9fc 31830+/* policies for copyup */
31831+
31832+/* top down parent */
31833+static int au_wbr_copyup_tdp(struct dentry *dentry)
31834+{
392086de 31835+ return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
1facf9fc 31836+}
31837+
31838+/* bottom up parent */
31839+static int au_wbr_copyup_bup(struct dentry *dentry)
31840+{
31841+ int err;
31842+ aufs_bindex_t bindex, bstart;
31843+ struct dentry *parent, *h_parent;
31844+ struct super_block *sb;
31845+
31846+ err = -EROFS;
31847+ sb = dentry->d_sb;
31848+ parent = dget_parent(dentry);
31849+ bstart = au_dbstart(parent);
31850+ for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
31851+ h_parent = au_h_dptr(parent, bindex);
5527c038 31852+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 31853+ continue;
31854+
31855+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
31856+ err = bindex;
31857+ break;
31858+ }
31859+ }
31860+ dput(parent);
31861+
31862+ /* bottom up here */
31863+ if (unlikely(err < 0))
31864+ err = au_wbr_bu(sb, bstart - 1);
31865+
31866+ AuDbg("b%d\n", err);
31867+ return err;
31868+}
31869+
31870+/* bottom up */
076b876e 31871+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart)
1facf9fc 31872+{
31873+ int err;
31874+
4a4d8108
AM
31875+ err = au_wbr_bu(dentry->d_sb, bstart);
31876+ AuDbg("b%d\n", err);
31877+ if (err > bstart)
31878+ err = au_wbr_nonopq(dentry, err);
1facf9fc 31879+
31880+ AuDbg("b%d\n", err);
31881+ return err;
31882+}
31883+
076b876e
AM
31884+static int au_wbr_copyup_bu(struct dentry *dentry)
31885+{
31886+ int err;
31887+ aufs_bindex_t bstart;
31888+
31889+ bstart = au_dbstart(dentry);
31890+ err = au_wbr_do_copyup_bu(dentry, bstart);
31891+ return err;
31892+}
31893+
1facf9fc 31894+/* ---------------------------------------------------------------------- */
31895+
31896+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
31897+ [AuWbrCopyup_TDP] = {
31898+ .copyup = au_wbr_copyup_tdp
31899+ },
31900+ [AuWbrCopyup_BUP] = {
31901+ .copyup = au_wbr_copyup_bup
31902+ },
31903+ [AuWbrCopyup_BU] = {
31904+ .copyup = au_wbr_copyup_bu
31905+ }
31906+};
31907+
31908+struct au_wbr_create_operations au_wbr_create_ops[] = {
31909+ [AuWbrCreate_TDP] = {
31910+ .create = au_wbr_create_tdp
31911+ },
31912+ [AuWbrCreate_RR] = {
31913+ .create = au_wbr_create_rr,
31914+ .init = au_wbr_create_init_rr
31915+ },
31916+ [AuWbrCreate_MFS] = {
31917+ .create = au_wbr_create_mfs,
31918+ .init = au_wbr_create_init_mfs,
31919+ .fin = au_wbr_create_fin_mfs
31920+ },
31921+ [AuWbrCreate_MFSV] = {
31922+ .create = au_wbr_create_mfs,
31923+ .init = au_wbr_create_init_mfs,
31924+ .fin = au_wbr_create_fin_mfs
31925+ },
31926+ [AuWbrCreate_MFSRR] = {
31927+ .create = au_wbr_create_mfsrr,
31928+ .init = au_wbr_create_init_mfsrr,
31929+ .fin = au_wbr_create_fin_mfs
31930+ },
31931+ [AuWbrCreate_MFSRRV] = {
31932+ .create = au_wbr_create_mfsrr,
31933+ .init = au_wbr_create_init_mfsrr,
31934+ .fin = au_wbr_create_fin_mfs
31935+ },
31936+ [AuWbrCreate_PMFS] = {
31937+ .create = au_wbr_create_pmfs,
31938+ .init = au_wbr_create_init_mfs,
31939+ .fin = au_wbr_create_fin_mfs
31940+ },
31941+ [AuWbrCreate_PMFSV] = {
31942+ .create = au_wbr_create_pmfs,
31943+ .init = au_wbr_create_init_mfs,
31944+ .fin = au_wbr_create_fin_mfs
392086de
AM
31945+ },
31946+ [AuWbrCreate_PMFSRR] = {
31947+ .create = au_wbr_create_pmfsrr,
31948+ .init = au_wbr_create_init_mfsrr,
31949+ .fin = au_wbr_create_fin_mfs
31950+ },
31951+ [AuWbrCreate_PMFSRRV] = {
31952+ .create = au_wbr_create_pmfsrr,
31953+ .init = au_wbr_create_init_mfsrr,
31954+ .fin = au_wbr_create_fin_mfs
1facf9fc 31955+ }
31956+};
7f207e10
AM
31957diff -urN /usr/share/empty/fs/aufs/whout.c linux/fs/aufs/whout.c
31958--- /usr/share/empty/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 31959+++ linux/fs/aufs/whout.c 2015-09-24 10:47:58.258053165 +0200
5527c038 31960@@ -0,0 +1,1063 @@
1facf9fc 31961+/*
2000de60 31962+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 31963+ *
31964+ * This program, aufs is free software; you can redistribute it and/or modify
31965+ * it under the terms of the GNU General Public License as published by
31966+ * the Free Software Foundation; either version 2 of the License, or
31967+ * (at your option) any later version.
dece6358
AM
31968+ *
31969+ * This program is distributed in the hope that it will be useful,
31970+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31971+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31972+ * GNU General Public License for more details.
31973+ *
31974+ * You should have received a copy of the GNU General Public License
523b37e3 31975+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31976+ */
31977+
31978+/*
31979+ * whiteout for logical deletion and opaque directory
31980+ */
31981+
1facf9fc 31982+#include "aufs.h"
31983+
31984+#define WH_MASK S_IRUGO
31985+
31986+/*
31987+ * If a directory contains this file, then it is opaque. We start with the
31988+ * .wh. flag so that it is blocked by lookup.
31989+ */
0c3ec466
AM
31990+static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ,
31991+ sizeof(AUFS_WH_DIROPQ) - 1);
1facf9fc 31992+
31993+/*
31994+ * generate whiteout name, which is NOT terminated by NULL.
31995+ * @name: original d_name.name
31996+ * @len: original d_name.len
31997+ * @wh: whiteout qstr
31998+ * returns zero when succeeds, otherwise error.
31999+ * succeeded value as wh->name should be freed by kfree().
32000+ */
32001+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
32002+{
32003+ char *p;
32004+
32005+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
32006+ return -ENAMETOOLONG;
32007+
32008+ wh->len = name->len + AUFS_WH_PFX_LEN;
32009+ p = kmalloc(wh->len, GFP_NOFS);
32010+ wh->name = p;
32011+ if (p) {
32012+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
32013+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
32014+ /* smp_mb(); */
32015+ return 0;
32016+ }
32017+ return -ENOMEM;
32018+}
32019+
32020+/* ---------------------------------------------------------------------- */
32021+
32022+/*
32023+ * test if the @wh_name exists under @h_parent.
32024+ * @try_sio specifies the necessary of super-io.
32025+ */
076b876e 32026+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio)
1facf9fc 32027+{
32028+ int err;
32029+ struct dentry *wh_dentry;
1facf9fc 32030+
1facf9fc 32031+ if (!try_sio)
b4510431 32032+ wh_dentry = vfsub_lkup_one(wh_name, h_parent);
1facf9fc 32033+ else
076b876e 32034+ wh_dentry = au_sio_lkup_one(wh_name, h_parent);
1facf9fc 32035+ err = PTR_ERR(wh_dentry);
2000de60
JR
32036+ if (IS_ERR(wh_dentry)) {
32037+ if (err == -ENAMETOOLONG)
32038+ err = 0;
1facf9fc 32039+ goto out;
2000de60 32040+ }
1facf9fc 32041+
32042+ err = 0;
5527c038 32043+ if (d_is_negative(wh_dentry))
1facf9fc 32044+ goto out_wh; /* success */
32045+
32046+ err = 1;
7e9cd9fe 32047+ if (d_is_reg(wh_dentry))
1facf9fc 32048+ goto out_wh; /* success */
32049+
32050+ err = -EIO;
523b37e3 32051+ AuIOErr("%pd Invalid whiteout entry type 0%o.\n",
5527c038 32052+ wh_dentry, d_inode(wh_dentry)->i_mode);
1facf9fc 32053+
4f0767ce 32054+out_wh:
1facf9fc 32055+ dput(wh_dentry);
4f0767ce 32056+out:
1facf9fc 32057+ return err;
32058+}
32059+
32060+/*
32061+ * test if the @h_dentry sets opaque or not.
32062+ */
076b876e 32063+int au_diropq_test(struct dentry *h_dentry)
1facf9fc 32064+{
32065+ int err;
32066+ struct inode *h_dir;
32067+
5527c038 32068+ h_dir = d_inode(h_dentry);
076b876e 32069+ err = au_wh_test(h_dentry, &diropq_name,
1facf9fc 32070+ au_test_h_perm_sio(h_dir, MAY_EXEC));
32071+ return err;
32072+}
32073+
32074+/*
32075+ * returns a negative dentry whose name is unique and temporary.
32076+ */
32077+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
32078+ struct qstr *prefix)
32079+{
1facf9fc 32080+ struct dentry *dentry;
32081+ int i;
027c5e7a 32082+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
4a4d8108 32083+ *name, *p;
027c5e7a 32084+ /* strict atomic_t is unnecessary here */
1facf9fc 32085+ static unsigned short cnt;
32086+ struct qstr qs;
32087+
4a4d8108
AM
32088+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
32089+
1facf9fc 32090+ name = defname;
027c5e7a
AM
32091+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
32092+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
1facf9fc 32093+ dentry = ERR_PTR(-ENAMETOOLONG);
4a4d8108 32094+ if (unlikely(qs.len > NAME_MAX))
1facf9fc 32095+ goto out;
32096+ dentry = ERR_PTR(-ENOMEM);
32097+ name = kmalloc(qs.len + 1, GFP_NOFS);
32098+ if (unlikely(!name))
32099+ goto out;
32100+ }
32101+
32102+ /* doubly whiteout-ed */
32103+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
32104+ p = name + AUFS_WH_PFX_LEN * 2;
32105+ memcpy(p, prefix->name, prefix->len);
32106+ p += prefix->len;
32107+ *p++ = '.';
4a4d8108 32108+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
1facf9fc 32109+
32110+ qs.name = name;
32111+ for (i = 0; i < 3; i++) {
b752ccd1 32112+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
076b876e 32113+ dentry = au_sio_lkup_one(&qs, h_parent);
5527c038 32114+ if (IS_ERR(dentry) || d_is_negative(dentry))
1facf9fc 32115+ goto out_name;
32116+ dput(dentry);
32117+ }
0c3ec466 32118+ /* pr_warn("could not get random name\n"); */
1facf9fc 32119+ dentry = ERR_PTR(-EEXIST);
32120+ AuDbg("%.*s\n", AuLNPair(&qs));
32121+ BUG();
32122+
4f0767ce 32123+out_name:
1facf9fc 32124+ if (name != defname)
32125+ kfree(name);
4f0767ce 32126+out:
4a4d8108 32127+ AuTraceErrPtr(dentry);
1facf9fc 32128+ return dentry;
1facf9fc 32129+}
32130+
32131+/*
32132+ * rename the @h_dentry on @br to the whiteouted temporary name.
32133+ */
32134+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
32135+{
32136+ int err;
32137+ struct path h_path = {
86dc4139 32138+ .mnt = au_br_mnt(br)
1facf9fc 32139+ };
523b37e3 32140+ struct inode *h_dir, *delegated;
1facf9fc 32141+ struct dentry *h_parent;
32142+
32143+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5527c038 32144+ h_dir = d_inode(h_parent);
1facf9fc 32145+ IMustLock(h_dir);
32146+
32147+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
32148+ err = PTR_ERR(h_path.dentry);
32149+ if (IS_ERR(h_path.dentry))
32150+ goto out;
32151+
32152+ /* under the same dir, no need to lock_rename() */
523b37e3
AM
32153+ delegated = NULL;
32154+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated);
1facf9fc 32155+ AuTraceErr(err);
523b37e3
AM
32156+ if (unlikely(err == -EWOULDBLOCK)) {
32157+ pr_warn("cannot retry for NFSv4 delegation"
32158+ " for an internal rename\n");
32159+ iput(delegated);
32160+ }
1facf9fc 32161+ dput(h_path.dentry);
32162+
4f0767ce 32163+out:
4a4d8108 32164+ AuTraceErr(err);
1facf9fc 32165+ return err;
32166+}
32167+
32168+/* ---------------------------------------------------------------------- */
32169+/*
32170+ * functions for removing a whiteout
32171+ */
32172+
32173+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
32174+{
523b37e3
AM
32175+ int err, force;
32176+ struct inode *delegated;
1facf9fc 32177+
32178+ /*
32179+ * forces superio when the dir has a sticky bit.
32180+ * this may be a violation of unix fs semantics.
32181+ */
32182+ force = (h_dir->i_mode & S_ISVTX)
5527c038 32183+ && !uid_eq(current_fsuid(), d_inode(h_path->dentry)->i_uid);
523b37e3
AM
32184+ delegated = NULL;
32185+ err = vfsub_unlink(h_dir, h_path, &delegated, force);
32186+ if (unlikely(err == -EWOULDBLOCK)) {
32187+ pr_warn("cannot retry for NFSv4 delegation"
32188+ " for an internal unlink\n");
32189+ iput(delegated);
32190+ }
32191+ return err;
1facf9fc 32192+}
32193+
32194+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
32195+ struct dentry *dentry)
32196+{
32197+ int err;
32198+
32199+ err = do_unlink_wh(h_dir, h_path);
32200+ if (!err && dentry)
32201+ au_set_dbwh(dentry, -1);
32202+
32203+ return err;
32204+}
32205+
32206+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
32207+ struct au_branch *br)
32208+{
32209+ int err;
32210+ struct path h_path = {
86dc4139 32211+ .mnt = au_br_mnt(br)
1facf9fc 32212+ };
32213+
32214+ err = 0;
b4510431 32215+ h_path.dentry = vfsub_lkup_one(wh, h_parent);
1facf9fc 32216+ if (IS_ERR(h_path.dentry))
32217+ err = PTR_ERR(h_path.dentry);
32218+ else {
5527c038
JR
32219+ if (d_is_reg(h_path.dentry))
32220+ err = do_unlink_wh(d_inode(h_parent), &h_path);
1facf9fc 32221+ dput(h_path.dentry);
32222+ }
32223+
32224+ return err;
32225+}
32226+
32227+/* ---------------------------------------------------------------------- */
32228+/*
32229+ * initialize/clean whiteout for a branch
32230+ */
32231+
32232+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
32233+ const int isdir)
32234+{
32235+ int err;
523b37e3 32236+ struct inode *delegated;
1facf9fc 32237+
5527c038 32238+ if (d_is_negative(whpath->dentry))
1facf9fc 32239+ return;
32240+
86dc4139
AM
32241+ if (isdir)
32242+ err = vfsub_rmdir(h_dir, whpath);
523b37e3
AM
32243+ else {
32244+ delegated = NULL;
32245+ err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0);
32246+ if (unlikely(err == -EWOULDBLOCK)) {
32247+ pr_warn("cannot retry for NFSv4 delegation"
32248+ " for an internal unlink\n");
32249+ iput(delegated);
32250+ }
32251+ }
1facf9fc 32252+ if (unlikely(err))
523b37e3
AM
32253+ pr_warn("failed removing %pd (%d), ignored.\n",
32254+ whpath->dentry, err);
1facf9fc 32255+}
32256+
32257+static int test_linkable(struct dentry *h_root)
32258+{
5527c038 32259+ struct inode *h_dir = d_inode(h_root);
1facf9fc 32260+
32261+ if (h_dir->i_op->link)
32262+ return 0;
32263+
523b37e3
AM
32264+ pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n",
32265+ h_root, au_sbtype(h_root->d_sb));
1facf9fc 32266+ return -ENOSYS;
32267+}
32268+
32269+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
32270+static int au_whdir(struct inode *h_dir, struct path *path)
32271+{
32272+ int err;
32273+
32274+ err = -EEXIST;
5527c038 32275+ if (d_is_negative(path->dentry)) {
1facf9fc 32276+ int mode = S_IRWXU;
32277+
32278+ if (au_test_nfs(path->dentry->d_sb))
32279+ mode |= S_IXUGO;
86dc4139 32280+ err = vfsub_mkdir(h_dir, path, mode);
2000de60 32281+ } else if (d_is_dir(path->dentry))
1facf9fc 32282+ err = 0;
32283+ else
523b37e3 32284+ pr_err("unknown %pd exists\n", path->dentry);
1facf9fc 32285+
32286+ return err;
32287+}
32288+
32289+struct au_wh_base {
32290+ const struct qstr *name;
32291+ struct dentry *dentry;
32292+};
32293+
32294+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
32295+ struct path *h_path)
32296+{
32297+ h_path->dentry = base[AuBrWh_BASE].dentry;
32298+ au_wh_clean(h_dir, h_path, /*isdir*/0);
32299+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32300+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32301+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32302+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32303+}
32304+
32305+/*
32306+ * returns tri-state,
c1595e42 32307+ * minus: error, caller should print the message
1facf9fc 32308+ * zero: succuess
c1595e42 32309+ * plus: error, caller should NOT print the message
1facf9fc 32310+ */
32311+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
32312+ int do_plink, struct au_wh_base base[],
32313+ struct path *h_path)
32314+{
32315+ int err;
32316+ struct inode *h_dir;
32317+
5527c038 32318+ h_dir = d_inode(h_root);
1facf9fc 32319+ h_path->dentry = base[AuBrWh_BASE].dentry;
32320+ au_wh_clean(h_dir, h_path, /*isdir*/0);
32321+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32322+ if (do_plink) {
32323+ err = test_linkable(h_root);
32324+ if (unlikely(err)) {
32325+ err = 1;
32326+ goto out;
32327+ }
32328+
32329+ err = au_whdir(h_dir, h_path);
32330+ if (unlikely(err))
32331+ goto out;
32332+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
32333+ } else
32334+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32335+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32336+ err = au_whdir(h_dir, h_path);
32337+ if (unlikely(err))
32338+ goto out;
32339+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
32340+
4f0767ce 32341+out:
1facf9fc 32342+ return err;
32343+}
32344+
32345+/*
32346+ * for the moment, aufs supports the branch filesystem which does not support
32347+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
32348+ * copyup failed. finally, such filesystem will not be used as the writable
32349+ * branch.
32350+ *
32351+ * returns tri-state, see above.
32352+ */
32353+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
32354+ int do_plink, struct au_wh_base base[],
32355+ struct path *h_path)
32356+{
32357+ int err;
32358+ struct inode *h_dir;
32359+
1308ab2a 32360+ WbrWhMustWriteLock(wbr);
32361+
1facf9fc 32362+ err = test_linkable(h_root);
32363+ if (unlikely(err)) {
32364+ err = 1;
32365+ goto out;
32366+ }
32367+
32368+ /*
32369+ * todo: should this create be done in /sbin/mount.aufs helper?
32370+ */
32371+ err = -EEXIST;
5527c038
JR
32372+ h_dir = d_inode(h_root);
32373+ if (d_is_negative(base[AuBrWh_BASE].dentry)) {
86dc4139
AM
32374+ h_path->dentry = base[AuBrWh_BASE].dentry;
32375+ err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true);
7e9cd9fe 32376+ } else if (d_is_reg(base[AuBrWh_BASE].dentry))
1facf9fc 32377+ err = 0;
32378+ else
523b37e3 32379+ pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry);
1facf9fc 32380+ if (unlikely(err))
32381+ goto out;
32382+
32383+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32384+ if (do_plink) {
32385+ err = au_whdir(h_dir, h_path);
32386+ if (unlikely(err))
32387+ goto out;
32388+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
32389+ } else
32390+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32391+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
32392+
32393+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32394+ err = au_whdir(h_dir, h_path);
32395+ if (unlikely(err))
32396+ goto out;
32397+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
32398+
4f0767ce 32399+out:
1facf9fc 32400+ return err;
32401+}
32402+
32403+/*
32404+ * initialize the whiteout base file/dir for @br.
32405+ */
86dc4139 32406+int au_wh_init(struct au_branch *br, struct super_block *sb)
1facf9fc 32407+{
32408+ int err, i;
32409+ const unsigned char do_plink
32410+ = !!au_opt_test(au_mntflags(sb), PLINK);
1facf9fc 32411+ struct inode *h_dir;
86dc4139
AM
32412+ struct path path = br->br_path;
32413+ struct dentry *h_root = path.dentry;
1facf9fc 32414+ struct au_wbr *wbr = br->br_wbr;
32415+ static const struct qstr base_name[] = {
0c3ec466
AM
32416+ [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME,
32417+ sizeof(AUFS_BASE_NAME) - 1),
32418+ [AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME,
32419+ sizeof(AUFS_PLINKDIR_NAME) - 1),
32420+ [AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME,
32421+ sizeof(AUFS_ORPHDIR_NAME) - 1)
1facf9fc 32422+ };
32423+ struct au_wh_base base[] = {
32424+ [AuBrWh_BASE] = {
32425+ .name = base_name + AuBrWh_BASE,
32426+ .dentry = NULL
32427+ },
32428+ [AuBrWh_PLINK] = {
32429+ .name = base_name + AuBrWh_PLINK,
32430+ .dentry = NULL
32431+ },
32432+ [AuBrWh_ORPH] = {
32433+ .name = base_name + AuBrWh_ORPH,
32434+ .dentry = NULL
32435+ }
32436+ };
32437+
1308ab2a 32438+ if (wbr)
32439+ WbrWhMustWriteLock(wbr);
1facf9fc 32440+
1facf9fc 32441+ for (i = 0; i < AuBrWh_Last; i++) {
32442+ /* doubly whiteouted */
32443+ struct dentry *d;
32444+
32445+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
32446+ err = PTR_ERR(d);
32447+ if (IS_ERR(d))
32448+ goto out;
32449+
32450+ base[i].dentry = d;
32451+ AuDebugOn(wbr
32452+ && wbr->wbr_wh[i]
32453+ && wbr->wbr_wh[i] != base[i].dentry);
32454+ }
32455+
32456+ if (wbr)
32457+ for (i = 0; i < AuBrWh_Last; i++) {
32458+ dput(wbr->wbr_wh[i]);
32459+ wbr->wbr_wh[i] = NULL;
32460+ }
32461+
32462+ err = 0;
1e00d052 32463+ if (!au_br_writable(br->br_perm)) {
5527c038 32464+ h_dir = d_inode(h_root);
1facf9fc 32465+ au_wh_init_ro(h_dir, base, &path);
1e00d052 32466+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 32467+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
32468+ if (err > 0)
32469+ goto out;
32470+ else if (err)
32471+ goto out_err;
1e00d052 32472+ } else {
1facf9fc 32473+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
32474+ if (err > 0)
32475+ goto out;
32476+ else if (err)
32477+ goto out_err;
1facf9fc 32478+ }
32479+ goto out; /* success */
32480+
4f0767ce 32481+out_err:
523b37e3
AM
32482+ pr_err("an error(%d) on the writable branch %pd(%s)\n",
32483+ err, h_root, au_sbtype(h_root->d_sb));
4f0767ce 32484+out:
1facf9fc 32485+ for (i = 0; i < AuBrWh_Last; i++)
32486+ dput(base[i].dentry);
32487+ return err;
32488+}
32489+
32490+/* ---------------------------------------------------------------------- */
32491+/*
32492+ * whiteouts are all hard-linked usually.
32493+ * when its link count reaches a ceiling, we create a new whiteout base
32494+ * asynchronously.
32495+ */
32496+
32497+struct reinit_br_wh {
32498+ struct super_block *sb;
32499+ struct au_branch *br;
32500+};
32501+
32502+static void reinit_br_wh(void *arg)
32503+{
32504+ int err;
32505+ aufs_bindex_t bindex;
32506+ struct path h_path;
32507+ struct reinit_br_wh *a = arg;
32508+ struct au_wbr *wbr;
523b37e3 32509+ struct inode *dir, *delegated;
1facf9fc 32510+ struct dentry *h_root;
32511+ struct au_hinode *hdir;
32512+
32513+ err = 0;
32514+ wbr = a->br->br_wbr;
32515+ /* big aufs lock */
32516+ si_noflush_write_lock(a->sb);
32517+ if (!au_br_writable(a->br->br_perm))
32518+ goto out;
32519+ bindex = au_br_index(a->sb, a->br->br_id);
32520+ if (unlikely(bindex < 0))
32521+ goto out;
32522+
1308ab2a 32523+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
5527c038 32524+ dir = d_inode(a->sb->s_root);
1facf9fc 32525+ hdir = au_hi(dir, bindex);
32526+ h_root = au_h_dptr(a->sb->s_root, bindex);
86dc4139 32527+ AuDebugOn(h_root != au_br_dentry(a->br));
1facf9fc 32528+
4a4d8108 32529+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 32530+ wbr_wh_write_lock(wbr);
32531+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
32532+ h_root, a->br);
32533+ if (!err) {
86dc4139
AM
32534+ h_path.dentry = wbr->wbr_whbase;
32535+ h_path.mnt = au_br_mnt(a->br);
523b37e3
AM
32536+ delegated = NULL;
32537+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated,
32538+ /*force*/0);
32539+ if (unlikely(err == -EWOULDBLOCK)) {
32540+ pr_warn("cannot retry for NFSv4 delegation"
32541+ " for an internal unlink\n");
32542+ iput(delegated);
32543+ }
1facf9fc 32544+ } else {
523b37e3 32545+ pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase);
1facf9fc 32546+ err = 0;
32547+ }
32548+ dput(wbr->wbr_whbase);
32549+ wbr->wbr_whbase = NULL;
32550+ if (!err)
86dc4139 32551+ err = au_wh_init(a->br, a->sb);
1facf9fc 32552+ wbr_wh_write_unlock(wbr);
4a4d8108 32553+ au_hn_imtx_unlock(hdir);
1308ab2a 32554+ di_read_unlock(a->sb->s_root, AuLock_IR);
076b876e
AM
32555+ if (!err)
32556+ au_fhsm_wrote(a->sb, bindex, /*force*/0);
1facf9fc 32557+
4f0767ce 32558+out:
1facf9fc 32559+ if (wbr)
32560+ atomic_dec(&wbr->wbr_wh_running);
32561+ atomic_dec(&a->br->br_count);
1facf9fc 32562+ si_write_unlock(a->sb);
027c5e7a 32563+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
1facf9fc 32564+ kfree(arg);
32565+ if (unlikely(err))
32566+ AuIOErr("err %d\n", err);
32567+}
32568+
32569+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
32570+{
32571+ int do_dec, wkq_err;
32572+ struct reinit_br_wh *arg;
32573+
32574+ do_dec = 1;
32575+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
32576+ goto out;
32577+
32578+ /* ignore ENOMEM */
32579+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
32580+ if (arg) {
32581+ /*
32582+ * dec(wh_running), kfree(arg) and dec(br_count)
32583+ * in reinit function
32584+ */
32585+ arg->sb = sb;
32586+ arg->br = br;
32587+ atomic_inc(&br->br_count);
53392da6 32588+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
1facf9fc 32589+ if (unlikely(wkq_err)) {
32590+ atomic_dec(&br->br_wbr->wbr_wh_running);
32591+ atomic_dec(&br->br_count);
32592+ kfree(arg);
32593+ }
32594+ do_dec = 0;
32595+ }
32596+
4f0767ce 32597+out:
1facf9fc 32598+ if (do_dec)
32599+ atomic_dec(&br->br_wbr->wbr_wh_running);
32600+}
32601+
32602+/* ---------------------------------------------------------------------- */
32603+
32604+/*
32605+ * create the whiteout @wh.
32606+ */
32607+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
32608+ struct dentry *wh)
32609+{
32610+ int err;
32611+ struct path h_path = {
32612+ .dentry = wh
32613+ };
32614+ struct au_branch *br;
32615+ struct au_wbr *wbr;
32616+ struct dentry *h_parent;
523b37e3 32617+ struct inode *h_dir, *delegated;
1facf9fc 32618+
32619+ h_parent = wh->d_parent; /* dir inode is locked */
5527c038 32620+ h_dir = d_inode(h_parent);
1facf9fc 32621+ IMustLock(h_dir);
32622+
32623+ br = au_sbr(sb, bindex);
86dc4139 32624+ h_path.mnt = au_br_mnt(br);
1facf9fc 32625+ wbr = br->br_wbr;
32626+ wbr_wh_read_lock(wbr);
32627+ if (wbr->wbr_whbase) {
523b37e3
AM
32628+ delegated = NULL;
32629+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated);
32630+ if (unlikely(err == -EWOULDBLOCK)) {
32631+ pr_warn("cannot retry for NFSv4 delegation"
32632+ " for an internal link\n");
32633+ iput(delegated);
32634+ }
1facf9fc 32635+ if (!err || err != -EMLINK)
32636+ goto out;
32637+
32638+ /* link count full. re-initialize br_whbase. */
32639+ kick_reinit_br_wh(sb, br);
32640+ }
32641+
32642+ /* return this error in this context */
b4510431 32643+ err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true);
076b876e
AM
32644+ if (!err)
32645+ au_fhsm_wrote(sb, bindex, /*force*/0);
1facf9fc 32646+
4f0767ce 32647+out:
1facf9fc 32648+ wbr_wh_read_unlock(wbr);
32649+ return err;
32650+}
32651+
32652+/* ---------------------------------------------------------------------- */
32653+
32654+/*
32655+ * create or remove the diropq.
32656+ */
32657+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
32658+ unsigned int flags)
32659+{
32660+ struct dentry *opq_dentry, *h_dentry;
32661+ struct super_block *sb;
32662+ struct au_branch *br;
32663+ int err;
32664+
32665+ sb = dentry->d_sb;
32666+ br = au_sbr(sb, bindex);
32667+ h_dentry = au_h_dptr(dentry, bindex);
b4510431 32668+ opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry);
1facf9fc 32669+ if (IS_ERR(opq_dentry))
32670+ goto out;
32671+
32672+ if (au_ftest_diropq(flags, CREATE)) {
32673+ err = link_or_create_wh(sb, bindex, opq_dentry);
32674+ if (!err) {
32675+ au_set_dbdiropq(dentry, bindex);
32676+ goto out; /* success */
32677+ }
32678+ } else {
32679+ struct path tmp = {
32680+ .dentry = opq_dentry,
86dc4139 32681+ .mnt = au_br_mnt(br)
1facf9fc 32682+ };
5527c038 32683+ err = do_unlink_wh(au_h_iptr(d_inode(dentry), bindex), &tmp);
1facf9fc 32684+ if (!err)
32685+ au_set_dbdiropq(dentry, -1);
32686+ }
32687+ dput(opq_dentry);
32688+ opq_dentry = ERR_PTR(err);
32689+
4f0767ce 32690+out:
1facf9fc 32691+ return opq_dentry;
32692+}
32693+
32694+struct do_diropq_args {
32695+ struct dentry **errp;
32696+ struct dentry *dentry;
32697+ aufs_bindex_t bindex;
32698+ unsigned int flags;
32699+};
32700+
32701+static void call_do_diropq(void *args)
32702+{
32703+ struct do_diropq_args *a = args;
32704+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
32705+}
32706+
32707+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
32708+ unsigned int flags)
32709+{
32710+ struct dentry *diropq, *h_dentry;
32711+
32712+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 32713+ if (!au_test_h_perm_sio(d_inode(h_dentry), MAY_EXEC | MAY_WRITE))
1facf9fc 32714+ diropq = do_diropq(dentry, bindex, flags);
32715+ else {
32716+ int wkq_err;
32717+ struct do_diropq_args args = {
32718+ .errp = &diropq,
32719+ .dentry = dentry,
32720+ .bindex = bindex,
32721+ .flags = flags
32722+ };
32723+
32724+ wkq_err = au_wkq_wait(call_do_diropq, &args);
32725+ if (unlikely(wkq_err))
32726+ diropq = ERR_PTR(wkq_err);
32727+ }
32728+
32729+ return diropq;
32730+}
32731+
32732+/* ---------------------------------------------------------------------- */
32733+
32734+/*
32735+ * lookup whiteout dentry.
32736+ * @h_parent: lower parent dentry which must exist and be locked
32737+ * @base_name: name of dentry which will be whiteouted
32738+ * returns dentry for whiteout.
32739+ */
32740+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
32741+ struct au_branch *br)
32742+{
32743+ int err;
32744+ struct qstr wh_name;
32745+ struct dentry *wh_dentry;
32746+
32747+ err = au_wh_name_alloc(&wh_name, base_name);
32748+ wh_dentry = ERR_PTR(err);
32749+ if (!err) {
b4510431 32750+ wh_dentry = vfsub_lkup_one(&wh_name, h_parent);
1facf9fc 32751+ kfree(wh_name.name);
32752+ }
32753+ return wh_dentry;
32754+}
32755+
32756+/*
32757+ * link/create a whiteout for @dentry on @bindex.
32758+ */
32759+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
32760+ struct dentry *h_parent)
32761+{
32762+ struct dentry *wh_dentry;
32763+ struct super_block *sb;
32764+ int err;
32765+
32766+ sb = dentry->d_sb;
32767+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
5527c038 32768+ if (!IS_ERR(wh_dentry) && d_is_negative(wh_dentry)) {
1facf9fc 32769+ err = link_or_create_wh(sb, bindex, wh_dentry);
076b876e 32770+ if (!err) {
1facf9fc 32771+ au_set_dbwh(dentry, bindex);
076b876e
AM
32772+ au_fhsm_wrote(sb, bindex, /*force*/0);
32773+ } else {
1facf9fc 32774+ dput(wh_dentry);
32775+ wh_dentry = ERR_PTR(err);
32776+ }
32777+ }
32778+
32779+ return wh_dentry;
32780+}
32781+
32782+/* ---------------------------------------------------------------------- */
32783+
32784+/* Delete all whiteouts in this directory on branch bindex. */
32785+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
32786+ aufs_bindex_t bindex, struct au_branch *br)
32787+{
32788+ int err;
32789+ unsigned long ul, n;
32790+ struct qstr wh_name;
32791+ char *p;
32792+ struct hlist_head *head;
c06a8ce3 32793+ struct au_vdir_wh *pos;
1facf9fc 32794+ struct au_vdir_destr *str;
32795+
32796+ err = -ENOMEM;
537831f9 32797+ p = (void *)__get_free_page(GFP_NOFS);
1facf9fc 32798+ wh_name.name = p;
32799+ if (unlikely(!wh_name.name))
32800+ goto out;
32801+
32802+ err = 0;
32803+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
32804+ p += AUFS_WH_PFX_LEN;
32805+ n = whlist->nh_num;
32806+ head = whlist->nh_head;
32807+ for (ul = 0; !err && ul < n; ul++, head++) {
c06a8ce3
AM
32808+ hlist_for_each_entry(pos, head, wh_hash) {
32809+ if (pos->wh_bindex != bindex)
1facf9fc 32810+ continue;
32811+
c06a8ce3 32812+ str = &pos->wh_str;
1facf9fc 32813+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
32814+ memcpy(p, str->name, str->len);
32815+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
32816+ err = unlink_wh_name(h_dentry, &wh_name, br);
32817+ if (!err)
32818+ continue;
32819+ break;
32820+ }
32821+ AuIOErr("whiteout name too long %.*s\n",
32822+ str->len, str->name);
32823+ err = -EIO;
32824+ break;
32825+ }
32826+ }
537831f9 32827+ free_page((unsigned long)wh_name.name);
1facf9fc 32828+
4f0767ce 32829+out:
1facf9fc 32830+ return err;
32831+}
32832+
32833+struct del_wh_children_args {
32834+ int *errp;
32835+ struct dentry *h_dentry;
1308ab2a 32836+ struct au_nhash *whlist;
1facf9fc 32837+ aufs_bindex_t bindex;
32838+ struct au_branch *br;
32839+};
32840+
32841+static void call_del_wh_children(void *args)
32842+{
32843+ struct del_wh_children_args *a = args;
1308ab2a 32844+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
1facf9fc 32845+}
32846+
32847+/* ---------------------------------------------------------------------- */
32848+
32849+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
32850+{
32851+ struct au_whtmp_rmdir *whtmp;
dece6358 32852+ int err;
1308ab2a 32853+ unsigned int rdhash;
dece6358
AM
32854+
32855+ SiMustAnyLock(sb);
1facf9fc 32856+
32857+ whtmp = kmalloc(sizeof(*whtmp), gfp);
dece6358
AM
32858+ if (unlikely(!whtmp)) {
32859+ whtmp = ERR_PTR(-ENOMEM);
1facf9fc 32860+ goto out;
dece6358 32861+ }
1facf9fc 32862+
32863+ whtmp->dir = NULL;
027c5e7a 32864+ whtmp->br = NULL;
1facf9fc 32865+ whtmp->wh_dentry = NULL;
1308ab2a 32866+ /* no estimation for dir size */
32867+ rdhash = au_sbi(sb)->si_rdhash;
32868+ if (!rdhash)
32869+ rdhash = AUFS_RDHASH_DEF;
32870+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
32871+ if (unlikely(err)) {
32872+ kfree(whtmp);
32873+ whtmp = ERR_PTR(err);
32874+ }
dece6358 32875+
4f0767ce 32876+out:
dece6358 32877+ return whtmp;
1facf9fc 32878+}
32879+
32880+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
32881+{
027c5e7a
AM
32882+ if (whtmp->br)
32883+ atomic_dec(&whtmp->br->br_count);
1facf9fc 32884+ dput(whtmp->wh_dentry);
32885+ iput(whtmp->dir);
dece6358 32886+ au_nhash_wh_free(&whtmp->whlist);
1facf9fc 32887+ kfree(whtmp);
32888+}
32889+
32890+/*
32891+ * rmdir the whiteouted temporary named dir @h_dentry.
32892+ * @whlist: whiteouted children.
32893+ */
32894+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
32895+ struct dentry *wh_dentry, struct au_nhash *whlist)
32896+{
32897+ int err;
2000de60 32898+ unsigned int h_nlink;
1facf9fc 32899+ struct path h_tmp;
32900+ struct inode *wh_inode, *h_dir;
32901+ struct au_branch *br;
32902+
5527c038 32903+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
1facf9fc 32904+ IMustLock(h_dir);
32905+
32906+ br = au_sbr(dir->i_sb, bindex);
5527c038 32907+ wh_inode = d_inode(wh_dentry);
1facf9fc 32908+ mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
32909+
32910+ /*
32911+ * someone else might change some whiteouts while we were sleeping.
32912+ * it means this whlist may have an obsoleted entry.
32913+ */
32914+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
32915+ err = del_wh_children(wh_dentry, whlist, bindex, br);
32916+ else {
32917+ int wkq_err;
32918+ struct del_wh_children_args args = {
32919+ .errp = &err,
32920+ .h_dentry = wh_dentry,
1308ab2a 32921+ .whlist = whlist,
1facf9fc 32922+ .bindex = bindex,
32923+ .br = br
32924+ };
32925+
32926+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
32927+ if (unlikely(wkq_err))
32928+ err = wkq_err;
32929+ }
32930+ mutex_unlock(&wh_inode->i_mutex);
32931+
32932+ if (!err) {
32933+ h_tmp.dentry = wh_dentry;
86dc4139 32934+ h_tmp.mnt = au_br_mnt(br);
2000de60 32935+ h_nlink = h_dir->i_nlink;
1facf9fc 32936+ err = vfsub_rmdir(h_dir, &h_tmp);
2000de60
JR
32937+ /* some fs doesn't change the parent nlink in some cases */
32938+ h_nlink -= h_dir->i_nlink;
1facf9fc 32939+ }
32940+
32941+ if (!err) {
32942+ if (au_ibstart(dir) == bindex) {
7f207e10 32943+ /* todo: dir->i_mutex is necessary */
1facf9fc 32944+ au_cpup_attr_timesizes(dir);
2000de60
JR
32945+ if (h_nlink)
32946+ vfsub_drop_nlink(dir);
1facf9fc 32947+ }
32948+ return 0; /* success */
32949+ }
32950+
523b37e3 32951+ pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
1facf9fc 32952+ return err;
32953+}
32954+
32955+static void call_rmdir_whtmp(void *args)
32956+{
32957+ int err;
e49829fe 32958+ aufs_bindex_t bindex;
1facf9fc 32959+ struct au_whtmp_rmdir *a = args;
32960+ struct super_block *sb;
32961+ struct dentry *h_parent;
32962+ struct inode *h_dir;
1facf9fc 32963+ struct au_hinode *hdir;
32964+
32965+ /* rmdir by nfsd may cause deadlock with this i_mutex */
32966+ /* mutex_lock(&a->dir->i_mutex); */
e49829fe 32967+ err = -EROFS;
1facf9fc 32968+ sb = a->dir->i_sb;
e49829fe
JR
32969+ si_read_lock(sb, !AuLock_FLUSH);
32970+ if (!au_br_writable(a->br->br_perm))
32971+ goto out;
32972+ bindex = au_br_index(sb, a->br->br_id);
32973+ if (unlikely(bindex < 0))
1facf9fc 32974+ goto out;
32975+
32976+ err = -EIO;
1facf9fc 32977+ ii_write_lock_parent(a->dir);
32978+ h_parent = dget_parent(a->wh_dentry);
5527c038 32979+ h_dir = d_inode(h_parent);
e49829fe 32980+ hdir = au_hi(a->dir, bindex);
86dc4139
AM
32981+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
32982+ if (unlikely(err))
32983+ goto out_mnt;
4a4d8108 32984+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
e49829fe
JR
32985+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
32986+ a->br);
86dc4139
AM
32987+ if (!err)
32988+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
4a4d8108 32989+ au_hn_imtx_unlock(hdir);
86dc4139
AM
32990+ vfsub_mnt_drop_write(au_br_mnt(a->br));
32991+
32992+out_mnt:
1facf9fc 32993+ dput(h_parent);
32994+ ii_write_unlock(a->dir);
4f0767ce 32995+out:
1facf9fc 32996+ /* mutex_unlock(&a->dir->i_mutex); */
1facf9fc 32997+ au_whtmp_rmdir_free(a);
027c5e7a
AM
32998+ si_read_unlock(sb);
32999+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 33000+ if (unlikely(err))
33001+ AuIOErr("err %d\n", err);
33002+}
33003+
33004+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33005+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
33006+{
33007+ int wkq_err;
e49829fe 33008+ struct super_block *sb;
1facf9fc 33009+
33010+ IMustLock(dir);
33011+
33012+ /* all post-process will be done in do_rmdir_whtmp(). */
e49829fe 33013+ sb = dir->i_sb;
1facf9fc 33014+ args->dir = au_igrab(dir);
e49829fe
JR
33015+ args->br = au_sbr(sb, bindex);
33016+ atomic_inc(&args->br->br_count);
1facf9fc 33017+ args->wh_dentry = dget(wh_dentry);
53392da6 33018+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1facf9fc 33019+ if (unlikely(wkq_err)) {
523b37e3 33020+ pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
1facf9fc 33021+ au_whtmp_rmdir_free(args);
33022+ }
33023+}
7f207e10
AM
33024diff -urN /usr/share/empty/fs/aufs/whout.h linux/fs/aufs/whout.h
33025--- /usr/share/empty/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 33026+++ linux/fs/aufs/whout.h 2015-09-24 10:47:58.258053165 +0200
076b876e 33027@@ -0,0 +1,85 @@
1facf9fc 33028+/*
2000de60 33029+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 33030+ *
33031+ * This program, aufs is free software; you can redistribute it and/or modify
33032+ * it under the terms of the GNU General Public License as published by
33033+ * the Free Software Foundation; either version 2 of the License, or
33034+ * (at your option) any later version.
dece6358
AM
33035+ *
33036+ * This program is distributed in the hope that it will be useful,
33037+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33038+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33039+ * GNU General Public License for more details.
33040+ *
33041+ * You should have received a copy of the GNU General Public License
523b37e3 33042+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33043+ */
33044+
33045+/*
33046+ * whiteout for logical deletion and opaque directory
33047+ */
33048+
33049+#ifndef __AUFS_WHOUT_H__
33050+#define __AUFS_WHOUT_H__
33051+
33052+#ifdef __KERNEL__
33053+
1facf9fc 33054+#include "dir.h"
33055+
33056+/* whout.c */
33057+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
076b876e
AM
33058+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
33059+int au_diropq_test(struct dentry *h_dentry);
7e9cd9fe 33060+struct au_branch;
1facf9fc 33061+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
33062+ struct qstr *prefix);
33063+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
33064+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
33065+ struct dentry *dentry);
86dc4139 33066+int au_wh_init(struct au_branch *br, struct super_block *sb);
1facf9fc 33067+
33068+/* diropq flags */
33069+#define AuDiropq_CREATE 1
33070+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
7f207e10
AM
33071+#define au_fset_diropq(flags, name) \
33072+ do { (flags) |= AuDiropq_##name; } while (0)
33073+#define au_fclr_diropq(flags, name) \
33074+ do { (flags) &= ~AuDiropq_##name; } while (0)
1facf9fc 33075+
33076+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
33077+ unsigned int flags);
33078+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
33079+ struct au_branch *br);
33080+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
33081+ struct dentry *h_parent);
33082+
33083+/* real rmdir for the whiteout-ed dir */
33084+struct au_whtmp_rmdir {
33085+ struct inode *dir;
e49829fe 33086+ struct au_branch *br;
1facf9fc 33087+ struct dentry *wh_dentry;
dece6358 33088+ struct au_nhash whlist;
1facf9fc 33089+};
33090+
33091+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
33092+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
33093+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
33094+ struct dentry *wh_dentry, struct au_nhash *whlist);
33095+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33096+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
33097+
33098+/* ---------------------------------------------------------------------- */
33099+
33100+static inline struct dentry *au_diropq_create(struct dentry *dentry,
33101+ aufs_bindex_t bindex)
33102+{
33103+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
33104+}
33105+
33106+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
33107+{
33108+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
33109+}
33110+
33111+#endif /* __KERNEL__ */
33112+#endif /* __AUFS_WHOUT_H__ */
7f207e10
AM
33113diff -urN /usr/share/empty/fs/aufs/wkq.c linux/fs/aufs/wkq.c
33114--- /usr/share/empty/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 33115+++ linux/fs/aufs/wkq.c 2015-09-24 10:47:58.258053165 +0200
38d290e6 33116@@ -0,0 +1,213 @@
1facf9fc 33117+/*
2000de60 33118+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 33119+ *
33120+ * This program, aufs is free software; you can redistribute it and/or modify
33121+ * it under the terms of the GNU General Public License as published by
33122+ * the Free Software Foundation; either version 2 of the License, or
33123+ * (at your option) any later version.
dece6358
AM
33124+ *
33125+ * This program is distributed in the hope that it will be useful,
33126+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33127+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33128+ * GNU General Public License for more details.
33129+ *
33130+ * You should have received a copy of the GNU General Public License
523b37e3 33131+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33132+ */
33133+
33134+/*
33135+ * workqueue for asynchronous/super-io operations
33136+ * todo: try new dredential scheme
33137+ */
33138+
dece6358 33139+#include <linux/module.h>
1facf9fc 33140+#include "aufs.h"
33141+
9dbd164d 33142+/* internal workqueue named AUFS_WKQ_NAME */
b752ccd1 33143+
9dbd164d 33144+static struct workqueue_struct *au_wkq;
1facf9fc 33145+
33146+struct au_wkinfo {
33147+ struct work_struct wk;
7f207e10 33148+ struct kobject *kobj;
1facf9fc 33149+
33150+ unsigned int flags; /* see wkq.h */
33151+
33152+ au_wkq_func_t func;
33153+ void *args;
33154+
1facf9fc 33155+ struct completion *comp;
33156+};
33157+
33158+/* ---------------------------------------------------------------------- */
33159+
1facf9fc 33160+static void wkq_func(struct work_struct *wk)
33161+{
33162+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
33163+
2dfbb274 33164+ AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
7f207e10
AM
33165+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
33166+
1facf9fc 33167+ wkinfo->func(wkinfo->args);
1facf9fc 33168+ if (au_ftest_wkq(wkinfo->flags, WAIT))
33169+ complete(wkinfo->comp);
33170+ else {
7f207e10 33171+ kobject_put(wkinfo->kobj);
9dbd164d 33172+ module_put(THIS_MODULE); /* todo: ?? */
1facf9fc 33173+ kfree(wkinfo);
33174+ }
33175+}
33176+
33177+/*
33178+ * Since struct completion is large, try allocating it dynamically.
33179+ */
c2b27bf2 33180+#if 1 /* defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS) */
1facf9fc 33181+#define AuWkqCompDeclare(name) struct completion *comp = NULL
33182+
33183+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33184+{
33185+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
33186+ if (*comp) {
33187+ init_completion(*comp);
33188+ wkinfo->comp = *comp;
33189+ return 0;
33190+ }
33191+ return -ENOMEM;
33192+}
33193+
33194+static void au_wkq_comp_free(struct completion *comp)
33195+{
33196+ kfree(comp);
33197+}
33198+
33199+#else
33200+
33201+/* no braces */
33202+#define AuWkqCompDeclare(name) \
33203+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
33204+ struct completion *comp = &_ ## name
33205+
33206+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33207+{
33208+ wkinfo->comp = *comp;
33209+ return 0;
33210+}
33211+
33212+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
33213+{
33214+ /* empty */
33215+}
33216+#endif /* 4KSTACKS */
33217+
53392da6 33218+static void au_wkq_run(struct au_wkinfo *wkinfo)
1facf9fc 33219+{
53392da6
AM
33220+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
33221+ if (au_wkq_test()) {
38d290e6
JR
33222+ AuWarn1("wkq from wkq, unless silly-rename on NFS,"
33223+ " due to a dead dir by UDBA?\n");
53392da6
AM
33224+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
33225+ }
33226+ } else
33227+ au_dbg_verify_kthread();
33228+
33229+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
a1f66529 33230+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
9dbd164d 33231+ queue_work(au_wkq, &wkinfo->wk);
4a4d8108
AM
33232+ } else {
33233+ INIT_WORK(&wkinfo->wk, wkq_func);
33234+ schedule_work(&wkinfo->wk);
33235+ }
1facf9fc 33236+}
33237+
7f207e10
AM
33238+/*
33239+ * Be careful. It is easy to make deadlock happen.
33240+ * processA: lock, wkq and wait
33241+ * processB: wkq and wait, lock in wkq
33242+ * --> deadlock
33243+ */
b752ccd1 33244+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
1facf9fc 33245+{
33246+ int err;
33247+ AuWkqCompDeclare(comp);
33248+ struct au_wkinfo wkinfo = {
b752ccd1 33249+ .flags = flags,
1facf9fc 33250+ .func = func,
33251+ .args = args
33252+ };
33253+
33254+ err = au_wkq_comp_alloc(&wkinfo, &comp);
33255+ if (!err) {
53392da6 33256+ au_wkq_run(&wkinfo);
1facf9fc 33257+ /* no timeout, no interrupt */
33258+ wait_for_completion(wkinfo.comp);
33259+ au_wkq_comp_free(comp);
4a4d8108 33260+ destroy_work_on_stack(&wkinfo.wk);
1facf9fc 33261+ }
33262+
33263+ return err;
33264+
33265+}
33266+
027c5e7a
AM
33267+/*
33268+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
33269+ * problem in a concurrent umounting.
33270+ */
53392da6
AM
33271+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
33272+ unsigned int flags)
1facf9fc 33273+{
33274+ int err;
33275+ struct au_wkinfo *wkinfo;
33276+
33277+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
33278+
33279+ /*
33280+ * wkq_func() must free this wkinfo.
33281+ * it highly depends upon the implementation of workqueue.
33282+ */
33283+ err = 0;
33284+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
33285+ if (wkinfo) {
7f207e10 33286+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
53392da6 33287+ wkinfo->flags = flags & ~AuWkq_WAIT;
1facf9fc 33288+ wkinfo->func = func;
33289+ wkinfo->args = args;
33290+ wkinfo->comp = NULL;
7f207e10 33291+ kobject_get(wkinfo->kobj);
9dbd164d 33292+ __module_get(THIS_MODULE); /* todo: ?? */
1facf9fc 33293+
53392da6 33294+ au_wkq_run(wkinfo);
1facf9fc 33295+ } else {
33296+ err = -ENOMEM;
e49829fe 33297+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 33298+ }
33299+
33300+ return err;
33301+}
33302+
33303+/* ---------------------------------------------------------------------- */
33304+
33305+void au_nwt_init(struct au_nowait_tasks *nwt)
33306+{
33307+ atomic_set(&nwt->nw_len, 0);
4a4d8108 33308+ /* smp_mb(); */ /* atomic_set */
1facf9fc 33309+ init_waitqueue_head(&nwt->nw_wq);
33310+}
33311+
33312+void au_wkq_fin(void)
33313+{
9dbd164d 33314+ destroy_workqueue(au_wkq);
1facf9fc 33315+}
33316+
33317+int __init au_wkq_init(void)
33318+{
9dbd164d 33319+ int err;
b752ccd1
AM
33320+
33321+ err = 0;
86dc4139 33322+ au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
9dbd164d
AM
33323+ if (IS_ERR(au_wkq))
33324+ err = PTR_ERR(au_wkq);
33325+ else if (!au_wkq)
33326+ err = -ENOMEM;
b752ccd1
AM
33327+
33328+ return err;
1facf9fc 33329+}
7f207e10
AM
33330diff -urN /usr/share/empty/fs/aufs/wkq.h linux/fs/aufs/wkq.h
33331--- /usr/share/empty/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 33332+++ linux/fs/aufs/wkq.h 2015-09-24 10:47:58.258053165 +0200
523b37e3 33333@@ -0,0 +1,91 @@
1facf9fc 33334+/*
2000de60 33335+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 33336+ *
33337+ * This program, aufs is free software; you can redistribute it and/or modify
33338+ * it under the terms of the GNU General Public License as published by
33339+ * the Free Software Foundation; either version 2 of the License, or
33340+ * (at your option) any later version.
dece6358
AM
33341+ *
33342+ * This program is distributed in the hope that it will be useful,
33343+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33344+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33345+ * GNU General Public License for more details.
33346+ *
33347+ * You should have received a copy of the GNU General Public License
523b37e3 33348+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33349+ */
33350+
33351+/*
33352+ * workqueue for asynchronous/super-io operations
33353+ * todo: try new credentials management scheme
33354+ */
33355+
33356+#ifndef __AUFS_WKQ_H__
33357+#define __AUFS_WKQ_H__
33358+
33359+#ifdef __KERNEL__
33360+
dece6358
AM
33361+struct super_block;
33362+
1facf9fc 33363+/* ---------------------------------------------------------------------- */
33364+
33365+/*
33366+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
33367+ */
33368+struct au_nowait_tasks {
33369+ atomic_t nw_len;
33370+ wait_queue_head_t nw_wq;
33371+};
33372+
33373+/* ---------------------------------------------------------------------- */
33374+
33375+typedef void (*au_wkq_func_t)(void *args);
33376+
33377+/* wkq flags */
33378+#define AuWkq_WAIT 1
9dbd164d 33379+#define AuWkq_NEST (1 << 1)
1facf9fc 33380+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
7f207e10
AM
33381+#define au_fset_wkq(flags, name) \
33382+ do { (flags) |= AuWkq_##name; } while (0)
33383+#define au_fclr_wkq(flags, name) \
33384+ do { (flags) &= ~AuWkq_##name; } while (0)
1facf9fc 33385+
9dbd164d
AM
33386+#ifndef CONFIG_AUFS_HNOTIFY
33387+#undef AuWkq_NEST
33388+#define AuWkq_NEST 0
33389+#endif
33390+
1facf9fc 33391+/* wkq.c */
b752ccd1 33392+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
53392da6
AM
33393+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
33394+ unsigned int flags);
1facf9fc 33395+void au_nwt_init(struct au_nowait_tasks *nwt);
33396+int __init au_wkq_init(void);
33397+void au_wkq_fin(void);
33398+
33399+/* ---------------------------------------------------------------------- */
33400+
53392da6
AM
33401+static inline int au_wkq_test(void)
33402+{
33403+ return current->flags & PF_WQ_WORKER;
33404+}
33405+
b752ccd1 33406+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
1facf9fc 33407+{
b752ccd1 33408+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
1facf9fc 33409+}
33410+
33411+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
33412+{
e49829fe 33413+ if (atomic_dec_and_test(&nwt->nw_len))
1facf9fc 33414+ wake_up_all(&nwt->nw_wq);
33415+}
33416+
33417+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
33418+{
33419+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
33420+ return 0;
33421+}
33422+
33423+#endif /* __KERNEL__ */
33424+#endif /* __AUFS_WKQ_H__ */
c1595e42
JR
33425diff -urN /usr/share/empty/fs/aufs/xattr.c linux/fs/aufs/xattr.c
33426--- /usr/share/empty/fs/aufs/xattr.c 1970-01-01 01:00:00.000000000 +0100
c2c0f25c 33427+++ linux/fs/aufs/xattr.c 2015-09-24 10:47:58.258053165 +0200
b912730e 33428@@ -0,0 +1,344 @@
c1595e42 33429+/*
2000de60 33430+ * Copyright (C) 2014-2015 Junjiro R. Okajima
c1595e42
JR
33431+ *
33432+ * This program, aufs is free software; you can redistribute it and/or modify
33433+ * it under the terms of the GNU General Public License as published by
33434+ * the Free Software Foundation; either version 2 of the License, or
33435+ * (at your option) any later version.
33436+ *
33437+ * This program is distributed in the hope that it will be useful,
33438+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33439+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33440+ * GNU General Public License for more details.
33441+ *
33442+ * You should have received a copy of the GNU General Public License
33443+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
33444+ */
33445+
33446+/*
33447+ * handling xattr functions
33448+ */
33449+
33450+#include <linux/xattr.h>
33451+#include "aufs.h"
33452+
33453+static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
33454+{
33455+ if (!ignore_flags)
33456+ goto out;
33457+ switch (err) {
33458+ case -ENOMEM:
33459+ case -EDQUOT:
33460+ goto out;
33461+ }
33462+
33463+ if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
33464+ err = 0;
33465+ goto out;
33466+ }
33467+
33468+#define cmp(brattr, prefix) do { \
33469+ if (!strncmp(name, XATTR_##prefix##_PREFIX, \
33470+ XATTR_##prefix##_PREFIX_LEN)) { \
33471+ if (ignore_flags & AuBrAttr_ICEX_##brattr) \
33472+ err = 0; \
33473+ goto out; \
33474+ } \
33475+ } while (0)
33476+
33477+ cmp(SEC, SECURITY);
33478+ cmp(SYS, SYSTEM);
33479+ cmp(TR, TRUSTED);
33480+ cmp(USR, USER);
33481+#undef cmp
33482+
33483+ if (ignore_flags & AuBrAttr_ICEX_OTH)
33484+ err = 0;
33485+
33486+out:
33487+ return err;
33488+}
33489+
33490+static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
33491+
33492+static int au_do_cpup_xattr(struct dentry *h_dst, struct dentry *h_src,
7e9cd9fe
AM
33493+ char *name, char **buf, unsigned int ignore_flags,
33494+ unsigned int verbose)
c1595e42
JR
33495+{
33496+ int err;
33497+ ssize_t ssz;
33498+ struct inode *h_idst;
33499+
33500+ ssz = vfs_getxattr_alloc(h_src, name, buf, 0, GFP_NOFS);
33501+ err = ssz;
33502+ if (unlikely(err <= 0)) {
c1595e42
JR
33503+ if (err == -ENODATA
33504+ || (err == -EOPNOTSUPP
b912730e 33505+ && ((ignore_flags & au_xattr_out_of_list)
5527c038 33506+ || (au_test_nfs_noacl(d_inode(h_src))
b912730e
AM
33507+ && (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)
33508+ || !strcmp(name,
33509+ XATTR_NAME_POSIX_ACL_DEFAULT))))
33510+ ))
c1595e42 33511+ err = 0;
b912730e
AM
33512+ if (err && (verbose || au_debug_test()))
33513+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
33514+ goto out;
33515+ }
33516+
33517+ /* unlock it temporary */
5527c038 33518+ h_idst = d_inode(h_dst);
c1595e42
JR
33519+ mutex_unlock(&h_idst->i_mutex);
33520+ err = vfsub_setxattr(h_dst, name, *buf, ssz, /*flags*/0);
33521+ mutex_lock_nested(&h_idst->i_mutex, AuLsc_I_CHILD2);
33522+ if (unlikely(err)) {
7e9cd9fe
AM
33523+ if (verbose || au_debug_test())
33524+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
33525+ err = au_xattr_ignore(err, name, ignore_flags);
33526+ }
33527+
33528+out:
33529+ return err;
33530+}
33531+
7e9cd9fe
AM
33532+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
33533+ unsigned int verbose)
c1595e42
JR
33534+{
33535+ int err, unlocked, acl_access, acl_default;
33536+ ssize_t ssz;
33537+ struct inode *h_isrc, *h_idst;
33538+ char *value, *p, *o, *e;
33539+
33540+ /* try stopping to update the source inode while we are referencing */
7e9cd9fe 33541+ /* there should not be the parent-child relationship between them */
5527c038
JR
33542+ h_isrc = d_inode(h_src);
33543+ h_idst = d_inode(h_dst);
c1595e42
JR
33544+ mutex_unlock(&h_idst->i_mutex);
33545+ mutex_lock_nested(&h_isrc->i_mutex, AuLsc_I_CHILD);
33546+ mutex_lock_nested(&h_idst->i_mutex, AuLsc_I_CHILD2);
33547+ unlocked = 0;
33548+
33549+ /* some filesystems don't list POSIX ACL, for example tmpfs */
33550+ ssz = vfs_listxattr(h_src, NULL, 0);
33551+ err = ssz;
33552+ if (unlikely(err < 0)) {
33553+ AuTraceErr(err);
33554+ if (err == -ENODATA
33555+ || err == -EOPNOTSUPP)
33556+ err = 0; /* ignore */
33557+ goto out;
33558+ }
33559+
33560+ err = 0;
33561+ p = NULL;
33562+ o = NULL;
33563+ if (ssz) {
33564+ err = -ENOMEM;
33565+ p = kmalloc(ssz, GFP_NOFS);
33566+ o = p;
33567+ if (unlikely(!p))
33568+ goto out;
33569+ err = vfs_listxattr(h_src, p, ssz);
33570+ }
33571+ mutex_unlock(&h_isrc->i_mutex);
33572+ unlocked = 1;
33573+ AuDbg("err %d, ssz %zd\n", err, ssz);
33574+ if (unlikely(err < 0))
33575+ goto out_free;
33576+
33577+ err = 0;
33578+ e = p + ssz;
33579+ value = NULL;
33580+ acl_access = 0;
33581+ acl_default = 0;
33582+ while (!err && p < e) {
33583+ acl_access |= !strncmp(p, XATTR_NAME_POSIX_ACL_ACCESS,
33584+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1);
33585+ acl_default |= !strncmp(p, XATTR_NAME_POSIX_ACL_DEFAULT,
33586+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)
33587+ - 1);
7e9cd9fe
AM
33588+ err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
33589+ verbose);
c1595e42
JR
33590+ p += strlen(p) + 1;
33591+ }
33592+ AuTraceErr(err);
33593+ ignore_flags |= au_xattr_out_of_list;
33594+ if (!err && !acl_access) {
33595+ err = au_do_cpup_xattr(h_dst, h_src,
33596+ XATTR_NAME_POSIX_ACL_ACCESS, &value,
7e9cd9fe 33597+ ignore_flags, verbose);
c1595e42
JR
33598+ AuTraceErr(err);
33599+ }
33600+ if (!err && !acl_default) {
33601+ err = au_do_cpup_xattr(h_dst, h_src,
33602+ XATTR_NAME_POSIX_ACL_DEFAULT, &value,
7e9cd9fe 33603+ ignore_flags, verbose);
c1595e42
JR
33604+ AuTraceErr(err);
33605+ }
33606+
33607+ kfree(value);
33608+
33609+out_free:
33610+ kfree(o);
33611+out:
33612+ if (!unlocked)
33613+ mutex_unlock(&h_isrc->i_mutex);
33614+ AuTraceErr(err);
33615+ return err;
33616+}
33617+
33618+/* ---------------------------------------------------------------------- */
33619+
33620+enum {
33621+ AU_XATTR_LIST,
33622+ AU_XATTR_GET
33623+};
33624+
33625+struct au_lgxattr {
33626+ int type;
33627+ union {
33628+ struct {
33629+ char *list;
33630+ size_t size;
33631+ } list;
33632+ struct {
33633+ const char *name;
33634+ void *value;
33635+ size_t size;
33636+ } get;
33637+ } u;
33638+};
33639+
33640+static ssize_t au_lgxattr(struct dentry *dentry, struct au_lgxattr *arg)
33641+{
33642+ ssize_t err;
33643+ struct path h_path;
33644+ struct super_block *sb;
33645+
33646+ sb = dentry->d_sb;
33647+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
33648+ if (unlikely(err))
33649+ goto out;
33650+ err = au_h_path_getattr(dentry, /*force*/1, &h_path);
33651+ if (unlikely(err))
33652+ goto out_si;
33653+ if (unlikely(!h_path.dentry))
33654+ /* illegally overlapped or something */
33655+ goto out_di; /* pretending success */
33656+
33657+ /* always topmost entry only */
33658+ switch (arg->type) {
33659+ case AU_XATTR_LIST:
33660+ err = vfs_listxattr(h_path.dentry,
33661+ arg->u.list.list, arg->u.list.size);
33662+ break;
33663+ case AU_XATTR_GET:
33664+ err = vfs_getxattr(h_path.dentry,
33665+ arg->u.get.name, arg->u.get.value,
33666+ arg->u.get.size);
33667+ break;
33668+ }
33669+
33670+out_di:
33671+ di_read_unlock(dentry, AuLock_IR);
33672+out_si:
33673+ si_read_unlock(sb);
33674+out:
33675+ AuTraceErr(err);
33676+ return err;
33677+}
33678+
33679+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
33680+{
33681+ struct au_lgxattr arg = {
33682+ .type = AU_XATTR_LIST,
33683+ .u.list = {
33684+ .list = list,
33685+ .size = size
33686+ },
33687+ };
33688+
33689+ return au_lgxattr(dentry, &arg);
33690+}
33691+
33692+ssize_t aufs_getxattr(struct dentry *dentry, const char *name, void *value,
33693+ size_t size)
33694+{
33695+ struct au_lgxattr arg = {
33696+ .type = AU_XATTR_GET,
33697+ .u.get = {
33698+ .name = name,
33699+ .value = value,
33700+ .size = size
33701+ },
33702+ };
33703+
33704+ return au_lgxattr(dentry, &arg);
33705+}
33706+
33707+int aufs_setxattr(struct dentry *dentry, const char *name, const void *value,
33708+ size_t size, int flags)
33709+{
33710+ struct au_srxattr arg = {
33711+ .type = AU_XATTR_SET,
33712+ .u.set = {
33713+ .name = name,
33714+ .value = value,
33715+ .size = size,
33716+ .flags = flags
33717+ },
33718+ };
33719+
33720+ return au_srxattr(dentry, &arg);
33721+}
33722+
33723+int aufs_removexattr(struct dentry *dentry, const char *name)
33724+{
33725+ struct au_srxattr arg = {
33726+ .type = AU_XATTR_REMOVE,
33727+ .u.remove = {
33728+ .name = name
33729+ },
33730+ };
33731+
33732+ return au_srxattr(dentry, &arg);
33733+}
33734+
33735+/* ---------------------------------------------------------------------- */
33736+
33737+#if 0
33738+static size_t au_xattr_list(struct dentry *dentry, char *list, size_t list_size,
33739+ const char *name, size_t name_len, int type)
33740+{
33741+ return aufs_listxattr(dentry, list, list_size);
33742+}
33743+
33744+static int au_xattr_get(struct dentry *dentry, const char *name, void *buffer,
33745+ size_t size, int type)
33746+{
33747+ return aufs_getxattr(dentry, name, buffer, size);
33748+}
33749+
33750+static int au_xattr_set(struct dentry *dentry, const char *name,
33751+ const void *value, size_t size, int flags, int type)
33752+{
33753+ return aufs_setxattr(dentry, name, value, size, flags);
33754+}
33755+
33756+static const struct xattr_handler au_xattr_handler = {
33757+ /* no prefix, no flags */
33758+ .list = au_xattr_list,
33759+ .get = au_xattr_get,
33760+ .set = au_xattr_set
33761+ /* why no remove? */
33762+};
33763+
33764+static const struct xattr_handler *au_xattr_handlers[] = {
33765+ &au_xattr_handler
33766+};
33767+
33768+void au_xattr_init(struct super_block *sb)
33769+{
33770+ /* sb->s_xattr = au_xattr_handlers; */
33771+}
33772+#endif
7f207e10
AM
33773diff -urN /usr/share/empty/fs/aufs/xino.c linux/fs/aufs/xino.c
33774--- /usr/share/empty/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
79b8bda9
AM
33775+++ linux/fs/aufs/xino.c 2015-11-11 17:21:46.922197217 +0100
33776@@ -0,0 +1,1296 @@
1facf9fc 33777+/*
2000de60 33778+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 33779+ *
33780+ * This program, aufs is free software; you can redistribute it and/or modify
33781+ * it under the terms of the GNU General Public License as published by
33782+ * the Free Software Foundation; either version 2 of the License, or
33783+ * (at your option) any later version.
dece6358
AM
33784+ *
33785+ * This program is distributed in the hope that it will be useful,
33786+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33787+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33788+ * GNU General Public License for more details.
33789+ *
33790+ * You should have received a copy of the GNU General Public License
523b37e3 33791+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33792+ */
33793+
33794+/*
33795+ * external inode number translation table and bitmap
33796+ */
33797+
33798+#include <linux/seq_file.h>
392086de 33799+#include <linux/statfs.h>
1facf9fc 33800+#include "aufs.h"
33801+
9dbd164d 33802+/* todo: unnecessary to support mmap_sem since kernel-space? */
5527c038 33803+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
1facf9fc 33804+ loff_t *pos)
33805+{
33806+ ssize_t err;
33807+ mm_segment_t oldfs;
b752ccd1
AM
33808+ union {
33809+ void *k;
33810+ char __user *u;
33811+ } buf;
1facf9fc 33812+
b752ccd1 33813+ buf.k = kbuf;
1facf9fc 33814+ oldfs = get_fs();
33815+ set_fs(KERNEL_DS);
33816+ do {
33817+ /* todo: signal_pending? */
b752ccd1 33818+ err = func(file, buf.u, size, pos);
1facf9fc 33819+ } while (err == -EAGAIN || err == -EINTR);
33820+ set_fs(oldfs);
33821+
33822+#if 0 /* reserved for future use */
33823+ if (err > 0)
2000de60 33824+ fsnotify_access(file->f_path.dentry);
1facf9fc 33825+#endif
33826+
33827+ return err;
33828+}
33829+
33830+/* ---------------------------------------------------------------------- */
33831+
5527c038 33832+static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
1facf9fc 33833+ size_t size, loff_t *pos)
33834+{
33835+ ssize_t err;
33836+ mm_segment_t oldfs;
b752ccd1
AM
33837+ union {
33838+ void *k;
33839+ const char __user *u;
33840+ } buf;
1facf9fc 33841+
b752ccd1 33842+ buf.k = kbuf;
1facf9fc 33843+ oldfs = get_fs();
33844+ set_fs(KERNEL_DS);
1facf9fc 33845+ do {
33846+ /* todo: signal_pending? */
b752ccd1 33847+ err = func(file, buf.u, size, pos);
1facf9fc 33848+ } while (err == -EAGAIN || err == -EINTR);
1facf9fc 33849+ set_fs(oldfs);
33850+
33851+#if 0 /* reserved for future use */
33852+ if (err > 0)
2000de60 33853+ fsnotify_modify(file->f_path.dentry);
1facf9fc 33854+#endif
33855+
33856+ return err;
33857+}
33858+
33859+struct do_xino_fwrite_args {
33860+ ssize_t *errp;
5527c038 33861+ vfs_writef_t func;
1facf9fc 33862+ struct file *file;
33863+ void *buf;
33864+ size_t size;
33865+ loff_t *pos;
33866+};
33867+
33868+static void call_do_xino_fwrite(void *args)
33869+{
33870+ struct do_xino_fwrite_args *a = args;
33871+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
33872+}
33873+
5527c038
JR
33874+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
33875+ size_t size, loff_t *pos)
1facf9fc 33876+{
33877+ ssize_t err;
33878+
33879+ /* todo: signal block and no wkq? */
b752ccd1
AM
33880+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
33881+ lockdep_off();
33882+ err = do_xino_fwrite(func, file, buf, size, pos);
33883+ lockdep_on();
33884+ } else {
33885+ /*
33886+ * it breaks RLIMIT_FSIZE and normal user's limit,
33887+ * users should care about quota and real 'filesystem full.'
33888+ */
1facf9fc 33889+ int wkq_err;
33890+ struct do_xino_fwrite_args args = {
33891+ .errp = &err,
33892+ .func = func,
33893+ .file = file,
33894+ .buf = buf,
33895+ .size = size,
33896+ .pos = pos
33897+ };
33898+
33899+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
33900+ if (unlikely(wkq_err))
33901+ err = wkq_err;
b752ccd1 33902+ }
1facf9fc 33903+
33904+ return err;
33905+}
33906+
33907+/* ---------------------------------------------------------------------- */
33908+
33909+/*
33910+ * create a new xinofile at the same place/path as @base_file.
33911+ */
33912+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
33913+{
33914+ struct file *file;
4a4d8108 33915+ struct dentry *base, *parent;
523b37e3 33916+ struct inode *dir, *delegated;
1facf9fc 33917+ struct qstr *name;
1308ab2a 33918+ struct path path;
4a4d8108 33919+ int err;
1facf9fc 33920+
2000de60 33921+ base = base_file->f_path.dentry;
1facf9fc 33922+ parent = base->d_parent; /* dir inode is locked */
5527c038 33923+ dir = d_inode(parent);
1facf9fc 33924+ IMustLock(dir);
33925+
33926+ file = ERR_PTR(-EINVAL);
33927+ name = &base->d_name;
4a4d8108
AM
33928+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
33929+ if (IS_ERR(path.dentry)) {
33930+ file = (void *)path.dentry;
523b37e3
AM
33931+ pr_err("%pd lookup err %ld\n",
33932+ base, PTR_ERR(path.dentry));
1facf9fc 33933+ goto out;
33934+ }
33935+
33936+ /* no need to mnt_want_write() since we call dentry_open() later */
4a4d8108 33937+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
1facf9fc 33938+ if (unlikely(err)) {
33939+ file = ERR_PTR(err);
523b37e3 33940+ pr_err("%pd create err %d\n", base, err);
1facf9fc 33941+ goto out_dput;
33942+ }
33943+
c06a8ce3 33944+ path.mnt = base_file->f_path.mnt;
4a4d8108 33945+ file = vfsub_dentry_open(&path,
7f207e10 33946+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 33947+ /* | __FMODE_NONOTIFY */);
1facf9fc 33948+ if (IS_ERR(file)) {
523b37e3 33949+ pr_err("%pd open err %ld\n", base, PTR_ERR(file));
1facf9fc 33950+ goto out_dput;
33951+ }
33952+
523b37e3
AM
33953+ delegated = NULL;
33954+ err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
33955+ if (unlikely(err == -EWOULDBLOCK)) {
33956+ pr_warn("cannot retry for NFSv4 delegation"
33957+ " for an internal unlink\n");
33958+ iput(delegated);
33959+ }
1facf9fc 33960+ if (unlikely(err)) {
523b37e3 33961+ pr_err("%pd unlink err %d\n", base, err);
1facf9fc 33962+ goto out_fput;
33963+ }
33964+
33965+ if (copy_src) {
33966+ /* no one can touch copy_src xino */
c06a8ce3 33967+ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
1facf9fc 33968+ if (unlikely(err)) {
523b37e3 33969+ pr_err("%pd copy err %d\n", base, err);
1facf9fc 33970+ goto out_fput;
33971+ }
33972+ }
33973+ goto out_dput; /* success */
33974+
4f0767ce 33975+out_fput:
1facf9fc 33976+ fput(file);
33977+ file = ERR_PTR(err);
4f0767ce 33978+out_dput:
4a4d8108 33979+ dput(path.dentry);
4f0767ce 33980+out:
1facf9fc 33981+ return file;
33982+}
33983+
33984+struct au_xino_lock_dir {
33985+ struct au_hinode *hdir;
33986+ struct dentry *parent;
33987+ struct mutex *mtx;
33988+};
33989+
33990+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
33991+ struct au_xino_lock_dir *ldir)
33992+{
33993+ aufs_bindex_t brid, bindex;
33994+
33995+ ldir->hdir = NULL;
33996+ bindex = -1;
33997+ brid = au_xino_brid(sb);
33998+ if (brid >= 0)
33999+ bindex = au_br_index(sb, brid);
34000+ if (bindex >= 0) {
5527c038 34001+ ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
4a4d8108 34002+ au_hn_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
1facf9fc 34003+ } else {
2000de60 34004+ ldir->parent = dget_parent(xino->f_path.dentry);
5527c038 34005+ ldir->mtx = &d_inode(ldir->parent)->i_mutex;
1facf9fc 34006+ mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
34007+ }
34008+}
34009+
34010+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
34011+{
34012+ if (ldir->hdir)
4a4d8108 34013+ au_hn_imtx_unlock(ldir->hdir);
1facf9fc 34014+ else {
34015+ mutex_unlock(ldir->mtx);
34016+ dput(ldir->parent);
34017+ }
34018+}
34019+
34020+/* ---------------------------------------------------------------------- */
34021+
34022+/* trucate xino files asynchronously */
34023+
34024+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
34025+{
34026+ int err;
392086de
AM
34027+ unsigned long jiffy;
34028+ blkcnt_t blocks;
1facf9fc 34029+ aufs_bindex_t bi, bend;
392086de 34030+ struct kstatfs *st;
1facf9fc 34031+ struct au_branch *br;
34032+ struct file *new_xino, *file;
34033+ struct super_block *h_sb;
34034+ struct au_xino_lock_dir ldir;
34035+
392086de
AM
34036+ err = -ENOMEM;
34037+ st = kzalloc(sizeof(*st), GFP_NOFS);
34038+ if (unlikely(!st))
34039+ goto out;
34040+
1facf9fc 34041+ err = -EINVAL;
34042+ bend = au_sbend(sb);
34043+ if (unlikely(bindex < 0 || bend < bindex))
392086de 34044+ goto out_st;
1facf9fc 34045+ br = au_sbr(sb, bindex);
34046+ file = br->br_xino.xi_file;
34047+ if (!file)
392086de
AM
34048+ goto out_st;
34049+
34050+ err = vfs_statfs(&file->f_path, st);
34051+ if (unlikely(err))
34052+ AuErr1("statfs err %d, ignored\n", err);
34053+ jiffy = jiffies;
34054+ blocks = file_inode(file)->i_blocks;
34055+ pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34056+ bindex, (u64)blocks, st->f_bfree, st->f_blocks);
1facf9fc 34057+
34058+ au_xino_lock_dir(sb, file, &ldir);
34059+ /* mnt_want_write() is unnecessary here */
34060+ new_xino = au_xino_create2(file, file);
34061+ au_xino_unlock_dir(&ldir);
34062+ err = PTR_ERR(new_xino);
392086de
AM
34063+ if (IS_ERR(new_xino)) {
34064+ pr_err("err %d, ignored\n", err);
34065+ goto out_st;
34066+ }
1facf9fc 34067+ err = 0;
34068+ fput(file);
34069+ br->br_xino.xi_file = new_xino;
34070+
86dc4139 34071+ h_sb = au_br_sb(br);
1facf9fc 34072+ for (bi = 0; bi <= bend; bi++) {
34073+ if (unlikely(bi == bindex))
34074+ continue;
34075+ br = au_sbr(sb, bi);
86dc4139 34076+ if (au_br_sb(br) != h_sb)
1facf9fc 34077+ continue;
34078+
34079+ fput(br->br_xino.xi_file);
34080+ br->br_xino.xi_file = new_xino;
34081+ get_file(new_xino);
34082+ }
34083+
392086de
AM
34084+ err = vfs_statfs(&new_xino->f_path, st);
34085+ if (!err) {
34086+ pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34087+ bindex, (u64)file_inode(new_xino)->i_blocks,
34088+ st->f_bfree, st->f_blocks);
34089+ if (file_inode(new_xino)->i_blocks < blocks)
34090+ au_sbi(sb)->si_xino_jiffy = jiffy;
34091+ } else
34092+ AuErr1("statfs err %d, ignored\n", err);
34093+
34094+out_st:
34095+ kfree(st);
4f0767ce 34096+out:
1facf9fc 34097+ return err;
34098+}
34099+
34100+struct xino_do_trunc_args {
34101+ struct super_block *sb;
34102+ struct au_branch *br;
34103+};
34104+
34105+static void xino_do_trunc(void *_args)
34106+{
34107+ struct xino_do_trunc_args *args = _args;
34108+ struct super_block *sb;
34109+ struct au_branch *br;
34110+ struct inode *dir;
34111+ int err;
34112+ aufs_bindex_t bindex;
34113+
34114+ err = 0;
34115+ sb = args->sb;
5527c038 34116+ dir = d_inode(sb->s_root);
1facf9fc 34117+ br = args->br;
34118+
34119+ si_noflush_write_lock(sb);
34120+ ii_read_lock_parent(dir);
34121+ bindex = au_br_index(sb, br->br_id);
34122+ err = au_xino_trunc(sb, bindex);
1facf9fc 34123+ ii_read_unlock(dir);
34124+ if (unlikely(err))
392086de 34125+ pr_warn("err b%d, (%d)\n", bindex, err);
1facf9fc 34126+ atomic_dec(&br->br_xino_running);
34127+ atomic_dec(&br->br_count);
1facf9fc 34128+ si_write_unlock(sb);
027c5e7a 34129+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 34130+ kfree(args);
34131+}
34132+
392086de
AM
34133+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
34134+{
34135+ int err;
34136+ struct kstatfs st;
34137+ struct au_sbinfo *sbinfo;
34138+
34139+ /* todo: si_xino_expire and the ratio should be customizable */
34140+ sbinfo = au_sbi(sb);
34141+ if (time_before(jiffies,
34142+ sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
34143+ return 0;
34144+
34145+ /* truncation border */
34146+ err = vfs_statfs(&br->br_xino.xi_file->f_path, &st);
34147+ if (unlikely(err)) {
34148+ AuErr1("statfs err %d, ignored\n", err);
34149+ return 0;
34150+ }
34151+ if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC)
34152+ return 0;
34153+
34154+ return 1;
34155+}
34156+
1facf9fc 34157+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
34158+{
34159+ struct xino_do_trunc_args *args;
34160+ int wkq_err;
34161+
392086de 34162+ if (!xino_trunc_test(sb, br))
1facf9fc 34163+ return;
34164+
34165+ if (atomic_inc_return(&br->br_xino_running) > 1)
34166+ goto out;
34167+
34168+ /* lock and kfree() will be called in trunc_xino() */
34169+ args = kmalloc(sizeof(*args), GFP_NOFS);
34170+ if (unlikely(!args)) {
34171+ AuErr1("no memory\n");
34172+ goto out_args;
34173+ }
34174+
e49829fe 34175+ atomic_inc(&br->br_count);
1facf9fc 34176+ args->sb = sb;
34177+ args->br = br;
53392da6 34178+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
1facf9fc 34179+ if (!wkq_err)
34180+ return; /* success */
34181+
4a4d8108 34182+ pr_err("wkq %d\n", wkq_err);
e49829fe 34183+ atomic_dec(&br->br_count);
1facf9fc 34184+
4f0767ce 34185+out_args:
1facf9fc 34186+ kfree(args);
4f0767ce 34187+out:
e49829fe 34188+ atomic_dec(&br->br_xino_running);
1facf9fc 34189+}
34190+
34191+/* ---------------------------------------------------------------------- */
34192+
5527c038 34193+static int au_xino_do_write(vfs_writef_t write, struct file *file,
1facf9fc 34194+ ino_t h_ino, ino_t ino)
34195+{
34196+ loff_t pos;
34197+ ssize_t sz;
34198+
34199+ pos = h_ino;
34200+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
34201+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
34202+ return -EFBIG;
34203+ }
34204+ pos *= sizeof(ino);
34205+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
34206+ if (sz == sizeof(ino))
34207+ return 0; /* success */
34208+
34209+ AuIOErr("write failed (%zd)\n", sz);
34210+ return -EIO;
34211+}
34212+
34213+/*
34214+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
34215+ * at the position of @h_ino.
34216+ * even if @ino is zero, it is written to the xinofile and means no entry.
34217+ * if the size of the xino file on a specific filesystem exceeds the watermark,
34218+ * try truncating it.
34219+ */
34220+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
34221+ ino_t ino)
34222+{
34223+ int err;
34224+ unsigned int mnt_flags;
34225+ struct au_branch *br;
34226+
34227+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
34228+ || ((loff_t)-1) > 0);
dece6358 34229+ SiMustAnyLock(sb);
1facf9fc 34230+
34231+ mnt_flags = au_mntflags(sb);
34232+ if (!au_opt_test(mnt_flags, XINO))
34233+ return 0;
34234+
34235+ br = au_sbr(sb, bindex);
34236+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
34237+ h_ino, ino);
34238+ if (!err) {
34239+ if (au_opt_test(mnt_flags, TRUNC_XINO)
86dc4139 34240+ && au_test_fs_trunc_xino(au_br_sb(br)))
1facf9fc 34241+ xino_try_trunc(sb, br);
34242+ return 0; /* success */
34243+ }
34244+
34245+ AuIOErr("write failed (%d)\n", err);
34246+ return -EIO;
34247+}
34248+
34249+/* ---------------------------------------------------------------------- */
34250+
34251+/* aufs inode number bitmap */
34252+
34253+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
34254+static ino_t xib_calc_ino(unsigned long pindex, int bit)
34255+{
34256+ ino_t ino;
34257+
34258+ AuDebugOn(bit < 0 || page_bits <= bit);
34259+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
34260+ return ino;
34261+}
34262+
34263+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
34264+{
34265+ AuDebugOn(ino < AUFS_FIRST_INO);
34266+ ino -= AUFS_FIRST_INO;
34267+ *pindex = ino / page_bits;
34268+ *bit = ino % page_bits;
34269+}
34270+
34271+static int xib_pindex(struct super_block *sb, unsigned long pindex)
34272+{
34273+ int err;
34274+ loff_t pos;
34275+ ssize_t sz;
34276+ struct au_sbinfo *sbinfo;
34277+ struct file *xib;
34278+ unsigned long *p;
34279+
34280+ sbinfo = au_sbi(sb);
34281+ MtxMustLock(&sbinfo->si_xib_mtx);
34282+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
34283+ || !au_opt_test(sbinfo->si_mntflags, XINO));
34284+
34285+ if (pindex == sbinfo->si_xib_last_pindex)
34286+ return 0;
34287+
34288+ xib = sbinfo->si_xib;
34289+ p = sbinfo->si_xib_buf;
34290+ pos = sbinfo->si_xib_last_pindex;
34291+ pos *= PAGE_SIZE;
34292+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
34293+ if (unlikely(sz != PAGE_SIZE))
34294+ goto out;
34295+
34296+ pos = pindex;
34297+ pos *= PAGE_SIZE;
c06a8ce3 34298+ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
1facf9fc 34299+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
34300+ else {
34301+ memset(p, 0, PAGE_SIZE);
34302+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
34303+ }
34304+ if (sz == PAGE_SIZE) {
34305+ sbinfo->si_xib_last_pindex = pindex;
34306+ return 0; /* success */
34307+ }
34308+
4f0767ce 34309+out:
b752ccd1
AM
34310+ AuIOErr1("write failed (%zd)\n", sz);
34311+ err = sz;
34312+ if (sz >= 0)
34313+ err = -EIO;
34314+ return err;
34315+}
34316+
34317+/* ---------------------------------------------------------------------- */
34318+
34319+static void au_xib_clear_bit(struct inode *inode)
34320+{
34321+ int err, bit;
34322+ unsigned long pindex;
34323+ struct super_block *sb;
34324+ struct au_sbinfo *sbinfo;
34325+
34326+ AuDebugOn(inode->i_nlink);
34327+
34328+ sb = inode->i_sb;
34329+ xib_calc_bit(inode->i_ino, &pindex, &bit);
34330+ AuDebugOn(page_bits <= bit);
34331+ sbinfo = au_sbi(sb);
34332+ mutex_lock(&sbinfo->si_xib_mtx);
34333+ err = xib_pindex(sb, pindex);
34334+ if (!err) {
34335+ clear_bit(bit, sbinfo->si_xib_buf);
34336+ sbinfo->si_xib_next_bit = bit;
34337+ }
34338+ mutex_unlock(&sbinfo->si_xib_mtx);
34339+}
34340+
34341+/* for s_op->delete_inode() */
34342+void au_xino_delete_inode(struct inode *inode, const int unlinked)
34343+{
34344+ int err;
34345+ unsigned int mnt_flags;
34346+ aufs_bindex_t bindex, bend, bi;
34347+ unsigned char try_trunc;
34348+ struct au_iinfo *iinfo;
34349+ struct super_block *sb;
34350+ struct au_hinode *hi;
34351+ struct inode *h_inode;
34352+ struct au_branch *br;
5527c038 34353+ vfs_writef_t xwrite;
b752ccd1
AM
34354+
34355+ sb = inode->i_sb;
34356+ mnt_flags = au_mntflags(sb);
34357+ if (!au_opt_test(mnt_flags, XINO)
34358+ || inode->i_ino == AUFS_ROOT_INO)
34359+ return;
34360+
34361+ if (unlinked) {
34362+ au_xigen_inc(inode);
34363+ au_xib_clear_bit(inode);
34364+ }
34365+
34366+ iinfo = au_ii(inode);
34367+ if (!iinfo)
34368+ return;
1facf9fc 34369+
b752ccd1
AM
34370+ bindex = iinfo->ii_bstart;
34371+ if (bindex < 0)
34372+ return;
1facf9fc 34373+
b752ccd1
AM
34374+ xwrite = au_sbi(sb)->si_xwrite;
34375+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
34376+ hi = iinfo->ii_hinode + bindex;
34377+ bend = iinfo->ii_bend;
34378+ for (; bindex <= bend; bindex++, hi++) {
34379+ h_inode = hi->hi_inode;
34380+ if (!h_inode
34381+ || (!unlinked && h_inode->i_nlink))
34382+ continue;
1facf9fc 34383+
b752ccd1
AM
34384+ /* inode may not be revalidated */
34385+ bi = au_br_index(sb, hi->hi_id);
34386+ if (bi < 0)
34387+ continue;
1facf9fc 34388+
b752ccd1
AM
34389+ br = au_sbr(sb, bi);
34390+ err = au_xino_do_write(xwrite, br->br_xino.xi_file,
34391+ h_inode->i_ino, /*ino*/0);
34392+ if (!err && try_trunc
86dc4139 34393+ && au_test_fs_trunc_xino(au_br_sb(br)))
b752ccd1 34394+ xino_try_trunc(sb, br);
1facf9fc 34395+ }
1facf9fc 34396+}
34397+
34398+/* get an unused inode number from bitmap */
34399+ino_t au_xino_new_ino(struct super_block *sb)
34400+{
34401+ ino_t ino;
34402+ unsigned long *p, pindex, ul, pend;
34403+ struct au_sbinfo *sbinfo;
34404+ struct file *file;
34405+ int free_bit, err;
34406+
34407+ if (!au_opt_test(au_mntflags(sb), XINO))
34408+ return iunique(sb, AUFS_FIRST_INO);
34409+
34410+ sbinfo = au_sbi(sb);
34411+ mutex_lock(&sbinfo->si_xib_mtx);
34412+ p = sbinfo->si_xib_buf;
34413+ free_bit = sbinfo->si_xib_next_bit;
34414+ if (free_bit < page_bits && !test_bit(free_bit, p))
34415+ goto out; /* success */
34416+ free_bit = find_first_zero_bit(p, page_bits);
34417+ if (free_bit < page_bits)
34418+ goto out; /* success */
34419+
34420+ pindex = sbinfo->si_xib_last_pindex;
34421+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
34422+ err = xib_pindex(sb, ul);
34423+ if (unlikely(err))
34424+ goto out_err;
34425+ free_bit = find_first_zero_bit(p, page_bits);
34426+ if (free_bit < page_bits)
34427+ goto out; /* success */
34428+ }
34429+
34430+ file = sbinfo->si_xib;
c06a8ce3 34431+ pend = vfsub_f_size_read(file) / PAGE_SIZE;
1facf9fc 34432+ for (ul = pindex + 1; ul <= pend; ul++) {
34433+ err = xib_pindex(sb, ul);
34434+ if (unlikely(err))
34435+ goto out_err;
34436+ free_bit = find_first_zero_bit(p, page_bits);
34437+ if (free_bit < page_bits)
34438+ goto out; /* success */
34439+ }
34440+ BUG();
34441+
4f0767ce 34442+out:
1facf9fc 34443+ set_bit(free_bit, p);
7f207e10 34444+ sbinfo->si_xib_next_bit = free_bit + 1;
1facf9fc 34445+ pindex = sbinfo->si_xib_last_pindex;
34446+ mutex_unlock(&sbinfo->si_xib_mtx);
34447+ ino = xib_calc_ino(pindex, free_bit);
34448+ AuDbg("i%lu\n", (unsigned long)ino);
34449+ return ino;
4f0767ce 34450+out_err:
1facf9fc 34451+ mutex_unlock(&sbinfo->si_xib_mtx);
34452+ AuDbg("i0\n");
34453+ return 0;
34454+}
34455+
34456+/*
34457+ * read @ino from xinofile for the specified branch{@sb, @bindex}
34458+ * at the position of @h_ino.
34459+ * if @ino does not exist and @do_new is true, get new one.
34460+ */
34461+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
34462+ ino_t *ino)
34463+{
34464+ int err;
34465+ ssize_t sz;
34466+ loff_t pos;
34467+ struct file *file;
34468+ struct au_sbinfo *sbinfo;
34469+
34470+ *ino = 0;
34471+ if (!au_opt_test(au_mntflags(sb), XINO))
34472+ return 0; /* no xino */
34473+
34474+ err = 0;
34475+ sbinfo = au_sbi(sb);
34476+ pos = h_ino;
34477+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
34478+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
34479+ return -EFBIG;
34480+ }
34481+ pos *= sizeof(*ino);
34482+
34483+ file = au_sbr(sb, bindex)->br_xino.xi_file;
c06a8ce3 34484+ if (vfsub_f_size_read(file) < pos + sizeof(*ino))
1facf9fc 34485+ return 0; /* no ino */
34486+
34487+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
34488+ if (sz == sizeof(*ino))
34489+ return 0; /* success */
34490+
34491+ err = sz;
34492+ if (unlikely(sz >= 0)) {
34493+ err = -EIO;
34494+ AuIOErr("xino read error (%zd)\n", sz);
34495+ }
34496+
34497+ return err;
34498+}
34499+
34500+/* ---------------------------------------------------------------------- */
34501+
34502+/* create and set a new xino file */
34503+
34504+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
34505+{
34506+ struct file *file;
34507+ struct dentry *h_parent, *d;
b912730e 34508+ struct inode *h_dir, *inode;
1facf9fc 34509+ int err;
34510+
34511+ /*
34512+ * at mount-time, and the xino file is the default path,
4a4d8108 34513+ * hnotify is disabled so we have no notify events to ignore.
1facf9fc 34514+ * when a user specified the xino, we cannot get au_hdir to be ignored.
34515+ */
7f207e10 34516+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 34517+ /* | __FMODE_NONOTIFY */,
1facf9fc 34518+ S_IRUGO | S_IWUGO);
34519+ if (IS_ERR(file)) {
34520+ if (!silent)
4a4d8108 34521+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
1facf9fc 34522+ return file;
34523+ }
34524+
34525+ /* keep file count */
b912730e
AM
34526+ err = 0;
34527+ inode = file_inode(file);
2000de60 34528+ h_parent = dget_parent(file->f_path.dentry);
5527c038 34529+ h_dir = d_inode(h_parent);
1facf9fc 34530+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
34531+ /* mnt_want_write() is unnecessary here */
523b37e3 34532+ /* no delegation since it is just created */
b912730e
AM
34533+ if (inode->i_nlink)
34534+ err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
34535+ /*force*/0);
1facf9fc 34536+ mutex_unlock(&h_dir->i_mutex);
34537+ dput(h_parent);
34538+ if (unlikely(err)) {
34539+ if (!silent)
4a4d8108 34540+ pr_err("unlink %s(%d)\n", fname, err);
1facf9fc 34541+ goto out;
34542+ }
34543+
34544+ err = -EINVAL;
2000de60 34545+ d = file->f_path.dentry;
1facf9fc 34546+ if (unlikely(sb == d->d_sb)) {
34547+ if (!silent)
4a4d8108 34548+ pr_err("%s must be outside\n", fname);
1facf9fc 34549+ goto out;
34550+ }
34551+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
34552+ if (!silent)
4a4d8108
AM
34553+ pr_err("xino doesn't support %s(%s)\n",
34554+ fname, au_sbtype(d->d_sb));
1facf9fc 34555+ goto out;
34556+ }
34557+ return file; /* success */
34558+
4f0767ce 34559+out:
1facf9fc 34560+ fput(file);
34561+ file = ERR_PTR(err);
34562+ return file;
34563+}
34564+
34565+/*
34566+ * find another branch who is on the same filesystem of the specified
34567+ * branch{@btgt}. search until @bend.
34568+ */
34569+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
34570+ aufs_bindex_t bend)
34571+{
34572+ aufs_bindex_t bindex;
34573+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
34574+
34575+ for (bindex = 0; bindex < btgt; bindex++)
34576+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
34577+ return bindex;
34578+ for (bindex++; bindex <= bend; bindex++)
34579+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
34580+ return bindex;
34581+ return -1;
34582+}
34583+
34584+/* ---------------------------------------------------------------------- */
34585+
34586+/*
34587+ * initialize the xinofile for the specified branch @br
34588+ * at the place/path where @base_file indicates.
34589+ * test whether another branch is on the same filesystem or not,
34590+ * if @do_test is true.
34591+ */
34592+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
34593+ struct file *base_file, int do_test)
34594+{
34595+ int err;
34596+ ino_t ino;
34597+ aufs_bindex_t bend, bindex;
34598+ struct au_branch *shared_br, *b;
34599+ struct file *file;
34600+ struct super_block *tgt_sb;
34601+
34602+ shared_br = NULL;
34603+ bend = au_sbend(sb);
34604+ if (do_test) {
86dc4139 34605+ tgt_sb = au_br_sb(br);
1facf9fc 34606+ for (bindex = 0; bindex <= bend; bindex++) {
34607+ b = au_sbr(sb, bindex);
86dc4139 34608+ if (tgt_sb == au_br_sb(b)) {
1facf9fc 34609+ shared_br = b;
34610+ break;
34611+ }
34612+ }
34613+ }
34614+
34615+ if (!shared_br || !shared_br->br_xino.xi_file) {
34616+ struct au_xino_lock_dir ldir;
34617+
34618+ au_xino_lock_dir(sb, base_file, &ldir);
34619+ /* mnt_want_write() is unnecessary here */
34620+ file = au_xino_create2(base_file, NULL);
34621+ au_xino_unlock_dir(&ldir);
34622+ err = PTR_ERR(file);
34623+ if (IS_ERR(file))
34624+ goto out;
34625+ br->br_xino.xi_file = file;
34626+ } else {
34627+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
34628+ get_file(br->br_xino.xi_file);
34629+ }
34630+
34631+ ino = AUFS_ROOT_INO;
34632+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
34633+ h_ino, ino);
b752ccd1
AM
34634+ if (unlikely(err)) {
34635+ fput(br->br_xino.xi_file);
34636+ br->br_xino.xi_file = NULL;
34637+ }
1facf9fc 34638+
4f0767ce 34639+out:
1facf9fc 34640+ return err;
34641+}
34642+
34643+/* ---------------------------------------------------------------------- */
34644+
34645+/* trucate a xino bitmap file */
34646+
34647+/* todo: slow */
34648+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
34649+{
34650+ int err, bit;
34651+ ssize_t sz;
34652+ unsigned long pindex;
34653+ loff_t pos, pend;
34654+ struct au_sbinfo *sbinfo;
5527c038 34655+ vfs_readf_t func;
1facf9fc 34656+ ino_t *ino;
34657+ unsigned long *p;
34658+
34659+ err = 0;
34660+ sbinfo = au_sbi(sb);
dece6358 34661+ MtxMustLock(&sbinfo->si_xib_mtx);
1facf9fc 34662+ p = sbinfo->si_xib_buf;
34663+ func = sbinfo->si_xread;
c06a8ce3 34664+ pend = vfsub_f_size_read(file);
1facf9fc 34665+ pos = 0;
34666+ while (pos < pend) {
34667+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
34668+ err = sz;
34669+ if (unlikely(sz <= 0))
34670+ goto out;
34671+
34672+ err = 0;
34673+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
34674+ if (unlikely(*ino < AUFS_FIRST_INO))
34675+ continue;
34676+
34677+ xib_calc_bit(*ino, &pindex, &bit);
34678+ AuDebugOn(page_bits <= bit);
34679+ err = xib_pindex(sb, pindex);
34680+ if (!err)
34681+ set_bit(bit, p);
34682+ else
34683+ goto out;
34684+ }
34685+ }
34686+
4f0767ce 34687+out:
1facf9fc 34688+ return err;
34689+}
34690+
34691+static int xib_restore(struct super_block *sb)
34692+{
34693+ int err;
34694+ aufs_bindex_t bindex, bend;
34695+ void *page;
34696+
34697+ err = -ENOMEM;
34698+ page = (void *)__get_free_page(GFP_NOFS);
34699+ if (unlikely(!page))
34700+ goto out;
34701+
34702+ err = 0;
34703+ bend = au_sbend(sb);
34704+ for (bindex = 0; !err && bindex <= bend; bindex++)
34705+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
34706+ err = do_xib_restore
34707+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
34708+ else
34709+ AuDbg("b%d\n", bindex);
34710+ free_page((unsigned long)page);
34711+
4f0767ce 34712+out:
1facf9fc 34713+ return err;
34714+}
34715+
34716+int au_xib_trunc(struct super_block *sb)
34717+{
34718+ int err;
34719+ ssize_t sz;
34720+ loff_t pos;
34721+ struct au_xino_lock_dir ldir;
34722+ struct au_sbinfo *sbinfo;
34723+ unsigned long *p;
34724+ struct file *file;
34725+
dece6358
AM
34726+ SiMustWriteLock(sb);
34727+
1facf9fc 34728+ err = 0;
34729+ sbinfo = au_sbi(sb);
34730+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
34731+ goto out;
34732+
34733+ file = sbinfo->si_xib;
c06a8ce3 34734+ if (vfsub_f_size_read(file) <= PAGE_SIZE)
1facf9fc 34735+ goto out;
34736+
34737+ au_xino_lock_dir(sb, file, &ldir);
34738+ /* mnt_want_write() is unnecessary here */
34739+ file = au_xino_create2(sbinfo->si_xib, NULL);
34740+ au_xino_unlock_dir(&ldir);
34741+ err = PTR_ERR(file);
34742+ if (IS_ERR(file))
34743+ goto out;
34744+ fput(sbinfo->si_xib);
34745+ sbinfo->si_xib = file;
34746+
34747+ p = sbinfo->si_xib_buf;
34748+ memset(p, 0, PAGE_SIZE);
34749+ pos = 0;
34750+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
34751+ if (unlikely(sz != PAGE_SIZE)) {
34752+ err = sz;
34753+ AuIOErr("err %d\n", err);
34754+ if (sz >= 0)
34755+ err = -EIO;
34756+ goto out;
34757+ }
34758+
34759+ mutex_lock(&sbinfo->si_xib_mtx);
34760+ /* mnt_want_write() is unnecessary here */
34761+ err = xib_restore(sb);
34762+ mutex_unlock(&sbinfo->si_xib_mtx);
34763+
34764+out:
34765+ return err;
34766+}
34767+
34768+/* ---------------------------------------------------------------------- */
34769+
34770+/*
34771+ * xino mount option handlers
34772+ */
1facf9fc 34773+
34774+/* xino bitmap */
34775+static void xino_clear_xib(struct super_block *sb)
34776+{
34777+ struct au_sbinfo *sbinfo;
34778+
dece6358
AM
34779+ SiMustWriteLock(sb);
34780+
1facf9fc 34781+ sbinfo = au_sbi(sb);
34782+ sbinfo->si_xread = NULL;
34783+ sbinfo->si_xwrite = NULL;
34784+ if (sbinfo->si_xib)
34785+ fput(sbinfo->si_xib);
34786+ sbinfo->si_xib = NULL;
34787+ free_page((unsigned long)sbinfo->si_xib_buf);
34788+ sbinfo->si_xib_buf = NULL;
34789+}
34790+
34791+static int au_xino_set_xib(struct super_block *sb, struct file *base)
34792+{
34793+ int err;
34794+ loff_t pos;
34795+ struct au_sbinfo *sbinfo;
34796+ struct file *file;
34797+
dece6358
AM
34798+ SiMustWriteLock(sb);
34799+
1facf9fc 34800+ sbinfo = au_sbi(sb);
34801+ file = au_xino_create2(base, sbinfo->si_xib);
34802+ err = PTR_ERR(file);
34803+ if (IS_ERR(file))
34804+ goto out;
34805+ if (sbinfo->si_xib)
34806+ fput(sbinfo->si_xib);
34807+ sbinfo->si_xib = file;
5527c038
JR
34808+ sbinfo->si_xread = vfs_readf(file);
34809+ sbinfo->si_xwrite = vfs_writef(file);
1facf9fc 34810+
34811+ err = -ENOMEM;
34812+ if (!sbinfo->si_xib_buf)
34813+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
34814+ if (unlikely(!sbinfo->si_xib_buf))
34815+ goto out_unset;
34816+
34817+ sbinfo->si_xib_last_pindex = 0;
34818+ sbinfo->si_xib_next_bit = 0;
c06a8ce3 34819+ if (vfsub_f_size_read(file) < PAGE_SIZE) {
1facf9fc 34820+ pos = 0;
34821+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
34822+ PAGE_SIZE, &pos);
34823+ if (unlikely(err != PAGE_SIZE))
34824+ goto out_free;
34825+ }
34826+ err = 0;
34827+ goto out; /* success */
34828+
4f0767ce 34829+out_free:
1facf9fc 34830+ free_page((unsigned long)sbinfo->si_xib_buf);
b752ccd1
AM
34831+ sbinfo->si_xib_buf = NULL;
34832+ if (err >= 0)
34833+ err = -EIO;
4f0767ce 34834+out_unset:
b752ccd1
AM
34835+ fput(sbinfo->si_xib);
34836+ sbinfo->si_xib = NULL;
34837+ sbinfo->si_xread = NULL;
34838+ sbinfo->si_xwrite = NULL;
4f0767ce 34839+out:
b752ccd1 34840+ return err;
1facf9fc 34841+}
34842+
b752ccd1
AM
34843+/* xino for each branch */
34844+static void xino_clear_br(struct super_block *sb)
34845+{
34846+ aufs_bindex_t bindex, bend;
34847+ struct au_branch *br;
1facf9fc 34848+
b752ccd1
AM
34849+ bend = au_sbend(sb);
34850+ for (bindex = 0; bindex <= bend; bindex++) {
34851+ br = au_sbr(sb, bindex);
34852+ if (!br || !br->br_xino.xi_file)
34853+ continue;
34854+
34855+ fput(br->br_xino.xi_file);
34856+ br->br_xino.xi_file = NULL;
34857+ }
34858+}
34859+
34860+static int au_xino_set_br(struct super_block *sb, struct file *base)
1facf9fc 34861+{
34862+ int err;
b752ccd1
AM
34863+ ino_t ino;
34864+ aufs_bindex_t bindex, bend, bshared;
34865+ struct {
34866+ struct file *old, *new;
34867+ } *fpair, *p;
34868+ struct au_branch *br;
34869+ struct inode *inode;
5527c038 34870+ vfs_writef_t writef;
1facf9fc 34871+
b752ccd1
AM
34872+ SiMustWriteLock(sb);
34873+
34874+ err = -ENOMEM;
34875+ bend = au_sbend(sb);
34876+ fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
34877+ if (unlikely(!fpair))
1facf9fc 34878+ goto out;
34879+
5527c038 34880+ inode = d_inode(sb->s_root);
b752ccd1
AM
34881+ ino = AUFS_ROOT_INO;
34882+ writef = au_sbi(sb)->si_xwrite;
34883+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
34884+ br = au_sbr(sb, bindex);
34885+ bshared = is_sb_shared(sb, bindex, bindex - 1);
34886+ if (bshared >= 0) {
34887+ /* shared xino */
34888+ *p = fpair[bshared];
34889+ get_file(p->new);
34890+ }
34891+
34892+ if (!p->new) {
34893+ /* new xino */
34894+ p->old = br->br_xino.xi_file;
34895+ p->new = au_xino_create2(base, br->br_xino.xi_file);
34896+ err = PTR_ERR(p->new);
34897+ if (IS_ERR(p->new)) {
34898+ p->new = NULL;
34899+ goto out_pair;
34900+ }
34901+ }
34902+
34903+ err = au_xino_do_write(writef, p->new,
34904+ au_h_iptr(inode, bindex)->i_ino, ino);
34905+ if (unlikely(err))
34906+ goto out_pair;
34907+ }
34908+
34909+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
34910+ br = au_sbr(sb, bindex);
34911+ if (br->br_xino.xi_file)
34912+ fput(br->br_xino.xi_file);
34913+ get_file(p->new);
34914+ br->br_xino.xi_file = p->new;
34915+ }
1facf9fc 34916+
4f0767ce 34917+out_pair:
b752ccd1
AM
34918+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
34919+ if (p->new)
34920+ fput(p->new);
34921+ else
34922+ break;
34923+ kfree(fpair);
4f0767ce 34924+out:
1facf9fc 34925+ return err;
34926+}
b752ccd1
AM
34927+
34928+void au_xino_clr(struct super_block *sb)
34929+{
34930+ struct au_sbinfo *sbinfo;
34931+
34932+ au_xigen_clr(sb);
34933+ xino_clear_xib(sb);
34934+ xino_clear_br(sb);
34935+ sbinfo = au_sbi(sb);
34936+ /* lvalue, do not call au_mntflags() */
34937+ au_opt_clr(sbinfo->si_mntflags, XINO);
34938+}
34939+
34940+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
34941+{
34942+ int err, skip;
34943+ struct dentry *parent, *cur_parent;
34944+ struct qstr *dname, *cur_name;
34945+ struct file *cur_xino;
34946+ struct inode *dir;
34947+ struct au_sbinfo *sbinfo;
34948+
34949+ SiMustWriteLock(sb);
34950+
34951+ err = 0;
34952+ sbinfo = au_sbi(sb);
2000de60 34953+ parent = dget_parent(xino->file->f_path.dentry);
b752ccd1
AM
34954+ if (remount) {
34955+ skip = 0;
2000de60 34956+ dname = &xino->file->f_path.dentry->d_name;
b752ccd1
AM
34957+ cur_xino = sbinfo->si_xib;
34958+ if (cur_xino) {
2000de60
JR
34959+ cur_parent = dget_parent(cur_xino->f_path.dentry);
34960+ cur_name = &cur_xino->f_path.dentry->d_name;
b752ccd1 34961+ skip = (cur_parent == parent
38d290e6 34962+ && au_qstreq(dname, cur_name));
b752ccd1
AM
34963+ dput(cur_parent);
34964+ }
34965+ if (skip)
34966+ goto out;
34967+ }
34968+
34969+ au_opt_set(sbinfo->si_mntflags, XINO);
5527c038 34970+ dir = d_inode(parent);
b752ccd1
AM
34971+ mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
34972+ /* mnt_want_write() is unnecessary here */
34973+ err = au_xino_set_xib(sb, xino->file);
34974+ if (!err)
34975+ err = au_xigen_set(sb, xino->file);
34976+ if (!err)
34977+ err = au_xino_set_br(sb, xino->file);
34978+ mutex_unlock(&dir->i_mutex);
34979+ if (!err)
34980+ goto out; /* success */
34981+
34982+ /* reset all */
34983+ AuIOErr("failed creating xino(%d).\n", err);
c1595e42
JR
34984+ au_xigen_clr(sb);
34985+ xino_clear_xib(sb);
b752ccd1 34986+
4f0767ce 34987+out:
b752ccd1
AM
34988+ dput(parent);
34989+ return err;
34990+}
34991+
34992+/* ---------------------------------------------------------------------- */
34993+
34994+/*
34995+ * create a xinofile at the default place/path.
34996+ */
34997+struct file *au_xino_def(struct super_block *sb)
34998+{
34999+ struct file *file;
35000+ char *page, *p;
35001+ struct au_branch *br;
35002+ struct super_block *h_sb;
35003+ struct path path;
35004+ aufs_bindex_t bend, bindex, bwr;
35005+
35006+ br = NULL;
35007+ bend = au_sbend(sb);
35008+ bwr = -1;
35009+ for (bindex = 0; bindex <= bend; bindex++) {
35010+ br = au_sbr(sb, bindex);
35011+ if (au_br_writable(br->br_perm)
86dc4139 35012+ && !au_test_fs_bad_xino(au_br_sb(br))) {
b752ccd1
AM
35013+ bwr = bindex;
35014+ break;
35015+ }
35016+ }
35017+
7f207e10
AM
35018+ if (bwr >= 0) {
35019+ file = ERR_PTR(-ENOMEM);
537831f9 35020+ page = (void *)__get_free_page(GFP_NOFS);
7f207e10
AM
35021+ if (unlikely(!page))
35022+ goto out;
86dc4139 35023+ path.mnt = au_br_mnt(br);
7f207e10
AM
35024+ path.dentry = au_h_dptr(sb->s_root, bwr);
35025+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
35026+ file = (void *)p;
35027+ if (!IS_ERR(p)) {
35028+ strcat(p, "/" AUFS_XINO_FNAME);
35029+ AuDbg("%s\n", p);
35030+ file = au_xino_create(sb, p, /*silent*/0);
35031+ if (!IS_ERR(file))
35032+ au_xino_brid_set(sb, br->br_id);
35033+ }
537831f9 35034+ free_page((unsigned long)page);
7f207e10
AM
35035+ } else {
35036+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
35037+ if (IS_ERR(file))
35038+ goto out;
2000de60 35039+ h_sb = file->f_path.dentry->d_sb;
7f207e10
AM
35040+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
35041+ pr_err("xino doesn't support %s(%s)\n",
35042+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
35043+ fput(file);
35044+ file = ERR_PTR(-EINVAL);
35045+ }
35046+ if (!IS_ERR(file))
35047+ au_xino_brid_set(sb, -1);
35048+ }
0c5527e5 35049+
7f207e10
AM
35050+out:
35051+ return file;
35052+}
35053+
35054+/* ---------------------------------------------------------------------- */
35055+
35056+int au_xino_path(struct seq_file *seq, struct file *file)
35057+{
35058+ int err;
35059+
35060+ err = au_seq_path(seq, &file->f_path);
79b8bda9 35061+ if (unlikely(err))
7f207e10
AM
35062+ goto out;
35063+
7f207e10
AM
35064+#define Deleted "\\040(deleted)"
35065+ seq->count -= sizeof(Deleted) - 1;
35066+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
35067+ sizeof(Deleted) - 1));
35068+#undef Deleted
35069+
35070+out:
35071+ return err;
35072+}
537831f9
AM
35073diff -urN /usr/share/empty/include/uapi/linux/aufs_type.h linux/include/uapi/linux/aufs_type.h
35074--- /usr/share/empty/include/uapi/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
b95c5147 35075+++ linux/include/uapi/linux/aufs_type.h 2015-12-10 17:59:16.856166891 +0100
c1595e42 35076@@ -0,0 +1,419 @@
7f207e10 35077+/*
2000de60 35078+ * Copyright (C) 2005-2015 Junjiro R. Okajima
7f207e10
AM
35079+ *
35080+ * This program, aufs is free software; you can redistribute it and/or modify
35081+ * it under the terms of the GNU General Public License as published by
35082+ * the Free Software Foundation; either version 2 of the License, or
35083+ * (at your option) any later version.
35084+ *
35085+ * This program is distributed in the hope that it will be useful,
35086+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35087+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35088+ * GNU General Public License for more details.
35089+ *
35090+ * You should have received a copy of the GNU General Public License
523b37e3 35091+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
35092+ */
35093+
35094+#ifndef __AUFS_TYPE_H__
35095+#define __AUFS_TYPE_H__
35096+
f6c5ef8b
AM
35097+#define AUFS_NAME "aufs"
35098+
9dbd164d 35099+#ifdef __KERNEL__
f6c5ef8b
AM
35100+/*
35101+ * define it before including all other headers.
35102+ * sched.h may use pr_* macros before defining "current", so define the
35103+ * no-current version first, and re-define later.
35104+ */
35105+#define pr_fmt(fmt) AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
35106+#include <linux/sched.h>
35107+#undef pr_fmt
a2a7ad62
AM
35108+#define pr_fmt(fmt) \
35109+ AUFS_NAME " %s:%d:%.*s[%d]: " fmt, __func__, __LINE__, \
35110+ (int)sizeof(current->comm), current->comm, current->pid
9dbd164d
AM
35111+#else
35112+#include <stdint.h>
35113+#include <sys/types.h>
f6c5ef8b 35114+#endif /* __KERNEL__ */
7f207e10 35115+
f6c5ef8b
AM
35116+#include <linux/limits.h>
35117+
b95c5147 35118+#define AUFS_VERSION "4.3-20151116"
7f207e10
AM
35119+
35120+/* todo? move this to linux-2.6.19/include/magic.h */
35121+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
35122+
35123+/* ---------------------------------------------------------------------- */
35124+
35125+#ifdef CONFIG_AUFS_BRANCH_MAX_127
9dbd164d 35126+typedef int8_t aufs_bindex_t;
7f207e10
AM
35127+#define AUFS_BRANCH_MAX 127
35128+#else
9dbd164d 35129+typedef int16_t aufs_bindex_t;
7f207e10
AM
35130+#ifdef CONFIG_AUFS_BRANCH_MAX_511
35131+#define AUFS_BRANCH_MAX 511
35132+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
35133+#define AUFS_BRANCH_MAX 1023
35134+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
35135+#define AUFS_BRANCH_MAX 32767
35136+#endif
35137+#endif
35138+
35139+#ifdef __KERNEL__
35140+#ifndef AUFS_BRANCH_MAX
35141+#error unknown CONFIG_AUFS_BRANCH_MAX value
35142+#endif
35143+#endif /* __KERNEL__ */
35144+
35145+/* ---------------------------------------------------------------------- */
35146+
7f207e10
AM
35147+#define AUFS_FSTYPE AUFS_NAME
35148+
35149+#define AUFS_ROOT_INO 2
35150+#define AUFS_FIRST_INO 11
35151+
35152+#define AUFS_WH_PFX ".wh."
35153+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
35154+#define AUFS_WH_TMP_LEN 4
86dc4139 35155+/* a limit for rmdir/rename a dir and copyup */
7f207e10
AM
35156+#define AUFS_MAX_NAMELEN (NAME_MAX \
35157+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
35158+ - 1 /* dot */\
35159+ - AUFS_WH_TMP_LEN) /* hex */
35160+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
35161+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
392086de
AM
35162+#define AUFS_XINO_DEF_SEC 30 /* seconds */
35163+#define AUFS_XINO_DEF_TRUNC 45 /* percentage */
7f207e10
AM
35164+#define AUFS_DIRWH_DEF 3
35165+#define AUFS_RDCACHE_DEF 10 /* seconds */
027c5e7a 35166+#define AUFS_RDCACHE_MAX 3600 /* seconds */
7f207e10
AM
35167+#define AUFS_RDBLK_DEF 512 /* bytes */
35168+#define AUFS_RDHASH_DEF 32
35169+#define AUFS_WKQ_NAME AUFS_NAME "d"
027c5e7a
AM
35170+#define AUFS_MFS_DEF_SEC 30 /* seconds */
35171+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
076b876e 35172+#define AUFS_FHSM_CACHE_DEF_SEC 30 /* seconds */
86dc4139 35173+#define AUFS_PLINK_WARN 50 /* number of plinks in a single bucket */
7f207e10
AM
35174+
35175+/* pseudo-link maintenace under /proc */
35176+#define AUFS_PLINK_MAINT_NAME "plink_maint"
35177+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
35178+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
35179+
35180+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
35181+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
35182+
35183+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
35184+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
35185+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
35186+
35187+/* doubly whiteouted */
35188+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
35189+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
35190+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
35191+
1e00d052 35192+/* branch permissions and attributes */
7f207e10
AM
35193+#define AUFS_BRPERM_RW "rw"
35194+#define AUFS_BRPERM_RO "ro"
35195+#define AUFS_BRPERM_RR "rr"
076b876e
AM
35196+#define AUFS_BRATTR_COO_REG "coo_reg"
35197+#define AUFS_BRATTR_COO_ALL "coo_all"
35198+#define AUFS_BRATTR_FHSM "fhsm"
35199+#define AUFS_BRATTR_UNPIN "unpin"
c1595e42
JR
35200+#define AUFS_BRATTR_ICEX "icex"
35201+#define AUFS_BRATTR_ICEX_SEC "icexsec"
35202+#define AUFS_BRATTR_ICEX_SYS "icexsys"
35203+#define AUFS_BRATTR_ICEX_TR "icextr"
35204+#define AUFS_BRATTR_ICEX_USR "icexusr"
35205+#define AUFS_BRATTR_ICEX_OTH "icexoth"
1e00d052
AM
35206+#define AUFS_BRRATTR_WH "wh"
35207+#define AUFS_BRWATTR_NLWH "nolwh"
076b876e
AM
35208+#define AUFS_BRWATTR_MOO "moo"
35209+
35210+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */
35211+#define AuBrPerm_RO (1 << 1) /* readonly */
35212+#define AuBrPerm_RR (1 << 2) /* natively readonly */
35213+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
35214+
35215+#define AuBrAttr_COO_REG (1 << 3) /* copy-up on open */
35216+#define AuBrAttr_COO_ALL (1 << 4)
35217+#define AuBrAttr_COO_Mask (AuBrAttr_COO_REG | AuBrAttr_COO_ALL)
35218+
35219+#define AuBrAttr_FHSM (1 << 5) /* file-based hsm */
35220+#define AuBrAttr_UNPIN (1 << 6) /* rename-able top dir of
c1595e42
JR
35221+ branch. meaningless since
35222+ linux-3.18-rc1 */
35223+
35224+/* ignore error in copying XATTR */
35225+#define AuBrAttr_ICEX_SEC (1 << 7)
35226+#define AuBrAttr_ICEX_SYS (1 << 8)
35227+#define AuBrAttr_ICEX_TR (1 << 9)
35228+#define AuBrAttr_ICEX_USR (1 << 10)
35229+#define AuBrAttr_ICEX_OTH (1 << 11)
35230+#define AuBrAttr_ICEX (AuBrAttr_ICEX_SEC \
35231+ | AuBrAttr_ICEX_SYS \
35232+ | AuBrAttr_ICEX_TR \
35233+ | AuBrAttr_ICEX_USR \
35234+ | AuBrAttr_ICEX_OTH)
35235+
35236+#define AuBrRAttr_WH (1 << 12) /* whiteout-able */
076b876e
AM
35237+#define AuBrRAttr_Mask AuBrRAttr_WH
35238+
c1595e42
JR
35239+#define AuBrWAttr_NoLinkWH (1 << 13) /* un-hardlinkable whiteouts */
35240+#define AuBrWAttr_MOO (1 << 14) /* move-up on open */
076b876e
AM
35241+#define AuBrWAttr_Mask (AuBrWAttr_NoLinkWH | AuBrWAttr_MOO)
35242+
35243+#define AuBrAttr_CMOO_Mask (AuBrAttr_COO_Mask | AuBrWAttr_MOO)
35244+
c1595e42 35245+/* #warning test userspace */
076b876e
AM
35246+#ifdef __KERNEL__
35247+#ifndef CONFIG_AUFS_FHSM
35248+#undef AuBrAttr_FHSM
35249+#define AuBrAttr_FHSM 0
35250+#endif
c1595e42
JR
35251+#ifndef CONFIG_AUFS_XATTR
35252+#undef AuBrAttr_ICEX
35253+#define AuBrAttr_ICEX 0
35254+#undef AuBrAttr_ICEX_SEC
35255+#define AuBrAttr_ICEX_SEC 0
35256+#undef AuBrAttr_ICEX_SYS
35257+#define AuBrAttr_ICEX_SYS 0
35258+#undef AuBrAttr_ICEX_TR
35259+#define AuBrAttr_ICEX_TR 0
35260+#undef AuBrAttr_ICEX_USR
35261+#define AuBrAttr_ICEX_USR 0
35262+#undef AuBrAttr_ICEX_OTH
35263+#define AuBrAttr_ICEX_OTH 0
35264+#endif
076b876e
AM
35265+#endif
35266+
35267+/* the longest combination */
c1595e42
JR
35268+/* AUFS_BRATTR_ICEX and AUFS_BRATTR_ICEX_TR don't affect here */
35269+#define AuBrPermStrSz sizeof(AUFS_BRPERM_RW \
35270+ "+" AUFS_BRATTR_COO_REG \
35271+ "+" AUFS_BRATTR_FHSM \
35272+ "+" AUFS_BRATTR_UNPIN \
7e9cd9fe
AM
35273+ "+" AUFS_BRATTR_ICEX_SEC \
35274+ "+" AUFS_BRATTR_ICEX_SYS \
35275+ "+" AUFS_BRATTR_ICEX_USR \
35276+ "+" AUFS_BRATTR_ICEX_OTH \
076b876e
AM
35277+ "+" AUFS_BRWATTR_NLWH)
35278+
35279+typedef struct {
35280+ char a[AuBrPermStrSz];
35281+} au_br_perm_str_t;
35282+
35283+static inline int au_br_writable(int brperm)
35284+{
35285+ return brperm & AuBrPerm_RW;
35286+}
35287+
35288+static inline int au_br_whable(int brperm)
35289+{
35290+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
35291+}
35292+
35293+static inline int au_br_wh_linkable(int brperm)
35294+{
35295+ return !(brperm & AuBrWAttr_NoLinkWH);
35296+}
35297+
35298+static inline int au_br_cmoo(int brperm)
35299+{
35300+ return brperm & AuBrAttr_CMOO_Mask;
35301+}
35302+
35303+static inline int au_br_fhsm(int brperm)
35304+{
35305+ return brperm & AuBrAttr_FHSM;
35306+}
7f207e10
AM
35307+
35308+/* ---------------------------------------------------------------------- */
35309+
35310+/* ioctl */
35311+enum {
35312+ /* readdir in userspace */
35313+ AuCtl_RDU,
35314+ AuCtl_RDU_INO,
35315+
076b876e
AM
35316+ AuCtl_WBR_FD, /* pathconf wrapper */
35317+ AuCtl_IBUSY, /* busy inode */
35318+ AuCtl_MVDOWN, /* move-down */
35319+ AuCtl_BR, /* info about branches */
35320+ AuCtl_FHSM_FD /* connection for fhsm */
7f207e10
AM
35321+};
35322+
35323+/* borrowed from linux/include/linux/kernel.h */
35324+#ifndef ALIGN
35325+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
35326+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
35327+#endif
35328+
35329+/* borrowed from linux/include/linux/compiler-gcc3.h */
35330+#ifndef __aligned
35331+#define __aligned(x) __attribute__((aligned(x)))
53392da6
AM
35332+#endif
35333+
35334+#ifdef __KERNEL__
35335+#ifndef __packed
7f207e10
AM
35336+#define __packed __attribute__((packed))
35337+#endif
53392da6 35338+#endif
7f207e10
AM
35339+
35340+struct au_rdu_cookie {
9dbd164d
AM
35341+ uint64_t h_pos;
35342+ int16_t bindex;
35343+ uint8_t flags;
35344+ uint8_t pad;
35345+ uint32_t generation;
7f207e10
AM
35346+} __aligned(8);
35347+
35348+struct au_rdu_ent {
9dbd164d
AM
35349+ uint64_t ino;
35350+ int16_t bindex;
35351+ uint8_t type;
35352+ uint8_t nlen;
35353+ uint8_t wh;
7f207e10
AM
35354+ char name[0];
35355+} __aligned(8);
35356+
35357+static inline int au_rdu_len(int nlen)
35358+{
35359+ /* include the terminating NULL */
35360+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
9dbd164d 35361+ sizeof(uint64_t));
7f207e10
AM
35362+}
35363+
35364+union au_rdu_ent_ul {
35365+ struct au_rdu_ent __user *e;
9dbd164d 35366+ uint64_t ul;
7f207e10
AM
35367+};
35368+
35369+enum {
35370+ AufsCtlRduV_SZ,
35371+ AufsCtlRduV_End
35372+};
35373+
35374+struct aufs_rdu {
35375+ /* input */
35376+ union {
9dbd164d
AM
35377+ uint64_t sz; /* AuCtl_RDU */
35378+ uint64_t nent; /* AuCtl_RDU_INO */
7f207e10
AM
35379+ };
35380+ union au_rdu_ent_ul ent;
9dbd164d 35381+ uint16_t verify[AufsCtlRduV_End];
7f207e10
AM
35382+
35383+ /* input/output */
9dbd164d 35384+ uint32_t blk;
7f207e10
AM
35385+
35386+ /* output */
35387+ union au_rdu_ent_ul tail;
35388+ /* number of entries which were added in a single call */
9dbd164d
AM
35389+ uint64_t rent;
35390+ uint8_t full;
35391+ uint8_t shwh;
7f207e10
AM
35392+
35393+ struct au_rdu_cookie cookie;
35394+} __aligned(8);
35395+
1e00d052
AM
35396+/* ---------------------------------------------------------------------- */
35397+
35398+struct aufs_wbr_fd {
9dbd164d
AM
35399+ uint32_t oflags;
35400+ int16_t brid;
1e00d052
AM
35401+} __aligned(8);
35402+
35403+/* ---------------------------------------------------------------------- */
35404+
027c5e7a 35405+struct aufs_ibusy {
9dbd164d
AM
35406+ uint64_t ino, h_ino;
35407+ int16_t bindex;
027c5e7a
AM
35408+} __aligned(8);
35409+
1e00d052
AM
35410+/* ---------------------------------------------------------------------- */
35411+
392086de
AM
35412+/* error code for move-down */
35413+/* the actual message strings are implemented in aufs-util.git */
35414+enum {
35415+ EAU_MVDOWN_OPAQUE = 1,
35416+ EAU_MVDOWN_WHITEOUT,
35417+ EAU_MVDOWN_UPPER,
35418+ EAU_MVDOWN_BOTTOM,
35419+ EAU_MVDOWN_NOUPPER,
35420+ EAU_MVDOWN_NOLOWERBR,
35421+ EAU_Last
35422+};
35423+
c2b27bf2 35424+/* flags for move-down */
392086de
AM
35425+#define AUFS_MVDOWN_DMSG 1
35426+#define AUFS_MVDOWN_OWLOWER (1 << 1) /* overwrite lower */
35427+#define AUFS_MVDOWN_KUPPER (1 << 2) /* keep upper */
35428+#define AUFS_MVDOWN_ROLOWER (1 << 3) /* do even if lower is RO */
35429+#define AUFS_MVDOWN_ROLOWER_R (1 << 4) /* did on lower RO */
35430+#define AUFS_MVDOWN_ROUPPER (1 << 5) /* do even if upper is RO */
35431+#define AUFS_MVDOWN_ROUPPER_R (1 << 6) /* did on upper RO */
35432+#define AUFS_MVDOWN_BRID_UPPER (1 << 7) /* upper brid */
35433+#define AUFS_MVDOWN_BRID_LOWER (1 << 8) /* lower brid */
076b876e
AM
35434+#define AUFS_MVDOWN_FHSM_LOWER (1 << 9) /* find fhsm attr for lower */
35435+#define AUFS_MVDOWN_STFS (1 << 10) /* req. stfs */
35436+#define AUFS_MVDOWN_STFS_FAILED (1 << 11) /* output: stfs is unusable */
35437+#define AUFS_MVDOWN_BOTTOM (1 << 12) /* output: no more lowers */
c2b27bf2 35438+
076b876e 35439+/* index for move-down */
392086de
AM
35440+enum {
35441+ AUFS_MVDOWN_UPPER,
35442+ AUFS_MVDOWN_LOWER,
35443+ AUFS_MVDOWN_NARRAY
35444+};
35445+
076b876e
AM
35446+/*
35447+ * additional info of move-down
35448+ * number of free blocks and inodes.
35449+ * subset of struct kstatfs, but smaller and always 64bit.
35450+ */
35451+struct aufs_stfs {
35452+ uint64_t f_blocks;
35453+ uint64_t f_bavail;
35454+ uint64_t f_files;
35455+ uint64_t f_ffree;
35456+};
35457+
35458+struct aufs_stbr {
35459+ int16_t brid; /* optional input */
35460+ int16_t bindex; /* output */
35461+ struct aufs_stfs stfs; /* output when AUFS_MVDOWN_STFS set */
35462+} __aligned(8);
35463+
c2b27bf2 35464+struct aufs_mvdown {
076b876e
AM
35465+ uint32_t flags; /* input/output */
35466+ struct aufs_stbr stbr[AUFS_MVDOWN_NARRAY]; /* input/output */
35467+ int8_t au_errno; /* output */
35468+} __aligned(8);
35469+
35470+/* ---------------------------------------------------------------------- */
35471+
35472+union aufs_brinfo {
35473+ /* PATH_MAX may differ between kernel-space and user-space */
35474+ char _spacer[4096];
392086de 35475+ struct {
076b876e
AM
35476+ int16_t id;
35477+ int perm;
35478+ char path[0];
35479+ };
c2b27bf2
AM
35480+} __aligned(8);
35481+
35482+/* ---------------------------------------------------------------------- */
35483+
7f207e10
AM
35484+#define AuCtlType 'A'
35485+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
35486+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
1e00d052
AM
35487+#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \
35488+ struct aufs_wbr_fd)
027c5e7a 35489+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
392086de
AM
35490+#define AUFS_CTL_MVDOWN _IOWR(AuCtlType, AuCtl_MVDOWN, \
35491+ struct aufs_mvdown)
076b876e
AM
35492+#define AUFS_CTL_BRINFO _IOW(AuCtlType, AuCtl_BR, union aufs_brinfo)
35493+#define AUFS_CTL_FHSM_FD _IOW(AuCtlType, AuCtl_FHSM_FD, int)
7f207e10
AM
35494+
35495+#endif /* __AUFS_TYPE_H__ */
79b8bda9 35496aufs4.3 loopback patch
5527c038
JR
35497
35498diff --git a/drivers/block/loop.c b/drivers/block/loop.c
79b8bda9 35499index 291ec9e..1b8190d 100644
5527c038
JR
35500--- a/drivers/block/loop.c
35501+++ b/drivers/block/loop.c
c2c0f25c 35502@@ -417,7 +417,7 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
5527c038
JR
35503 }
35504
35505 struct switch_request {
35506- struct file *file;
35507+ struct file *file, *virt_file;
35508 struct completion wait;
35509 };
35510
c2c0f25c 35511@@ -437,6 +437,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
5527c038
JR
35512 mapping = file->f_mapping;
35513 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
35514 lo->lo_backing_file = file;
35515+ lo->lo_backing_virt_file = p->virt_file;
35516 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
35517 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
35518 lo->old_gfp_mask = mapping_gfp_mask(mapping);
c2c0f25c 35519@@ -448,11 +449,13 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
5527c038
JR
35520 * First it needs to flush existing IO, it does this by sending a magic
35521 * BIO down the pipe. The completion of this BIO does the actual switch.
35522 */
35523-static int loop_switch(struct loop_device *lo, struct file *file)
35524+static int loop_switch(struct loop_device *lo, struct file *file,
35525+ struct file *virt_file)
35526 {
35527 struct switch_request w;
35528
35529 w.file = file;
35530+ w.virt_file = virt_file;
35531
35532 /* freeze queue and wait for completion of scheduled requests */
35533 blk_mq_freeze_queue(lo->lo_queue);
c2c0f25c 35534@@ -471,7 +474,16 @@ static int loop_switch(struct loop_device *lo, struct file *file)
5527c038
JR
35535 */
35536 static int loop_flush(struct loop_device *lo)
35537 {
35538- return loop_switch(lo, NULL);
35539+ return loop_switch(lo, NULL, NULL);
35540+}
35541+
35542+static struct file *loop_real_file(struct file *file)
35543+{
35544+ struct file *f = NULL;
35545+
35546+ if (file->f_path.dentry->d_sb->s_op->real_loop)
35547+ f = file->f_path.dentry->d_sb->s_op->real_loop(file);
35548+ return f;
35549 }
35550
c2c0f25c
AM
35551 static void loop_reread_partitions(struct loop_device *lo,
35552@@ -508,6 +520,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
5527c038
JR
35553 unsigned int arg)
35554 {
35555 struct file *file, *old_file;
35556+ struct file *f, *virt_file = NULL, *old_virt_file;
35557 struct inode *inode;
35558 int error;
35559
c2c0f25c 35560@@ -524,9 +537,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
5527c038
JR
35561 file = fget(arg);
35562 if (!file)
35563 goto out;
35564+ f = loop_real_file(file);
35565+ if (f) {
35566+ virt_file = file;
35567+ file = f;
35568+ get_file(file);
35569+ }
35570
35571 inode = file->f_mapping->host;
35572 old_file = lo->lo_backing_file;
35573+ old_virt_file = lo->lo_backing_virt_file;
35574
35575 error = -EINVAL;
35576
c2c0f25c 35577@@ -538,17 +558,21 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
5527c038
JR
35578 goto out_putf;
35579
35580 /* and ... switch */
35581- error = loop_switch(lo, file);
35582+ error = loop_switch(lo, file, virt_file);
35583 if (error)
35584 goto out_putf;
35585
35586 fput(old_file);
35587+ if (old_virt_file)
35588+ fput(old_virt_file);
35589 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
c2c0f25c 35590 loop_reread_partitions(lo, bdev);
5527c038
JR
35591 return 0;
35592
35593 out_putf:
35594 fput(file);
35595+ if (virt_file)
35596+ fput(virt_file);
35597 out:
35598 return error;
35599 }
c2c0f25c 35600@@ -709,7 +733,7 @@ static void loop_config_discard(struct loop_device *lo)
5527c038
JR
35601 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
35602 struct block_device *bdev, unsigned int arg)
35603 {
35604- struct file *file, *f;
35605+ struct file *file, *f, *virt_file = NULL;
35606 struct inode *inode;
35607 struct address_space *mapping;
35608 unsigned lo_blocksize;
c2c0f25c 35609@@ -724,6 +748,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
35610 file = fget(arg);
35611 if (!file)
35612 goto out;
35613+ f = loop_real_file(file);
35614+ if (f) {
35615+ virt_file = file;
35616+ file = f;
35617+ get_file(file);
35618+ }
35619
35620 error = -EBUSY;
35621 if (lo->lo_state != Lo_unbound)
c2c0f25c 35622@@ -778,6 +808,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
35623 lo->lo_device = bdev;
35624 lo->lo_flags = lo_flags;
35625 lo->lo_backing_file = file;
35626+ lo->lo_backing_virt_file = virt_file;
35627 lo->transfer = NULL;
35628 lo->ioctl = NULL;
35629 lo->lo_sizelimit = 0;
c2c0f25c 35630@@ -809,6 +840,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
35631
35632 out_putf:
35633 fput(file);
35634+ if (virt_file)
35635+ fput(virt_file);
35636 out:
35637 /* This is safe: open() is still holding a reference. */
35638 module_put(THIS_MODULE);
c2c0f25c 35639@@ -855,6 +888,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
5527c038
JR
35640 static int loop_clr_fd(struct loop_device *lo)
35641 {
35642 struct file *filp = lo->lo_backing_file;
35643+ struct file *virt_filp = lo->lo_backing_virt_file;
35644 gfp_t gfp = lo->old_gfp_mask;
35645 struct block_device *bdev = lo->lo_device;
35646
c2c0f25c 35647@@ -886,6 +920,7 @@ static int loop_clr_fd(struct loop_device *lo)
5527c038
JR
35648 spin_lock_irq(&lo->lo_lock);
35649 lo->lo_state = Lo_rundown;
35650 lo->lo_backing_file = NULL;
35651+ lo->lo_backing_virt_file = NULL;
35652 spin_unlock_irq(&lo->lo_lock);
35653
35654 loop_release_xfer(lo);
c2c0f25c 35655@@ -931,6 +966,8 @@ static int loop_clr_fd(struct loop_device *lo)
5527c038
JR
35656 * bd_mutex which is usually taken before lo_ctl_mutex.
35657 */
35658 fput(filp);
35659+ if (virt_filp)
35660+ fput(virt_filp);
35661 return 0;
35662 }
35663
35664diff --git a/drivers/block/loop.h b/drivers/block/loop.h
c2c0f25c 35665index 25e8997..93b6fce 100644
5527c038
JR
35666--- a/drivers/block/loop.h
35667+++ b/drivers/block/loop.h
35668@@ -46,7 +46,7 @@ struct loop_device {
35669 int (*ioctl)(struct loop_device *, int cmd,
35670 unsigned long arg);
35671
35672- struct file * lo_backing_file;
35673+ struct file * lo_backing_file, *lo_backing_virt_file;
35674 struct block_device *lo_device;
35675 unsigned lo_blocksize;
35676 void *key_data;
35677diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
35678index 91c2ce7..d4ee5a7 100644
35679--- a/fs/aufs/f_op.c
35680+++ b/fs/aufs/f_op.c
35681@@ -389,7 +389,7 @@ static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
35682 if (IS_ERR(h_file))
35683 goto out;
35684
35685- if (au_test_loopback_kthread()) {
35686+ if (0 && au_test_loopback_kthread()) {
35687 au_warn_loopback(h_file->f_path.dentry->d_sb);
35688 if (file->f_mapping != h_file->f_mapping) {
35689 file->f_mapping = h_file->f_mapping;
35690diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c
79b8bda9 35691index f324758..4555e7b 100644
5527c038
JR
35692--- a/fs/aufs/loop.c
35693+++ b/fs/aufs/loop.c
79b8bda9
AM
35694@@ -131,3 +131,19 @@ void au_loopback_fin(void)
35695 symbol_put(loop_backing_file);
5527c038
JR
35696 kfree(au_warn_loopback_array);
35697 }
35698+
35699+/* ---------------------------------------------------------------------- */
35700+
35701+/* support the loopback block device insude aufs */
35702+
35703+struct file *aufs_real_loop(struct file *file)
35704+{
35705+ struct file *f;
35706+
35707+ BUG_ON(!au_test_aufs(file->f_path.dentry->d_sb));
35708+ fi_read_lock(file);
35709+ f = au_hf_top(file);
35710+ fi_read_unlock(file);
35711+ AuDebugOn(!f);
35712+ return f;
35713+}
35714diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h
35715index 6d9864d..3322557 100644
35716--- a/fs/aufs/loop.h
35717+++ b/fs/aufs/loop.h
35718@@ -25,7 +25,11 @@ void au_warn_loopback(struct super_block *h_sb);
35719
35720 int au_loopback_init(void);
35721 void au_loopback_fin(void);
35722+
35723+struct file *aufs_real_loop(struct file *file);
35724 #else
35725+AuStub(struct file *, loop_backing_file, return NULL)
35726+
35727 AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
35728 struct dentry *h_adding)
35729 AuStubInt0(au_test_loopback_kthread, void)
35730@@ -33,6 +37,8 @@ AuStubVoid(au_warn_loopback, struct super_block *h_sb)
35731
35732 AuStubInt0(au_loopback_init, void)
35733 AuStubVoid(au_loopback_fin, void)
35734+
35735+AuStub(struct file *, aufs_real_loop, return NULL, struct file *file)
35736 #endif /* BLK_DEV_LOOP */
35737
35738 #endif /* __KERNEL__ */
35739diff --git a/fs/aufs/super.c b/fs/aufs/super.c
b95c5147 35740index 3fe10d3..2f32d58 100644
5527c038
JR
35741--- a/fs/aufs/super.c
35742+++ b/fs/aufs/super.c
b95c5147 35743@@ -841,7 +841,10 @@ static const struct super_operations aufs_sop = {
5527c038
JR
35744 .statfs = aufs_statfs,
35745 .put_super = aufs_put_super,
35746 .sync_fs = aufs_sync_fs,
35747- .remount_fs = aufs_remount_fs
35748+ .remount_fs = aufs_remount_fs,
35749+#ifdef CONFIG_AUFS_BDEV_LOOP
35750+ .real_loop = aufs_real_loop
35751+#endif
35752 };
35753
35754 /* ---------------------------------------------------------------------- */
35755diff --git a/include/linux/fs.h b/include/linux/fs.h
79b8bda9 35756index fabd9d7a..90174cf 100644
5527c038
JR
35757--- a/include/linux/fs.h
35758+++ b/include/linux/fs.h
79b8bda9 35759@@ -1734,6 +1734,10 @@ struct super_operations {
5527c038
JR
35760 struct shrink_control *);
35761 long (*free_cached_objects)(struct super_block *,
35762 struct shrink_control *);
35763+#if defined(CONFIG_BLK_DEV_LOOP) || defined(CONFIG_BLK_DEV_LOOP_MODULE)
35764+ /* and aufs */
35765+ struct file *(*real_loop)(struct file *);
35766+#endif
35767 };
35768
35769 /*
This page took 5.87428 seconds and 4 git commands to generate.